commit d67477a27ccf7f219af8464d6b93a50b9b9784cc
Author: Cosmin Vlaicu <cosmin.vlaicu@yahoo.com>
Date:   Thu Dec 12 20:25:55 2024 +0200

    Initial detect and track module based on CV alone.

diff --git a/crop-helper.py b/crop-helper.py
new file mode 100644
index 0000000..a9c1631
--- /dev/null
+++ b/crop-helper.py
@@ -0,0 +1,51 @@
+import cv2 as cv
+import numpy as np
+
+points = []
+
+def click_event(event, x, y, flags, param):
+    global points, img_copy
+    if event == cv.EVENT_LBUTTONDOWN:
+        points.append((x, y))
+        cv.circle(img_copy, (x, y), 5, (0, 0, 255), -1)
+        cv.imshow("Image", img_copy)
+
+        if len(points) == 4:
+            for i in range(4):
+                cv.line(img_copy, points[i], points[(i+1) % 4], (0, 255, 0), 2)
+            cv.imshow("Image", img_copy)
+            crop_box()
+
+def crop_box():
+    global points, img
+    src_pts = np.array(points, dtype="float32")
+    
+    width = int(max(np.linalg.norm(src_pts[0] - src_pts[1]), np.linalg.norm(src_pts[2] - src_pts[3])))
+    height = int(max(np.linalg.norm(src_pts[1] - src_pts[2]), np.linalg.norm(src_pts[3] - src_pts[0])))
+
+    dst_pts = np.array([
+        [0, 0],
+        [width - 1, 0],
+        [width - 1, height - 1],
+        [0, height - 1]
+    ], dtype="float32")
+
+    M = cv.getPerspectiveTransform(src_pts, dst_pts)
+    
+    cropped_img = cv.warpPerspective(img, M, (width, height))
+    
+    cv.imshow("Cropped Image", cropped_img)
+    cv.imwrite("cropped_image.png", cropped_img)
+
+    cv.waitKey(0)
+    cv.destroyAllWindows()
+
+img_path = "/Users/vlacosmi/Documents/Personal/detect_picture/detect-flying-kitties-main/video-frames/frame_0953.png"
+img = cv.imread(img_path)
+img_copy = img.copy()
+
+cv.imshow("Image", img_copy)
+cv.setMouseCallback("Image", click_event)
+
+cv.waitKey(0)
+cv.destroyAllWindows()
diff --git a/cropped_image.png b/cropped_image.png
new file mode 100644
index 0000000..853d2e3
Binary files /dev/null and b/cropped_image.png differ
diff --git a/detect.py b/detect.py
new file mode 100644
index 0000000..04be903
--- /dev/null
+++ b/detect.py
@@ -0,0 +1,49 @@
+import os
+import cv2 as cv
+from detector import Detector
+import matplotlib.pyplot as plt
+
+SHOW_FAILED = False
+
+def show_image(img, title, colorspace, results_file_name, show=False):
+    if not show:
+        return
+    dpi = 96
+    figsize = (img.shape[1] / dpi, img.shape[0] / dpi)
+    fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
+    if colorspace == "RGB":
+        plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16")
+    if colorspace == "gray":
+        plt.imshow(img, cmap="gray")
+    plt.title(title, fontsize=12)
+    ax.axis("off")
+    plt.savefig(results_file_name)
+    plt.close()
+
+def main():
+    input_folder = "data/video-frames/"
+    success_output_folder = "data/success-video-frames/"
+    failures_output_folder = "data/failures-video-frames/"
+
+    os.makedirs(input_folder, exist_ok=True)
+    os.makedirs(success_output_folder, exist_ok=True)
+    os.makedirs(failures_output_folder, exist_ok=True)
+
+    target_image = cv.imread("cropped_image.png")
+
+    detector = Detector(target_image, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5)
+
+    filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")])
+
+    for filename in filenames:
+        img = cv.imread(os.path.join(input_folder, filename))
+        bbox, modified_img = detector.match_feature_find_object(img)
+        if bbox is not None:
+            show_image(modified_img, "Success feature matching", "RGB", os.path.join(success_output_folder, filename), show=True)
+        else:
+            show_image(modified_img, "Failed feature matching", "RGB", os.path.join(failures_output_folder, filename), show=SHOW_FAILED)
+
+        
+
+if __name__ == "__main__":
+    main()
diff --git a/detect_and_track.py b/detect_and_track.py
new file mode 100644
index 0000000..931f78c
--- /dev/null
+++ b/detect_and_track.py
@@ -0,0 +1,72 @@
+import os
+import cv2 as cv
+import numpy as np
+from detector import Detector
+from tracker import Tracker
+import matplotlib.pyplot as plt
+
+
+def show_image(img, title, colorspace, results_file_name, show=False):
+    """Displays or saves the processed image."""
+    if not show:
+        return
+    dpi = 96
+    figsize = (img.shape[1] / dpi, img.shape[0] / dpi)
+    fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
+    if colorspace == "RGB":
+        plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16")
+    if colorspace == "gray":
+        plt.imshow(img, cmap="gray")
+    plt.title(title, fontsize=12)
+    ax.axis("off")
+    plt.savefig(results_file_name)
+    plt.close()
+
+
+def draw_bounding_box(img, bbox, color=(0, 255, 0)):
+    """Draws a bounding box (as a polygon) on the image."""
+    x, y, w, h = bbox
+    box_points = np.array([
+        [x, y],
+        [x + w, y],
+        [x + w, y + h],
+        [x, y + h]
+    ], dtype=np.int32)
+    cv.polylines(img, [box_points], isClosed=True, color=color, thickness=2, lineType=cv.LINE_AA)
+
+
+def main():
+    input_folder = "data/video-frames/"
+    output_folder = "data/detect_and_track/results/"
+
+    os.makedirs(input_folder, exist_ok=True)
+    os.makedirs(output_folder, exist_ok=True)
+
+    query_img = cv.imread("cropped_image.png")
+    detector = Detector(query_img, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5)
+    tracker = Tracker()
+
+    filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")])
+
+    for filename in filenames:
+        img = cv.imread(os.path.join(input_folder, filename))
+
+        bbox, detected_img = detector.match_feature_find_object(img)
+        if bbox is not None:
+            tracker.start_tracking(img, bbox)
+            draw_bounding_box(img, bbox, color=(0, 255, 0))
+            modified_img = detected_img
+        else:
+            if tracker.is_tracking:
+                bbox = tracker.update(img)
+                if bbox:
+                    draw_bounding_box(img, bbox, color=(255, 0, 0))
+                else:
+                    tracker.is_tracking = False
+            modified_img = img
+
+        show_image(modified_img, "Feature matching and object recognition", "RGB", os.path.join(output_folder, filename), show=(bbox is not None))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/detector.py b/detector.py
new file mode 100644
index 0000000..9ed54af
--- /dev/null
+++ b/detector.py
@@ -0,0 +1,81 @@
+import cv2 as cv
+import numpy as np
+
+class Detector:
+    def __init__(self, target_image, min_matches=4, angle_tolerance=30, length_ratio_tolerance=2.5, nfeatures=30000):
+        self.orb = cv.ORB_create(nfeatures=nfeatures)
+        self.target_features, self.target_des = self.orb.detectAndCompute(target_image, None)
+        self.target_image_h, self.target_image_w = target_image.shape[:2]
+        self.min_matches = min_matches
+        self.angle_tolerance = angle_tolerance
+        self.length_ratio_tolerance = length_ratio_tolerance
+
+    def valid_rectangle(self, points):
+        def angle_between(p1, p2, p3):
+            v1 = p1 - p2
+            v2 = p3 - p2
+            angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
+            return np.degrees(angle)
+
+        if len(points) != 4:
+            return False
+
+        angles = [
+            angle_between(points[i], points[(i + 1) % 4], points[(i + 2) % 4])
+            for i in range(4)
+        ]
+        if not all(abs(angle - 90) <= self.angle_tolerance for angle in angles):
+            print(f'Not a valid rectangle, angle criterion {[angle for angle in angles]}')
+            return False
+
+        side_lengths = [
+            np.linalg.norm(points[i] - points[(i + 1) % 4]) for i in range(4)
+        ]
+        max_length = max(side_lengths)
+        min_length = min(side_lengths)
+        if max_length / min_length > self.length_ratio_tolerance:
+            print(f'Not a valid rectangle, length criterion')
+            return False
+        
+        return True
+
+    def match_feature_find_object(self, train_img):
+        features2, des2 = self.orb.detectAndCompute(train_img, None)
+
+        bf = cv.BFMatcher(cv.NORM_HAMMING)
+        matches = bf.knnMatch(self.target_des, des2, k=2)
+
+        good = []
+        good_without_lists = []
+        matches = [match for match in matches if len(match) == 2]
+        for m, n in matches:
+            if m.distance < 0.8 * n.distance:
+                good.append([m])
+                good_without_lists.append(m)
+
+        if len(good) >= self.min_matches:
+            src_pts = np.float32([self.target_features[m.queryIdx].pt for m in good_without_lists]).reshape(-1, 1, 2)
+            dst_pts = np.float32([features2[m.trainIdx].pt for m in good_without_lists]).reshape(-1, 1, 2)
+
+            M, _ = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
+
+            h, w = self.target_image_h, self.target_image_w
+            pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
+
+            try:
+                dst = cv.perspectiveTransform(pts, M)
+            except cv.error as e:
+                print(f"Perspective transform failed: {e}")
+                return None, train_img
+
+            rectangle_points = np.squeeze(dst)
+            if not self.valid_rectangle(rectangle_points):
+                return None, train_img
+
+            train_img = cv.polylines(train_img, [np.int32(dst)], True, (0, 255, 0), 2, cv.LINE_AA)
+            x, y, w, h = cv.boundingRect(np.int32(dst))
+            return (x, y, w, h), train_img
+
+        else:
+            print(f"Not enough good matches are found - {len(good)}/{self.min_matches}")
+            return None, train_img
diff --git a/mov_to_png.py b/mov_to_png.py
new file mode 100644
index 0000000..1dea046
--- /dev/null
+++ b/mov_to_png.py
@@ -0,0 +1,41 @@
+import cv2
+import os
+
+def extract_frames_from_video(video_path, output_folder, frame_interval=30):
+    os.makedirs(output_folder, exist_ok=True)
+
+    video_capture = cv2.VideoCapture(video_path)
+
+    if not video_capture.isOpened():
+        print(f"Error: Could not open video {video_path}")
+        return
+
+    frame_count = 0
+    saved_count = 0
+
+    while True:
+        success, frame = video_capture.read()
+
+        if not success:
+            print("Finished extracting frames.")
+            break
+
+        if frame_count % frame_interval == 0:
+            frame_file_path = os.path.join(output_folder, f"frame_{saved_count:04d}.png")
+            cv2.imwrite(frame_file_path, frame)
+            print(f"Saved {frame_file_path}")
+            saved_count += 1
+
+        frame_count += 1
+
+    video_capture.release()
+
+def main():
+    video_path = "/Users/vlacosmi/Downloads/drone-detect/capture_6_dec/fhd50fps/nvcamtest_6833_s00_00001.mp4"
+    output_folder = "video-frames"
+    frame_interval = 1
+
+    extract_frames_from_video(video_path, output_folder, frame_interval)
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e075dae
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+pillow==10.4.0
+opencv-python==4.10.0.84
+numpy==2.1.1
+matplotlib==3.9.2
diff --git a/tracker.py b/tracker.py
new file mode 100644
index 0000000..eef6ab3
--- /dev/null
+++ b/tracker.py
@@ -0,0 +1,30 @@
+import dlib
+import cv2 as cv
+
+
+class Tracker:
+    def __init__(self):
+        self.tracker = dlib.correlation_tracker()
+        self.is_tracking = False
+
+    def start_tracking(self, img, bbox):
+        """Initialize the tracker with the given bounding box."""
+        x, y, w, h = bbox
+        self.tracker.start_track(img, dlib.rectangle(x, y, x + w, y + h))
+        self.is_tracking = True
+
+    def update(self, img):
+        """Update the tracker with the new frame."""
+        if not self.is_tracking:
+            return None
+
+        self.tracker.update(img)
+        pos = self.tracker.get_position()
+        x1, y1, x2, y2 = int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom())
+
+        # Validate the bounding box
+        if (x2 - x1) > 0 and (y2 - y1) > 0:
+            return (x1, y1, x2 - x1, y2 - y1)  # Valid bounding box
+        else:
+            self.is_tracking = False  # Stop tracking if the box is invalid
+            return None