commit d67477a27ccf7f219af8464d6b93a50b9b9784cc Author: Cosmin Vlaicu Date: Thu Dec 12 20:25:55 2024 +0200 Initial detect and track module based on CV alone. diff --git a/crop-helper.py b/crop-helper.py new file mode 100644 index 0000000..a9c1631 --- /dev/null +++ b/crop-helper.py @@ -0,0 +1,51 @@ +import cv2 as cv +import numpy as np + +points = [] + +def click_event(event, x, y, flags, param): + global points, img_copy + if event == cv.EVENT_LBUTTONDOWN: + points.append((x, y)) + cv.circle(img_copy, (x, y), 5, (0, 0, 255), -1) + cv.imshow("Image", img_copy) + + if len(points) == 4: + for i in range(4): + cv.line(img_copy, points[i], points[(i+1) % 4], (0, 255, 0), 2) + cv.imshow("Image", img_copy) + crop_box() + +def crop_box(): + global points, img + src_pts = np.array(points, dtype="float32") + + width = int(max(np.linalg.norm(src_pts[0] - src_pts[1]), np.linalg.norm(src_pts[2] - src_pts[3]))) + height = int(max(np.linalg.norm(src_pts[1] - src_pts[2]), np.linalg.norm(src_pts[3] - src_pts[0]))) + + dst_pts = np.array([ + [0, 0], + [width - 1, 0], + [width - 1, height - 1], + [0, height - 1] + ], dtype="float32") + + M = cv.getPerspectiveTransform(src_pts, dst_pts) + + cropped_img = cv.warpPerspective(img, M, (width, height)) + + cv.imshow("Cropped Image", cropped_img) + cv.imwrite("cropped_image.png", cropped_img) + + cv.waitKey(0) + cv.destroyAllWindows() + +img_path = "/Users/vlacosmi/Documents/Personal/detect_picture/detect-flying-kitties-main/video-frames/frame_0953.png" +img = cv.imread(img_path) +img_copy = img.copy() + +cv.imshow("Image", img_copy) +cv.setMouseCallback("Image", click_event) + +cv.waitKey(0) +cv.destroyAllWindows() diff --git a/cropped_image.png b/cropped_image.png new file mode 100644 index 0000000..853d2e3 Binary files /dev/null and b/cropped_image.png differ diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..04be903 --- /dev/null +++ b/detect.py @@ -0,0 +1,49 @@ +import os +import cv2 as cv +from detector import Detector +import matplotlib.pyplot as plt + +SHOW_FAILED = False + +def show_image(img, title, colorspace, results_file_name, show=False): + if not show: + return + dpi = 96 + figsize = (img.shape[1] / dpi, img.shape[0] / dpi) + fig, ax = plt.subplots(figsize=figsize, dpi=dpi) + if colorspace == "RGB": + plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16") + if colorspace == "gray": + plt.imshow(img, cmap="gray") + plt.title(title, fontsize=12) + ax.axis("off") + plt.savefig(results_file_name) + plt.close() + +def main(): + input_folder = "data/video-frames/" + success_output_folder = "data/success-video-frames/" + failures_output_folder = "data/failures-video-frames/" + + os.makedirs(input_folder, exist_ok=True) + os.makedirs(success_output_folder, exist_ok=True) + os.makedirs(failures_output_folder, exist_ok=True) + + target_image = cv.imread("cropped_image.png") + + detector = Detector(target_image, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5) + + filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")]) + + for filename in filenames: + img = cv.imread(os.path.join(input_folder, filename)) + bbox, modified_img = detector.match_feature_find_object(img) + if bbox is not None: + show_image(modified_img, "Success feature matching", "RGB", os.path.join(success_output_folder, filename), show=True) + else: + show_image(modified_img, "Failed feature matching", "RGB", os.path.join(failures_output_folder, filename), show=SHOW_FAILED) + + + +if __name__ == "__main__": + main() diff --git a/detect_and_track.py b/detect_and_track.py new file mode 100644 index 0000000..931f78c --- /dev/null +++ b/detect_and_track.py @@ -0,0 +1,72 @@ +import os +import cv2 as cv +import numpy as np +from detector import Detector +from tracker import Tracker +import matplotlib.pyplot as plt + + +def show_image(img, title, colorspace, results_file_name, show=False): + """Displays or saves the processed image.""" + if not show: + return + dpi = 96 + figsize = (img.shape[1] / dpi, img.shape[0] / dpi) + fig, ax = plt.subplots(figsize=figsize, dpi=dpi) + if colorspace == "RGB": + plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16") + if colorspace == "gray": + plt.imshow(img, cmap="gray") + plt.title(title, fontsize=12) + ax.axis("off") + plt.savefig(results_file_name) + plt.close() + + +def draw_bounding_box(img, bbox, color=(0, 255, 0)): + """Draws a bounding box (as a polygon) on the image.""" + x, y, w, h = bbox + box_points = np.array([ + [x, y], + [x + w, y], + [x + w, y + h], + [x, y + h] + ], dtype=np.int32) + cv.polylines(img, [box_points], isClosed=True, color=color, thickness=2, lineType=cv.LINE_AA) + + +def main(): + input_folder = "data/video-frames/" + output_folder = "data/detect_and_track/results/" + + os.makedirs(input_folder, exist_ok=True) + os.makedirs(output_folder, exist_ok=True) + + query_img = cv.imread("cropped_image.png") + detector = Detector(query_img, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5) + tracker = Tracker() + + filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")]) + + for filename in filenames: + img = cv.imread(os.path.join(input_folder, filename)) + + bbox, detected_img = detector.match_feature_find_object(img) + if bbox is not None: + tracker.start_tracking(img, bbox) + draw_bounding_box(img, bbox, color=(0, 255, 0)) + modified_img = detected_img + else: + if tracker.is_tracking: + bbox = tracker.update(img) + if bbox: + draw_bounding_box(img, bbox, color=(255, 0, 0)) + else: + tracker.is_tracking = False + modified_img = img + + show_image(modified_img, "Feature matching and object recognition", "RGB", os.path.join(output_folder, filename), show=(bbox is not None)) + + +if __name__ == "__main__": + main() diff --git a/detector.py b/detector.py new file mode 100644 index 0000000..9ed54af --- /dev/null +++ b/detector.py @@ -0,0 +1,81 @@ +import cv2 as cv +import numpy as np + +class Detector: + def __init__(self, target_image, min_matches=4, angle_tolerance=30, length_ratio_tolerance=2.5, nfeatures=30000): + self.orb = cv.ORB_create(nfeatures=nfeatures) + self.target_features, self.target_des = self.orb.detectAndCompute(target_image, None) + self.target_image_h, self.target_image_w = target_image.shape[:2] + self.min_matches = min_matches + self.angle_tolerance = angle_tolerance + self.length_ratio_tolerance = length_ratio_tolerance + + def valid_rectangle(self, points): + def angle_between(p1, p2, p3): + v1 = p1 - p2 + v2 = p3 - p2 + angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))) + return np.degrees(angle) + + if len(points) != 4: + return False + + angles = [ + angle_between(points[i], points[(i + 1) % 4], points[(i + 2) % 4]) + for i in range(4) + ] + if not all(abs(angle - 90) <= self.angle_tolerance for angle in angles): + print(f'Not a valid rectangle, angle criterion {[angle for angle in angles]}') + return False + + side_lengths = [ + np.linalg.norm(points[i] - points[(i + 1) % 4]) for i in range(4) + ] + max_length = max(side_lengths) + min_length = min(side_lengths) + if max_length / min_length > self.length_ratio_tolerance: + print(f'Not a valid rectangle, length criterion') + return False + + return True + + def match_feature_find_object(self, train_img): + features2, des2 = self.orb.detectAndCompute(train_img, None) + + bf = cv.BFMatcher(cv.NORM_HAMMING) + matches = bf.knnMatch(self.target_des, des2, k=2) + + good = [] + good_without_lists = [] + matches = [match for match in matches if len(match) == 2] + for m, n in matches: + if m.distance < 0.8 * n.distance: + good.append([m]) + good_without_lists.append(m) + + if len(good) >= self.min_matches: + src_pts = np.float32([self.target_features[m.queryIdx].pt for m in good_without_lists]).reshape(-1, 1, 2) + dst_pts = np.float32([features2[m.trainIdx].pt for m in good_without_lists]).reshape(-1, 1, 2) + + M, _ = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0) + + h, w = self.target_image_h, self.target_image_w + pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2) + + try: + dst = cv.perspectiveTransform(pts, M) + except cv.error as e: + print(f"Perspective transform failed: {e}") + return None, train_img + + rectangle_points = np.squeeze(dst) + if not self.valid_rectangle(rectangle_points): + return None, train_img + + train_img = cv.polylines(train_img, [np.int32(dst)], True, (0, 255, 0), 2, cv.LINE_AA) + x, y, w, h = cv.boundingRect(np.int32(dst)) + return (x, y, w, h), train_img + + else: + print(f"Not enough good matches are found - {len(good)}/{self.min_matches}") + return None, train_img diff --git a/mov_to_png.py b/mov_to_png.py new file mode 100644 index 0000000..1dea046 --- /dev/null +++ b/mov_to_png.py @@ -0,0 +1,41 @@ +import cv2 +import os + +def extract_frames_from_video(video_path, output_folder, frame_interval=30): + os.makedirs(output_folder, exist_ok=True) + + video_capture = cv2.VideoCapture(video_path) + + if not video_capture.isOpened(): + print(f"Error: Could not open video {video_path}") + return + + frame_count = 0 + saved_count = 0 + + while True: + success, frame = video_capture.read() + + if not success: + print("Finished extracting frames.") + break + + if frame_count % frame_interval == 0: + frame_file_path = os.path.join(output_folder, f"frame_{saved_count:04d}.png") + cv2.imwrite(frame_file_path, frame) + print(f"Saved {frame_file_path}") + saved_count += 1 + + frame_count += 1 + + video_capture.release() + +def main(): + video_path = "/Users/vlacosmi/Downloads/drone-detect/capture_6_dec/fhd50fps/nvcamtest_6833_s00_00001.mp4" + output_folder = "video-frames" + frame_interval = 1 + + extract_frames_from_video(video_path, output_folder, frame_interval) + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e075dae --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pillow==10.4.0 +opencv-python==4.10.0.84 +numpy==2.1.1 +matplotlib==3.9.2 diff --git a/tracker.py b/tracker.py new file mode 100644 index 0000000..eef6ab3 --- /dev/null +++ b/tracker.py @@ -0,0 +1,30 @@ +import dlib +import cv2 as cv + + +class Tracker: + def __init__(self): + self.tracker = dlib.correlation_tracker() + self.is_tracking = False + + def start_tracking(self, img, bbox): + """Initialize the tracker with the given bounding box.""" + x, y, w, h = bbox + self.tracker.start_track(img, dlib.rectangle(x, y, x + w, y + h)) + self.is_tracking = True + + def update(self, img): + """Update the tracker with the new frame.""" + if not self.is_tracking: + return None + + self.tracker.update(img) + pos = self.tracker.get_position() + x1, y1, x2, y2 = int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom()) + + # Validate the bounding box + if (x2 - x1) > 0 and (y2 - y1) > 0: + return (x1, y1, x2 - x1, y2 - y1) # Valid bounding box + else: + self.is_tracking = False # Stop tracking if the box is invalid + return None