Initial detect and track module based on CV alone.

This commit is contained in:
Cosmin Vlaicu 2024-12-12 20:25:55 +02:00
commit d67477a27c
8 changed files with 328 additions and 0 deletions

51
crop-helper.py Normal file
View File

@ -0,0 +1,51 @@
import cv2 as cv
import numpy as np
points = []
def click_event(event, x, y, flags, param):
global points, img_copy
if event == cv.EVENT_LBUTTONDOWN:
points.append((x, y))
cv.circle(img_copy, (x, y), 5, (0, 0, 255), -1)
cv.imshow("Image", img_copy)
if len(points) == 4:
for i in range(4):
cv.line(img_copy, points[i], points[(i+1) % 4], (0, 255, 0), 2)
cv.imshow("Image", img_copy)
crop_box()
def crop_box():
global points, img
src_pts = np.array(points, dtype="float32")
width = int(max(np.linalg.norm(src_pts[0] - src_pts[1]), np.linalg.norm(src_pts[2] - src_pts[3])))
height = int(max(np.linalg.norm(src_pts[1] - src_pts[2]), np.linalg.norm(src_pts[3] - src_pts[0])))
dst_pts = np.array([
[0, 0],
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]
], dtype="float32")
M = cv.getPerspectiveTransform(src_pts, dst_pts)
cropped_img = cv.warpPerspective(img, M, (width, height))
cv.imshow("Cropped Image", cropped_img)
cv.imwrite("cropped_image.png", cropped_img)
cv.waitKey(0)
cv.destroyAllWindows()
img_path = "/Users/vlacosmi/Documents/Personal/detect_picture/detect-flying-kitties-main/video-frames/frame_0953.png"
img = cv.imread(img_path)
img_copy = img.copy()
cv.imshow("Image", img_copy)
cv.setMouseCallback("Image", click_event)
cv.waitKey(0)
cv.destroyAllWindows()

BIN
cropped_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

49
detect.py Normal file
View File

@ -0,0 +1,49 @@
import os
import cv2 as cv
from detector import Detector
import matplotlib.pyplot as plt
SHOW_FAILED = False
def show_image(img, title, colorspace, results_file_name, show=False):
if not show:
return
dpi = 96
figsize = (img.shape[1] / dpi, img.shape[0] / dpi)
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
if colorspace == "RGB":
plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16")
if colorspace == "gray":
plt.imshow(img, cmap="gray")
plt.title(title, fontsize=12)
ax.axis("off")
plt.savefig(results_file_name)
plt.close()
def main():
input_folder = "data/video-frames/"
success_output_folder = "data/success-video-frames/"
failures_output_folder = "data/failures-video-frames/"
os.makedirs(input_folder, exist_ok=True)
os.makedirs(success_output_folder, exist_ok=True)
os.makedirs(failures_output_folder, exist_ok=True)
target_image = cv.imread("cropped_image.png")
detector = Detector(target_image, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5)
filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")])
for filename in filenames:
img = cv.imread(os.path.join(input_folder, filename))
bbox, modified_img = detector.match_feature_find_object(img)
if bbox is not None:
show_image(modified_img, "Success feature matching", "RGB", os.path.join(success_output_folder, filename), show=True)
else:
show_image(modified_img, "Failed feature matching", "RGB", os.path.join(failures_output_folder, filename), show=SHOW_FAILED)
if __name__ == "__main__":
main()

72
detect_and_track.py Normal file
View File

@ -0,0 +1,72 @@
import os
import cv2 as cv
import numpy as np
from detector import Detector
from tracker import Tracker
import matplotlib.pyplot as plt
def show_image(img, title, colorspace, results_file_name, show=False):
"""Displays or saves the processed image."""
if not show:
return
dpi = 96
figsize = (img.shape[1] / dpi, img.shape[0] / dpi)
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
if colorspace == "RGB":
plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB), interpolation="spline16")
if colorspace == "gray":
plt.imshow(img, cmap="gray")
plt.title(title, fontsize=12)
ax.axis("off")
plt.savefig(results_file_name)
plt.close()
def draw_bounding_box(img, bbox, color=(0, 255, 0)):
"""Draws a bounding box (as a polygon) on the image."""
x, y, w, h = bbox
box_points = np.array([
[x, y],
[x + w, y],
[x + w, y + h],
[x, y + h]
], dtype=np.int32)
cv.polylines(img, [box_points], isClosed=True, color=color, thickness=2, lineType=cv.LINE_AA)
def main():
input_folder = "data/video-frames/"
output_folder = "data/detect_and_track/results/"
os.makedirs(input_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)
query_img = cv.imread("cropped_image.png")
detector = Detector(query_img, min_matches=4, angle_tolerance=20, length_ratio_tolerance=2.5)
tracker = Tracker()
filenames = sorted([f for f in os.listdir(input_folder) if f.endswith("png")])
for filename in filenames:
img = cv.imread(os.path.join(input_folder, filename))
bbox, detected_img = detector.match_feature_find_object(img)
if bbox is not None:
tracker.start_tracking(img, bbox)
draw_bounding_box(img, bbox, color=(0, 255, 0))
modified_img = detected_img
else:
if tracker.is_tracking:
bbox = tracker.update(img)
if bbox:
draw_bounding_box(img, bbox, color=(255, 0, 0))
else:
tracker.is_tracking = False
modified_img = img
show_image(modified_img, "Feature matching and object recognition", "RGB", os.path.join(output_folder, filename), show=(bbox is not None))
if __name__ == "__main__":
main()

81
detector.py Normal file
View File

@ -0,0 +1,81 @@
import cv2 as cv
import numpy as np
class Detector:
def __init__(self, target_image, min_matches=4, angle_tolerance=30, length_ratio_tolerance=2.5, nfeatures=30000):
self.orb = cv.ORB_create(nfeatures=nfeatures)
self.target_features, self.target_des = self.orb.detectAndCompute(target_image, None)
self.target_image_h, self.target_image_w = target_image.shape[:2]
self.min_matches = min_matches
self.angle_tolerance = angle_tolerance
self.length_ratio_tolerance = length_ratio_tolerance
def valid_rectangle(self, points):
def angle_between(p1, p2, p3):
v1 = p1 - p2
v2 = p3 - p2
angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
return np.degrees(angle)
if len(points) != 4:
return False
angles = [
angle_between(points[i], points[(i + 1) % 4], points[(i + 2) % 4])
for i in range(4)
]
if not all(abs(angle - 90) <= self.angle_tolerance for angle in angles):
print(f'Not a valid rectangle, angle criterion {[angle for angle in angles]}')
return False
side_lengths = [
np.linalg.norm(points[i] - points[(i + 1) % 4]) for i in range(4)
]
max_length = max(side_lengths)
min_length = min(side_lengths)
if max_length / min_length > self.length_ratio_tolerance:
print(f'Not a valid rectangle, length criterion')
return False
return True
def match_feature_find_object(self, train_img):
features2, des2 = self.orb.detectAndCompute(train_img, None)
bf = cv.BFMatcher(cv.NORM_HAMMING)
matches = bf.knnMatch(self.target_des, des2, k=2)
good = []
good_without_lists = []
matches = [match for match in matches if len(match) == 2]
for m, n in matches:
if m.distance < 0.8 * n.distance:
good.append([m])
good_without_lists.append(m)
if len(good) >= self.min_matches:
src_pts = np.float32([self.target_features[m.queryIdx].pt for m in good_without_lists]).reshape(-1, 1, 2)
dst_pts = np.float32([features2[m.trainIdx].pt for m in good_without_lists]).reshape(-1, 1, 2)
M, _ = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
h, w = self.target_image_h, self.target_image_w
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
try:
dst = cv.perspectiveTransform(pts, M)
except cv.error as e:
print(f"Perspective transform failed: {e}")
return None, train_img
rectangle_points = np.squeeze(dst)
if not self.valid_rectangle(rectangle_points):
return None, train_img
train_img = cv.polylines(train_img, [np.int32(dst)], True, (0, 255, 0), 2, cv.LINE_AA)
x, y, w, h = cv.boundingRect(np.int32(dst))
return (x, y, w, h), train_img
else:
print(f"Not enough good matches are found - {len(good)}/{self.min_matches}")
return None, train_img

41
mov_to_png.py Normal file
View File

@ -0,0 +1,41 @@
import cv2
import os
def extract_frames_from_video(video_path, output_folder, frame_interval=30):
os.makedirs(output_folder, exist_ok=True)
video_capture = cv2.VideoCapture(video_path)
if not video_capture.isOpened():
print(f"Error: Could not open video {video_path}")
return
frame_count = 0
saved_count = 0
while True:
success, frame = video_capture.read()
if not success:
print("Finished extracting frames.")
break
if frame_count % frame_interval == 0:
frame_file_path = os.path.join(output_folder, f"frame_{saved_count:04d}.png")
cv2.imwrite(frame_file_path, frame)
print(f"Saved {frame_file_path}")
saved_count += 1
frame_count += 1
video_capture.release()
def main():
video_path = "/Users/vlacosmi/Downloads/drone-detect/capture_6_dec/fhd50fps/nvcamtest_6833_s00_00001.mp4"
output_folder = "video-frames"
frame_interval = 1
extract_frames_from_video(video_path, output_folder, frame_interval)
if __name__ == "__main__":
main()

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
pillow==10.4.0
opencv-python==4.10.0.84
numpy==2.1.1
matplotlib==3.9.2

30
tracker.py Normal file
View File

@ -0,0 +1,30 @@
import dlib
import cv2 as cv
class Tracker:
def __init__(self):
self.tracker = dlib.correlation_tracker()
self.is_tracking = False
def start_tracking(self, img, bbox):
"""Initialize the tracker with the given bounding box."""
x, y, w, h = bbox
self.tracker.start_track(img, dlib.rectangle(x, y, x + w, y + h))
self.is_tracking = True
def update(self, img):
"""Update the tracker with the new frame."""
if not self.is_tracking:
return None
self.tracker.update(img)
pos = self.tracker.get_position()
x1, y1, x2, y2 = int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom())
# Validate the bounding box
if (x2 - x1) > 0 and (y2 - y1) > 0:
return (x1, y1, x2 - x1, y2 - y1) # Valid bounding box
else:
self.is_tracking = False # Stop tracking if the box is invalid
return None