Detecting the object from larger image using the grid block of image to train the model and predict in YOLOV8n

Question

The code demonstrates the yolov8 model weights uploaded to predict the object in images. I have a problem using spherical image as input to predict the object, can anyone help in using sliding window approach for this code?

import cv2
import numpy as np
from ultralytics import YOLO
import matplotlib.pyplot as plt
import pyvips

# Load your custom YOLOv5 model
# Replace 'path/to/your/model.pt' with the correct path to your YOLOv5 model
# (Note: This may depend on how you've implemented and saved your model)
custom_yolo_model = YOLO('D:/yolo_objectdetection/runs/detect/train26/weights/best.pt')

# Define window size (adjust this based on the size of the objects you want to detect)
window_size = (1000, 1000)

# Define window stride (adjust this based on the amount of overlap between windows)
stride = (500, 500)

# Load the large input image (1333x800 dimension image)
image = cv2.imread('D:/yolo_objectdetection/newdata/images/train/Track_C-Sphere-66.jpg')

# Get image dimensions
height, width, _ = image.shape

# Initialize empty list to store detected objects
detected_objects = []
result_list = []

# Loop through the image with the sliding window
for y in range(0, height - window_size[1], stride[1]):
    for x in range(0, width - window_size[0], stride[0]):
        # Extract the current window from the image
        window = image[y:y + window_size[1], x:x + window_size[0]]
        
        # Perform object detection on the window using your custom YOLOv5 model
        results = custom_yolo_model.predict(window, show=False, save=True, save_txt=True, conf=0.7, iou=0.9)
        
        detected_objects.append(results)
        result = results[0]
        for box in result.boxes:
          class_id = result.names[box.cls[0].item()]
          cords = box.xyxy[0].tolist()
          cords = [round(x) for x in cords]
          conf = round(box.conf[0].item(), 2)
          print("Object type:", class_id)
          print("Coordinates:", cords)
          print("Probability:", conf)
          print("---")
          if len(cords) > 0:
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.5
            font_thickness = 1
            cv2.putText(window, class_id, (cords[0], cords[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 1, cv2.LINE_AA)
            cv2.rectangle(window, (cords[0], cords[1]), (cords[2], cords[3]), color=(255,255,0), thickness=2)
            plt.imshow(image)

Detecting the object from larger image using the grid block of image to train the model and predict in YOLOV8n

0 Answers0