0

I found a code here that can return the features corresponding to each bounding box detected after applying a Faster RCNN model using Detectron2 framework.

from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog
import cv2
import time
from detectron2.data.detection_utils import read_image
import torch
import os
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling import build_model

#setup config
cfg = get_cfg()
cfg.merge_from_file("C:/Users/preet/detectron_repo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set the testing threshold for this model
#build model
model = build_model(cfg)

model.eval() #make sure its in eval mode

image = cv2.imread('sample.jpg')
height, width = image.shape[:2]
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = [{"image": image, "height": height, "width": width}]

with torch.no_grad():
    images = model.preprocess_image(inputs)  # don't forget to preprocess
    features = model.backbone(images.tensor)  # set of cnn features
    proposals, _ = model.proposal_generator(images, features, None)  # RPN

    features_ = [features[f] for f in model.roi_heads.box_in_features]
    box_features = model.roi_heads.box_pooler(features_, [x.proposal_boxes for x in proposals])
    box_features = model.roi_heads.box_head(box_features)  # features of all 1k candidates
    predictions = model.roi_heads.box_predictor(box_features)
    pred_instances, pred_inds = model.roi_heads.box_predictor.inference(predictions, proposals)
    pred_instances = model.roi_heads.forward_with_given_boxes(features, pred_instances)

    # output boxes, masks, scores, etc
    pred_instances = model._postprocess(pred_instances, inputs, images.image_sizes)  # scale box to orig size
    # features of the proposed boxes
    feats = box_features[pred_inds]
    print("features:", feats)

When I print feats, I get a tensor of size (0,1024). But when I use DefaultPredictor of Detectron2, the model detects 11 objects in the same image. So the feats tensor should have a size of (11,1024). What is the problem here?

Here I have returned the features of the bounding boxes corresponding to a single image only, but I have to do the same for a lot of images. How to do that?

Preetom Saha Arko
  • 2,588
  • 4
  • 21
  • 37

0 Answers0