I found a code here that can return the features corresponding to each bounding box detected after applying a Faster RCNN model using Detectron2 framework.
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog
import cv2
import time
from detectron2.data.detection_utils import read_image
import torch
import os
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling import build_model
#setup config
cfg = get_cfg()
cfg.merge_from_file("C:/Users/preet/detectron_repo/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
#build model
model = build_model(cfg)
model.eval() #make sure its in eval mode
image = cv2.imread('sample.jpg')
height, width = image.shape[:2]
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = [{"image": image, "height": height, "width": width}]
with torch.no_grad():
images = model.preprocess_image(inputs) # don't forget to preprocess
features = model.backbone(images.tensor) # set of cnn features
proposals, _ = model.proposal_generator(images, features, None) # RPN
features_ = [features[f] for f in model.roi_heads.box_in_features]
box_features = model.roi_heads.box_pooler(features_, [x.proposal_boxes for x in proposals])
box_features = model.roi_heads.box_head(box_features) # features of all 1k candidates
predictions = model.roi_heads.box_predictor(box_features)
pred_instances, pred_inds = model.roi_heads.box_predictor.inference(predictions, proposals)
pred_instances = model.roi_heads.forward_with_given_boxes(features, pred_instances)
# output boxes, masks, scores, etc
pred_instances = model._postprocess(pred_instances, inputs, images.image_sizes) # scale box to orig size
# features of the proposed boxes
feats = box_features[pred_inds]
print("features:", feats)
When I print feats
, I get a tensor of size (0,1024). But when I use DefaultPredictor
of Detectron2, the model detects 11 objects in the same image. So the feats
tensor should have a size of (11,1024). What is the problem here?
Here I have returned the features of the bounding boxes corresponding to a single image only, but I have to do the same for a lot of images. How to do that?