0

It is written in the documentation of Mediapipe that: "x and y: Landmark coordinates normalized to [0.0, 1.0] by the image width and height respectively.", however I'm getting values out of that range.

mediapip 0.10.1, Python 3.8.10

#!/usr/bin/env python3

import numpy as np
import cv2
import mediapipe as mp
import time

class HumanPoseDetection:
    def __init__(self):
        # TODO: change the path
        model_path = "/home/user/models/pose_landmarker_full.task"
        BaseOptions = mp.tasks.BaseOptions
        self.PoseLandmarker = mp.tasks.vision.PoseLandmarker
        PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
        self.result = mp.tasks.vision.PoseLandmarkerResult
        VisionRunningMode = mp.tasks.vision.RunningMode       

        self.options = PoseLandmarkerOptions(
            base_options=BaseOptions(model_asset_path=model_path),
            running_mode=VisionRunningMode.LIVE_STREAM,
            result_callback=self.callback
            )
        
    def callback(self, result, output_image, timestamp_ms):
        if(result.pose_landmarks):
            self.result = result.pose_landmarks[0]
            for idx, elem in enumerate(self.result):
                if(0 <= elem.x <= 1 and 0 <= elem.y <= 1):
                    pass
                else:
                    print("Warning out of range values: {}".format(elem))

    def detect_pose(self):
        cap = cv2.VideoCapture(0)
        with self.PoseLandmarker.create_from_options(self.options) as landmarker:
            while cap.isOpened():
                _, image = cap.read()
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (224, 224)) 
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
                frame_timestamp_ms = int(time.time() * 1000)
                landmarker.detect_async(mp_image, frame_timestamp_ms)

if __name__=="__main__":
    HPD_ = HumanPoseDetection()
    HPD_.detect_pose()

A workaround proposed here is to use min, in my case I need the normalized x, y and not the pixel coordinates! also this workaround doesn't seem to be accurate!

x_px = min(math.floor(normalized_x * image_width), image_width - 1)
y_px = min(math.floor(normalized_y * image_height), image_height - 1)

Can you please tell me how can I solve this issue please? thanks in advance.

Related Issue

Bilal
  • 3,191
  • 4
  • 21
  • 49

0 Answers0