It is written in the documentation of Mediapipe that: "x and y: Landmark coordinates normalized to [0.0, 1.0] by the image width and height respectively.", however I'm getting values out of that range.
mediapip
0.10.1
, Python3.8.10
#!/usr/bin/env python3
import numpy as np
import cv2
import mediapipe as mp
import time
class HumanPoseDetection:
def __init__(self):
# TODO: change the path
model_path = "/home/user/models/pose_landmarker_full.task"
BaseOptions = mp.tasks.BaseOptions
self.PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
self.result = mp.tasks.vision.PoseLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode
self.options = PoseLandmarkerOptions(
base_options=BaseOptions(model_asset_path=model_path),
running_mode=VisionRunningMode.LIVE_STREAM,
result_callback=self.callback
)
def callback(self, result, output_image, timestamp_ms):
if(result.pose_landmarks):
self.result = result.pose_landmarks[0]
for idx, elem in enumerate(self.result):
if(0 <= elem.x <= 1 and 0 <= elem.y <= 1):
pass
else:
print("Warning out of range values: {}".format(elem))
def detect_pose(self):
cap = cv2.VideoCapture(0)
with self.PoseLandmarker.create_from_options(self.options) as landmarker:
while cap.isOpened():
_, image = cap.read()
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
frame_timestamp_ms = int(time.time() * 1000)
landmarker.detect_async(mp_image, frame_timestamp_ms)
if __name__=="__main__":
HPD_ = HumanPoseDetection()
HPD_.detect_pose()
A workaround proposed here is to use min
, in my case I need the normalized x, y and not the pixel coordinates! also this workaround doesn't seem to be accurate!
x_px = min(math.floor(normalized_x * image_width), image_width - 1)
y_px = min(math.floor(normalized_y * image_height), image_height - 1)
Can you please tell me how can I solve this issue please? thanks in advance.