In this lesson we show how to create python classes to parse the data coming from Mediapipe for hand Landmarks, Pose Landmarks and the bounding boxes for found faces. Creating these classes allows the difficult parsing to be done in the class, and then have a simple way to parse and use all the data.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | import cv2 print(cv2.__version__) class mpFace: import mediapipe as mp def __init__(self): def Marks(self,frame): frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.myFace.process(frameRGB) faceBoundBoxs=[] if results.detections != None: for face in results.detections: bBox=face.location_data.relative_bounding_box topLeft=(int(bBox.xmin*width),int(bBox.ymin*height)) bottomRight=(int((bBox.xmin+bBox.width)*width),int((bBox.ymin+bBox.height)*height)) faceBoundBoxs.append((topLeft,bottomRight)) return faceBoundBoxs class mpPose: import mediapipe as mp def __init__(self,still=False,upperBody=False, smoothData=True, tol1=.5, tol2=.5):,upperBody,smoothData,tol1,tol2) def Marks(self,frame): frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.myPose.process(frameRGB) poseLandmarks=[] if results.pose_landmarks: for lm in results.pose_landmarks.landmark: poseLandmarks.append((int(lm.x*width),int(lm.y*height))) return poseLandmarks class mpHands: import mediapipe as mp def __init__(self,maxHands=2,tol1=.5,tol2=.5):,maxHands,tol1,tol2) def Marks(self,frame): myHands=[] handsType=[] frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.hands.process(frameRGB) if results.multi_hand_landmarks != None: #print(results.multi_handedness) for hand in results.multi_handedness: #print(hand) #print(hand.classification) #print(hand.classification[0]) handType=hand.classification[0].label handsType.append(handType) for handLandMarks in results.multi_hand_landmarks: myHand=[] for landMark in handLandMarks.landmark: myHand.append((int(landMark.x*width),int(landMark.y*height))) myHands.append(myHand) return myHands,handsType width=1280 height=720 cam=cv2.VideoCapture(3,cv2.CAP_DSHOW) cam.set(cv2.CAP_PROP_FRAME_WIDTH, width) cam.set(cv2.CAP_PROP_FRAME_HEIGHT,height) cam.set(cv2.CAP_PROP_FPS, 30) cam.set(cv2.CAP_PROP_FOURCC,cv2.VideoWriter_fourcc(*'MJPG')) findHands=mpHands(2) findFace=mpFace() findPose=mpPose() font=cv2.FONT_HERSHEY_SIMPLEX fontColor=(0,0,255) while True: ignore, frame = frame=cv2.resize(frame,(width,height)) handsLM,handsType=findHands.Marks(frame) faceLoc=findFace.Marks(frame) poseLM=findPose.Marks(frame) if poseLM != []: for ind in [13,14,15,16]:,poseLM[ind],20,(0,255,0),-1) for face in faceLoc: cv2.rectangle(frame,face[0],face[1],(255,0,0),3) for hand,handType in zip(handsLM,handsType): if handType=='Right': lbl='Right' if handType=='Left': lbl='Left' cv2.putText(frame,lbl,hand[8],font,2,fontColor,2) cv2.imshow('my WEBcam', frame) cv2.moveWindow('my WEBcam',0,0) if cv2.waitKey(1) & 0xff ==ord('q'): break cam.release() |