In this video lesson we show you how you can improve the accuracy of your gesture recognition program developed in the last lesson. We do this by normalizing the hand landmarks distance matrix to a standard size. By doing this, you get accurate results independent of the distance your hand is from the camera. For your convenience, I include the code below which we develop in this lesson. Enjoy!
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | import time import cv2 print(cv2.__version__) import numpy as np class mpHands: import mediapipe as mp def __init__(self,maxHands=2,tol1=.5,tol2=.5): self.hands=self.mp.solutions.hands.Hands(False,maxHands,tol1,tol2) def Marks(self,frame): myHands=[] frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.hands.process(frameRGB) if results.multi_hand_landmarks != None: for handLandMarks in results.multi_hand_landmarks: myHand=[] for landMark in handLandMarks.landmark: myHand.append((int(landMark.x*width),int(landMark.y*height))) myHands.append(myHand) return myHands def findDistances(handData): distMatrix=np.zeros([len(handData),len(handData)],dtype='float') palmSize=((handData[0][0]-handData[9][0])**2+(handData[0][1]-handData[9][1])**2)**(1./2.) for row in range(0,len(handData)): for column in range(0,len(handData)): distMatrix[row][column]=(((handData[row][0]-handData[column][0])**2+(handData[row][1]-handData[column][1])**2)**(1./2.))/palmSize return distMatrix def findError(gestureMatrix,unknownMatrix,keyPoints): error=0 for row in keyPoints: for column in keyPoints: error=error+abs(gestureMatrix[row][column]-unknownMatrix[row][column]) print(error) return error def findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol): errorArray=[] for i in range(0,len(gestNames),1): error=findError(knownGestures[i],unknownGesture,keyPoints) errorArray.append(error) errorMin=errorArray[0] minIndex=0 for i in range(0,len(errorArray),1): if errorArray[i]<errorMin: errorMin=errorArray[i] minIndex=i if errorMin<tol: gesture=gestNames[minIndex] if errorMin>=tol: gesture='Unknown' return gesture width=1280 height=720 cam=cv2.VideoCapture(4,cv2.CAP_DSHOW) cam.set(cv2.CAP_PROP_FRAME_WIDTH, width) cam.set(cv2.CAP_PROP_FRAME_HEIGHT,height) cam.set(cv2.CAP_PROP_FPS, 30) cam.set(cv2.CAP_PROP_FOURCC,cv2.VideoWriter_fourcc(*'MJPG')) findHands=mpHands(1) time.sleep(5) keyPoints=[0,4,5,9,13,17,8,12,16,20] train=True tol=10 trainCnt=0 knownGestures=[] numGest=int(input('How Many Gestures Do You Want? ')) gestNames=[] for i in range(0,numGest,1): prompt='Name of Gesture #'+str(i+1)+' ' name=input(prompt) gestNames.append(name) print(gestNames) while True: ignore, frame = cam.read() frame=cv2.resize(frame,(width,height)) handData=findHands.Marks(frame) if train==True: if handData!=[]: print('Please Show Gesture ',gestNames[trainCnt],': Press t when Ready') if cv2.waitKey(1) & 0xff==ord('t'): knownGesture=findDistances(handData[0]) knownGestures.append(knownGesture) trainCnt=trainCnt+1 if trainCnt==numGest: train=False if train == False: if handData!=[]: unknownGesture=findDistances(handData[0]) myGesture=findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol) #error=findError(knownGesture,unknownGesture,keyPoints) cv2.putText(frame,myGesture,(100,175),cv2.FONT_HERSHEY_SIMPLEX,3,(255,0,0),8) for hand in handData: for ind in keyPoints: cv2.circle(frame,hand[ind],25,(255,0,255),3) cv2.imshow('my WEBcam', frame) cv2.moveWindow('my WEBcam',0,0) if cv2.waitKey(1) & 0xff ==ord('q'): break cam.release() |
s lesson. Enjoy!