In this lesson we demonstrate how to use mediaPipe and Python to create an AI system that can accurately recognize hand gestures. This is follow on work to what we developed in Lesson 25. The code we developed is presented below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | import time import cv2 print(cv2.__version__) import numpy as np class mpHands: import mediapipe as mp def __init__(self,maxHands=2,tol1=.5,tol2=.5): self.hands=self.mp.solutions.hands.Hands(False,maxHands,tol1,tol2) def Marks(self,frame): myHands=[] frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.hands.process(frameRGB) if results.multi_hand_landmarks != None: for handLandMarks in results.multi_hand_landmarks: myHand=[] for landMark in handLandMarks.landmark: myHand.append((int(landMark.x*width),int(landMark.y*height))) myHands.append(myHand) return myHands def findDistances(handData): distMatrix=np.zeros([len(handData),len(handData)],dtype='float') for row in range(0,len(handData)): for column in range(0,len(handData)): distMatrix[row][column]=((handData[row][0]-handData[column][0])**2+(handData[row][1]-handData[column][1])**2)**(1./2.) return distMatrix def findError(gestureMatrix,unknownMatrix,keyPoints): error=0 for row in keyPoints: for column in keyPoints: error=error+abs(gestureMatrix[row][column]-unknownMatrix[row][column]) return error def findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol): errorArray=[] for i in range(0,len(gestNames),1): error=findError(knownGestures[i],unknownGesture,keyPoints) errorArray.append(error) errorMin=errorArray[0] minIndex=0 for i in range(0,len(errorArray),1): if errorArray[i]<errorMin: errorMin=errorArray[i] minIndex=i if errorMin<tol: gesture=gestNames[minIndex] if errorMin>=tol: gesture='Unknown' return gesture width=1280 height=720 cam=cv2.VideoCapture(0,cv2.CAP_DSHOW) cam.set(cv2.CAP_PROP_FRAME_WIDTH, width) cam.set(cv2.CAP_PROP_FRAME_HEIGHT,height) cam.set(cv2.CAP_PROP_FPS, 30) cam.set(cv2.CAP_PROP_FOURCC,cv2.VideoWriter_fourcc(*'MJPG')) findHands=mpHands(1) time.sleep(5) keyPoints=[0,4,5,9,13,17,8,12,16,20] train=True tol=1500 trainCnt=0 knownGestures=[] numGest=int(input('How Many Gestures Do You Want? ')) gestNames=[] for i in range(0,numGest,1): prompt='Name of Gesture #'+str(i+1)+' ' name=input(prompt) gestNames.append(name) print(gestNames) while True: ignore, frame = cam.read() frame=cv2.resize(frame,(width,height)) handData=findHands.Marks(frame) if train==True: if handData!=[]: print('Please Show Gesture ',gestNames[trainCnt],': Press t when Ready') if cv2.waitKey(1) & 0xff==ord('t'): knownGesture=findDistances(handData[0]) knownGestures.append(knownGesture) trainCnt=trainCnt+1 if trainCnt==numGest: train=False if train == False: if handData!=[]: unknownGesture=findDistances(handData[0]) myGesture=findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol) #error=findError(knownGesture,unknownGesture,keyPoints) cv2.putText(frame,myGesture,(100,175),cv2.FONT_HERSHEY_SIMPLEX,3,(255,0,0),8) for hand in handData: for ind in keyPoints: cv2.circle(frame,hand[ind],25,(255,0,255),3) cv2.imshow('my WEBcam', frame) cv2.moveWindow('my WEBcam',0,0) if cv2.waitKey(1) & 0xff ==ord('q'): break cam.release() |