In this video lesson we show you how you can control objects in the real world using OpenCV, Python, Mediapipa and our old friend, the Arduino. On the Python side, we recognize hand gestures, and then we pass the recognized gesture to Arduino and Arduino lights LED in response to what hand signal is seen. This is a simple example, but a very powerful method. Instead of LED, you could operate servos, stepper motors or relays to control any manner of different devices. For your convenience, this is the code we used on the Arduino side:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | String cmd; int LED1=9; int LED2=6; int LED3=5; int LED4=3; int LED5=10; void setup() { pinMode(LED1,OUTPUT); pinMode(LED2,OUTPUT); pinMode(LED3,OUTPUT); pinMode(LED4,OUTPUT); pinMode(LED5,OUTPUT); Serial.begin(115200); } void loop() { while (Serial.available()==0){ } cmd = Serial.readStringUntil('\r'); if (cmd=="One"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,LOW); digitalWrite(LED3,LOW); digitalWrite(LED4,LOW); digitalWrite(LED5,LOW); } if (cmd=="Two"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,HIGH); digitalWrite(LED3,LOW); digitalWrite(LED4,LOW); digitalWrite(LED5,LOW); } if (cmd=="Three"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,HIGH); digitalWrite(LED3,HIGH); digitalWrite(LED4,LOW); digitalWrite(LED5,LOW); } if (cmd=="Four"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,HIGH); digitalWrite(LED3,HIGH); digitalWrite(LED4,HIGH); digitalWrite(LED5,LOW); } if (cmd=="Five"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,HIGH); digitalWrite(LED3,HIGH); digitalWrite(LED4,HIGH); digitalWrite(LED5,HIGH); } if (cmd=="Pinky"){ digitalWrite(LED1,LOW); digitalWrite(LED2,LOW); digitalWrite(LED3,LOW); digitalWrite(LED4,HIGH); digitalWrite(LED5,LOW); } if (cmd=="Thumb"){ digitalWrite(LED1,LOW); digitalWrite(LED2,LOW); digitalWrite(LED3,LOW); digitalWrite(LED4,LOW); digitalWrite(LED5,HIGH); } if (cmd=="Inside"){ digitalWrite(LED1,LOW); digitalWrite(LED2,HIGH); digitalWrite(LED3,HIGH); digitalWrite(LED4,LOW); digitalWrite(LED5,LOW); } if (cmd=="Outside"){ digitalWrite(LED1,HIGH); digitalWrite(LED2,LOW); digitalWrite(LED3,LOW); digitalWrite(LED4,HIGH); digitalWrite(LED5,LOW); } if (cmd=="Unknown"){ digitalWrite(LED1,LOW); digitalWrite(LED2,LOW); digitalWrite(LED3,LOW); digitalWrite(LED4,LOW); digitalWrite(LED5,LOW); } } |
And on the python side, we used the following code.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import time import cv2 print(cv2.__version__) import numpy as np import pickle import serial arduinoData = serial.Serial('COM3',115200) class mpHands: import mediapipe as mp def __init__(self,maxHands=2,tol1=.5,tol2=.5): self.hands=self.mp.solutions.hands.Hands(False,maxHands,tol1,tol2) def Marks(self,frame): myHands=[] frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) results=self.hands.process(frameRGB) if results.multi_hand_landmarks != None: for handLandMarks in results.multi_hand_landmarks: myHand=[] for landMark in handLandMarks.landmark: myHand.append((int(landMark.x*width),int(landMark.y*height))) myHands.append(myHand) return myHands def findDistances(handData): distMatrix=np.zeros([len(handData),len(handData)],dtype='float') palmSize=((handData[0][0]-handData[9][0])**2+(handData[0][1]-handData[9][1])**2)**(1./2.) for row in range(0,len(handData)): for column in range(0,len(handData)): distMatrix[row][column]=(((handData[row][0]-handData[column][0])**2+(handData[row][1]-handData[column][1])**2)**(1./2.))/palmSize return distMatrix def findError(gestureMatrix,unknownMatrix,keyPoints): error=0 for row in keyPoints: for column in keyPoints: error=error+abs(gestureMatrix[row][column]-unknownMatrix[row][column]) print(error) return error def findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol): errorArray=[] for i in range(0,len(gestNames),1): error=findError(knownGestures[i],unknownGesture,keyPoints) errorArray.append(error) errorMin=errorArray[0] minIndex=0 for i in range(0,len(errorArray),1): if errorArray[i]<errorMin: errorMin=errorArray[i] minIndex=i if errorMin<tol: gesture=gestNames[minIndex] if errorMin>=tol: gesture='Unknown' return gesture width=1280 height=720 cam=cv2.VideoCapture(3,cv2.CAP_DSHOW) cam.set(cv2.CAP_PROP_FRAME_WIDTH, width) cam.set(cv2.CAP_PROP_FRAME_HEIGHT,height) cam.set(cv2.CAP_PROP_FPS, 30) cam.set(cv2.CAP_PROP_FOURCC,cv2.VideoWriter_fourcc(*'MJPG')) findHands=mpHands(1) time.sleep(5) keyPoints=[0,4,5,9,13,17,8,12,16,20] train=int(input('Enter 1 to Train, Enter 0 to Recognize ')) if train==1: trainCnt=0 knownGestures=[] numGest=int(input('How Many Gestures Do You Want? ')) gestNames=[] for i in range(0,numGest,1): prompt='Name of Gesture #'+str(i+1)+' ' name=input(prompt) gestNames.append(name) print(gestNames) trainName=input('Filename for training data? (Press Enter for Default) ') if trainName=='': trainName='default' trainName=trainName+'.pkl' if train==0: trainName=input('What Training Data Do You Want to Use? (Press Enter for Default) ') if trainName=='': trainName='default' trainName=trainName+'.pkl' with open(trainName,'rb') as f: gestNames=pickle.load(f) knownGestures=pickle.load(f) tol=10 while True: ignore, frame = cam.read() frame=cv2.resize(frame,(width,height)) handData=findHands.Marks(frame) if train==1: if handData!=[]: print('Please Show Gesture ',gestNames[trainCnt],': Press t when Ready') if cv2.waitKey(1) & 0xff==ord('t'): knownGesture=findDistances(handData[0]) knownGestures.append(knownGesture) trainCnt=trainCnt+1 if trainCnt==numGest: train=0 with open(trainName,'wb') as f: pickle.dump(gestNames,f) pickle.dump(knownGestures,f) if train == 0: if handData!=[]: unknownGesture=findDistances(handData[0]) myGesture=findGesture(unknownGesture,knownGestures,keyPoints,gestNames,tol) #error=findError(knownGesture,unknownGesture,keyPoints) cv2.putText(frame,myGesture,(100,175),cv2.FONT_HERSHEY_SIMPLEX,3,(255,0,0),8) myGesture=myGesture+'\r' arduinoData.write(myGesture.encode()) for hand in handData: for ind in keyPoints: cv2.circle(frame,hand[ind],25,(255,0,255),3) cv2.imshow('my WEBcam', frame) cv2.moveWindow('my WEBcam',0,0) if cv2.waitKey(1) & 0xff ==ord('q'): break cam.release() |