Signals

import csv
import copy
import argparse
import itertools
from collections import Counter
from collections import deque
import cv2
import numpy as np
import mediapipe as mp
from utils import CvFpsCalc

from model import KeyPointClassifier
from model import PointHistoryClassifier
def get_args():
parser = argparse.ArgumentParser()
#function defines a command-line argument parser using the argparse
module in Python.
#This function is typically used to parse command-line arguments when
running a script or program.
parser.add_argument("--device", type=int, default=0)

parser.add_argument("--width", help='cap width', type=int, default=960)
parser.add_argument("--height", help='cap height', type=int, default=540)
parser.add_argument('--use_static_image_mode', action='store_true')
parser.add_argument("--min_detection_confidence",
help='min_detection_confidence',
type=float,
default=0.7)
parser.add_argument("--min_tracking_confidence",
help='min_tracking_confidence',
type=int,
default=0.5)
args = parser.parse_args()
return args
#python script.py --width 1280 --height 720 --use_static_image_mode
def main():
# Argument parsing // if none given takes the default
args = get_args()
cap_device = args.device
cap_width = args.width
cap_height = args.height
use_static_image_mode = args.use_static_image_mode
min_detection_confidence = args.min_detection_confidence
min_tracking_confidence = args.min_tracking_confidence
use_brect = True
#an option for choosing the need of a bounding box
# Camera preparation
cap = cv2.VideoCapture(cap_device)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, cap_height)
# Model load of mediapipe

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=use_static_image_mode,
#use_static_image_mode is True, the model may optimize for static
images rather than a continuous stream of frames.
max_num_hands=2,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence,
#Initially when we create an instance of the model
#It has to detect the hands and it has to track the hands..det_conf
is the minimum confidance needed to first detect the hands
# min tracking is the confidance needed to plot it when the hand is
moving around.=Threshold for tracking after an initial detection
)
keypoint_classifier = KeyPointClassifier()
point_history_classifier = PointHistoryClassifier()
# Read labels in the created models for classification process

with open('model/keypoint_classifier/keypoint_classifier_label.csv',
encoding='utf-8-sig') as f:
keypoint_classifier_labels = csv.reader(f)
keypoint_classifier_labels = [
str(row[0]) for row in keypoint_classifier_labels if row!= None
]
with open(
'model/point_history_classifier/point_history_classifier_label.csv',
encoding='utf-8-sig') as f:
point_history_classifier_labels = csv.reader(f)
point_history_classifier_labels = [
str(row[0]) for row in point_history_classifier_labels if row!
=None
]
# FPS Measurement------------------------------------------------
cvFpsCalc = CvFpsCalc(buffer_len=10)
#buffer to smooth out FPS calculations, especially if you want to display
#an average FPS over a certain number of frames rather than the
instantaneous FPS.
# Coordinate history ----------------------------------------------------

history_length = 16
point_history = deque(maxlen=history_length)
#deque is part of the collections module in Python and provides a

versatile data structure.
#Double ended queue
# Finger gesture history ################################################

finger_gesture_history = deque(maxlen=history_length)
#This deque will be used to store historical information about hand
landmarks or points,
# and it will automatically discard the oldest elements once the
specified maximum length is reached.
mode = 0 #default
while True:
fps = cvFpsCalc.get()
# Process Key (ESC: end)

key = cv2.waitKey(10)
if key == 27 or key==ord('q'): # ESC,q
break
number, mode = select_mode(key, mode)
# Camera capture
#####################################################
ret, image = cap.read()
if not ret:
break
image = cv2.flip(image, 1) # Mirror display
debug_image = copy.deepcopy(image)
'''
In Python, you can create a deep copy of an image using the copy
module or by using NumPy's copy function. A deep copy
creates a new object that is a copy of the original object, and it
recursively copies all nested objects within it.'''
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# rgb as input to mediapipe
image.flags.writeable = False
#setting the flags to be false
#passing the image as input to the called model with initial
thresholds set
results = hands.process(image)
#Setting us to render on the img/draw stuff on it
image.flags.writeable = True
if results.multi_hand_landmarks is not None:

#results.mul_hand_landmarks gives us x:position in the x axis,y
axis,depth from the camera as the coordinates opt
#looping through each of the results
#This condition checks if there are hand landmarks detected in
the current frame (results). If hands are detected, the code proceeds to
process the hand landmarks.
for hand_landmarks, handedness in
zip(results.multi_hand_landmarks,
results.multi_handedness):
#Handedness refers to whether a detected hand is the left

hand or the right hand. It is essential to distinguish between left and right
hands,
# especially in applications where specific gestures or
actions depend on the hand's orientation
# Bounding box calculation
brect = calc_bounding_rect(debug_image, hand_landmarks)
# Landmark calculation
landmark_list = calc_landmark_list(debug_image,
hand_landmarks)
# Conversion to relative coordinates / normalized coordinates

pre_processed_landmark_list = pre_process_landmark(
landmark_list)
pre_processed_point_history_list = pre_process_point_history(
debug_image, point_history)
# Write to the dataset file
logging_csv(number, mode, pre_processed_landmark_list,
pre_processed_point_history_list)
# Hand sign classification

hand_sign_id =
keypoint_classifier(pre_processed_landmark_list)
if hand_sign_id == 2: # Point gesture
point_history.append(landmark_list[8])
else:
point_history.append([0, 0])
# Finger gesture classification

finger_gesture_id = 0
point_history_len = len(pre_processed_point_history_list)
if point_history_len == (history_length * 2):
finger_gesture_id = point_history_classifier(
pre_processed_point_history_list)
# Calculates the gesture IDs in the latest detection

finger_gesture_history.append(finger_gesture_id)
most_common_fg_id = Counter(
finger_gesture_history).most_common()
# Drawing part
debug_image = draw_bounding_rect(use_brect, debug_image,
brect)
debug_image = draw_landmarks(debug_image, landmark_list)
debug_image = draw_info_text(
debug_image,
brect,
handedness,
keypoint_classifier_labels[hand_sign_id],
point_history_classifier_labels[most_common_fg_id[0][0]],
)
else:
point_history.append([0, 0])
debug_image = draw_point_history(debug_image, point_history)

debug_image = draw_info(debug_image, fps, mode, number)
# Screen reflection
#############################################################
cv2.imshow('Hand Gesture Recognition', debug_image)
cap.release()
cv2.destroyAllWindows()
def select_mode(key, mode):

number = -1
if 48 <= key <= 57: # 0 ~ 9
number = key - 48
if key == 110: # n
mode = 0
if key == 107: # k
mode = 1
if key == 104: # h
mode = 2
return number, mode
def calc_bounding_rect(image, landmarks):

image_width, image_height = image.shape[1], image.shape[0]
landmark_array = np.empty((0, 2), int)
for _, landmark in enumerate(landmarks.landmark):

landmark_x = min(int(landmark.x * image_width), image_width - 1)
#TO just make sure the point doesnot fall outside of the frame
landmark_y = min(int(landmark.y * image_height), image_height - 1)
landmark_point = [np.array((landmark_x, landmark_y))]
landmark_array = np.append(landmark_array, landmark_point, axis=0)
x, y, w, h = cv2.boundingRect(landmark_array)
#x, y, w, h = cv2.boundingRect(landmark_array): Calculates the bounding
rectangle (x, y, width, height) around the set of landmarks using the
cv2.boundingRect function.
#The function returns the bounding rectangle coordinates as [x, y, x + w,

y + h].
return [x, y, x + w, y + h]
def calc_landmark_list(image, landmarks):

landmark_point = []
# Keypoint
for _, landmark in enumerate(landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
# landmark_z = landmark.z
landmark_point.append([landmark_x, landmark_y])
return landmark_point
def pre_process_landmark(landmark_list):
temp_landmark_list = copy.deepcopy(landmark_list)
# Convert to relative coordinates

base_x, base_y = 0, 0
for index, landmark_point in enumerate(temp_landmark_list):
if index == 0:
base_x, base_y = landmark_point[0], landmark_point[1]
temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x

temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
# Convert to a one-dimensional list

temp_landmark_list = list(
itertools.chain.from_iterable(temp_landmark_list))
# Normalization
max_value = max(list(map(abs, temp_landmark_list)))
def normalize_(n):
return n / max_value
temp_landmark_list = list(map(normalize_, temp_landmark_list))
return temp_landmark_list
def pre_process_point_history(image, point_history):

temp_point_history = copy.deepcopy(point_history)
# Convert to relative coordinates

base_x, base_y = 0, 0
for index, point in enumerate(temp_point_history):
if index == 0:
base_x, base_y = point[0], point[1]
temp_point_history[index][0] = (temp_point_history[index][0] -
base_x) / image_width
temp_point_history[index][1] = (temp_point_history[index][1] -
base_y) / image_height
# Convert to a one-dimensional list

temp_point_history = list(
itertools.chain.from_iterable(temp_point_history))
return temp_point_history
def logging_csv(number, mode, landmark_list, point_history_list):

#ADDING THE DATASET COORDINATES
if mode == 0:
pass
if mode == 1 and (0 <= number <= 9):
csv_path = 'model/keypoint_classifier/keypoint.csv'
with open(csv_path, 'a', newline="") as f:
writer = csv.writer(f)
writer.writerow([number, *landmark_list])
if mode == 2 and (0 <= number <= 9):
csv_path = 'model/point_history_classifier/point_history.csv'
with open(csv_path, 'a', newline="") as f:
writer = csv.writer(f)
writer.writerow([number, *point_history_list])
return
def draw_landmarks(image, landmark_point):
# Sketching the design plan:
if len(landmark_point) > 0:
#To check if it actually exists
# Thumb
cv2.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]),
(0, 0, 0), 6) #6 is the thickness of line in pixels
(255, 255, 255), 2)
#Atempting to get white line with black borders
#Thumb line1
(0, 0, 0), 6)
(255, 255, 255), 2)
#Top of that thumb line 2
# Index finger
(0, 0, 0), 6)
(255, 255, 255), 2)
# Index line 1
(0, 0, 0), 6)
(255, 255, 255), 2)
# Index line 2 on top of that
(0, 0, 0), 6)
(255, 255, 255), 2)
# Index line 3 on top of that as well
# Middle finger
(0, 0, 0), 6)
(255, 255, 255), 2)
#middle line 1
(0, 0, 0), 6)
(255, 255, 255), 2)
#middle line 2 on top of that
(0, 0, 0), 6)
(255, 255, 255), 2)
#middle line 3 on top of that as well
# Ring finger
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
# Little finger
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
# Palm
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
(0, 0, 0), 6)
(255, 255, 255), 2)
# Key Points
for index, landmark in enumerate(landmark_point):
if index == 0: #Wrist
cv2.circle(image, (landmark[0], landmark[1]), 5, (255, 255, 255),
-1) #completefill
cv2.circle(image, (landmark[0], landmark[1]), 5, (0, 0, 0), 1)
if index == 1: #Thumb cmc
-1)
if index == 2: # Thumb mcp
-1)
if index == 3: # Thumb ip
-1)
if index == 4: # Thumb tip
-1)
if index == 5: # Index mcp
-1)
if index == 6: # index pip
-1)
if index == 7: # index dip
-1)
if index == 8: # index tip
-1)
if index == 9: # middle mcp
-1)
if index == 10: # middle pip
-1)
if index == 11: # middle dip
-1)
if index == 12: # middle tip
-1)
if index == 13: # ring mcp
-1)
if index == 14: # ring pip
-1)
if index == 15: # ring dip
-1)
if index == 16: # ring tip
-1)
if index == 17: # little mcp
-1)
if index == 18: # little pip
-1)
if index == 19: # little dip
-1)
if index == 20: # little tip
-1)
return image
#RGB values (e.g., white is (255, 255, 255), and black is (0, 0, 0)).
def draw_bounding_rect(use_brect, image, brect):

if use_brect:
#Checking the truth condition
# Outer rectangle
cv2.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]),
(0, 0, 0), 1)
return image
def draw_info_text(image, brect, handedness, hand_sign_text,

finger_gesture_text):
cv2.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22),
(0, 0, 0), -1)
info_text = handedness.classification[0].label[0:]
if hand_sign_text != "":
info_text = info_text + ':' + hand_sign_text
cv2.putText(image, info_text, (brect[0] + 5, brect[1] - 4),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1,
cv2.LINE_AA)
if finger_gesture_text != "":
cv2.putText(image, "Finger Gesture:" + finger_gesture_text, (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 4, cv2.LINE_AA)
cv2.putText(image, "Finger Gesture:" + finger_gesture_text, (10, 60),
cv2.LINE_AA)
return image
def draw_point_history(image, point_history):

for index, point in enumerate(point_history):
if point[0] != 0 and point[1] != 0:
cv2.circle(image, (point[0], point[1]), 1 + int(index / 2),
(152, 251, 152), 2)
return image
def draw_info(image, fps, mode, number):

cv2.putText(image, "FPS:" + str(fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
1.0, (0, 0, 0), 4, cv2.LINE_AA)
#cv2.LINE_AA stands for anti-aliased line, which helps in rendering
smoother text
#1.0 font scale factor
cv2.putText(image, "FPS:" + str(fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
1.0, (255, 255, 255), 2, cv2.LINE_AA)
mode_string = ['Logging Key Point', 'Logging Point History']

if 1 <= mode <= 101:
cv2.putText(image, "MODE:" + mode_string[mode - 1], (10, 90),
cv2.LINE_AA)
if 0 <= number <= 100:
cv2.putText(image, "NUM:" + str(number), (10, 110),
cv2.LINE_AA)
return image
if __name__ == '__main__':
main()
# Volume controller:
import cv2
# pycaw. This library will handle the controlling of our system volume.
#from cvzone.HandTrackingModule import HandDetector
import math
import numpy as np
from ctypes import cast, POINTER

from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils
class HandDetector:
def __init__(self, max_num_hands=2, min_detection_confidence=0.5,
min_tracking_confidence=0.5):
self.hands = mpHands.Hands(max_num_hands=max_num_hands,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence)
def findHandLandMarks(self, image, handNumber=0, draw=True):

originalImage = image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # mediapipe needs RGB
results = self.hands.process(image)
landMarkList = []
if results.multi_hand_landmarks: # returns None if hand is not found

hand = results.multi_hand_landmarks[handNumber]
#results.multi_hand_landmarks returns landMarks for all the hands
for id, landMark in enumerate(hand.landmark):

# landMark holds x,y,z ratios of single landmark
imgH, imgW, imgC = originalImage.shape # height, width,
channel for image
xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)
landMarkList.append([id, xPos, yPos])
if draw:
mpDraw.draw_landmarks(originalImage, hand,
mpHands.HAND_CONNECTIONS)
#image=draw_landmarks(originalImage,hand.landmark)
return landMarkList
webcamFeed = cv2.VideoCapture(0)
handDetector = HandDetector(min_detection_confidence=0.7)
#Volume related initializations

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
print(volume.GetVolumeRange()) #(-65.25, 0.0)
while True:
status, image = webcamFeed.read()
handLandmarks = handDetector.findHandLandMarks(image=image, draw=True)
if(len(handLandmarks) != 0):
#for volume control we need 4th and 8th landmark
x1, y1 = handLandmarks[4][1], handLandmarks[4][2]

x2, y2 = handLandmarks[8][1], handLandmarks[8][2]
length = math.hypot(x2-x1, y2-y1)
print(length)
#Hand range(length): 50-250

#Volume Range: (-65.25, 0.0)
volumeValue = np.interp(length, [50, 250], [-65.25, 0.0]) #coverting

length to proportionate to volume range
volume.SetMasterVolumeLevel(volumeValue, None)
cv2.circle(image, (x1, y1), 15, (255, 0, 255), cv2.FILLED)

cv2.circle(image, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
cv2.line(image, (x1, y1), (x2, y2), (255, 0, 255), 3)
cv2.imshow("Volume", image)
key = cv2.waitKey(1) & 0xFF # Add the bitwise AND operation
if key == ord('q'):
break
cv2.destroyAllWindows()
webcamFeed.release()
# Counter using media pipe

from HandTrackingModule import FindHands
import cv2
import os
import time
cap=cv2.VideoCapture(0)
cap.set(3,1280)
cap.set(4,720)
folderPath="sources"
folder=os.listdir(folderPath)
print(folder)
overlayList=[]
for imPath in folder:
image=cv2.imread(f'{folderPath}\{imPath}')
print(f'{folderPath}\{imPath}')
overlayList.append(image)
previous=0
#detector=htm.imgDetector(detectionCon=0.7)
detector=FindHands()
while True:
ret,img =cap.read()
#img=htm.FindHands(img)
#h,w,c=overlayList[0].shape
#img[0:h,0:w]=overlayList[0]
hand1_positions = detector.getPosition(img, range(21), draw=True)

hand2_positions = detector.getPosition(img, range(21), hand_no=1,
draw=True)
for pos in hand1_positions:
cv2.circle(img, pos, 5, (0,255,0), cv2.FILLED)
for pos in hand2_positions:
cv2.circle(img, pos, 5, (255,0,0), cv2.FILLED)
#print("Index finger up:", detector.index_finger_up(img))
#print("Middle finger up:", detector.middle_finger_up(img)(img))
#print("Ring finger up:", detector.ring_finger_up(img) (img))
if( (detector.little_finger_up(img) in ["NO HAND FOUND",])==False):

#print("Little finger up:", detector.little_finger_up(img)(img))
if(detector.index_finger_up(img)==True and
detector.middle_finger_up(img)==False and detector.ring_finger_up(img)==False
and detector.little_finger_up(img)==False):
print("ONE")
if(detector.index_finger_up(img)==False and
detector.middle_finger_up(img)==True and detector.ring_finger_up(img) ==False
print("ONE")
detector.middle_finger_up(img)==False and detector.ring_finger_up(img) ==True
print("ONE")
detector.middle_finger_up(img)==False and detector.ring_finger_up(img) ==False
and detector.little_finger_up(img)==True):
print("ONE")
detector.middle_finger_up(img)==False and detector.ring_finger_up(img) ==False
print("ONE")
detector.middle_finger_up(img)==True and detector.ring_finger_up(img) ==False
print("TWO")
detector.middle_finger_up(img)== True and detector.ring_finger_up(img) ==True
print("THREE")
detector.middle_finger_up(img)==True and detector.ring_finger_up(img) ==True
and detector.little_finger_up(img)==True):
print("FOUR")
current=time.time()
fps=1/(current-previous)
previous=current
cv2.putText(img,f'FPS:{int(fps)}',(430,70),cv2.FONT_HERSHEY_SIMPLEX,1,
(255,0,0),2)
cv2.imshow("Image",img)
cv2.waitKey(1)

Signals

Uploaded by

Document Informationclick to expand document information

Copyright:

Available Formats

Signals

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Signals

Uploaded by

Copyright:

Available Formats

import csv

from utils import CvFpsCalc

parser.add_argument("--device", type=int, default=0)

# Model load of mediapipe

# Read labels in the created models for classification process

# Coordinate history ----------------------------------------------------

#deque is part of the collections module in Python and provides a

# Finger gesture history ################################################

# Process Key (ESC: end)

if results.multi_hand_landmarks is not None:

#Handedness refers to whether a detected hand is the left

# Conversion to relative coordinates / normalized coordinates

# Hand sign classification

# Finger gesture classification

# Calculates the gesture IDs in the latest detection

debug_image = draw_point_history(debug_image, point_history)

def select_mode(key, mode):

def calc_bounding_rect(image, landmarks):

landmark_array = np.empty((0, 2), int)

for _, landmark in enumerate(landmarks.landmark):

landmark_point = [np.array((landmark_x, landmark_y))]

landmark_array = np.append(landmark_array, landmark_point, axis=0)

#The function returns the bounding rectangle coordinates as [x, y, x + w,

def calc_landmark_list(image, landmarks):

# Convert to relative coordinates

temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x

# Convert to a one-dimensional list

temp_landmark_list = list(map(normalize_, temp_landmark_list))

def pre_process_point_history(image, point_history):

# Convert to relative coordinates

# Convert to a one-dimensional list

def logging_csv(number, mode, landmark_list, point_history_list):

def draw_landmarks(image, landmark_point):

# Sketching the design plan:

def draw_bounding_rect(use_brect, image, brect):

def draw_info_text(image, brect, handedness, hand_sign_text,

def draw_point_history(image, point_history):

def draw_info(image, fps, mode, number):

mode_string = ['Logging Key Point', 'Logging Point History']

from ctypes import cast, POINTER

def findHandLandMarks(self, image, handNumber=0, draw=True):

if results.multi_hand_landmarks: # returns None if hand is not found

for id, landMark in enumerate(hand.landmark):

#Volume related initializations

x1, y1 = handLandmarks[4][1], handLandmarks[4][2]

#Hand range(length): 50-250

volumeValue = np.interp(length, [50, 250], [-65.25, 0.0]) #coverting

cv2.circle(image, (x1, y1), 15, (255, 0, 255), cv2.FILLED)

# Counter using media pipe

hand1_positions = detector.getPosition(img, range(21), draw=True)

if( (detector.little_finger_up(img) in ["NO HAND FOUND",])==False):

You might also like