Examples
Computer vision
Getting Started
API Reference
- Control
- Camera
- Recording
Examples
Computer vision
How to leverage vision models for robotics.
The junior dev kit comes with a stereoscopic camera, ideal for 3D vision and AI workflows.
Wave back
Using OpenCV we can detect a wave gesture and make the robot wave back in just a couple lines of code.
wave.py
import cv2
import sys
import time
import signal
import requests
import mediapipe as mp # type: ignore
# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080
# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7
)
# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
print("\nExiting gracefully...")
cap.release()
cv2.destroyAllWindows()
hands.close()
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
return response.json()
def wave_motion():
points = 5
for _ in range(2):
for i in range(points):
call_to_api(
"absolute",
{
"x": 0,
"y": 0.02 * (-1) ** i,
"z": 0,
"rx": -90,
"ry": 0,
"rz": 0,
"open": i % 2 == 0,
},
)
time.sleep(0.2)
call_to_api("init")
cap = cv2.VideoCapture(0)
last_wave_time: float = 0
WAVE_COOLDOWN: float = 3
try:
while True:
success, image = cap.read()
if not success:
continue
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
current_time = time.time()
if (
results.multi_hand_landmarks
and current_time - last_wave_time > WAVE_COOLDOWN
):
wave_motion()
last_wave_time = current_time
cv2.imshow("Hand Detection", image)
cv2.waitKey(1)
except KeyboardInterrupt:
print("\nExiting gracefully...")
finally:
cap.release()
cv2.destroyAllWindows()
hands.close()
Hand tracking
This is a simple implementation of hand tracking using the MediaPipe library. The robot moves based on the hand position and closure.
hand_tracking.py
import sys
import cv2
import time
import signal
import requests
import numpy as np
import mediapipe as mp # type: ignore
# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080
# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.7,
min_tracking_confidence=0.7,
)
mp_draw = mp.solutions.drawing_utils
# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
print("\nExiting gracefully...")
cap.release()
cv2.destroyAllWindows()
hands.close()
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
return response.json()
def calculate_hand_closure(hand_landmarks):
"""
Calculate if the hand is closed based on thumb and index finger distance
Returns a value between 0 (open) and 1 (closed)
"""
thumb_tip = hand_landmarks.landmark[4]
index_tip = hand_landmarks.landmark[8]
distance = np.sqrt(
(thumb_tip.x - index_tip.x) ** 2
+ (thumb_tip.y - index_tip.y) ** 2
+ (thumb_tip.z - index_tip.z) ** 2
)
# Normalize distance (these values might need adjustment based on the hand size)
normalized = np.clip(1.0 - (distance * 5), 0, 1)
return normalized
# 1 - Initialize the robot
call_to_api("init")
print("Initializing robot")
time.sleep(2)
# Initialize webcam
cap = cv2.VideoCapture(0)
# Get camera frame dimensions
frame_width: float = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height: float = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Define the workspace boundaries (in meters)
WORKSPACE_Y = 0.4
WORKSPACE_Z = 0.2
def map_to_robot_coordinates(hand_x, hand_y):
"""
Map normalized hand coordinates to robot workspace coordinates
We match the hand x coordinate to the robot y coordinate
And the hand y coordinate to the robot z coordinate
"""
robot_y = ((0.5 - hand_x) * 2) * (WORKSPACE_Y / 2)
robot_z = ((0.5 - hand_y) * 2) * (WORKSPACE_Z / 2)
return robot_y, robot_z
# Previous position for smoothing, this helps make the robot movements less jerky
prev_pos = {"y": 0, "z": 0}
smoothing_factor = 0.5
try:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Failed to capture frame")
continue
image = cv2.flip(image, 1) # The front camera is inverted
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert to RGB image
results = hands.process(rgb_image)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_draw.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
palm = hand_landmarks.landmark[0]
hand_closed = calculate_hand_closure(hand_landmarks)
robot_y, robot_z = map_to_robot_coordinates(palm.x, palm.y)
robot_y = prev_pos["y"] * smoothing_factor + robot_y * (
1 - smoothing_factor
)
robot_z = prev_pos["z"] * smoothing_factor + robot_z * (
1 - smoothing_factor
)
prev_pos = {"y": robot_y, "z": robot_z}
call_to_api(
"absolute",
{
"x": -0.05,
"y": robot_y,
"z": robot_z,
"rx": 0,
"ry": 0,
"rz": 0,
"open": 1 - hand_closed,
},
)
cv2.putText(
image,
f"Position: (y:{robot_y:.3f}, z:{robot_z:.3f})",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.putText(
image,
f"Grip: {'Closed' if hand_closed > 0.5 else 'Open'}",
(10, 70),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow("Hand Tracking", image)
cv2.waitKey(1)
except KeyboardInterrupt:
print("\nExiting gracefully...")
finally:
cap.release()
cv2.destroyAllWindows()
hands.close()
Rock paper scissors
Create fun interactions with your robot with this simple implementation of a Rock Paper Scissors game.
rock_paper_scissors.py
import cv2
import time
import random
import requests
import numpy as np
import mediapipe as mp # type: ignore
# Robot API Configuration
PI_IP = "127.0.0.1"
PI_PORT = 8080
class RockPaperScissorsGame:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.7,
min_tracking_confidence=0.7,
)
self.cap = cv2.VideoCapture(0)
self.gestures = {
"rock": self.make_rock_gesture,
"paper": self.make_paper_gesture,
"scissors": self.make_scissors_gesture,
}
def call_to_api(self, endpoint: str, data: dict = {}):
response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
return response.json()
def detect_gesture(self, hand_landmarks):
# Get relevant finger landmarks
thumb_tip = hand_landmarks.landmark[4]
index_tip = hand_landmarks.landmark[8]
middle_tip = hand_landmarks.landmark[12]
ring_tip = hand_landmarks.landmark[16]
pinky_tip = hand_landmarks.landmark[20]
# Get wrist position for reference
wrist = hand_landmarks.landmark[0]
# Calculate distances from wrist
fingers_extended = []
for tip in [thumb_tip, index_tip, middle_tip, ring_tip, pinky_tip]:
distance = np.sqrt((tip.x - wrist.x) ** 2 + (tip.y - wrist.y) ** 2)
fingers_extended.append(distance > 0.2) # Threshold for extended fingers
# Determine gesture
if not any(fingers_extended[1:]): # All fingers closed
return "rock"
elif all(fingers_extended): # All fingers open
return "paper"
elif (
fingers_extended[1]
and fingers_extended[2]
and not fingers_extended[3]
and not fingers_extended[4]
): # Only index and middle extended
return "scissors"
return None
def make_rock_gesture(self):
# Move to closed fist position
self.call_to_api(
"absolute",
{"x": 0, "y": 0, "z": 0.05, "rx": 0, "ry": 0, "rz": 0, "open": 0},
)
def make_paper_gesture(self):
# Move to open hand position
self.call_to_api(
"absolute",
{"x": 0, "y": 0, "z": 0.05, "rx": 0, "ry": 0, "rz": 0, "open": 1},
)
def make_scissors_gesture(self):
# Move to scissors position
self.call_to_api(
"absolute",
{"x": 0, "y": 0, "z": 0.05, "rx": 0, "ry": -45, "rz": 0, "open": 0.5},
)
def move_up_down(self, times=3):
for step in range(times + 1):
self.call_to_api(
"absolute",
{"x": 0, "y": 0, "z": 0.04, "rx": 0, "ry": 0, "rz": 0, "open": 0},
)
time.sleep(0.25)
self.call_to_api(
"absolute",
{"x": 0, "y": 0, "z": -0.04, "rx": 0, "ry": 0, "rz": 0, "open": 0},
)
time.sleep(0.25)
print(times - step)
def determine_winner(self, player_gesture, robot_gesture):
if player_gesture == robot_gesture:
return "Tie!"
winners = {"rock": "scissors", "paper": "rock", "scissors": "paper"}
return (
"Player wins!"
if winners[player_gesture] == robot_gesture
else "Robot wins!"
)
def play_game(self):
print("Initializing robot...")
self.call_to_api("init")
time.sleep(1)
print("Robot performing countdown...")
self.move_up_down(times=3)
ret, frame = self.cap.read()
if not ret:
print("Failed to capture image.")
return
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = self.hands.process(rgb_frame)
if results.multi_hand_landmarks:
player_gesture = self.detect_gesture(results.multi_hand_landmarks[0])
if player_gesture:
robot_gesture = random.choice(["rock", "paper", "scissors"])
print(f"\nPlayer chose: {player_gesture}")
print(f"Robot chose: {robot_gesture}")
self.gestures[robot_gesture]() # Robot makes its gesture
result = self.determine_winner(player_gesture, robot_gesture)
print(result)
time.sleep(2)
else:
print("Gesture not detected. Please try again.")
else:
print("No hand detected. Please try again.")
self.cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
game = RockPaperScissorsGame()
game.play_game()
Was this page helpful?
On this page