The dev kit comes with a stereoscopic camera, ideal for 3D vision and AI workflows. All code examples can be found in our open source repo here.

Wave back

Using OpenCV we can detect a wave gesture and make the robot wave back in just a couple lines of code.

wave.py
import cv2
import sys
import time
import signal
import requests
import mediapipe as mp  # type: ignore

# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080

# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7
)


# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
    print("\nExiting gracefully...")
    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
    response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
    return response.json()


def wave_motion():
    points = 5
    for _ in range(2):
        for i in range(points):
            call_to_api(
                "absolute",
                {
                    "x": 0,
                    "y": 2 * (-1) ** i,
                    "z": 0,
                    "rx": -90,
                    "ry": 0,
                    "rz": 0,
                    "open": i % 2 == 0,
                },
            )
            time.sleep(0.2)


call_to_api("init")
cap = cv2.VideoCapture(0)
last_wave_time: float = 0
WAVE_COOLDOWN: float = 3

try:
    while True:
        success, image = cap.read()
        if not success:
            continue

        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        current_time = time.time()
        if (
            results.multi_hand_landmarks
            and current_time - last_wave_time > WAVE_COOLDOWN
        ):
            wave_motion()
            last_wave_time = current_time

        cv2.imshow("Hand Detection", image)
        cv2.waitKey(1)

except KeyboardInterrupt:
    print("\nExiting gracefully...")
finally:
    cap.release()
    cv2.destroyAllWindows()
    hands.close()

Hand tracking

This is a simple implementation of hand tracking using the MediaPipe library. The robot moves based on the hand position and closure.

hand_tracking.py
import sys
import cv2
import time
import signal
import requests
import numpy as np
import mediapipe as mp  # type: ignore

# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080

# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
)
mp_draw = mp.solutions.drawing_utils


# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
    print("\nExiting gracefully...")
    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
    response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
    return response.json()


def calculate_hand_closure(hand_landmarks):
    """
    Calculate if the hand is closed based on thumb and index finger distance
    Returns a value between 0 (open) and 1 (closed)
    """
    thumb_tip = hand_landmarks.landmark[4]
    index_tip = hand_landmarks.landmark[8]

    distance = np.sqrt(
        (thumb_tip.x - index_tip.x) ** 2
        + (thumb_tip.y - index_tip.y) ** 2
        + (thumb_tip.z - index_tip.z) ** 2
    )

    # Normalize distance (these values might need adjustment based on the hand size)
    normalized = np.clip(1.0 - (distance * 5), 0, 1)
    return normalized


# 1 - Initialize the robot
call_to_api("init")
print("Initializing robot")
time.sleep(2)

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get camera frame dimensions
frame_width: float = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height: float = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the workspace boundaries (in meters)
WORKSPACE_Y = 0.4
WORKSPACE_Z = 0.2


def map_to_robot_coordinates(hand_x, hand_y):
    """
    Map normalized hand coordinates to robot workspace coordinates
    We match the hand x coordinate to the robot y coordinate
    And the hand y coordinate to the robot z coordinate
    """
    robot_y = ((0.5 - hand_x) * 2) * (WORKSPACE_Y / 2) * 100
    robot_z = ((0.5 - hand_y) * 2) * (WORKSPACE_Z / 2) * 100
    return robot_y, robot_z


# Previous position for smoothing, this helps make the robot movements less jerky
prev_pos = {"y": 0, "z": 0}
smoothing_factor = 0.5

try:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Failed to capture frame")
            continue

        image = cv2.flip(image, 1)  # The front camera is inverted
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB image
        results = hands.process(rgb_image)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                palm = hand_landmarks.landmark[0]
                hand_closed = calculate_hand_closure(hand_landmarks)

                robot_y, robot_z = map_to_robot_coordinates(palm.x, palm.y)

                robot_y = prev_pos["y"] * smoothing_factor + robot_y * (
                    1 - smoothing_factor
                )
                robot_z = prev_pos["z"] * smoothing_factor + robot_z * (
                    1 - smoothing_factor
                )

                prev_pos = {"y": robot_y, "z": robot_z}

                call_to_api(
                    "absolute",
                    {
                        "x": -5,
                        "y": robot_y,
                        "z": robot_z,
                        "rx": 0,
                        "ry": 0,
                        "rz": 0,
                        "open": 1 - hand_closed,
                    },
                )

                cv2.putText(
                    image,
                    f"Position: (y:{robot_y:.3f}, z:{robot_z:.3f})",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2,
                )
                cv2.putText(
                    image,
                    f"Grip: {'Closed' if hand_closed > 0.5 else 'Open'}",
                    (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2,
                )

        cv2.imshow("Hand Tracking", image)
        cv2.waitKey(1)

except KeyboardInterrupt:
    print("\nExiting gracefully...")
finally:
    cap.release()
    cv2.destroyAllWindows()
    hands.close()

Rock paper scissors

Create fun interactions with your robot with this simple implementation of a Rock Paper Scissors game.

rock_paper_scissors.py
import cv2
import time
import random
import requests
import numpy as np
import mediapipe as mp  # type: ignore

# Robot API Configuration
PI_IP = "127.0.0.1"
PI_PORT = 8080


class RockPaperScissorsGame:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7,
        )
        self.cap = cv2.VideoCapture(0)
        self.gestures = {
            "rock": self.make_rock_gesture,
            "paper": self.make_paper_gesture,
            "scissors": self.make_scissors_gesture,
        }

    def call_to_api(self, endpoint: str, data: dict = {}):
        response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
        return response.json()

    def detect_gesture(self, hand_landmarks):
        # Get relevant finger landmarks
        thumb_tip = hand_landmarks.landmark[4]
        index_tip = hand_landmarks.landmark[8]
        middle_tip = hand_landmarks.landmark[12]
        ring_tip = hand_landmarks.landmark[16]
        pinky_tip = hand_landmarks.landmark[20]

        # Get wrist position for reference
        wrist = hand_landmarks.landmark[0]

        # Calculate distances from wrist
        fingers_extended = []
        for tip in [thumb_tip, index_tip, middle_tip, ring_tip, pinky_tip]:
            distance = np.sqrt((tip.x - wrist.x) ** 2 + (tip.y - wrist.y) ** 2)
            fingers_extended.append(distance > 0.2)  # Threshold for extended fingers

        # Determine gesture
        if not any(fingers_extended[1:]):  # All fingers closed
            return "rock"
        elif all(fingers_extended):  # All fingers open
            return "paper"
        elif (
            fingers_extended[1]
            and fingers_extended[2]
            and not fingers_extended[3]
            and not fingers_extended[4]
        ):  # Only index and middle extended
            return "scissors"
        return None

    def make_rock_gesture(self):
        # Move to closed fist position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": 0, "rz": 0, "open": 0},
        )

    def make_paper_gesture(self):
        # Move to open hand position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": 0, "rz": 0, "open": 1},
        )

    def make_scissors_gesture(self):
        # Move to scissors position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": -45, "rz": 0, "open": 0.5},
        )

    def move_up_down(self, times=3):
        for step in range(times + 1):
            self.call_to_api(
                "absolute",
                {"x": 0, "y": 0, "z": 4, "rx": 0, "ry": 0, "rz": 0, "open": 0},
            )
            time.sleep(0.25)
            self.call_to_api(
                "absolute",
                {"x": 0, "y": 0, "z": -4, "rx": 0, "ry": 0, "rz": 0, "open": 0},
            )
            time.sleep(0.25)
            print(times - step)

    def determine_winner(self, player_gesture, robot_gesture):
        if player_gesture == robot_gesture:
            return "Tie!"
        winners = {"rock": "scissors", "paper": "rock", "scissors": "paper"}
        return (
            "Player wins!"
            if winners[player_gesture] == robot_gesture
            else "Robot wins!"
        )

    def play_game(self):
        print("Initializing robot...")
        self.call_to_api("init")
        time.sleep(1)

        print("Robot performing countdown...")
        self.move_up_down(times=3)

        ret, frame = self.cap.read()
        if not ret:
            print("Failed to capture image.")
            return

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            player_gesture = self.detect_gesture(results.multi_hand_landmarks[0])

            if player_gesture:
                robot_gesture = random.choice(["rock", "paper", "scissors"])
                print(f"\nPlayer chose: {player_gesture}")
                print(f"Robot chose: {robot_gesture}")

                self.gestures[robot_gesture]()  # Robot makes its gesture
                result = self.determine_winner(player_gesture, robot_gesture)
                print(result)
                time.sleep(2)
            else:
                print("Gesture not detected. Please try again.")
        else:
            print("No hand detected. Please try again.")

        self.cap.release()
        cv2.destroyAllWindows()


if __name__ == "__main__":
    game = RockPaperScissorsGame()
    game.play_game()