phospho starter pack documentation

The dev kit comes with a stereoscopic camera, ideal for 3D vision and AI workflows. All code examples can be found in our open source repo here.

Wave back

Using OpenCV we can detect a wave gesture and make the robot wave back in just a couple lines of code.

Show code - python implementation

wave.py

import cv2
import sys
import time
import signal
import requests
import mediapipe as mp  # type: ignore

# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080

# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7
)


# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
    print("\nExiting gracefully...")
    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
    response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
    return response.json()


def wave_motion():
    points = 5
    for _ in range(2):
        for i in range(points):
            call_to_api(
                "absolute",
                {
                    "x": 0,
                    "y": 2 * (-1) ** i,
                    "z": 0,
                    "rx": -90,
                    "ry": 0,
                    "rz": 0,
                    "open": i % 2 == 0,
                },
            )
            time.sleep(0.2)


call_to_api("init")
cap = cv2.VideoCapture(0)
last_wave_time: float = 0
WAVE_COOLDOWN: float = 3

try:
    while True:
        success, image = cap.read()
        if not success:
            continue

        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        current_time = time.time()
        if (
            results.multi_hand_landmarks
            and current_time - last_wave_time > WAVE_COOLDOWN
        ):
            wave_motion()
            last_wave_time = current_time

        cv2.imshow("Hand Detection", image)
        cv2.waitKey(1)

except KeyboardInterrupt:
    print("\nExiting gracefully...")
finally:
    cap.release()
    cv2.destroyAllWindows()
    hands.close()

Hand tracking

This is a simple implementation of hand tracking using the MediaPipe library. The robot moves based on the hand position and closure.

Show code - python implementation

hand_tracking.py

import sys
import cv2
import time
import signal
import requests
import numpy as np
import mediapipe as mp  # type: ignore

# Configurations
PI_IP: str = "127.0.0.1"
PI_PORT: int = 8080

# Initialize MediaPipe Hand tracking
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
)
mp_draw = mp.solutions.drawing_utils


# Handle Ctrl+C to exit the program gracefully
def signal_handler(sig, frame):
    print("\nExiting gracefully...")
    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


# Function to call the API
def call_to_api(endpoint: str, data: dict = {}):
    response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
    return response.json()


def calculate_hand_closure(hand_landmarks):
    """
    Calculate if the hand is closed based on thumb and index finger distance
    Returns a value between 0 (open) and 1 (closed)
    """
    thumb_tip = hand_landmarks.landmark[4]
    index_tip = hand_landmarks.landmark[8]

    distance = np.sqrt(
        (thumb_tip.x - index_tip.x) ** 2
        + (thumb_tip.y - index_tip.y) ** 2
        + (thumb_tip.z - index_tip.z) ** 2
    )

    # Normalize distance (these values might need adjustment based on the hand size)
    normalized = np.clip(1.0 - (distance * 5), 0, 1)
    return normalized


# 1 - Initialize the robot
call_to_api("init")
print("Initializing robot")
time.sleep(2)

# Initialize webcam
cap = cv2.VideoCapture(0)

# Get camera frame dimensions
frame_width: float = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height: float = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the workspace boundaries (in meters)
WORKSPACE_Y = 0.4
WORKSPACE_Z = 0.2


def map_to_robot_coordinates(hand_x, hand_y):
    """
    Map normalized hand coordinates to robot workspace coordinates
    We match the hand x coordinate to the robot y coordinate
    And the hand y coordinate to the robot z coordinate
    """
    robot_y = ((0.5 - hand_x) * 2) * (WORKSPACE_Y / 2) * 100
    robot_z = ((0.5 - hand_y) * 2) * (WORKSPACE_Z / 2) * 100
    return robot_y, robot_z


# Previous position for smoothing, this helps make the robot movements less jerky
prev_pos = {"y": 0, "z": 0}
smoothing_factor = 0.5

try:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Failed to capture frame")
            continue

        image = cv2.flip(image, 1)  # The front camera is inverted
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB image
        results = hands.process(rgb_image)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                palm = hand_landmarks.landmark[0]
                hand_closed = calculate_hand_closure(hand_landmarks)

                robot_y, robot_z = map_to_robot_coordinates(palm.x, palm.y)

                robot_y = prev_pos["y"] * smoothing_factor + robot_y * (
                    1 - smoothing_factor
                )
                robot_z = prev_pos["z"] * smoothing_factor + robot_z * (
                    1 - smoothing_factor
                )

                prev_pos = {"y": robot_y, "z": robot_z}

                call_to_api(
                    "absolute",
                    {
                        "x": -5,
                        "y": robot_y,
                        "z": robot_z,
                        "rx": 0,
                        "ry": 0,
                        "rz": 0,
                        "open": 1 - hand_closed,
                    },
                )

                cv2.putText(
                    image,
                    f"Position: (y:{robot_y:.3f}, z:{robot_z:.3f})",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2,
                )
                cv2.putText(
                    image,
                    f"Grip: {'Closed' if hand_closed > 0.5 else 'Open'}",
                    (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2,
                )

        cv2.imshow("Hand Tracking", image)
        cv2.waitKey(1)

except KeyboardInterrupt:
    print("\nExiting gracefully...")
finally:
    cap.release()
    cv2.destroyAllWindows()
    hands.close()

Rock paper scissors

Create fun interactions with your robot with this simple implementation of a Rock Paper Scissors game.

Show code - python implementation

rock_paper_scissors.py

import cv2
import time
import random
import requests
import numpy as np
import mediapipe as mp  # type: ignore

# Robot API Configuration
PI_IP = "127.0.0.1"
PI_PORT = 8080


class RockPaperScissorsGame:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7,
        )
        self.cap = cv2.VideoCapture(0)
        self.gestures = {
            "rock": self.make_rock_gesture,
            "paper": self.make_paper_gesture,
            "scissors": self.make_scissors_gesture,
        }

    def call_to_api(self, endpoint: str, data: dict = {}):
        response = requests.post(f"http://{PI_IP}:{PI_PORT}/move/{endpoint}", json=data)
        return response.json()

    def detect_gesture(self, hand_landmarks):
        # Get relevant finger landmarks
        thumb_tip = hand_landmarks.landmark[4]
        index_tip = hand_landmarks.landmark[8]
        middle_tip = hand_landmarks.landmark[12]
        ring_tip = hand_landmarks.landmark[16]
        pinky_tip = hand_landmarks.landmark[20]

        # Get wrist position for reference
        wrist = hand_landmarks.landmark[0]

        # Calculate distances from wrist
        fingers_extended = []
        for tip in [thumb_tip, index_tip, middle_tip, ring_tip, pinky_tip]:
            distance = np.sqrt((tip.x - wrist.x) ** 2 + (tip.y - wrist.y) ** 2)
            fingers_extended.append(distance > 0.2)  # Threshold for extended fingers

        # Determine gesture
        if not any(fingers_extended[1:]):  # All fingers closed
            return "rock"
        elif all(fingers_extended):  # All fingers open
            return "paper"
        elif (
            fingers_extended[1]
            and fingers_extended[2]
            and not fingers_extended[3]
            and not fingers_extended[4]
        ):  # Only index and middle extended
            return "scissors"
        return None

    def make_rock_gesture(self):
        # Move to closed fist position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": 0, "rz": 0, "open": 0},
        )

    def make_paper_gesture(self):
        # Move to open hand position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": 0, "rz": 0, "open": 1},
        )

    def make_scissors_gesture(self):
        # Move to scissors position
        self.call_to_api(
            "absolute",
            {"x": 0, "y": 0, "z": 5, "rx": 0, "ry": -45, "rz": 0, "open": 0.5},
        )

    def move_up_down(self, times=3):
        for step in range(times + 1):
            self.call_to_api(
                "absolute",
                {"x": 0, "y": 0, "z": 4, "rx": 0, "ry": 0, "rz": 0, "open": 0},
            )
            time.sleep(0.25)
            self.call_to_api(
                "absolute",
                {"x": 0, "y": 0, "z": -4, "rx": 0, "ry": 0, "rz": 0, "open": 0},
            )
            time.sleep(0.25)
            print(times - step)

    def determine_winner(self, player_gesture, robot_gesture):
        if player_gesture == robot_gesture:
            return "Tie!"
        winners = {"rock": "scissors", "paper": "rock", "scissors": "paper"}
        return (
            "Player wins!"
            if winners[player_gesture] == robot_gesture
            else "Robot wins!"
        )

    def play_game(self):
        print("Initializing robot...")
        self.call_to_api("init")
        time.sleep(1)

        print("Robot performing countdown...")
        self.move_up_down(times=3)

        ret, frame = self.cap.read()
        if not ret:
            print("Failed to capture image.")
            return

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            player_gesture = self.detect_gesture(results.multi_hand_landmarks[0])

            if player_gesture:
                robot_gesture = random.choice(["rock", "paper", "scissors"])
                print(f"\nPlayer chose: {player_gesture}")
                print(f"Robot chose: {robot_gesture}")

                self.gestures[robot_gesture]()  # Robot makes its gesture
                result = self.determine_winner(player_gesture, robot_gesture)
                print(result)
                time.sleep(2)
            else:
                print("Gesture not detected. Please try again.")
        else:
            print("No hand detected. Please try again.")

        self.cap.release()
        cv2.destroyAllWindows()


if __name__ == "__main__":
    game = RockPaperScissorsGame()
    game.play_game()

Getting Started

phosphobot Basic Usage

Learn about AI and robotics

Hardware

API Reference

Examples

Other

Computer vision

Wave back

Hand tracking

Rock paper scissors

Getting Started

phosphobot Basic Usage

Learn about AI and robotics

Hardware

API Reference

Examples

Other

​Wave back

​

​Hand tracking

​

​Rock paper scissors

​

Wave back

Hand tracking

Rock paper scissors