RockPaperScissor/main.py

import os
import random
import time

import torch
from model import ClassifyModel
from datasets import RPSDataset
import cv2
import mediapipe as mp


def process_data(landmarks):
    landmark_lst = []
    for hand in landmarks:
        current_hand_lm = hand.landmark
        for lm in current_hand_lm:
            landmark_lst.append(lm.x)
            landmark_lst.append(lm.y)
            landmark_lst.append(lm.z)

    # Set to 0 if there is only 1 hand
    if len(landmarks) == 1:
        for i in range(21):
            landmark_lst.append(0)
            landmark_lst.append(0)
            landmark_lst.append(0)
    landmark_t = torch.Tensor(landmark_lst).unsqueeze(0)
    return landmark_t


def predict(_model, x, label_dict):
    output = torch.argmax(_model(x), dim=1).item()
    return label_dict[output]

def computer_predict(label_dict, model=None):
    prediction = 0
    if not model:
        prediction = random.randint(0, 2)
    return label_dict[prediction]

def show_result(u_output, c_output):
    if u_output == c_output:
        return "Draw"
    if (u_output == "rock" and c_output == "paper") or \
        (u_output == "paper" and c_output == "scissor") or \
            (u_output == "scissor" and c_output == "rock"):
        return "Lose"
    return "Win"

if __name__ == '__main__':
    mpHands = mp.solutions.hands
    Hands = mpHands.Hands()
    mpDraws = mp.solutions.drawing_utils
    label_dict = RPSDataset(os.getcwd()).label_dict
    model = ClassifyModel.load_from_checkpoint("./lightning_logs/version_6/checkpoints/epoch=12-step=247.ckpt")
    model.eval()
    cap = cv2.VideoCapture(0)

    current_choose = None
    output = None

    if not cap.isOpened():
        exit(1)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if current_choose:
            cv2.putText(frame, f"You choose: {output}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX,
                        2, (0, 0, 255), 2, 2)
            computer_choose = computer_predict(label_dict)
            result = show_result(current_choose, computer_choose)
            cv2.putText(frame, f"Computer choose: {computer_choose}, You {result}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX,
                        2, (0, 0, 255), 2, 2)
            cv2.imshow("frame", frame)
            cv2.waitKey(1)
            time.sleep(3)
            current_choose = None
        else:
            result = Hands.process(frame_rgb)

            if result.multi_hand_landmarks:
                x = process_data(result.multi_hand_landmarks)
                output = predict(model, x, label_dict)
                cv2.putText(frame, f"CURRENT: {output}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX,
                            2, (0, 0, 255), 2, 2)

            key = cv2.waitKey(1)
            cv2.imshow("frame", frame)
            if key == ord("o"):
                current_choose = output
            if key == ord("q"):
                break
    cap.release()
    cv2.destroyAllWindows()