CameraChessWeb icon indicating copy to clipboard operation
CameraChessWeb copied to clipboard

Open source the API?

Open zvezdolet opened this issue 1 year ago • 3 comments

Hey, that's just fantasic! Do you plan to open source the server part?

zvezdolet avatar May 14 '24 16:05 zvezdolet

There is no API, all the models run locally.

I.e. https://github.com/Pbatch/CameraChessWeb/blob/main/src/utils/loadModels.tsx loads the pieces and corners models to your device.

If you want the PyTorch/ONNX models I am happy to provide them :)

Pbatch avatar May 14 '24 18:05 Pbatch

I see, that's nice. Yes, would like to play with it a bit

zvezdolet avatar May 14 '24 19:05 zvezdolet

I'll post something more formal another time, but for now this script (+ the model links in the README) should be enough to get you started with the pieces detector.

You have to pass two arguments to the run method, your image as a numpy array and the chessboard corners as a dictionary I.e. {"h1": [0, 0], "a1": [0, 100], "a8": [100, 100], "h8": [100, 0]}. The easiest thing to do is just to pass the four corners of the image.

import os
from collections import namedtuple

import cv2
import numpy as np
import onnxruntime as ort

CORNERS = ['h1', 'a1', 'a8', 'h8']
MODEL_DIR = "models"

def get_roi(keypoints, width, height, model_width, model_height, padding_ratio=12):
    x_min = np.min(keypoints[:, 0])
    x_max = np.max(keypoints[:, 0])
    y_min = np.min(keypoints[:, 1])
    y_max = np.max(keypoints[:, 1])

    roi_width = x_max - x_min
    roi_height = y_max - y_min
    padding_left = roi_width // padding_ratio
    padding_right = roi_width // padding_ratio
    padding_top = roi_height // padding_ratio
    padding_bottom = roi_height // padding_ratio

    padded_roi_width = roi_width + padding_left + padding_right
    padded_roi_height = roi_height + padding_top + padding_bottom
    ratio = padded_roi_height / padded_roi_width
    desired_ratio = model_height / model_width

    if ratio > desired_ratio:
        target_width = padded_roi_height / desired_ratio
        dx = target_width - padded_roi_width
        padding_left += dx // 2
        padding_right += dx - (dx // 2)
    else:
        target_height = padded_roi_width * desired_ratio
        padding_top += target_height - padded_roi_height

    roi = [int(max(x_min - padding_left, 0)),
           int(max(y_min - padding_top, 0)),
           int(min(x_max + padding_right, width)),
           int(min(y_max + padding_bottom, height))]
    return roi

class Detector:
    def __init__(self, model_basename, fill_colour=114, device='cuda:0'):
        self.model_basename = model_basename
        self.fill_colour = fill_colour
        self.device = device

        providers = [('CUDAExecutionProvider', {"cudnn_conv_algo_search": "DEFAULT"})]
        self.sess = ort.InferenceSession(os.path.join(MODEL_DIR, self.model_basename),
                                         providers=providers)
        self.inputs = self.sess.get_inputs()[0]
        self.outputs = self.sess.get_outputs()[0]
        self.io_binding = self.sess.io_binding()
        self.io_binding.bind_output(self.outputs.name)

        # shape should be B, C, H, W
        self.model_height = self.inputs.shape[2]
        self.model_width = self.inputs.shape[3]
        self.desired_ratio = self.model_height / self.model_width

    def _resize(self, x):
        height, width = x.shape[:2]
        ratio = height / width
        if ratio > self.desired_ratio:
            height = self.model_height
            width = int(self.model_height / ratio)
        else:
            width = self.model_width
            height = int(self.model_width * ratio)

        x = cv2.resize(x, (width, height))

        return x

    def _pad(self, x):
        height, width = x.shape[:2]
        dx = self.model_width - width
        dy = self.model_height - height
        pad_right = dx // 2
        pad_left = dx - pad_right
        pad_bottom = dy // 2
        pad_top = dy - pad_bottom
        padding = [pad_left, pad_right, pad_top, pad_bottom]
        x = np.pad(x, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)),
                   mode="constant",
                   constant_values=self.fill_colour)

        return x, padding

    def _fix_bboxes(self, y, roi, padding):
        # xywh -> xyxy
        y[..., 0] -= y[..., 2] / 2
        y[..., 1] -= y[..., 3] / 2
        y[..., 2] += y[..., 0]
        y[..., 3] += y[..., 1]

        # Rescale predictions to original image
        roi_width = roi[2] - roi[0]
        roi_height = roi[3] - roi[1]
        y[..., [0, 2]] -= padding[0]
        y[..., [1, 3]] -= padding[2]
        y[..., [0, 2]] *= roi_width / (self.model_width - padding[0] - padding[1])
        y[..., [1, 3]] *= roi_height / (self.model_height - padding[2] - padding[3])

        y[..., [0, 2]] += roi[0]
        y[..., [1, 3]] += roi[1]

        return y

    def run(self, image, keypoints):
        # Crop out region of interest
        height, width = image.shape[:2]
        keypoints_arr = np.array([keypoints[s].tolist() for s in CORNERS])
        roi = get_roi(keypoints_arr, width, height, self.model_width, self.model_height)
        image = image[roi[1]:roi[3], roi[0]:roi[2]]

        # Resize, pad and scale
        image = self._resize(image)
        image, padding = self._pad(image)
        image = image.astype(np.float16) / 255
        image = np.transpose(image, (2, 0, 1))
        image = np.expand_dims(image, axis=0)

        # Inference
        self.io_binding.bind_cpu_input(self.inputs.name, image)
        self.sess.run_with_iobinding(self.io_binding)
        y = self.io_binding.copy_outputs_to_cpu()[0][0].astype(np.float32)

        # Rescale bboxes to match original image size
        y = self._fix_bboxes(y, roi, padding)

        return y

Pbatch avatar May 14 '24 21:05 Pbatch

As I run this script, the ouput is a np array of shape (16, 2835), what does this array represented?

duyddwcs avatar Oct 14 '24 05:10 duyddwcs