Open source the API?
Hey, that's just fantasic! Do you plan to open source the server part?
There is no API, all the models run locally.
I.e. https://github.com/Pbatch/CameraChessWeb/blob/main/src/utils/loadModels.tsx loads the pieces and corners models to your device.
If you want the PyTorch/ONNX models I am happy to provide them :)
I see, that's nice. Yes, would like to play with it a bit
I'll post something more formal another time, but for now this script (+ the model links in the README) should be enough to get you started with the pieces detector.
You have to pass two arguments to the run method, your image as a numpy array and the chessboard corners as a dictionary I.e. {"h1": [0, 0], "a1": [0, 100], "a8": [100, 100], "h8": [100, 0]}. The easiest thing to do is just to pass the four corners of the image.
import os
from collections import namedtuple
import cv2
import numpy as np
import onnxruntime as ort
CORNERS = ['h1', 'a1', 'a8', 'h8']
MODEL_DIR = "models"
def get_roi(keypoints, width, height, model_width, model_height, padding_ratio=12):
x_min = np.min(keypoints[:, 0])
x_max = np.max(keypoints[:, 0])
y_min = np.min(keypoints[:, 1])
y_max = np.max(keypoints[:, 1])
roi_width = x_max - x_min
roi_height = y_max - y_min
padding_left = roi_width // padding_ratio
padding_right = roi_width // padding_ratio
padding_top = roi_height // padding_ratio
padding_bottom = roi_height // padding_ratio
padded_roi_width = roi_width + padding_left + padding_right
padded_roi_height = roi_height + padding_top + padding_bottom
ratio = padded_roi_height / padded_roi_width
desired_ratio = model_height / model_width
if ratio > desired_ratio:
target_width = padded_roi_height / desired_ratio
dx = target_width - padded_roi_width
padding_left += dx // 2
padding_right += dx - (dx // 2)
else:
target_height = padded_roi_width * desired_ratio
padding_top += target_height - padded_roi_height
roi = [int(max(x_min - padding_left, 0)),
int(max(y_min - padding_top, 0)),
int(min(x_max + padding_right, width)),
int(min(y_max + padding_bottom, height))]
return roi
class Detector:
def __init__(self, model_basename, fill_colour=114, device='cuda:0'):
self.model_basename = model_basename
self.fill_colour = fill_colour
self.device = device
providers = [('CUDAExecutionProvider', {"cudnn_conv_algo_search": "DEFAULT"})]
self.sess = ort.InferenceSession(os.path.join(MODEL_DIR, self.model_basename),
providers=providers)
self.inputs = self.sess.get_inputs()[0]
self.outputs = self.sess.get_outputs()[0]
self.io_binding = self.sess.io_binding()
self.io_binding.bind_output(self.outputs.name)
# shape should be B, C, H, W
self.model_height = self.inputs.shape[2]
self.model_width = self.inputs.shape[3]
self.desired_ratio = self.model_height / self.model_width
def _resize(self, x):
height, width = x.shape[:2]
ratio = height / width
if ratio > self.desired_ratio:
height = self.model_height
width = int(self.model_height / ratio)
else:
width = self.model_width
height = int(self.model_width * ratio)
x = cv2.resize(x, (width, height))
return x
def _pad(self, x):
height, width = x.shape[:2]
dx = self.model_width - width
dy = self.model_height - height
pad_right = dx // 2
pad_left = dx - pad_right
pad_bottom = dy // 2
pad_top = dy - pad_bottom
padding = [pad_left, pad_right, pad_top, pad_bottom]
x = np.pad(x, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)),
mode="constant",
constant_values=self.fill_colour)
return x, padding
def _fix_bboxes(self, y, roi, padding):
# xywh -> xyxy
y[..., 0] -= y[..., 2] / 2
y[..., 1] -= y[..., 3] / 2
y[..., 2] += y[..., 0]
y[..., 3] += y[..., 1]
# Rescale predictions to original image
roi_width = roi[2] - roi[0]
roi_height = roi[3] - roi[1]
y[..., [0, 2]] -= padding[0]
y[..., [1, 3]] -= padding[2]
y[..., [0, 2]] *= roi_width / (self.model_width - padding[0] - padding[1])
y[..., [1, 3]] *= roi_height / (self.model_height - padding[2] - padding[3])
y[..., [0, 2]] += roi[0]
y[..., [1, 3]] += roi[1]
return y
def run(self, image, keypoints):
# Crop out region of interest
height, width = image.shape[:2]
keypoints_arr = np.array([keypoints[s].tolist() for s in CORNERS])
roi = get_roi(keypoints_arr, width, height, self.model_width, self.model_height)
image = image[roi[1]:roi[3], roi[0]:roi[2]]
# Resize, pad and scale
image = self._resize(image)
image, padding = self._pad(image)
image = image.astype(np.float16) / 255
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis=0)
# Inference
self.io_binding.bind_cpu_input(self.inputs.name, image)
self.sess.run_with_iobinding(self.io_binding)
y = self.io_binding.copy_outputs_to_cpu()[0][0].astype(np.float32)
# Rescale bboxes to match original image size
y = self._fix_bboxes(y, roi, padding)
return y
As I run this script, the ouput is a np array of shape (16, 2835), what does this array represented?