models icon indicating copy to clipboard operation
models copied to clipboard

How to obtain Speed metric in TF2 model zoo

Open gcunhase opened this issue 4 years ago • 5 comments

Dear TF experts,

How can we obtain the inference speed metrics in the TF2 model zoo?

This is my script to measure speed. I'm testing it on EfficientDet D0, but it's giving me a latency speed of 205ms instead of 39ms. Please advise.

import argparse
import numpy as np
import tensorflow as tf

import time
from object_detection.utils import config_util
from object_detection.builders import model_builder


"""
Example flags:
--checkpoint ./weights/efficientdet_d0_coco17_tpu-32/checkpoint --input_shape="1,3,512,512"
"""


def process_performance_stats(timestamps, batch_size, mode):
    """ Get confidence intervals
        Source: https://github.com/NVIDIA/DeepLearningExamples/blob/master/TensorFlow2/Segmentation/UNet_Medical/runtime/parse_results.py

    :param timestamps: Collection of timestamps
    :param batch_size: Number of samples per batch
    :param mode: Estimator's execution mode
    :return: Stats
    """
    timestamps_ms = 1000 * timestamps
    throughput_imgps = (1000.0 * batch_size / timestamps_ms).mean()
    stats = {f"throughput_{mode}": throughput_imgps,
             f"latency_{mode}_mean": timestamps_ms.mean()}
    for level in [90, 95, 99]:
        stats.update({f"latency_{mode}_{level}": np.percentile(timestamps_ms, level)})

    return stats


def load_model_from_ckpt(pipeline_config_path, pre_trained_model_dir):
    """ Loads pipeline config and builds a detection model from checkpoint.

    :param pipeline_config: .config file
    :param pre_trained_model_dir: Path to model checkpoint.
    :return: loaded model and detection function.
    """
    # Load pipeline config and build a detection model
    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    model_config = configs['model']
    detection_model = model_builder.build(
        model_config=model_config, is_training=False)

    # Restore checkpoint
    ckpt = tf.compat.v2.train.Checkpoint(
        model=detection_model)
    latest_checkpoint = tf.train.latest_checkpoint(pre_trained_model_dir)
    ckpt.restore(latest_checkpoint).expect_partial()  # Can vary

    def get_model_detection_function(model):
        """Get a tf.function for detection."""

        @tf.function
        def detect_fn(image):
            """Detect objects in image."""

            image, shapes = model.preprocess(image)
            prediction_dict = model.predict(image, shapes)
            detections = model.postprocess(prediction_dict, shapes)

            return detections, prediction_dict, tf.reshape(shapes, [-1])

        return detect_fn

    detect_fn = get_model_detection_function(detection_model)
    return detection_model, detect_fn, configs, ckpt


def main(args):
    # Convert input_shape from string to list of ints: "1,3,512,512" -> [1,3,512,512]
    input_shape = args.input_shape.split(",")
    assert len(input_shape) == 4
    for i in range(len(input_shape)):
        input_shape[i] = int(input_shape[i])
        assert input_shape[i] >= 1

    # Make random inputs for latency evaluation inference
    input = np.ones(input_shape)
    for bs in range(args.batch_size - 1):
        input = np.concatenate((input, np.ones(input_shape)), axis=0)

    ########### With Checkpoint ##############
    if args.checkpoint:
        model, detect_fn, configs, ckpt = load_model_from_ckpt(
            pipeline_config_path='./weights/efficientdet_d0_coco17_tpu-32_pipeline_load.config',
            pre_trained_model_dir=args.checkpoint)

        input_ckpt = tf.convert_to_tensor(input, dtype=tf.float32)

        print("Warm up step")
        for i in range(args.warmup_steps):
            output = detect_fn(input_ckpt)

        print("Latency eval")
        timings = []
        for i in range(100):
            start_time = time.time()
            detect_fn(input_ckpt)
            timings.append(time.time() - start_time)
        timings = np.array(timings)
        stats = process_performance_stats(timings, batch_size=args.batch_size, mode="eval")
        print("Checkpoint stats: {}".format(stats))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-ckpt", "--checkpoint", help="The TensorFlow checkpoint path to validate against")
    parser.add_argument("-i", "--input_shape", default="1,512,512,3",
                        help="Set the input shape of the graph, as comma-separated dimensions in NCHW or NHWC format, "
                             "default: 1,512,512,3")
    parser.add_argument("-bs", "--batch_size", default=8, type=int,
                        help="Number of images to perform latency evaluation on.")
    parser.add_argument("--warmup_steps", type=int, default=10, help="Number of warmup steps.")
    args = parser.parse_args()
    main(args)

gcunhase avatar Sep 10 '21 09:09 gcunhase

Any updates on this issue?

gcunhase avatar Sep 14 '21 23:09 gcunhase

Is the reported time of 39ms for batch_size=1?

gcunhase avatar Sep 17 '21 02:09 gcunhase

Hello, I think this is highly related to your hardware setup. I'm guessing that results presented in TF2 Model Zoo are directly the ones presented in research papers. Research teams are using latests GPUs like the Nvidia Titan serie.

For example, if you check EfficientDet D0's paper, you can see that they are using a Titan V GPU.

thmsgntz avatar Oct 13 '21 09:10 thmsgntz

Hello, I think this is highly related to your hardware setup. I'm guessing that results presented in TF2 Model Zoo are directly the ones presented in research papers. Research teams are using latests GPUs like the Nvidia Titan serie.

For example, if you check EfficientDet D0's paper, you can see that they are using a Titan V GPU.

Hi, thank you for your reply. I'm currently also using one of the latest GPUs (RTX 3090). Could there be any other reason why the latency results are so different?

gcunhase avatar Oct 13 '21 23:10 gcunhase

Hi, I tried using the script you provided and had this output:

Latency eval Checkpoint stats: {'throughput_eval': 31.124873192969257, 'latency_eval_mean': 129.15063619613647, 'latency_eval_90': 137.2121572494507, 'latency_eval_95': 152.260422706604, 'latency_eval_99': 166.06466293334964}

May I ask where is the model inference time in the dictionary output? Thank you.

JerickoDG avatar Oct 25 '23 14:10 JerickoDG