Tengine 模型输出结果不一致

看到了 #371 ，想添加Mobilefacenets，折腾了一天，demo写出来了，但是结果不对，模型转换之后，输入全0，输出结果不一致

仓库：https://github.com/deepinsight/insightface/ 模型：https://github.com/deepinsight/insightface/wiki/Model-Zoo#34-mobilefacenetarcfacems1m-refine-v1 python代码：

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import numpy as np
import mxnet as mx

def do_flip(data):
  for idx in range(data.shape[0]):
    data[idx,:,:] = np.fliplr(data[idx,:,:])

def get_model(ctx, image_size, model_str, layer):
  _vec = model_str.split(',')
  assert len(_vec)==2
  prefix = _vec[0]
  epoch = int(_vec[1])
  print('loading',prefix, epoch)
  sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
  all_layers = sym.get_internals()
  sym = all_layers[layer+'_output']

  model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
  model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
  model.set_params(arg_params, aux_params)

  return model

model = get_model(mx.cpu(), (112,112), 'F:\\cmf\\models\\model-y1-test2\\model,0', 'fc1')
img = np.zeros(shape=(3,112,112), dtype=np.uint)
input_blob = np.expand_dims(img, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data,))
model.forward(db, is_train=False)
embedding = model.get_outputs()[0].asnumpy()
print(embedding)

结果：

[[ 0.02154729 -0.4162834  -0.09101282  0.7223283  -0.11585438  0.0611931
  -0.15061818 -0.16417424  0.03914393 -0.22145584  0.03533301  0.13380608
  -0.24567278 -0.35991496 -0.17583792 -0.14100203  0.17048325  0.2689074
  -0.32794398 -0.11333947 -0.02633217 -0.47033542 -0.10491757  0.13647714
  -0.14120585  0.2601153  -0.2262695   0.15353546 -0.24051094  0.19708422
   0.11372234 -0.106502   -0.04841888  0.24713843  0.18096168  0.17739116
  -0.26957372  0.52657896  0.51086056 -0.34206912 -0.5117914   0.45784622
   0.31155518  0.01418302 -0.2696568  -0.14567082 -0.03723009  0.39327598
  -0.45988983 -0.1328338  -0.01042032 -0.7263899  -0.60506123  0.04480996
   0.00260787 -0.22903123  0.06622976  0.06381423 -0.07263767 -0.02013894
  -0.05507097  0.05374932  0.4380721  -0.264139   -0.23919213  0.41576117
  -0.05746923 -0.22019155 -0.1482154  -0.04492995  0.6597815  -0.14962853
  -0.18185823 -0.24876197  0.05852314  0.36671737  0.12722713 -0.17162843
   0.01947528 -0.3874434  -0.42982838  0.21893841  0.07214668 -0.18667988
  -0.07072115 -0.26045492  0.14012316  0.21946907 -0.28807697  0.12465948
   0.0042928   0.07700975  0.15083246  0.13297793  0.11155006  0.4361202
   0.19571272 -0.48228177 -0.71047276 -0.5125456  -0.4758979  -0.2889716
  -0.01963822  0.48873454  0.2459967   0.18166065  0.30512288  0.14568155
   0.20536348 -0.23129442  0.10100278  0.22046189  0.42669702 -0.03751966
   0.61379117  0.34831098  0.51180166 -0.22458424  0.1418204  -0.22925594
  -0.53545964 -0.12080469 -0.2029418   0.12714365 -0.26134396  0.08022993
  -0.33410394  0.42091098]]

使用的转换工具：Tengine官网提供的工具 cpp代码

//
// Created by cmf on 20-8-24.
//

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>

#include "common.h"
#include "tengine_c_api.h"
#include "tengine_operations.h"


#define DEFAULT_IMG_H 112
#define DEFAULT_IMG_W 112
#define DEFAULT_SCALE1 1.f
#define DEFAULT_SCALE2 1.f
#define DEFAULT_SCALE3 1.f
#define DEFAULT_MEAN1 0
#define DEFAULT_MEAN2 0
#define DEFAULT_MEAN3 0
#define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1
#define DEFAULT_FEATURE_SIZE 128

void show_usage()
{
    fprintf(
            stderr,
            "[Usage]:  [-h]\n    [-m model_file] [-i image_file1,image_file2]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w "
            "mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count]\n");
    fprintf(
            stderr,
            "\nmobilenet example: \n    ./classification -m /path/to/mobilenet.tmfile -i /path/to/img1.jpg,/path/to/img2.jpg -g 224,224 -s "
            "0.017,0.017,0.017 -w 104.007,116.669,122.679\n");
}

int main(int argc, char* argv[])
{
    int loop_count = DEFAULT_LOOP_COUNT;
    int num_thread = DEFAULT_THREAD_COUNT;
    char* model_file = NULL;
    char* image_file = NULL;
    char** image_files = (char**)malloc(2 * sizeof(char*));
    float img_hw[2] = {0.f};
    int img_h = 0;
    int img_w = 0;
    float mean[3] = {-1.f, -1.f, -1.f};
    float scale[3] = {0.f, 0.f, 0.f};

    int res;
    while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:h")) != -1)
    {
        switch (res)
        {
            case 'm':
                model_file = optarg;
                break;
            case 'i':
                split(image_files, optarg, ",");
                fprintf(stderr, "image1 %s, image2 %s\n", image_files[0], image_files[1]);
                break;
            case 'g':
                split(img_hw, optarg, ",");
                img_h = ( int )img_hw[0];
                img_w = ( int )img_hw[1];
                break;
            case 's':
                split(scale, optarg, ",");
                break;
            case 'w':
                split(mean, optarg, ",");
                break;
            case 'r':
                loop_count = atoi(optarg);
                break;
            case 't':
                num_thread = atoi(optarg);
                break;
            case 'h':
                show_usage();
                return 0;
            default:
                break;
        }
    }

    /* check files */
    if (model_file == NULL)
    {
        fprintf(stderr, "Error: Tengine model file not specified!\n");
        show_usage();
        return -1;
    }

    if (!check_file_exist(model_file) || !check_file_exist(image_files[0]) || !check_file_exist(image_files[1]))
        return -1;

    if (img_h == 0)
    {
        img_h = DEFAULT_IMG_H;
        fprintf(stderr, "Image height not specified, use default %d\n", img_h);
    }

    if (img_w == 0)
    {
        img_w = DEFAULT_IMG_W;
        fprintf(stderr, "Image width not specified, use default  %d\n", img_w);
    }

    if (scale[0] == 0.f || scale[1] == 0.f || scale[2] == 0.f)
    {
        scale[0] = DEFAULT_SCALE1;
        scale[1] = DEFAULT_SCALE2;
        scale[2] = DEFAULT_SCALE3;
        fprintf(stderr, "Scale value not specified, use default  %.1f, %.1f, %.1f\n", scale[0], scale[1], scale[2]);
    }

    if (mean[0] == -1.0 || mean[1] == -1.0 || mean[2] == -1.0)
    {
        mean[0] = DEFAULT_MEAN1;
        mean[1] = DEFAULT_MEAN2;
        mean[2] = DEFAULT_MEAN3;
        fprintf(stderr, "Mean value not specified, use default   %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]);
    }

    /* inital tengine */
    if (init_tengine() != 0)
    {
        fprintf(stderr, "Initial tengine failed.\n");
        return -1;
    }
    fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version());

    /* create graph, load tengine model xxx.tmfile */
    graph_t graph = create_graph(nullptr, "tengine", model_file);
    graph_t* graphp = &graph;
    graph = *graphp;
    if (NULL == graph)
    {
        fprintf(stderr, "Create graph failed.\n");
        fprintf(stderr, "errno: %d \n", get_tengine_errno());
        return -1;
    }
    /* set the input shape to initial the graph, and prerun graph to infer shape */
    int img_size = img_h * img_w * 3;
    int dims[] = {1, 3, img_h, img_w};    // nchw
    float* input_data = ( float* )malloc(img_size * sizeof(float));

    tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
    if (input_tensor == NULL)
    {
        fprintf(stderr, "Get input tensor failed\n");
        return -1;
    }

    if (set_tensor_shape(input_tensor, dims, 4) < 0)
    {
        fprintf(stderr, "Set input tensor shape failed\n");
        return -1;
    }

    if (prerun_graph_multithread(graph, TENGINE_CLUSTER_ALL, num_thread) < 0)
    {
        fprintf(stderr, "Prerun multithread graph failed.\n");
        return -1;
    }

    /* prepare process input data, set the data mem to input tensor */

    for(int i = 0; i < img_size; i++)
    {
        input_data[i] = 0;
    }

    if (set_tensor_buffer(input_tensor, input_data, img_size * 4) < 0)
    {
        fprintf(stderr, "Set input tensor buffer failed\n");
        return -1;
    }

    /* run graph */
    double min_time = __DBL_MAX__;
    double max_time = -__DBL_MAX__;
    double total_time = 0.;
    for (int i = 0; i < loop_count; i++)
    {
        double start = get_current_time();
        if (run_graph(graph, 1) < 0)
        {
            fprintf(stderr, "Run graph failed\n");
            return -1;
        }
        double end = get_current_time();
        double cur = end - start;
        total_time += cur;
        if (min_time > cur)
            min_time = cur;
        if (max_time < cur)
            max_time = cur;
    }
//    fprintf(stderr, "\nmodel file : %s\n", model_file);
    fprintf(stderr, "image file : %s\n", image_file);
    fprintf(stderr, "img_h, img_w, scale[3], mean[3] : %d %d , %.3f %.3f %.3f, %.1f %.1f %.1f\n", img_h, img_w,
            scale[0], scale[1], scale[2], mean[0], mean[1], mean[2]);
    fprintf(stderr, "Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms\n", loop_count,
            num_thread, total_time / loop_count, max_time, min_time);
    fprintf(stderr, "--------------------------------------\n");

    /* get the result of classification */
    tensor_t output_tensor = get_graph_output_tensor(graph, 0, 0);
    float* output_data = ( float* )get_tensor_buffer(output_tensor);
    int output_size = get_tensor_buffer_size(output_tensor) / sizeof(float);
    for(int i = 0; i < output_size; i++)
    {
        fprintf(stderr, "%f, %d\n", output_data[i], i);
    }

    fprintf(stderr, "output_size %d\n", output_size);
    fprintf(stderr, "--------------------------------------\n");
    return 0;
}

结果：

image1 /home/cmf/Pictures/align/Anthony_Hopkins_0002.bmp, image2 /home/cmf/Pictures/align/Anthony_Hopkins_0001.bmp
Image height not specified, use default 112
Image width not specified, use default  112
Scale value not specified, use default  1.0, 1.0, 1.0
Mean value not specified, use default   0.0, 0.0, 0.0
tengine-lite library version: 0.2-dev
image file : (null)
img_h, img_w, scale[3], mean[3] : 112 112 , 1.000 1.000 1.000, 0.0 0.0 0.0
Repeat 1 times, thread 1, avg time 5902.37 ms, max_time 5902.37 ms, min_time 5902.37 ms
--------------------------------------
0.362114, 0
-2.963342, 1
3.301326, 2
2.838768, 3
-1.922418, 4
0.529027, 5
6.178867, 6
-1.005800, 7
-1.109636, 8
4.414474, 9
3.119429, 10
3.637292, 11
1.773481, 12
2.699330, 13
-0.010375, 14
-2.787954, 15
-1.055549, 16
0.130140, 17
-1.534408, 18
0.706951, 19
2.148369, 20
-1.312149, 21
2.624404, 22
-1.797011, 23
-0.939566, 24
1.641575, 25
-3.084844, 26
-2.920630, 27
1.967027, 28
2.191836, 29
3.107494, 30
2.182926, 31
-1.316845, 32
1.068025, 33
2.261882, 34
-0.130286, 35
-1.141233, 36
2.106531, 37
2.500280, 38
-3.654729, 39
1.834997, 40
-1.201777, 41
2.618726, 42
1.001131, 43
-1.039707, 44
1.739381, 45
0.548914, 46
-0.571207, 47
-2.208367, 48
0.342063, 49
-0.677966, 50
-0.688250, 51
-3.986355, 52
-0.728307, 53
-0.146743, 54
0.597801, 55
-2.542715, 56
2.394146, 57
1.320346, 58
-2.441096, 59
2.343701, 60
1.993334, 61
-0.281862, 62
0.814715, 63
-1.255474, 64
1.792185, 65
-1.430589, 66
1.048479, 67
2.565519, 68
-0.109898, 69
0.225530, 70
-2.756450, 71
-1.366037, 72
-1.569032, 73
-4.139597, 74
-0.362517, 75
1.315166, 76
1.794949, 77
2.273474, 78
-0.770949, 79
-1.419995, 80
1.774996, 81
-0.088472, 82
-0.385829, 83
-0.983600, 84
0.257186, 85
-0.198454, 86
2.252909, 87
1.478664, 88
0.017091, 89
1.752245, 90
-1.496271, 91
-3.115757, 92
0.865825, 93
-0.244161, 94
-2.079467, 95
-1.181141, 96
-4.464627, 97
1.239551, 98
-0.270379, 99
-3.965930, 100
0.823042, 101
0.670162, 102
-0.817640, 103
-1.470533, 104
-1.728705, 105
0.392263, 106
-0.102041, 107
1.387308, 108
2.787490, 109
-0.749577, 110
0.710049, 111
0.157990, 112
-1.053664, 113
4.362529, 114
-0.256790, 115
-0.434527, 116
1.626453, 117
-1.711483, 118
-1.843128, 119
-0.476325, 120
2.609643, 121
-0.704538, 122
-5.628683, 123
1.847601, 124
-3.102808, 125
-3.541050, 126
-1.755906, 127
output_size 128
--------------------------------------

Process finished with exit code 0

Aug 25 '20 07:08 fanqie03

请问，与原始框架相比，哪个结果更接近正确呢？

Aug 25 '20 08:08 BUG1989

哪个结果？？是哪一层结果吗？

Aug 25 '20 08:08 fanqie03

现在出来的效果是不同的人，模型认为是同一个人的相似度都很高

Aug 25 '20 08:08 fanqie03

我写了个和Tengine debug类似的脚本，可以输出mxnet每一层的结果 https://blog.csdn.net/night_mfc/article/details/108219908 希望对你有所帮助

Aug 25 '20 09:08 fanqie03

谢谢您的分享，我会安排工程师，尽快解决这个问题

Aug 25 '20 09:08 BUG1989

我这用的是相同的mobilefacenet-mxnet模型，转到onnx后再转tengine，输入都是0，输出结果是一致的。

Oct 18 '22 11:10 wei8171023