ncnn extract nullptr?

error log | 日志或报错信息 | ログ

想把RobustVideoMatting（稳健视频抠图模型）用ncnn跑起来，经过一番折腾后成功转成静态维度的onnx，并且使用了onnxsim去除了胶水op，用onnxruntime可以顺利跑通转换后的模型；然后将其转换成ncnn，在转换时报错：

onnx2ncnn rvm_mobilenetv3_fp32-480-480-sim.onnx rvm_mobilenetv3_fp32-480-480.param rvm_mobilenetv3_fp32-480-480.bin
Unsupported split axis !
Unsupported split axis !

在使用ncnn的c++和python接口进行推理时，出现错误，c++提示空指针，python的则是直接崩溃。
所有ex.input返回的错误码都是0，应该是正常的，但是在ex.extract("fgr",fgr)开始，返回的错误码是-100，data是一个空指针，导致cv::Mat初始化出错，程序崩溃。折腾了2天也没解决，目前依然没有头绪🤔，于是决定来这里占个坑~ 🙄

model | 模型 | モデル

original model onnx-model.zip ncnn-model.zip

这是其中一个被我转换成480x480静态输入的onnx模型，以及对应的ncnn模型。其他不同输入维度模型，问题应该相同。

how to reproduce | 复现步骤 | 再現方法

python测试代码

import ncnn
import numpy as np
import torch

def infer_ncnn_rvm():
    param_path = "./checkpoint/rvm_mobilenetv3_fp32-480-480-opt.param"
    bin_path = "./checkpoint/rvm_mobilenetv3_fp32-480-480-opt.bin"

    net = ncnn.Net()

    net.load_param(param_path)

    net.load_model(bin_path)

    input_names = net.input_names()
    output_names = net.output_names()

    print(input_names)
    print(output_names)

    src = torch.randn((480, 480, 3)).cpu().numpy()
    r1i = np.zeros([240, 240, 16])
    r2i = np.zeros([120, 120, 20])
    r3i = np.zeros([60, 60, 40])
    r4i = np.zeros([30, 30, 64])

    print(src.shape)
    print(r1i.shape)
    print(r2i.shape)
    print(r3i.shape)
    print(r4i.shape)

    src_in = ncnn.Mat(src)
    r1i_in = ncnn.Mat(r1i)
    r2i_in = ncnn.Mat(r2i)
    r3i_in = ncnn.Mat(r3i)
    r4i_in = ncnn.Mat(r4i)

    ex = net.create_extractor()

    ex.input("src", src_in)
    ex.input("r1i", r1i_in)
    ex.input("r2i", r2i_in)
    ex.input("r3i", r3i_in)
    ex.input("r4i", r4i_in)

    print("ex.input done")
    fgr = ex.extract("fgr")  # 运行到这个直接崩溃
    print("ex.extract fgr done")

    pha = ex.extract("pha")
    r1o = ex.extract("r1o")
    r2o = ex.extract("r2o")
    r3o = ex.extract("r3o")
    r4o = ex.extract("r4o")

    print("fgr".center(100, "="))
    print(fgr.shape)
    print(fgr)
if __name__ == "__main__":
    infer_ncnn_rvm()

C++代码

// 一些自定义的类
typedef struct MattingContentType
    {
      cv::Mat fgr_mat; // fore ground mat 3 channel (R,G,B) 0.~1. or 0~255
      cv::Mat pha_mat; // alpha(matte) 0.~1.
      cv::Mat merge_mat; // merge bg and fg according pha
      bool flag;

      MattingContentType(): flag(false)
      {};
    } MattingContent;

class BasicNCNNHandler
  {
  protected:
    ncnn::Net *net = nullptr;
    const char *log_id = nullptr;
    const char *param_path = nullptr;
    const char *bin_path = nullptr;

  protected:
    const unsigned int num_threads; // initialize at runtime.

  protected:
    explicit BasicNCNNHandler(const std::string &_param_path,
                              const std::string &_bin_path,
                              unsigned int _num_threads = 1);

    virtual ~BasicNCNNHandler();

    // un-copyable
  protected:
    BasicNCNNHandler(const BasicNCNNHandler &) = delete; //
    BasicNCNNHandler(BasicNCNNHandler &&) = delete; //
    BasicNCNNHandler &operator=(const BasicNCNNHandler &) = delete; //
    BasicNCNNHandler &operator=(BasicNCNNHandler &&) = delete; //

  private:
    virtual void transform(const cv::Mat &mat, ncnn::Mat &in) = 0;

  private:
    void initialize_handler();
  };

BasicNCNNHandler::BasicNCNNHandler(
    const std::string &_param_path, const std::string &_bin_path, unsigned int _num_threads) :
    log_id(_param_path.data()), param_path(_param_path.data()),
    bin_path(_bin_path.data()), num_threads(_num_threads)
{
  initialize_handler();
}

void BasicNCNNHandler::initialize_handler()
{
  // init net, change this setting for better performance.
  net = new ncnn::Net();
  net->opt.use_vulkan_compute = false; // default
  net->opt.use_fp16_arithmetic = false;
  net->load_param(param_path);
  net->load_model(bin_path);
}

BasicNCNNHandler::~BasicNCNNHandler()
{
  if (net) delete net;
  net = nullptr;
}

// 主要的代码  
NCNNRobustVideoMatting::NCNNRobustVideoMatting(
    const std::string &_param_path, const std::string &_bin_path,
    unsigned int _num_threads, int _input_height,
    int _input_width, unsigned int _variant_type
) :
    BasicNCNNHandler(_param_path, _bin_path, _num_threads),
    input_height(_input_height), input_width(_input_width),
    variant_type(_variant_type)
{
  initialize_context();
}

void NCNNRobustVideoMatting::initialize_context()
{
  if (variant_type == VARIANT::MOBILENETV3)
  {
    if (input_width == 1920 && input_height == 1080)
    {
      r1i = ncnn::Mat(240, 135, 16); // w,h,c in NCNN
      r2i = ncnn::Mat(120, 68, 20);
      r3i = ncnn::Mat(60, 34, 40);
      r4i = ncnn::Mat(30, 17, 64);
    } // hxw 480x640 480x480 640x480
    else
    {
      r1i = ncnn::Mat(input_width / 2, input_height / 2, 16);
      r2i = ncnn::Mat(input_width / 4, input_height / 4, 20);
      r3i = ncnn::Mat(input_width / 8, input_height / 8, 40);
      r4i = ncnn::Mat(input_width / 16, input_height / 16, 64);
    }
  } // RESNET50
  else
  {
    if (input_width == 1920 && input_height == 1080)
    {
      r1i = ncnn::Mat(240, 135, 16);
      r2i = ncnn::Mat(120, 68, 32);
      r3i = ncnn::Mat(60, 34, 64);
      r4i = ncnn::Mat(30, 17, 128);
    } // hxw 480x640 480x480 640x480
    else
    {
      r1i = ncnn::Mat(input_width / 2, input_height / 2, 16);
      r2i = ncnn::Mat(input_width / 4, input_height / 4, 20);
      r3i = ncnn::Mat(input_width / 8, input_height / 8, 40);
      r4i = ncnn::Mat(input_width / 16, input_height / 16, 64);
    }
  }
  // init 0.
  r1i.fill(0.f);
  r2i.fill(0.f);
  r3i.fill(0.f);
  r4i.fill(0.f);

  context_is_initialized = true;
}

void NCNNRobustVideoMatting::transform(const cv::Mat &mat, ncnn::Mat &in)
{
  // BGR NHWC -> RGB NCHW & resize
  int h = mat.rows;
  int w = mat.cols;
  in = ncnn::Mat::from_pixels_resize(
      mat.data, ncnn::Mat::PIXEL_BGR2RGB,
      w, h, input_width, input_height
  );
  in.substract_mean_normalize(mean_vals, norm_vals);
}

void NCNNRobustVideoMatting::detect_video(const std::string &video_path,
                                          const std::string &output_path,
                                          std::vector<MattingContent> &contents,
                                          bool save_contents, unsigned int writer_fps)
{
  // 0. init video capture
  cv::VideoCapture video_capture(video_path);
  const unsigned int width = video_capture.get(cv::CAP_PROP_FRAME_WIDTH);
  const unsigned int height = video_capture.get(cv::CAP_PROP_FRAME_HEIGHT);
  const unsigned int frame_count = video_capture.get(cv::CAP_PROP_FRAME_COUNT);
  if (!video_capture.isOpened())
  {
    std::cout << "Can not open video: " << video_path << "\n";
    return;
  }
  // 1. init video writer
  cv::VideoWriter video_writer(output_path, cv::VideoWriter::fourcc('m', 'p', '4', 'v'),
                               writer_fps, cv::Size(width, height));
  if (!video_writer.isOpened())
  {
    std::cout << "Can not open writer: " << output_path << "\n";
    return;
  }

  // 2. matting loop
  cv::Mat mat;
  unsigned int i = 0;
  while (video_capture.read(mat))
  {
    i += 1;
    types::MattingContent content;
    this->detect(mat, content);
    // 3. save contents and writing out.
    if (content.flag)
    {
      if (save_contents) contents.push_back(content);
      if (!content.merge_mat.empty()) video_writer.write(content.merge_mat);
    }
    // 4. check context states.
    if (!context_is_update) break;
  }

  // 5. release
  video_capture.release();
  video_writer.release();
}

void NCNNRobustVideoMatting::detect(const cv::Mat &mat, MattingContent &content)
{
  if (mat.empty()) return;
  int img_h = mat.rows;
  int img_w = mat.cols;
  if (!context_is_initialized) return;

  // 1. make input tensor
  ncnn::Mat src;
  this->transform(mat, src);

  // 2. inference & extract
  auto extractor = net->create_extractor();
  extractor.set_light_mode(false);  // default
  extractor.set_num_threads(num_threads);
  extractor.input("src", src);
  extractor.input("r1i", r1i);
  extractor.input("r2i", r2i);
  extractor.input("r3i", r3i);
  extractor.input("r4i", r4i); // 运行到这里都没问题

  // 3. generate matting
  this->generate_matting(extractor, content, img_h, img_w);

  // 4. update context (needed for video detection.)
  context_is_update = false; // init state.
  this->update_context(extractor);
}


void NCNNRobustVideoMatting::generate_matting(ncnn::Extractor &extractor,
                                              MattingContent &content,
                                              int img_h, int img_w)
{
  ncnn::Mat fgr, pha;
  extractor.extract("fgr", fgr);
  extractor.extract("pha", pha);
  float *fgr_ptr = (float *) fgr.data;  // 此处data是空指针
  float *pha_ptr = (float *) pha.data;

  const unsigned int channel_step = input_height * input_width;

  // fast assign & channel transpose(CHW->HWC).
  cv::Mat rmat(input_height, input_width, CV_32FC1, fgr_ptr);  // nullptr导致cv::Mat初始化错误
  cv::Mat gmat(input_height, input_width, CV_32FC1, fgr_ptr + channel_step);
  cv::Mat bmat(input_height, input_width, CV_32FC1, fgr_ptr + 2 * channel_step);
  cv::Mat pmat(input_height, input_width, CV_32FC1, pha_ptr); // ref only, zero-copy.
  rmat *= 255.f;
  bmat *= 255.f;
  gmat *= 255.f;
  cv::Mat rest = 1.f - pmat;
  cv::Mat mbmat = bmat.mul(pmat) + rest * 153.f;
  cv::Mat mgmat = gmat.mul(pmat) + rest * 255.f;
  cv::Mat mrmat = rmat.mul(pmat) + rest * 120.f;
  std::vector<cv::Mat> fgr_channel_mats, merge_channel_mats;
  fgr_channel_mats.push_back(bmat);
  fgr_channel_mats.push_back(gmat);
  fgr_channel_mats.push_back(rmat);
  merge_channel_mats.push_back(mbmat);
  merge_channel_mats.push_back(mgmat);
  merge_channel_mats.push_back(mrmat);

  content.pha_mat = pmat;
  cv::merge(fgr_channel_mats, content.fgr_mat);
  cv::merge(merge_channel_mats, content.merge_mat);
  content.fgr_mat.convertTo(content.fgr_mat, CV_8UC3);
  content.merge_mat.convertTo(content.merge_mat, CV_8UC3);

  if (img_w != input_width || img_h != input_height)
  {
    cv::resize(content.pha_mat, content.pha_mat, cv::Size(img_w, img_h));
    cv::resize(content.fgr_mat, content.fgr_mat, cv::Size(img_w, img_h));
    cv::resize(content.merge_mat, content.merge_mat, cv::Size(img_w, img_h));
  }

  content.flag = true;
}

void NCNNRobustVideoMatting::update_context(ncnn::Extractor &extractor)
{
  ncnn::Mat r1o, r2o, r3o, r4o;
  extractor.extract("r1o", r1o);
  extractor.extract("r2o", r2o);
  extractor.extract("r3o", r3o);
  extractor.extract("r4o", r4o);

  r1i.clone_from(r1o); // deepcopy
  r2i.clone_from(r2o); // deepcopy
  r3i.clone_from(r3o); // deepcopy
  r4i.clone_from(r4o); // deepcopy

  context_is_update = true;
}

// 头文件 
class NCNNRobustVideoMatting : public BasicNCNNHandler
  {
  public:
    explicit NCNNRobustVideoMatting(const std::string &_param_path,
                                    const std::string &_bin_path,
                                    unsigned int _num_threads = 1,
                                    int _input_height = 480,
                                    int _input_width = 640,
                                    unsigned int _variant_type = VARIANT::MOBILENETV3); //
    ~NCNNRobustVideoMatting() override = default;

  private:
    const float mean_vals[3] = {0.f, 0.f, 0.f}; // RGB
    const float norm_vals[3] = {1.f / 255.f, 1.f / 255.f, 1.f / 255.f};
    // hardcode input node names, hint only.
    // downsample_ratio has been freeze while onnx exported
    // and, the input size of each input has been freeze, also.
    std::vector<const char *> input_node_names = {
        "src",
        "r1i",
        "r2i",
        "r3i",
        "r4i"
    };
    // hardcode output node names, hint only.
    std::vector<const char *> output_node_names = {
        "fgr",
        "pha",
        "r1o",
        "r2o",
        "r3o",
        "r4o"
    };
    bool context_is_update = false;
    bool context_is_initialized = false;

  private:
    enum VARIANT
    {
      MOBILENETV3 = 0,
      RESNET50 = 1
    };
    // will be update inner video matting process.
    ncnn::Mat r1i, r2i, r3i, r4i;
    // input size & variant_type, initialize at runtime.
    const int input_height;
    const int input_width;
    const unsigned int variant_type;

  private:

    void transform(const cv::Mat &mat, ncnn::Mat &in) override;

    void initialize_context();

    void generate_matting(ncnn::Extractor &extractor,
                          MattingContent &content,
                          int img_h, int img_w);

    void update_context(ncnn::Extractor &extractor);

  public:
    /**
     * Image Matting Using RVM(https://github.com/PeterL1n/RobustVideoMatting)
     * @param mat: cv::Mat BGR HWC
     * @param content: MattingContent to catch the detected results.
     * See https://github.com/PeterL1n/RobustVideoMatting/blob/master/documentation/inference_zh_Hans.md
     */
    void detect(const cv::Mat &mat, MattingContent &content);
   /**
     * Video Matting Using RVM(https://github.com/PeterL1n/RobustVideoMatting)
     * @param video_path: eg. xxx/xxx/input.mp4
     * @param output_path: eg. xxx/xxx/output.mp4
     * @param contents: vector of MattingContent to catch the detected results.
     * @param save_contents: false by default, whether to save MattingContent.
     * See https://github.com/PeterL1n/RobustVideoMatting/blob/master/documentation/inference_zh_Hans.md
     * @param writer_fps: FPS for VideoWriter, 20 by default.
     */
    void detect_video(const std::string &video_path,
                      const std::string &output_path,
                      std::vector<MattingContent> &contents,
                      bool save_contents = false,
                      unsigned int writer_fps = 20);

C++测试demo

static void test_ncnn_rvm()
{

  //  UNLUCKY: Test Failed!
  std::string param_path = "rvm_mobilenetv3_fp32-480-480-opt.param";
  std::string bin_path = "rvm_mobilenetv3_fp32-480-480-opt.bin";
  std::string video_path = "test_rvm_1.mp4";
  std::string output_path = "test_rvm_1_ncnn.mp4";

  auto *rvm = new NCNNRobustVideoMatting(param_path, bin_path, 1, 480, 480, 0);  // 1 threads
  td::vector<MattingContent> contents;

  // 1. video matting.
  rvm->detect_video(video_path, output_path, contents, false);

  delete rvm;
}

int main(__unused int argc, __unused char *argv[])
{
  test_ncnn_rvm();
  return 0;
}

非常感谢~

Oct 13 '21 15:10 DefTruth

我也遇到了相似的问题，也卡了两三天没有解决了呜呜呜呜呜，但是好像直接extract返回值都得不到，ex.input是正常的0，安卓的log里面显示 2021-10-12 22:21:43.834 19393-19393/? A/DEBUG: signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0 2021-10-12 22:21:43.834 19393-19393/? A/DEBUG: Cause: null pointer dereference 希望来蹭一蹭能够得到解决！

Oct 14 '21 01:10 Zhaohuii-Wang

同样遇到这个问题，有解决方案吗？或者解决的方向？

Mar 26 '23 12:03 kekxv

遇到了同样的问题，extract出来的值是nan，而且我的模型有2个输出值，net.output_names()只能输出一个name

Apr 30 '24 02:04 feature-space-move

针对onnx模型转换的各种问题，推荐使用最新的pnnx工具转换到ncnn In view of various problems in onnx model conversion, it is recommended to use the latest pnnx tool to convert your model to ncnn

pip install pnnx
pnnx model.onnx inputshape=[1,3,224,224]

详细参考文档 Detailed reference documentation https://github.com/pnnx/pnnx https://github.com/Tencent/ncnn/wiki/use-ncnn-with-pytorch-or-onnx#how-to-use-pnnx

Aug 05 '24 07:08 nihui