diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake index 10da845a86..ee8f0cb86f 100644 --- a/3rdparty/libtengine/tengine.cmake +++ b/3rdparty/libtengine/tengine.cmake @@ -20,9 +20,8 @@ # Author: qtang@openailab.com or https://github.com/BUG1989 # qli@openailab.com # sqfu@openailab.com -# -SET(TENGINE_COMMIT_VERSION "8a4c58e0e05cd850f4bb0936a330edc86dc0e28c") +SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e") SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine") SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}") @@ -32,11 +31,10 @@ IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}") SET(Tengine_FOUND ON) SET(BUILD_TENGINE ON) ELSE() - SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name2 - SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url2 - SET(tengine_md5sum f51ca8f3963faeeff3f019a6f6edc206) #md5sum2 + SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name + SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url + SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum - #MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****") ocv_download(FILENAME ${OCV_TENGINE_FILENAME} HASH ${tengine_md5sum} URL @@ -62,24 +60,17 @@ ENDIF() if(BUILD_TENGINE) SET(HAVE_TENGINE 1) - # android system - if(ANDROID) - if(${ANDROID_ABI} STREQUAL "armeabi-v7a") - SET(CONFIG_ARCH_ARM32 ON) - elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") - SET(CONFIG_ARCH_ARM64 ON) - endif() - else() + if(NOT ANDROID) # linux system if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) - SET(CONFIG_ARCH_ARM32 ON) + SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a") elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 - SET(CONFIG_ARCH_ARM64 ON) + SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a") endif() endif() SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern . - SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/core/include" CACHE INTERNAL "") + SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "") if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt") add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build") else() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index a492b6ad45..b0811fb223 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -128,18 +128,11 @@ else() set(sources_options ${sources_options} EXCLUDE_CUDA) endif() - if(HAVE_TENGINE) list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS}) - if(EXISTS ${TENGINE_LIBRARIES}) - list(APPEND libs ${TENGINE_LIBRARIES}) - else() - ocv_add_dependencies(opencv_dnn tengine) - list(APPEND libs ${TENGINE_LIBRARIES}) - endif() + list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive) endif() - ocv_module_include_directories(${include_dirs}) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5bf7694320..1419c05517 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1585,7 +1585,9 @@ struct Net::Impl : public detail::NetImplBase { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_OPENCV) + { CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); + } else if (preferableBackend == DNN_BACKEND_HALIDE) initHalideBackend(); else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index d758457f86..023c8b40d8 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -248,6 +248,10 @@ public: float power; #endif +#ifdef HAVE_TENGINE + teng_graph_t tengine_graph; +#endif + #ifdef HAVE_CUDA cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode; cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType; @@ -266,8 +270,20 @@ public: #ifdef HAVE_CUDA cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE; cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY; +#endif +#ifdef HAVE_TENGINE + tengine_graph=NULL; #endif } +#ifdef HAVE_TENGINE + ~ConvolutionLayerImpl() + { + if(NULL != tengine_graph ) + { + tengine_release(tengine_graph); + } + } +#endif MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { @@ -391,6 +407,13 @@ public: for(int i = 0; i < numOutput; i++ ) biasvec[i] = biasMat.at(i); } +#ifdef HAVE_TENGINE + if(NULL != tengine_graph ) + { + tengine_release(tengine_graph); + tengine_graph = NULL ; + } +#endif #ifdef HAVE_OPENCL convolutionOp.release(); #endif @@ -1765,26 +1788,50 @@ public: } #ifdef HAVE_TENGINE - int inch = inputs[0].size[1]; // inch - int in_h = inputs[0].size[2]; // in_h - int in_w = inputs[0].size[3]; // in_w + bool tengine_ret = false; ; - int out_b = outputs[0].size[0]; // out batch size - int outch = outputs[0].size[1]; // outch - int out_h = outputs[0].size[2]; // out_h - int out_w = outputs[0].size[3]; // out_w + std::vector teng_in, teng_out; + inputs_arr.getMatVector(teng_in); + outputs_arr.getMatVector(teng_out); - float *input_ = inputs[0].ptr(); - float *output_ = outputs[0].ptr(); + int inch = teng_in[0].size[1]; // inch + int in_h = teng_in[0].size[2]; // in_h + int in_w = teng_in[0].size[3]; // in_w + + int out_b = teng_out[0].size[0]; // out batch size + int outch = teng_out[0].size[1]; // outch + int out_h = teng_out[0].size[2]; // out_h + int out_w = teng_out[0].size[3]; // out_w + + float *input_ = teng_in[0].ptr(); + float *output_ = teng_out[0].ptr(); float *kernel_ = weightsMat.ptr(); float *teg_bias = &biasvec[0]; - bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w, - output_, out_b, outch, out_h, out_w, - kernel_, kernel_size.size(), kernel.height, kernel.width, - teg_bias, stride.height, stride.width, - pad.height, pad.width, dilation.height, dilation.width, - weightsMat.step1(), padMode); + int nstripes = std::max(getNumThreads(), 1); + + /* tengine_init will run when first time. */ + if(NULL == tengine_graph) + { + tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w, + output_, out_b, outch, out_h, out_w, + kernel_, kernel_size.size(), kernel.height, kernel.width, + teg_bias, stride.height, stride.width, + pad.height, pad.width, dilation.height, dilation.width, + weightsMat.step1(), padMode, tengine_graph, nstripes); + /*printf("Init(%s): input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ," + "stride(%d %d), pad(%d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n", + name.c_str(),input_, inch, ngroups, in_h, in_w, + output_, out_b, outch, out_h, out_w, + kernel_, kernel_size.size(), kernel.height, kernel.width, + teg_bias, stride.height, stride.width, + pad.height, pad.width, dilation.height, dilation.width, + weightsMat.step1(), padMode.c_str() ,tengine_graph);*/ + } + if(NULL != tengine_graph) + { + tengine_ret = tengine_forward(tengine_graph); + } /* activation */ if((true == tengine_ret) && activ ) { diff --git a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp index 09f6e66da2..c6b0495ab5 100644 --- a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp +++ b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp @@ -26,17 +26,24 @@ #define TENGINE_GRAPH_CONVOLUTION_HPP #define FLOAT_TO_REALSIZE (4) +#ifdef HAVE_TENGINE + +#include "tengine_c_api.h" namespace cv { namespace dnn { -bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w, +teng_graph_t tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w, float *output_, int out_b, int outch, int out_h, int out_w, float *kernel_,int kernel_s , int kernel_h, int kernel_w, float *teg_bias, int stride_h,int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, - size_t wstep, const std::string padMode) ; + size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ; + +bool tengine_forward(teng_graph_t& graph) ; +bool tengine_release(teng_graph_t& graph) ; } } -#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */ +#endif +#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */ \ No newline at end of file diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp index 503a4eeb22..ecb5c62f56 100644 --- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp +++ b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp @@ -34,80 +34,78 @@ #ifdef HAVE_TENGINE #include "tengine_c_api.h" -#include "tengine_c_compat.h" -#include "tengine_operations.h" + namespace cv { namespace dnn { - -int create_input_node(graph_t graph, const char* node_name, int inch, int in_h, int in_w) +static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w) { - node_t node = create_graph_node(graph, node_name, "InputOp"); - tensor_t tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT); + node_t node = teng_create_graph_node(graph, node_name, "InputOp"); + tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); + teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT); int dims[4] = {1, inch, in_h, in_w}; - set_tensor_shape(tensor, dims, 4); + teng_set_tensor_shape(tensor, dims, 4); - release_graph_tensor(tensor); - release_graph_node(node); + teng_release_graph_tensor(tensor); + teng_release_graph_node(node); return 0; } -int create_conv_node(graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w, +static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int inch, int outch, int group, int dilation_h, int dilation_w, int activation, std::string padMode) { - node_t conv_node = create_graph_node(graph, node_name, "Convolution"); - tensor_t input_tensor = get_graph_tensor(graph, input_name); + node_t conv_node = teng_create_graph_node(graph, node_name, "Convolution"); + tensor_t input_tensor = teng_get_graph_tensor(graph, input_name); if (input_tensor == NULL) { - CV_LOG_WARNING(NULL,"Tengine :input_tensor is NULL . " ); + CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." ); return -1; } - set_node_input_tensor(conv_node, 0, input_tensor); - release_graph_tensor(input_tensor); + teng_set_node_input_tensor(conv_node, 0, input_tensor); + teng_release_graph_tensor(input_tensor); /* output */ - tensor_t output_tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32); + tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR); - release_graph_tensor(output_tensor); + teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR); + teng_release_graph_tensor(output_tensor); /* weight */ std::string weight_name(node_name); weight_name += "/weight"; - node_t w_node = create_graph_node(graph, weight_name.c_str(), "Const"); - tensor_t w_tensor = create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32); - set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST); - set_node_input_tensor(conv_node, 1, w_tensor); + node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const"); + tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32); + teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST); + teng_set_node_input_tensor(conv_node, 1, w_tensor); int w_dims[] = {outch, inch / group, kernel_h, kernel_w}; - set_tensor_shape(w_tensor, w_dims, 4); + teng_set_tensor_shape(w_tensor, w_dims, 4); - release_graph_node(w_node); - release_graph_tensor(w_tensor); + teng_release_graph_node(w_node); + teng_release_graph_tensor(w_tensor); /* bias */ std::string bias_name(node_name); bias_name += "/bias"; - node_t b_node = create_graph_node(graph, bias_name.c_str(), "Const"); - tensor_t b_tensor = create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32); - set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST); + node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const"); + tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32); + teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST); int b_dims[] = {outch}; - set_tensor_shape(b_tensor, b_dims, 1); + teng_set_tensor_shape(b_tensor, b_dims, 1); - set_node_input_tensor(conv_node, 2, b_tensor); - release_graph_node(b_node); - release_graph_tensor(b_tensor); + teng_set_node_input_tensor(conv_node, 2, b_tensor); + teng_release_graph_node(b_node); + teng_release_graph_tensor(b_tensor); int pad_h1 = pad_h; int pad_w1 = pad_w; @@ -127,31 +125,32 @@ int create_conv_node(graph_t graph, const char* node_name, const char* input_nam } /* attr */ - set_node_attr_int(conv_node, "kernel_h", &kernel_h); - set_node_attr_int(conv_node, "kernel_w", &kernel_w); - set_node_attr_int(conv_node, "stride_h", &stride_h); - set_node_attr_int(conv_node, "stride_w", &stride_w); - set_node_attr_int(conv_node, "pad_h0", &pad_h); - set_node_attr_int(conv_node, "pad_w0", &pad_w); - set_node_attr_int(conv_node, "pad_h1", &pad_h1); - set_node_attr_int(conv_node, "pad_w1", &pad_w1); - set_node_attr_int(conv_node, "output_channel", &outch); - set_node_attr_int(conv_node, "group", &group); - set_node_attr_int(conv_node, "dilation_h", &dilation_h); - set_node_attr_int(conv_node, "dilation_w", &dilation_w); - set_node_attr_int(conv_node, "activation", &activation); + teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h); + teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w); + teng_set_node_attr_int(conv_node, "stride_h", &stride_h); + teng_set_node_attr_int(conv_node, "stride_w", &stride_w); + teng_set_node_attr_int(conv_node, "pad_h0", &pad_h); + teng_set_node_attr_int(conv_node, "pad_w0", &pad_w); + teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1); + teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1); + teng_set_node_attr_int(conv_node, "output_channel", &outch); + teng_set_node_attr_int(conv_node, "input_channel", &inch); + teng_set_node_attr_int(conv_node, "group", &group); + teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h); + teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w); + // set_node_attr_int(conv_node, "activation", &activation); - release_graph_node(conv_node); + teng_release_graph_node(conv_node); return 0; } -graph_t create_conv_graph(float *input_data, int inch, int group, int in_h, int in_w, - float *output_data, int outch, int out_h, int out_w, +static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w, + float* output_data, int outch, int out_h, int out_w, int kernel_h, int kernel_w, int stride_h,int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int activation, - float * teg_weight , float * teg_bias , std::string padMode) + float* teg_weight, float* teg_bias, std::string padMode, int nstripes) { node_t conv_node = NULL; @@ -170,28 +169,28 @@ graph_t create_conv_graph(float *input_data, int inch, int group, int in_h, int int input_num = 0; /* create graph */ - graph_t graph = create_graph(NULL, NULL, NULL); + teng_graph_t graph = teng_create_graph(NULL, NULL, NULL); bool ok = true; if(graph == NULL) { - CV_LOG_WARNING(NULL,"Tengine :create_graph failed . " ); + CV_LOG_WARNING(NULL,"Tengine: create_graph failed." ); ok = false; } const char* input_name = "data"; - const char* conv_name = "conv"; + const char* conv_name = layer_name; if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0) { - CV_LOG_WARNING(NULL,"Tengine :create_input_node failed. " ); + CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." ); ok = false; } if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0) { - CV_LOG_WARNING(NULL,"Tengine :create conv node failed. " ); + CV_LOG_WARNING(NULL,"Tengine: create conv node failed." ); ok = false; } @@ -199,94 +198,101 @@ graph_t create_conv_graph(float *input_data, int inch, int group, int in_h, int const char* inputs_name[] = {input_name}; const char* outputs_name[] = {conv_name}; - if (ok && set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0) + if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0) { - CV_LOG_WARNING(NULL,"Tengine :set inputs failed . " ); + CV_LOG_WARNING(NULL,"Tengine: set inputs failed." ); ok = false; } - if (ok && set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0) + if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0) { - CV_LOG_WARNING(NULL,"Tengine :set outputs failed . " ); + CV_LOG_WARNING(NULL,"Tengine: set outputs failed." ); ok = false; } /* set input data */ if (ok) { - input_tensor = get_graph_input_tensor(graph, 0, 0); - buf_size = get_tensor_buffer_size(input_tensor); + input_tensor = teng_get_graph_input_tensor(graph, 0, 0); + buf_size = teng_get_tensor_buffer_size(input_tensor); if (buf_size != in_size * FLOAT_TO_REALSIZE) { - CV_LOG_WARNING(NULL,"Tengine :Input data size check failed . "); + CV_LOG_WARNING(NULL,"Tengine: Input data size check failed."); ok = false; } } if (ok) { - set_tensor_buffer(input_tensor, (float *)input_data, buf_size); - release_graph_tensor(input_tensor); + teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size); + teng_release_graph_tensor(input_tensor); /* create convolution node */ /* set weight node */ - conv_node = get_graph_node(graph, "conv"); - weight_tensor = get_node_input_tensor(conv_node, 1); - buf_size = get_tensor_buffer_size(weight_tensor); + conv_node = teng_get_graph_node(graph, conv_name); + weight_tensor = teng_get_node_input_tensor(conv_node, 1); + buf_size = teng_get_tensor_buffer_size(weight_tensor); if (buf_size != weight_size * FLOAT_TO_REALSIZE) { - CV_LOG_WARNING(NULL,"Input weight size check failed . "); + CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed."); ok = false; } } if (ok) { - set_tensor_buffer(weight_tensor, teg_weight, buf_size); + teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size); /* set bias node */ - input_num = get_node_input_number(conv_node); + input_num = teng_get_node_input_number(conv_node); if (input_num > 2) { - bias_tensor = get_node_input_tensor(conv_node, 2); - buf_size = get_tensor_buffer_size(bias_tensor); + bias_tensor = teng_get_node_input_tensor(conv_node, 2); + buf_size = teng_get_tensor_buffer_size(bias_tensor); if (buf_size != bias_size * FLOAT_TO_REALSIZE) { - CV_LOG_WARNING(NULL,"Tengine :Input bias size check failed . "); + CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed."); ok = false; } - else set_tensor_buffer(bias_tensor, teg_bias, buf_size); + else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size); } } + /* prerun */ + if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0) + { + CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed."); + ok = false; + } + if (ok) { /* set output data */ - output_tensor = get_node_output_tensor(conv_node, 0); - int ret = set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE); + output_tensor = teng_get_node_output_tensor(conv_node, 0); + int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE); if(ret) { - CV_LOG_WARNING(NULL,"Tengine :Set output tensor buffer failed . " ); + CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." ); + ok = false; } } - if (!ok) + if (false == ok) { - destroy_graph(graph); - return NULL; + teng_destroy_graph(graph) ; + return NULL ; } return graph; } - -bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w, +static bool tengine_init_flag = false; +teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w, float *output_, int out_b, int outch, int out_h, int out_w, float *kernel_, int kernel_s ,int kernel_h, int kernel_w, float *teg_bias, int stride_h,int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, - size_t wstep,const std::string padMode) + size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes) { - graph_t graph = NULL; std::vector teg_weight_vec; float *teg_weight = NULL; int kernel_inwh = (inch / group) * kernel_w * kernel_h; @@ -296,17 +302,20 @@ bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w, if (!(kernel_s == 2 && kernel_h == kernel_w && pad_h == pad_w && dilation_h == dilation_w && stride_h == stride_w && out_b == 1 && pad_h < 10)) // just for Conv2D - return false; + { + // printf("return : just for Conv2D\n"); + return NULL; + } { - /*printf("Tengine: input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n", - inch, in_h, in_w, - out_b,outch,out_h,out_w, + /* printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n", + layer_name, inch, in_h, in_w, + out_b, outch, out_h, out_w, kernel_w, kernel_h, stride_w, stride_h, dilation_w, dilation_h, - pad_w,pad_h);*/ - + pad_w, pad_h); + */ // weight if (kernel_inwh != wstep) { @@ -323,35 +332,42 @@ bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w, } /* initial the resoruce of tengine */ - init_tengine(); + if(false == tengine_init_flag) + { + init_tengine(); + tengine_init_flag = true; + } /* create the convolution graph */ - graph = create_conv_graph( input_, inch, group, in_h, in_w, + graph = create_conv_graph(layer_name, input_, inch, group, in_h, in_w, output_, outch, out_h, out_w, kernel_h, kernel_w, stride_h,stride_w, pad_h, pad_w, dilation_h, dilation_w, activation, - teg_weight , teg_bias , padMode); - - /* prerun */ - if(prerun_graph(graph) < 0) + teg_weight, teg_bias, padMode, nstripes); + if(NULL == graph ) { - CV_LOG_WARNING(NULL, "Tengine :prerun_graph failed ."); - return false ; + return NULL; } - - /* run */ - if(run_graph(graph, 1) < 0) - { - CV_LOG_WARNING(NULL,"Tengine :run_graph failed ."); - return false ; - } - - postrun_graph(graph); - destroy_graph(graph); } - return true ; + return graph ; } +bool tengine_forward(teng_graph_t &graph) +{ + /* run */ + if(teng_run_graph(graph, 1) < 0) + { + CV_LOG_WARNING(NULL,"Tengine: run_graph failed."); + return false ; + } + return true; +} +bool tengine_release(teng_graph_t &graph) +{ + teng_postrun_graph(graph); + teng_destroy_graph(graph); + return true; +} } } #endif