Merge pull request #24122 from fengyuentau:remove_tengine

dnn: cleanup of tengine backend #24122

🚀 Cleanup for OpenCV 5.0. Tengine backend is added for convolution layer speedup on ARM CPUs, but it is not maintained and the convolution layer on our default backend has reached similar performance to that of Tengine.

Tengine backend related PRs:
- https://github.com/opencv/opencv/pull/16724
- https://github.com/opencv/opencv/pull/18323

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Yuantao Feng 2023-08-09 02:26:02 -04:00 committed by GitHub
parent 7d59db4ec4
commit ba70ec99b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 3 additions and 696 deletions

View File

@ -1,80 +0,0 @@
# COPYRIGHT
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# License); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2020, OPEN AI LAB
# Author: qtang@openailab.com or https://github.com/BUG1989
# qli@openailab.com
# sqfu@openailab.com
SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e")
SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine")
SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}")
IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}")
MESSAGE(STATUS "Tengine is exist already at: ${OCV_TENGINE_SOURCE_PATH}")
SET(Tengine_FOUND ON)
SET(BUILD_TENGINE ON)
ELSE()
SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name
SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url
SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum
ocv_download(FILENAME ${OCV_TENGINE_FILENAME}
HASH ${tengine_md5sum}
URL
"${OPENCV_TENGINE_URL}"
"$ENV{OPENCV_TENGINE_URL}"
"${OCV_TENGINE_URL}"
DESTINATION_DIR "${OCV_TENGINE_DIR}"
ID TENGINE
STATUS res
UNPACK RELATIVE_URL)
if (NOT res)
MESSAGE(STATUS "TENGINE DOWNLOAD FAILED. Turning Tengine_FOUND off.")
SET(Tengine_FOUND OFF)
else ()
MESSAGE(STATUS "TENGINE DOWNLOAD success . ")
SET(Tengine_FOUND ON)
SET(BUILD_TENGINE ON)
endif()
ENDIF()
if(BUILD_TENGINE)
SET(HAVE_TENGINE 1)
if(NOT ANDROID)
# linux system
if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm)
SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64
SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a")
endif()
endif()
SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern .
SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "")
if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt")
add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build")
else()
message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}")
endif()
SET(Tengine_LIB "tengine" CACHE INTERNAL "")
endif()

View File

@ -462,9 +462,6 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)"
OCV_OPTION(WITH_ANDROID_NATIVE_CAMERA "Use Android NDK for Camera I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 23)
VISIBLE_IF ANDROID
VERIFY HAVE_ANDROID_NATIVE_CAMERA)
OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS
VERIFY HAVE_TENGINE)
OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF
VISIBLE_IF TRUE
VERIFY HAVE_ONNX)
@ -761,9 +758,6 @@ include(cmake/OpenCVFindLibsPerf.cmake)
include(cmake/OpenCVFindLAPACK.cmake)
include(cmake/OpenCVFindProtobuf.cmake)
include(cmake/OpenCVDetectFlatbuffers.cmake)
if(WITH_TENGINE)
include(cmake/OpenCVFindTengine.cmake)
endif()
if(WITH_TIMVX)
include(cmake/OpenCVFindTIMVX.cmake)
endif()
@ -1612,10 +1606,6 @@ if(WITH_VA OR HAVE_VA)
status(" VA:" HAVE_VA THEN "YES" ELSE NO)
endif()
if(WITH_TENGINE OR HAVE_TENGINE)
status(" Tengine:" HAVE_TENGINE THEN "YES (${TENGINE_LIBRARIES})" ELSE NO)
endif()
if(WITH_LAPACK OR HAVE_LAPACK)
status(" Lapack:" HAVE_LAPACK THEN "YES (${LAPACK_LIBRARIES})" ELSE NO)
endif()

View File

@ -1,78 +0,0 @@
# COPYRIGHT
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# License); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2020, OPEN AI LAB
# Author: qtang@openailab.com or https://github.com/BUG1989
#
# ----------------------------------------------------------------------------
# Path for Tengine binaries
# ----------------------------------------------------------------------------
set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Path to TENGINE binaries installation")
IF(OPENCV_LIBTENGINE_ROOT_DIR AND NOT BUILD_TENGINE)
MESSAGE(STATUS "TENGINE:-- Use binaries at ${OPENCV_LIBTENGINE_ROOT_DIR}")
SET(Tengine_FOUND ON)
set(BUILD_TENGINE OFF)
SET(Tengine_INCLUDE_DIR "${OPENCV_LIBTENGINE_ROOT_DIR}/include" CACHE PATH "TENGINE include dir")
SET(Tengine_LIB "${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a" CACHE PATH "TENGINE library dir")
ELSE()
IF(ANDROID)
IF(OPENCV_TENGINE_FORCE_ANDROID)
# nothing, use Android
ELSEIF(OPENCV_TENGINE_SKIP_ANDROID)
set(Tengine_FOUND OFF)
set(HAVE_TENGINE FALSE)
return()
ELSEIF(NOT DEFINED ANDROID_NDK_REVISION)
MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION is not defined")
set(Tengine_FOUND OFF)
set(HAVE_TENGINE FALSE)
return()
ELSEIF(ANDROID_NDK_REVISION VERSION_LESS 14)
MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION=${ANDROID_NDK_REVISION}")
set(Tengine_FOUND OFF)
set(HAVE_TENGINE FALSE)
return()
ENDIF()
ENDIF()
MESSAGE(STATUS "TENGINE:-- Build Tengine from source code. ")
include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake")
ENDIF()
IF(NOT Tengine_LIB)
SET(Tengine_FOUND OFF)
MESSAGE(STATUS "#### Could not find Tengine lib. Turning Tengine_FOUND off")
ENDIF()
IF (Tengine_FOUND)
MESSAGE(STATUS "Found Tengine include: ${Tengine_INCLUDE_DIR}")
MESSAGE(STATUS "Found Tengine libraries: ${Tengine_LIB}")
set(HAVE_TENGINE 1)
set(TENGINE_LIBRARIES ${Tengine_LIB})
set(TENGINE_INCLUDE_DIRS ${Tengine_INCLUDE_DIR})
ENDIF (Tengine_FOUND)
MARK_AS_ADVANCED(
Tengine_INCLUDE_DIR
Tengine_LIB
)

View File

@ -1,15 +1,12 @@
# Gitlab-style mirror
# CMake scripts look for opencv/opencv_3rdparty,
# OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade
# 01org/tbb(oneAPI/oneTBB), opencv/ade
# from OPENCV_DOWNLOAD_MIRROR
ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "")
######
# Download via commit id
######
# Tengine
ocv_update(TENGINE_PKG_MD5_CUSTOM "")
ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
# NVIDIA_OPTICAL_FLOW
ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "")
ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
@ -77,7 +74,7 @@ else()
ocv_download_url_custom_usercontent(opencv)
elseif(DL_ID STREQUAL "wechat_qrcode")
ocv_download_url_gitcode_usercontent(WeChatCV)
elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
ocv_download_url_custom_archive_commit_id()
elseif(DL_ID STREQUAL "TBB")
ocv_download_url_custom_archive_release()

View File

@ -1,9 +1,6 @@
######
# Download via commit id
######
# Tengine
ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690)
ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
# NVIDIA_OPTICAL_FLOW
ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47)
ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
@ -74,7 +71,7 @@ if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "da
ocv_download_url_gitcode_usercontent(opencv)
elseif(DL_ID STREQUAL "wechat_qrcode")
ocv_download_url_gitcode_usercontent(mirrors/WeChatCV)
elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
ocv_download_url_gitcode_archive_commit_id()
elseif(DL_ID STREQUAL "TBB")
ocv_download_url_gitcode_archive_release(OPENCV_TBB_SUBDIR)

View File

@ -484,7 +484,6 @@ OpenCV have own DNN inference module which have own build-in engine, but can als
| `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. |
| `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. |
| `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). |
| `WITH_TENGINE` | _OFF_ | Enable experimental [Tengine](https://github.com/OAID/Tengine) backend for ARM CPUs. Tengine library must be installed. |
# Installation layout {#tutorial_config_reference_install}

View File

@ -58,11 +58,6 @@ endif()
ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
if(HAVE_TENGINE)
ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TENGINE=1")
endif()
if(MSVC)
add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
@ -172,11 +167,6 @@ else()
set(sources_options ${sources_options} EXCLUDE_CUDA)
endif()
if(HAVE_TENGINE)
list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
endif()
if(HAVE_TIMVX)
list(APPEND include_dirs ${TIMVX_INCLUDE_DIR})
list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive)

View File

@ -62,9 +62,6 @@
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_TENGINE
#include "../tengine4dnn/include/tengine_graph_convolution.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/convolution.hpp"
@ -267,10 +264,6 @@ public:
float power;
#endif
#ifdef HAVE_TENGINE
teng_graph_t tengine_graph;
#endif
#ifdef HAVE_CUDA
cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode;
cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType;
@ -289,20 +282,8 @@ public:
#ifdef HAVE_CUDA
cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE;
cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY;
#endif
#ifdef HAVE_TENGINE
tengine_graph=NULL;
#endif
}
#ifdef HAVE_TENGINE
~ConvolutionLayerImpl()
{
if(NULL != tengine_graph )
{
tengine_release(tengine_graph);
}
}
#endif
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
{
@ -466,13 +447,6 @@ public:
for(int i = 0; i < numOutput; i++ )
biasvec[i] = biasMat.at<float>(i);
}
#ifdef HAVE_TENGINE
if(NULL != tengine_graph )
{
tengine_release(tengine_graph);
tengine_graph = NULL ;
}
#endif
#ifdef HAVE_OPENCL
convolutionOp.release();
#endif
@ -1305,65 +1279,6 @@ public:
}
}
#ifdef HAVE_TENGINE
bool tengine_ret = false;
std::vector<Mat> teng_in, teng_out;
inputs_arr.getMatVector(teng_in);
outputs_arr.getMatVector(teng_out);
int inch = teng_in[0].size[1]; // inch
int in_h = teng_in[0].size[2]; // in_h
int in_w = teng_in[0].size[3]; // in_w
int out_b = teng_out[0].size[0]; // out batch size
int outch = teng_out[0].size[1]; // outch
int out_h = teng_out[0].size[2]; // out_h
int out_w = teng_out[0].size[3]; // out_w
float *input_ = teng_in[0].ptr<float>();
float *output_ = teng_out[0].ptr<float>();
float *kernel_ = weightsMat.ptr<float>();
float *teg_bias = &biasvec[0];
int nstripes = std::max(getNumThreads(), 1);
/* tengine_init will run when first time. */
if(NULL == tengine_graph)
{
// pads_begin: 0 - pad_top, 1 - pad_left
// pads_end: 0 - pad_bottom, 1 - pad_right
// pad_h0: pad_top, pad_h1: pad_bottom
// pad_w0: pad_left, pad_w1: pad_right
tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w,
output_, out_b, outch, out_h, out_w,
kernel_, kernel_size.size(), kernel.height, kernel.width,
teg_bias, stride.height, stride.width,
pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
weightsMat.step1(), padMode, tengine_graph, nstripes);
// printf("Init(%s): input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ,"
// "stride(%d %d), pad(%d %d %d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n",
// name.c_str(),input_, inch, ngroups, in_h, in_w,
// output_, out_b, outch, out_h, out_w,
// kernel_, kernel_size.size(), kernel.height, kernel.width,
// teg_bias, stride.height, stride.width,
// pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
// weightsMat.step1(), padMode.c_str() ,tengine_graph);
}
if(NULL != tengine_graph)
{
tengine_ret = tengine_forward(tengine_graph);
}
/* activation */
if((true == tengine_ret) && activ )
{
int out_cstep = out_h * out_w; // out_cstep
ActivationLayer* activ_ = activ.get();
activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch);
}
if(false == tengine_ret)
#endif
{
int nstripes = std::max(getNumThreads(), 1);
int conv_dim = CONV_2D;

View File

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2020, OPEN AI LAB
* Author: qtang@openailab.com
*/
#ifndef TENGINE_GRAPH_CONVOLUTION_HPP
#define TENGINE_GRAPH_CONVOLUTION_HPP
#define FLOAT_TO_REALSIZE (4)
#ifdef HAVE_TENGINE
#include "tengine_c_api.h"
namespace cv
{
namespace dnn
{
// pad_h0: pad_top
// pad_h1: pad_bottom
// pad_w0: pad_left
// pad_w1: pad_right
teng_graph_t tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w,
float *output_, int out_b, int outch, int out_h, int out_w,
float *kernel_,int kernel_s , int kernel_h, int kernel_w,
float *teg_bias, int stride_h, int stride_w,
int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ;
bool tengine_forward(teng_graph_t& graph) ;
bool tengine_release(teng_graph_t& graph) ;
}
}
#endif
#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */

View File

@ -1,370 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2020, OPEN AI LAB
* Author: qtang@openailab.com
*/
#include "../../precomp.hpp"
#include <iostream>
#include <vector>
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/logger.hpp>
#include "../include/tengine_graph_convolution.hpp"
#ifdef HAVE_TENGINE
#include "tengine_c_api.h"
namespace cv
{
namespace dnn
{
static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w)
{
node_t node = teng_create_graph_node(graph, node_name, "InputOp");
tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT);
int dims[4] = {1, inch, in_h, in_w};
teng_set_tensor_shape(tensor, dims, 4);
teng_release_graph_tensor(tensor);
teng_release_graph_node(node);
return 0;
}
static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h0, int pad_h1, int pad_w0, int pad_w1, int inch, int outch, int group,
int dilation_h, int dilation_w, int activation, std::string padMode)
{
node_t conv_node = teng_create_graph_node(graph, node_name, "Convolution");
tensor_t input_tensor = teng_get_graph_tensor(graph, input_name);
if (input_tensor == NULL)
{
CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." );
return -1;
}
teng_set_node_input_tensor(conv_node, 0, input_tensor);
teng_release_graph_tensor(input_tensor);
/* output */
tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR);
teng_release_graph_tensor(output_tensor);
/* weight */
std::string weight_name(node_name);
weight_name += "/weight";
node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const");
tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32);
teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST);
teng_set_node_input_tensor(conv_node, 1, w_tensor);
int w_dims[] = {outch, inch / group, kernel_h, kernel_w};
teng_set_tensor_shape(w_tensor, w_dims, 4);
teng_release_graph_node(w_node);
teng_release_graph_tensor(w_tensor);
/* bias */
std::string bias_name(node_name);
bias_name += "/bias";
node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const");
tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32);
teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST);
int b_dims[] = {outch};
teng_set_tensor_shape(b_tensor, b_dims, 1);
teng_set_node_input_tensor(conv_node, 2, b_tensor);
teng_release_graph_node(b_node);
teng_release_graph_tensor(b_tensor);
if (!padMode.empty())
{
if (padMode == "SAME")
{
int out_h_temp = (in_h-kernel_h + 2*pad_h0)/stride_h + 1;
int out_w_temp = (in_w-kernel_w + 2*pad_w0)/stride_w + 1;
if (out_h_temp < out_h)
pad_h1 += 1;
if (out_w_temp < out_w)
pad_w1 += 1;
}
}
/* attr */
teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h);
teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w);
teng_set_node_attr_int(conv_node, "stride_h", &stride_h);
teng_set_node_attr_int(conv_node, "stride_w", &stride_w);
teng_set_node_attr_int(conv_node, "pad_h0", &pad_h0);
teng_set_node_attr_int(conv_node, "pad_w0", &pad_w0);
teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1);
teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1);
teng_set_node_attr_int(conv_node, "output_channel", &outch);
teng_set_node_attr_int(conv_node, "input_channel", &inch);
teng_set_node_attr_int(conv_node, "group", &group);
teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h);
teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w);
// set_node_attr_int(conv_node, "activation", &activation);
teng_release_graph_node(conv_node);
return 0;
}
static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w,
float* output_data, int outch, int out_h, int out_w,
int kernel_h, int kernel_w,
int stride_h,int stride_w,
int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, int activation,
float* teg_weight, float* teg_bias, std::string padMode, int nstripes)
{
node_t conv_node = NULL;
tensor_t input_tensor = NULL;
tensor_t output_tensor = NULL;
tensor_t weight_tensor = NULL;
tensor_t bias_tensor = NULL;
/* create graph for convolution */
int in_size = in_h * in_w * inch;
int out_size = out_h * out_w * outch;
int weight_size = outch * (inch / group) * kernel_w * kernel_h;
int bias_size = outch;
int buf_size = 0;
int input_num = 0;
/* create graph */
teng_graph_t graph = teng_create_graph(NULL, NULL, NULL);
bool ok = true;
if(graph == NULL)
{
CV_LOG_WARNING(NULL,"Tengine: create_graph failed." );
ok = false;
}
const char* input_name = "data";
const char* conv_name = layer_name;
if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0)
{
CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." );
ok = false;
}
if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w,
stride_h, stride_w, pad_h0, pad_h1, pad_w0, pad_w1, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
{
CV_LOG_WARNING(NULL,"Tengine: create conv node failed." );
ok = false;
}
/* set input/output node */
const char* inputs_name[] = {input_name};
const char* outputs_name[] = {conv_name};
if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0)
{
CV_LOG_WARNING(NULL,"Tengine: set inputs failed." );
ok = false;
}
if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0)
{
CV_LOG_WARNING(NULL,"Tengine: set outputs failed." );
ok = false;
}
/* set input data */
if (ok)
{
input_tensor = teng_get_graph_input_tensor(graph, 0, 0);
buf_size = teng_get_tensor_buffer_size(input_tensor);
if (buf_size != in_size * FLOAT_TO_REALSIZE)
{
CV_LOG_WARNING(NULL,"Tengine: Input data size check failed.");
ok = false;
}
}
if (ok)
{
teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size);
teng_release_graph_tensor(input_tensor);
/* create convolution node */
/* set weight node */
conv_node = teng_get_graph_node(graph, conv_name);
weight_tensor = teng_get_node_input_tensor(conv_node, 1);
buf_size = teng_get_tensor_buffer_size(weight_tensor);
if (buf_size != weight_size * FLOAT_TO_REALSIZE)
{
CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed.");
ok = false;
}
}
if (ok)
{
teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size);
/* set bias node */
input_num = teng_get_node_input_number(conv_node);
if (input_num > 2)
{
bias_tensor = teng_get_node_input_tensor(conv_node, 2);
buf_size = teng_get_tensor_buffer_size(bias_tensor);
if (buf_size != bias_size * FLOAT_TO_REALSIZE)
{
CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed.");
ok = false;
}
else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size);
}
}
/* prerun */
if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0)
{
CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed.");
ok = false;
}
if (ok)
{
/* set output data */
output_tensor = teng_get_node_output_tensor(conv_node, 0);
int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE);
if(ret)
{
CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." );
ok = false;
}
}
if (false == ok)
{
teng_destroy_graph(graph) ;
return NULL ;
}
return graph;
}
static bool tengine_init_flag = false;
teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w,
float *output_, int out_b, int outch, int out_h, int out_w,
float *kernel_, int kernel_s ,int kernel_h, int kernel_w,
float *teg_bias, int stride_h, int stride_w,
int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes)
{
std::vector<float> teg_weight_vec;
float *teg_weight = NULL;
int kernel_inwh = (inch / group) * kernel_w * kernel_h;
// Do not using the activation fuse mode, just convolution only.
int activation = -1;
if (!(kernel_s == 2 && kernel_h == kernel_w
&& dilation_h == dilation_w && stride_h == stride_w
&& out_b == 1 && pad_h0 < 10 && pad_h1 < 10 && pad_w0 < 10 && pad_w1 < 10)) // just for Conv2D
{
// printf("return : just for Conv2D\n");
return NULL;
}
{
/* printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n",
layer_name, inch, in_h, in_w,
out_b, outch, out_h, out_w,
kernel_w, kernel_h,
stride_w, stride_h,
dilation_w, dilation_h,
pad_h0, pad_h1, pad_w0, pad_w1);
*/
// weight
if (kernel_inwh != wstep)
{
teg_weight_vec.resize(kernel_inwh * outch);
teg_weight = &teg_weight_vec[0];
for (int i=0; i<outch; i++)
{
memcpy(teg_weight+i*kernel_inwh, kernel_+i*wstep, kernel_inwh*FLOAT_TO_REALSIZE);
}
}
else
{
teg_weight = kernel_;
}
/* initial the resource of tengine */
if(false == tengine_init_flag)
{
init_tengine();
tengine_init_flag = true;
}
/* create the convolution graph */
graph = create_conv_graph(layer_name, input_, inch, group, in_h, in_w,
output_, outch, out_h, out_w,
kernel_h, kernel_w, stride_h,stride_w,
pad_h0, pad_h1, pad_w0, pad_w1, dilation_h, dilation_w, activation,
teg_weight, teg_bias, padMode, nstripes);
if(NULL == graph )
{
return NULL;
}
}
return graph ;
}
bool tengine_forward(teng_graph_t &graph)
{
/* run */
if(teng_run_graph(graph, 1) < 0)
{
CV_LOG_WARNING(NULL,"Tengine: run_graph failed.");
return false ;
}
return true;
}
bool tengine_release(teng_graph_t &graph)
{
teng_postrun_graph(graph);
teng_destroy_graph(graph);
return true;
}
}
}
#endif