From 0bcdf7d03e57b9aa8fbd38f4896ae3657f5fd273 Mon Sep 17 00:00:00 2001
From: NesQl <32612899+liqi-c@users.noreply.github.com>
Date: Mon, 9 Mar 2020 22:59:23 +0800
Subject: [PATCH] Merge pull request #16724 from liqi-c:3.4-tengine

* Add Tengine support .

* Modify printf to CV_LOG_WARNING

* a few minor fixes in the code

* Renew Tengine version

* Add header file for CV_LOG_WARNING

* Add #ifdef HAVE_TENGINE in tengine_graph_convolution.cpp

* remove trailing whitespace

* Remove trailing whitespace

* Modify for compile problem

* Modify some code style error

* remove whitespace

* Move some code style problem

* test

* add ios limit and build problem

* Modified as alalek suggested

* Add cmake 2.8 support

* modify cmake 3.5.1 problem

* test and set BUILD_ANDROID_PROJECTS OFF

* remove some compile error

* remove some extra code in tengine

* close test.

* Test again

* disable android.

* delete ndk version judgement

* Remove setenv() call . and add License information

* Set tengine default OFF. Close test .

Co-authored-by: Vadim Pisarevsky <vadim.pisarevsky@gmail.com>
---
 3rdparty/libtengine/tengine.cmake             |  90 +++++
 CMakeLists.txt                                |  10 +
 cmake/OpenCVFindTengine.cmake                 |  65 ++++
 modules/dnn/CMakeLists.txt                    |  14 +
 modules/dnn/src/layers/convolution_layer.cpp  |  42 ++-
 .../include/tengine_graph_convolution.hpp     |  42 +++
 .../src/tengine_graph_convolution.cpp         | 357 ++++++++++++++++++
 7 files changed, 617 insertions(+), 3 deletions(-)
 create mode 100644 3rdparty/libtengine/tengine.cmake
 create mode 100644 cmake/OpenCVFindTengine.cmake
 create mode 100644 modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
 create mode 100644 modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp

diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake
new file mode 100644
index 0000000000..7bd42d1ba2
--- /dev/null
+++ b/3rdparty/libtengine/tengine.cmake
@@ -0,0 +1,90 @@
+# COPYRIGHT
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# License); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Copyright (c) 2020, OPEN AI LAB
+# Author: qtang@openailab.com or https://github.com/BUG1989
+#         qli@openailab.com
+#         sqfu@openailab.com
+#
+
+SET(TENGINE_VERSION "tengine-opencv")
+SET(OCV_TENGINE_DSTDIRECTORY ${OpenCV_BINARY_DIR}/3rdparty/libtengine)
+SET(DEFAULT_OPENCV_TENGINE_SOURCE_PATH ${OCV_TENGINE_DSTDIRECTORY}/Tengine-${TENGINE_VERSION})
+
+IF(EXISTS ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH})
+	MESSAGE(STATUS "Tengine is exist already  .")
+
+	SET(Tengine_FOUND ON)
+	set(BUILD_TENGINE ON)
+ELSE()
+	SET(OCV_TENGINE_FILENAME "${TENGINE_VERSION}.zip")#name2
+	SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url2
+	SET(tengine_md5sum 9c80d91dc8413911522ec80cde013ae2) #md5sum2
+
+	MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****")
+	ocv_download(FILENAME ${OCV_TENGINE_FILENAME}
+						HASH ${tengine_md5sum}
+						URL
+						"${OPENCV_TENGINE_URL}"
+						"$ENV{OPENCV_TENGINE_URL}"
+						"${OCV_TENGINE_URL}"
+						DESTINATION_DIR ${OCV_TENGINE_DSTDIRECTORY}
+						ID TENGINE
+						STATUS res
+						UNPACK RELATIVE_URL)
+
+	if (NOT res)
+		MESSAGE(STATUS "TENGINE DOWNLOAD FAILED .Turning Tengine_FOUND off.")
+		SET(Tengine_FOUND OFF)
+	else ()
+		MESSAGE(STATUS "TENGINE DOWNLOAD success . ")
+
+		SET(Tengine_FOUND ON)
+		set(BUILD_TENGINE ON)
+	endif()
+ENDIF()
+
+if (BUILD_TENGINE)
+	set(HAVE_TENGINE 1)
+
+	# android system
+	if(ANDROID)
+	   if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
+			   set(CONFIG_ARCH_ARM32 ON)
+	   elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
+			   set(CONFIG_ARCH_ARM64 ON)
+	   endif()
+	endif()
+
+	# linux system
+	if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm)
+		   set(CONFIG_ARCH_ARM32 ON)
+	elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64
+		   set(CONFIG_ARCH_ARM64 ON)
+	endif()
+
+	SET(DEFAULT_OPENCV_TENGINE_SOURCE_PATH ${OCV_TENGINE_DSTDIRECTORY}/Tengine-${TENGINE_VERSION})
+	set(BUILT_IN_OPENCV ON) ## set for tengine compile discern .
+	set(Tengine_INCLUDE_DIR  ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}/core/include)
+	set(Tengine_LIB   ${CMAKE_BINARY_DIR}/lib/${ANDROID_ABI}/libtengine.a)
+	if ( IS_DIRECTORY ${DEFAULT_OPENCV_TENGINE_SOURCE_PATH})
+		add_subdirectory("${DEFAULT_OPENCV_TENGINE_SOURCE_PATH}" ${OCV_TENGINE_DSTDIRECTORY}/build)
+	endif()
+endif()
+
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a7a6f1d7e..6d36719f10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -432,6 +432,9 @@ OCV_OPTION(WITH_IMGCODEC_PXM "Include PNM (PBM,PGM,PPM) and PAM formats support"
 OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON
   VISIBLE_IF TRUE
   VERIFY HAVE_QUIRC)
+OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
+  VISIBLE_IF (ARM OR AARCH64) AND UNIX AND NOT ANDROID AND NOT IOS
+  VERIFY HAVE_TENGINE)
 
 # OpenCV build components
 # ===================================================
@@ -687,6 +690,9 @@ include(cmake/OpenCVFindLibsVideo.cmake)
 include(cmake/OpenCVFindLibsPerf.cmake)
 include(cmake/OpenCVFindLAPACK.cmake)
 include(cmake/OpenCVFindProtobuf.cmake)
+if(WITH_TENGINE)
+  include(cmake/OpenCVFindTengine.cmake)
+endif()
 
 # ----------------------------------------------------------------------------
 #  Detect other 3rd-party libraries/tools
@@ -1472,6 +1478,10 @@ if(WITH_VA_INTEL OR HAVE_VA_INTEL)
   status("    Intel VA-API/OpenCL:"  HAVE_VA_INTEL       THEN "YES (OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
 endif()
 
+if(WITH_TENGINE OR HAVE_TENGINE)
+  status("    Tengine:"      HAVE_TENGINE     THEN "YES (${TENGINE_LIBRARIES})" ELSE NO)
+endif()
+
 if(WITH_LAPACK OR HAVE_LAPACK)
   status("    Lapack:"      HAVE_LAPACK     THEN "YES (${LAPACK_LIBRARIES})" ELSE NO)
 endif()
diff --git a/cmake/OpenCVFindTengine.cmake b/cmake/OpenCVFindTengine.cmake
new file mode 100644
index 0000000000..3f0bb7029c
--- /dev/null
+++ b/cmake/OpenCVFindTengine.cmake
@@ -0,0 +1,65 @@
+# COPYRIGHT
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# License); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Copyright (c) 2020, OPEN AI LAB
+# Author: qtang@openailab.com or https://github.com/BUG1989
+#
+
+# ----------------------------------------------------------------------------
+#  Path for Tengine modules
+# ----------------------------------------------------------------------------
+set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Where to look for additional OpenCV modules (can be ;-separated list of paths)")
+
+IF(OPENCV_LIBTENGINE_ROOT_DIR)
+
+	MESSAGE(STATUS "TENGINE:--  Set tengine lib dir by user ")
+
+	SET(Tengine_FOUND ON)
+	set(BUILD_TENGINE OFF)
+
+	SET(Tengine_INCLUDE_DIR   	${OPENCV_LIBTENGINE_ROOT_DIR}/include)
+	SET(Tengine_LIB 	${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a)
+
+ELSE()
+
+	MESSAGE(STATUS "TENGINE:--  Auto download Tengine source code. ")
+	include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake")
+
+ENDIF()
+
+IF(NOT Tengine_LIB)
+	SET(Tengine_FOUND OFF)
+	MESSAGE(STATUS "#### Could not find Tengine lib. Turning Tengine_FOUND off")
+ENDIF()
+
+IF (Tengine_FOUND)
+	MESSAGE(STATUS "Found Tengine include: ${Tengine_INCLUDE_DIR}")
+	MESSAGE(STATUS "Found Tengine libraries: ${Tengine_LIB}")
+	set(HAVE_TENGINE 1)
+	set(TENGINE_LIBRARIES    ${Tengine_LIB})
+	set(TENGINE_INCLUDE_DIRS    ${Tengine_INCLUDE_DIR})
+ENDIF (Tengine_FOUND)
+
+MESSAGE(STATUS "Tengine include is:" ${Tengine_INCLUDE_DIR})
+MESSAGE(STATUS "Tengine library is:" ${Tengine_LIB})
+
+MARK_AS_ADVANCED(
+	Tengine_INCLUDE_DIR
+	Tengine_LIB
+	Tengine
+)
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index c7e07c5543..c9532ade77 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -13,6 +13,9 @@ ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX)
 ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java js)
 
 ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
+if(HAVE_TENGINE)
+  add_definitions(-DHAVE_TENGINE=1)
+endif()
 
 if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
   add_definitions(-DCV_OCL4DNN=1)
@@ -83,6 +86,17 @@ else()
   set(sources_options EXCLUDE_OPENCL)
 endif()
 
+if(HAVE_TENGINE)
+	list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
+	if(EXISTS ${TENGINE_LIBRARIES})
+		list(APPEND libs ${TENGINE_LIBRARIES})
+	else()
+		ocv_add_dependencies(opencv_dnn tengine)
+		list(APPEND libs ${TENGINE_LIBRARIES})
+	endif()
+endif()
+
+
 ocv_module_include_directories(${include_dirs})
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override")  # GCC
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 7b75a77fdc..6deebb194c 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -55,6 +55,9 @@
 #include "opencl_kernels_dnn.hpp"
 using namespace cv::dnn::ocl4dnn;
 #endif
+#ifdef HAVE_TENGINE
+#include "../tengine4dnn/include/tengine_graph_convolution.hpp"
+#endif
 
 namespace cv
 {
@@ -1272,10 +1275,43 @@ public:
             }
         }
 
-        int nstripes = std::max(getNumThreads(), 1);
+#ifdef HAVE_TENGINE
+        int inch = inputs[0].size[1]; 		// inch
+        int in_h = inputs[0].size[2]; 		// in_h
+        int in_w = inputs[0].size[3]; 		// in_w
 
-        ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
-                          kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+        int out_b = outputs[0].size[0];     // out batch size
+        int outch = outputs[0].size[1]; 	// outch
+        int out_h = outputs[0].size[2]; 	// out_h
+        int out_w = outputs[0].size[3]; 	// out_w
+
+        float *input_  = inputs[0].ptr<float>();
+        float *output_ = outputs[0].ptr<float>();
+        float *kernel_ = weightsMat.ptr<float>();
+        float *teg_bias = &biasvec[0];
+
+        bool tengine_ret = tengine_forward(input_, inch, ngroups, in_h, in_w,
+                                    output_, out_b, outch, out_h, out_w,
+                                    kernel_, kernel_size.size(), kernel.height, kernel.width,
+                                    teg_bias, stride.height, stride.width,
+                                    pad.height,  pad.width, dilation.height, dilation.width,
+                                    weightsMat.step1(), padMode);
+        /* activation */
+        if((true == tengine_ret) && activ )
+        {
+            int out_cstep = out_h * out_w;	    // out_cstep
+
+            ActivationLayer* activ_ = activ.get();
+            activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch);
+        }
+        if(false == tengine_ret)
+#endif
+        {
+            int nstripes = std::max(getNumThreads(), 1);
+
+            ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
+                            kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
+        }
     }
 
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
diff --git a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
new file mode 100644
index 0000000000..09f6e66da2
--- /dev/null
+++ b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+#ifndef TENGINE_GRAPH_CONVOLUTION_HPP
+#define TENGINE_GRAPH_CONVOLUTION_HPP
+
+#define FLOAT_TO_REALSIZE (4)
+
+namespace cv
+{
+namespace dnn
+{
+bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w,
+                        float *output_, int out_b, int outch, int out_h, int out_w,
+                        float *kernel_,int kernel_s , int kernel_h, int kernel_w,
+                        float *teg_bias, int stride_h,int stride_w,
+                        int pad_h, int pad_w,  int dilation_h, int dilation_w,
+                        size_t wstep, const std::string padMode) ;
+}
+}
+#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */
diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
new file mode 100644
index 0000000000..503a4eeb22
--- /dev/null
+++ b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2020, OPEN AI LAB
+ * Author: qtang@openailab.com
+ */
+
+#include "../../precomp.hpp"
+#include <iostream>
+#include <vector>
+
+#include <opencv2/core/utils/configuration.private.hpp>
+#include <opencv2/core/utils/logger.hpp>
+
+#include "../include/tengine_graph_convolution.hpp"
+
+#ifdef HAVE_TENGINE
+
+#include "tengine_c_api.h"
+#include "tengine_c_compat.h"
+#include "tengine_operations.h"
+
+namespace cv
+{
+namespace dnn
+{
+
+int create_input_node(graph_t graph, const char* node_name, int inch, int in_h, int in_w)
+{
+    node_t node     = create_graph_node(graph, node_name, "InputOp");
+    tensor_t tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
+    set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT);
+
+    int dims[4] = {1, inch, in_h, in_w};
+    set_tensor_shape(tensor, dims, 4);
+
+    release_graph_tensor(tensor);
+    release_graph_node(node);
+
+    return 0;
+}
+
+int create_conv_node(graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int inch, int outch, int group,
+    int dilation_h, int dilation_w, int activation, std::string padMode)
+{
+    node_t conv_node      = create_graph_node(graph, node_name, "Convolution");
+    tensor_t input_tensor = get_graph_tensor(graph, input_name);
+
+    if (input_tensor == NULL)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :input_tensor is NULL . " );
+        return -1;
+    }
+
+    set_node_input_tensor(conv_node, 0, input_tensor);
+    release_graph_tensor(input_tensor);
+
+    /* output */
+    tensor_t output_tensor = create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
+
+    set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR);
+    release_graph_tensor(output_tensor);
+
+    /* weight */
+    std::string weight_name(node_name);
+    weight_name += "/weight";
+
+    node_t w_node = create_graph_node(graph, weight_name.c_str(), "Const");
+    tensor_t w_tensor = create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32);
+    set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST);
+    set_node_input_tensor(conv_node, 1, w_tensor);
+    int w_dims[] = {outch, inch / group, kernel_h, kernel_w};
+
+    set_tensor_shape(w_tensor, w_dims, 4);
+
+    release_graph_node(w_node);
+    release_graph_tensor(w_tensor);
+
+    /* bias */
+    std::string bias_name(node_name);
+    bias_name += "/bias";
+
+    node_t b_node = create_graph_node(graph, bias_name.c_str(), "Const");
+    tensor_t b_tensor = create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32);
+    set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST);
+    int b_dims[] = {outch};
+
+    set_tensor_shape(b_tensor, b_dims, 1);
+
+    set_node_input_tensor(conv_node, 2, b_tensor);
+    release_graph_node(b_node);
+    release_graph_tensor(b_tensor);
+
+    int pad_h1 = pad_h;
+    int pad_w1 = pad_w;
+
+    if (!padMode.empty())
+    {
+        if (padMode == "SAME")
+        {
+            int out_h_temp = (in_h-kernel_h + 2*pad_h)/stride_h + 1;
+            int out_w_temp = (in_w-kernel_w + 2*pad_w)/stride_w + 1;
+
+            if (out_h_temp < out_h)
+                pad_h1 += 1;
+            if (out_w_temp < out_w)
+                pad_w1 += 1;
+        }
+    }
+
+    /* attr */
+    set_node_attr_int(conv_node, "kernel_h", &kernel_h);
+    set_node_attr_int(conv_node, "kernel_w", &kernel_w);
+    set_node_attr_int(conv_node, "stride_h", &stride_h);
+    set_node_attr_int(conv_node, "stride_w", &stride_w);
+    set_node_attr_int(conv_node, "pad_h0", &pad_h);
+    set_node_attr_int(conv_node, "pad_w0", &pad_w);
+    set_node_attr_int(conv_node, "pad_h1", &pad_h1);
+    set_node_attr_int(conv_node, "pad_w1", &pad_w1);
+    set_node_attr_int(conv_node, "output_channel", &outch);
+    set_node_attr_int(conv_node, "group", &group);
+    set_node_attr_int(conv_node, "dilation_h", &dilation_h);
+    set_node_attr_int(conv_node, "dilation_w", &dilation_w);
+    set_node_attr_int(conv_node, "activation", &activation);
+
+    release_graph_node(conv_node);
+
+    return 0;
+}
+
+graph_t create_conv_graph(float *input_data, int inch, int group, int in_h, int in_w,
+                        float *output_data, int outch, int out_h, int out_w,
+                        int kernel_h, int kernel_w,
+                        int stride_h,int stride_w,
+                        int pad_h, int pad_w,  int dilation_h, int dilation_w, int activation,
+                        float * teg_weight , float * teg_bias , std::string padMode)
+{
+    node_t    conv_node     = NULL;
+
+    tensor_t  input_tensor  = NULL;
+    tensor_t  output_tensor = NULL;
+    tensor_t  weight_tensor = NULL;
+    tensor_t  bias_tensor   = NULL;
+
+    /* create graph for convolution */
+    int in_size  = in_h * in_w * inch;
+    int out_size  = out_h * out_w * outch;
+    int weight_size = outch * (inch / group) * kernel_w * kernel_h;
+    int bias_size = outch;
+
+    int buf_size  = 0;
+    int input_num = 0;
+
+    /* create graph */
+    graph_t graph = create_graph(NULL, NULL, NULL);
+    bool ok = true;
+
+    if(graph == NULL)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :create_graph failed . " );
+        ok = false;
+    }
+
+    const char* input_name = "data";
+    const char* conv_name  = "conv";
+
+    if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :create_input_node failed. " );
+        ok = false;
+    }
+
+    if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w,
+        stride_h, stride_w, pad_h, pad_w, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :create conv node failed. " );
+        ok = false;
+    }
+
+    /* set input/output node */
+    const char* inputs_name[]  = {input_name};
+    const char* outputs_name[] = {conv_name};
+
+    if (ok && set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :set inputs failed . " );
+        ok = false;
+    }
+
+    if (ok && set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0)
+    {
+        CV_LOG_WARNING(NULL,"Tengine :set outputs failed . " );
+        ok = false;
+    }
+
+    /* set input data */
+    if (ok)
+    {
+        input_tensor = get_graph_input_tensor(graph, 0, 0);
+        buf_size     = get_tensor_buffer_size(input_tensor);
+        if (buf_size != in_size * FLOAT_TO_REALSIZE)
+        {
+            CV_LOG_WARNING(NULL,"Tengine :Input data size check failed . ");
+            ok = false;
+        }
+    }
+
+    if (ok)
+    {
+        set_tensor_buffer(input_tensor, (float *)input_data, buf_size);
+        release_graph_tensor(input_tensor);
+
+        /* create convolution node */
+        /* set weight node */
+        conv_node     = get_graph_node(graph, "conv");
+        weight_tensor = get_node_input_tensor(conv_node, 1);
+        buf_size      = get_tensor_buffer_size(weight_tensor);
+
+        if (buf_size != weight_size * FLOAT_TO_REALSIZE)
+        {
+            CV_LOG_WARNING(NULL,"Input weight size check failed . ");
+            ok = false;
+        }
+    }
+
+    if (ok)
+    {
+        set_tensor_buffer(weight_tensor, teg_weight, buf_size);
+
+        /* set bias node */
+        input_num = get_node_input_number(conv_node);
+        if (input_num > 2)
+        {
+            bias_tensor = get_node_input_tensor(conv_node, 2);
+            buf_size    = get_tensor_buffer_size(bias_tensor);
+            if (buf_size != bias_size * FLOAT_TO_REALSIZE)
+            {
+                CV_LOG_WARNING(NULL,"Tengine :Input bias size check failed . ");
+                ok = false;
+            }
+            else set_tensor_buffer(bias_tensor, teg_bias, buf_size);
+        }
+    }
+
+    if (ok)
+    {
+        /* set output data */
+        output_tensor = get_node_output_tensor(conv_node, 0);
+        int ret = set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE);
+        if(ret)
+        {
+            CV_LOG_WARNING(NULL,"Tengine :Set output tensor buffer failed . " );
+        }
+    }
+
+    if (!ok)
+    {
+        destroy_graph(graph);
+        return NULL;
+    }
+    return graph;
+}
+
+bool tengine_forward(float *input_, int inch, int group, int in_h, int in_w,
+                        float *output_, int out_b, int outch, int out_h, int out_w,
+                        float *kernel_, int kernel_s ,int kernel_h, int kernel_w,
+                        float *teg_bias, int stride_h,int stride_w,
+                        int pad_h, int pad_w,  int dilation_h, int dilation_w,
+                        size_t wstep,const std::string padMode)
+{
+    graph_t graph = NULL;
+    std::vector<float> teg_weight_vec;
+    float *teg_weight = NULL;
+    int kernel_inwh = (inch / group) * kernel_w * kernel_h;
+    // Do not using the activation fuse mode, just convolution only.
+    int activation = -1;
+
+    if (!(kernel_s == 2 && kernel_h == kernel_w && pad_h == pad_w
+        && dilation_h == dilation_w && stride_h == stride_w
+        && out_b == 1 && pad_h < 10)) // just for Conv2D
+        return false;
+
+    {
+        /*printf("Tengine: input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n",
+               inch, in_h, in_w,
+               out_b,outch,out_h,out_w,
+               kernel_w, kernel_h,
+               stride_w, stride_h,
+               dilation_w, dilation_h,
+               pad_w,pad_h);*/
+
+        // weight
+        if (kernel_inwh != wstep)
+        {
+            teg_weight_vec.resize(kernel_inwh * outch);
+            teg_weight = &teg_weight_vec[0];
+            for (int i=0; i<outch; i++)
+            {
+                memcpy(teg_weight+i*kernel_inwh, kernel_+i*wstep, kernel_inwh*FLOAT_TO_REALSIZE);
+            }
+        }
+        else
+        {
+            teg_weight = kernel_;
+        }
+
+        /* initial the resoruce of tengine */
+        init_tengine();
+
+        /* create the convolution graph */
+        graph = create_conv_graph( input_, inch, group, in_h, in_w,
+                                    output_, outch, out_h, out_w,
+                                    kernel_h, kernel_w, stride_h,stride_w,
+                                    pad_h, pad_w, dilation_h, dilation_w, activation,
+                                    teg_weight , teg_bias , padMode);
+
+        /* prerun */
+        if(prerun_graph(graph) < 0)
+        {
+            CV_LOG_WARNING(NULL, "Tengine :prerun_graph failed .");
+            return false ;
+        }
+
+        /* run */
+        if(run_graph(graph, 1) < 0)
+        {
+            CV_LOG_WARNING(NULL,"Tengine :run_graph failed .");
+            return false ;
+        }
+
+        postrun_graph(graph);
+        destroy_graph(graph);
+    }
+    return true ;
+}
+
+}
+}
+#endif