mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 06:26:29 +08:00
Merge pull request #10228 from pengli:dnn_new
This commit is contained in:
commit
6074f92d48
@ -185,12 +185,13 @@ public:
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
int cAxis = clamp(axis, inputs[0].dims);
|
||||
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
|
||||
if (padding)
|
||||
return false;
|
||||
|
||||
int bottom_concat_axis;
|
||||
int concat_size = inputs[0].size[2] * inputs[0].size[3];
|
||||
int top_concat_axis = outputs[0].size[1];
|
||||
int concat_size = total(shape(inputs[0]), cAxis + 1);
|
||||
int top_concat_axis = outputs[0].size[cAxis];
|
||||
int num_concats = total(shape(inputs[0]), 0, cAxis);
|
||||
int offset_concat_axis = 0;
|
||||
UMat& outMat = outputs[0];
|
||||
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
|
||||
@ -202,12 +203,12 @@ public:
|
||||
return false;
|
||||
|
||||
UMat& inpMat = inputs[i];
|
||||
bottom_concat_axis = inputs[i].size[1];
|
||||
bottom_concat_axis = inputs[i].size[cAxis];
|
||||
size_t nthreads = inputs[i].total();
|
||||
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel.set(2, (int)inputs[i].size[0]);
|
||||
kernel.set(2, (int)num_concats);
|
||||
kernel.set(3, (int)concat_size);
|
||||
kernel.set(4, (int)top_concat_axis);
|
||||
kernel.set(5, (int)bottom_concat_axis);
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <float.h>
|
||||
#include <string>
|
||||
#include "../nms.inl.hpp"
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -211,11 +212,160 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
// Decode all bboxes in a batch
|
||||
bool ocl_DecodeBBoxesAll(UMat& loc_mat, UMat& prior_mat,
|
||||
const int num, const int numPriors, const bool share_location,
|
||||
const int num_loc_classes, const int background_label_id,
|
||||
const cv::String& code_type, const bool variance_encoded_in_target,
|
||||
const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
|
||||
{
|
||||
UMat outmat = UMat(loc_mat.dims, loc_mat.size, CV_32F);
|
||||
size_t nthreads = loc_mat.total();
|
||||
String kernel_name;
|
||||
|
||||
if (code_type == "CORNER")
|
||||
kernel_name = "DecodeBBoxesCORNER";
|
||||
else if (code_type == "CENTER_SIZE")
|
||||
kernel_name = "DecodeBBoxesCENTER_SIZE";
|
||||
else
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
ocl::Kernel kernel(kernel_name.c_str(), ocl::dnn::detection_output_oclsrc);
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(loc_mat));
|
||||
kernel.set(2, ocl::KernelArg::PtrReadOnly(prior_mat));
|
||||
kernel.set(3, (int)variance_encoded_in_target);
|
||||
kernel.set(4, (int)numPriors);
|
||||
kernel.set(5, (int)share_location);
|
||||
kernel.set(6, (int)num_loc_classes);
|
||||
kernel.set(7, (int)background_label_id);
|
||||
kernel.set(8, (int)clip);
|
||||
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat));
|
||||
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
all_decode_bboxes.clear();
|
||||
all_decode_bboxes.resize(num);
|
||||
{
|
||||
Mat mat = outmat.getMat(ACCESS_READ);
|
||||
const float* decode_data = mat.ptr<float>();
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
LabelBBox& decode_bboxes = all_decode_bboxes[i];
|
||||
for (int c = 0; c < num_loc_classes; ++c)
|
||||
{
|
||||
int label = share_location ? -1 : c;
|
||||
decode_bboxes[label].resize(numPriors);
|
||||
for (int p = 0; p < numPriors; ++p)
|
||||
{
|
||||
int startIdx = p * num_loc_classes * 4;
|
||||
util::NormalizedBBox& bbox = decode_bboxes[label][p];
|
||||
bbox.xmin = decode_data[startIdx + c * 4];
|
||||
bbox.ymin = decode_data[startIdx + c * 4 + 1];
|
||||
bbox.xmax = decode_data[startIdx + c * 4 + 2];
|
||||
bbox.ymax = decode_data[startIdx + c * 4 + 3];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ocl_GetConfidenceScores(const UMat& inp1, const int num,
|
||||
const int numPredsPerClass, const int numClasses,
|
||||
std::vector<Mat>& confPreds)
|
||||
{
|
||||
int shape[] = { numClasses, numPredsPerClass };
|
||||
for (int i = 0; i < num; i++)
|
||||
confPreds.push_back(Mat(2, shape, CV_32F));
|
||||
|
||||
UMat umat = inp1.reshape(1, num * numPredsPerClass);
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
Range ranges[] = { Range(i * numPredsPerClass, (i + 1) * numPredsPerClass), Range::all() };
|
||||
transpose(umat(ranges), confPreds[i]);
|
||||
}
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
std::vector<LabelBBox> allDecodedBBoxes;
|
||||
std::vector<Mat> allConfidenceScores;
|
||||
|
||||
int num = inputs[0].size[0];
|
||||
|
||||
// extract predictions from input layers
|
||||
{
|
||||
int numPriors = inputs[2].size[2] / 4;
|
||||
|
||||
// Retrieve all confidences
|
||||
ocl_GetConfidenceScores(inputs[1], num, numPriors, _numClasses, allConfidenceScores);
|
||||
|
||||
// Decode all loc predictions to bboxes
|
||||
bool ret = ocl_DecodeBBoxesAll(inputs[0], inputs[2], num, numPriors,
|
||||
_shareLocation, _numLocClasses, _backgroundLabelId,
|
||||
_codeType, _varianceEncodedInTarget, false,
|
||||
allDecodedBBoxes);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t numKept = 0;
|
||||
std::vector<std::map<int, std::vector<int> > > allIndices;
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
|
||||
}
|
||||
|
||||
if (numKept == 0)
|
||||
{
|
||||
// Set confidences to zeros.
|
||||
Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
|
||||
outputs[0](ranges).setTo(0);
|
||||
return true;
|
||||
}
|
||||
int outputShape[] = {1, 1, (int)numKept, 7};
|
||||
UMat umat = UMat(4, outputShape, CV_32F);
|
||||
{
|
||||
Mat mat = umat.getMat(ACCESS_WRITE);
|
||||
float* outputsData = mat.ptr<float>();
|
||||
|
||||
size_t count = 0;
|
||||
for (int i = 0; i < num; ++i)
|
||||
{
|
||||
count += outputDetections_(i, &outputsData[count * 7],
|
||||
allDecodedBBoxes[i], allConfidenceScores[i],
|
||||
allIndices[i]);
|
||||
}
|
||||
CV_Assert(count == numKept);
|
||||
}
|
||||
outputs.clear();
|
||||
outputs.push_back(umat);
|
||||
outs.assign(outputs);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
@ -225,7 +375,7 @@ public:
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<LabelBBox> allDecodedBBoxes;
|
||||
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
|
||||
std::vector<Mat> allConfidenceScores;
|
||||
|
||||
int num = inputs[0]->size[0];
|
||||
|
||||
@ -286,7 +436,7 @@ public:
|
||||
|
||||
size_t outputDetections_(
|
||||
const int i, float* outputsData,
|
||||
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
|
||||
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
|
||||
const std::map<int, std::vector<int> >& indicesMap
|
||||
)
|
||||
{
|
||||
@ -294,9 +444,9 @@ public:
|
||||
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
|
||||
{
|
||||
int label = it->first;
|
||||
if (confidenceScores.size() <= label)
|
||||
if (confidenceScores.rows <= label)
|
||||
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
|
||||
const std::vector<float>& scores = confidenceScores[label];
|
||||
const std::vector<float>& scores = confidenceScores.row(label);
|
||||
int locLabel = _shareLocation ? -1 : label;
|
||||
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel);
|
||||
if (label_bboxes == decodeBBoxes.end())
|
||||
@ -320,7 +470,7 @@ public:
|
||||
}
|
||||
|
||||
size_t processDetections_(
|
||||
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
|
||||
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
|
||||
std::vector<std::map<int, std::vector<int> > >& allIndices
|
||||
)
|
||||
{
|
||||
@ -330,10 +480,10 @@ public:
|
||||
{
|
||||
if (c == _backgroundLabelId)
|
||||
continue; // Ignore background class.
|
||||
if (c >= confidenceScores.size())
|
||||
if (c >= confidenceScores.rows)
|
||||
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c));
|
||||
|
||||
const std::vector<float>& scores = confidenceScores[c];
|
||||
const std::vector<float> scores = confidenceScores.row(c);
|
||||
int label = _shareLocation ? -1 : c;
|
||||
|
||||
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
|
||||
@ -351,9 +501,9 @@ public:
|
||||
{
|
||||
int label = it->first;
|
||||
const std::vector<int>& labelIndices = it->second;
|
||||
if (label >= confidenceScores.size())
|
||||
if (label >= confidenceScores.rows)
|
||||
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
|
||||
const std::vector<float>& scores = confidenceScores[label];
|
||||
const std::vector<float>& scores = confidenceScores.row(label);
|
||||
for (size_t j = 0; j < labelIndices.size(); ++j)
|
||||
{
|
||||
size_t idx = labelIndices[j];
|
||||
@ -630,20 +780,20 @@ public:
|
||||
// confidence prediction for an image.
|
||||
static void GetConfidenceScores(const float* confData, const int num,
|
||||
const int numPredsPerClass, const int numClasses,
|
||||
std::vector<std::vector<std::vector<float> > >& confPreds)
|
||||
std::vector<Mat>& confPreds)
|
||||
{
|
||||
confPreds.clear(); confPreds.resize(num);
|
||||
int shape[] = { numClasses, numPredsPerClass };
|
||||
for (int i = 0; i < num; i++)
|
||||
confPreds.push_back(Mat(2, shape, CV_32F));
|
||||
|
||||
for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses)
|
||||
{
|
||||
std::vector<std::vector<float> >& labelScores = confPreds[i];
|
||||
labelScores.resize(numClasses);
|
||||
Mat labelScores = confPreds[i];
|
||||
for (int c = 0; c < numClasses; ++c)
|
||||
{
|
||||
std::vector<float>& classLabelScores = labelScores[c];
|
||||
classLabelScores.resize(numPredsPerClass);
|
||||
for (int p = 0; p < numPredsPerClass; ++p)
|
||||
{
|
||||
classLabelScores[p] = confData[p * numClasses + c];
|
||||
labelScores.at<float>(c, p) = confData[p * numClasses + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include "layers_common.hpp"
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -173,6 +174,24 @@ public:
|
||||
CV_Assert((int)_numAxes == inp0.dims);
|
||||
|
||||
computeStrides(shape(*inputs[0]), shape(outputs[0]));
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
if (uorder.empty())
|
||||
{
|
||||
std::vector<int> orderVec(_order.begin(), _order.end());;
|
||||
Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]);
|
||||
|
||||
std::vector<int> oldStrideVec(_oldStride.begin(), _oldStride.end());
|
||||
Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]);
|
||||
|
||||
std::vector<int> newStrideVec(_newStride.begin(), _newStride.end());
|
||||
Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]);
|
||||
|
||||
morder.copyTo(uorder);
|
||||
mold_stride.copyTo(uold_stride);
|
||||
mnew_stride.copyTo(unew_stride);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
class PermuteInvoker : public ParallelLoopBody
|
||||
@ -247,11 +266,47 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (!_needsPermute)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc);
|
||||
|
||||
kernel.set(0, (int)_count);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
|
||||
kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder));
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride));
|
||||
kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride));
|
||||
kernel.set(5, (int)_numAxes);
|
||||
kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i]));
|
||||
|
||||
if (!kernel.run(1, &_count, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
@ -325,6 +380,10 @@ public:
|
||||
std::vector<size_t> _newStride;
|
||||
bool _needsPermute;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat uorder, uold_stride, unew_stride;
|
||||
#endif
|
||||
|
||||
size_t _numAxes;
|
||||
};
|
||||
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
#include <iostream>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -114,11 +115,83 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (useSoftmaxTree) { // Yolo 9000
|
||||
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
CV_Assert(inputs.size() >= 1);
|
||||
int const cell_size = classes + coords + 1;
|
||||
UMat blob_umat = blobs[0].getUMat(ACCESS_READ);
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
UMat& inpBlob = inputs[ii];
|
||||
UMat& outBlob = outputs[ii];
|
||||
|
||||
int rows = inpBlob.size[1];
|
||||
int cols = inpBlob.size[2];
|
||||
|
||||
ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
|
||||
size_t global = rows*cols*anchors;
|
||||
logistic_kernel.set(0, (int)global);
|
||||
logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
|
||||
logistic_kernel.set(2, (int)cell_size);
|
||||
logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
|
||||
logistic_kernel.run(1, &global, NULL, false);
|
||||
|
||||
if (useSoftmax)
|
||||
{
|
||||
// Yolo v2
|
||||
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
|
||||
ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
|
||||
size_t nthreads = rows*cols*anchors;
|
||||
softmax_kernel.set(0, (int)nthreads);
|
||||
softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
|
||||
softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
|
||||
softmax_kernel.set(3, (int)cell_size);
|
||||
softmax_kernel.set(4, (int)classes);
|
||||
softmax_kernel.set(5, (int)classfix);
|
||||
softmax_kernel.set(6, (int)rows);
|
||||
softmax_kernel.set(7, (int)cols);
|
||||
softmax_kernel.set(8, (int)anchors);
|
||||
softmax_kernel.set(9, (float)thresh);
|
||||
softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
|
||||
if (!softmax_kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nmsThreshold > 0) {
|
||||
Mat mat = outBlob.getMat(ACCESS_WRITE);
|
||||
float *dstData = mat.ptr<float>();
|
||||
do_nms_sort(dstData, rows*cols*anchors, nmsThreshold);
|
||||
//do_nms(dstData, rows*cols*anchors, nmsThreshold);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
#include <iostream>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -86,11 +87,54 @@ public:
|
||||
return backendId == DNN_BACKEND_DEFAULT;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
ocl::Kernel kernel("reorg", ocl::dnn::reorg_oclsrc, buildopt);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
UMat& srcBlob = inputs[i];
|
||||
UMat& dstBlob = outputs[0];
|
||||
int channels = srcBlob.size[1];
|
||||
int height = srcBlob.size[2];
|
||||
int width = srcBlob.size[3];
|
||||
size_t nthreads = channels * height * width;
|
||||
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(srcBlob));
|
||||
kernel.set(2, (int)channels);
|
||||
kernel.set(3, (int)height);
|
||||
kernel.set(4, (int)width);
|
||||
kernel.set(5, (int)reorgStride);
|
||||
kernel.set(6, ocl::KernelArg::PtrWriteOnly(dstBlob));
|
||||
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -182,11 +182,40 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
UMat srcBlob = inputs[i];
|
||||
void *src_handle = inputs[i].handle(ACCESS_READ);
|
||||
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
|
||||
if (src_handle != dst_handle)
|
||||
{
|
||||
MatShape outShape = shape(outputs[i]);
|
||||
UMat umat = srcBlob.reshape(1, (int)outShape.size(), &outShape[0]);
|
||||
umat.copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
outs.assign(outputs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
181
modules/dnn/src/opencl/detection_output.cl
Normal file
181
modules/dnn/src/opencl/detection_output.cl
Normal file
@ -0,0 +1,181 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
#define Dtype4 float4
|
||||
|
||||
__kernel void DecodeBBoxesCORNER(const int nthreads,
|
||||
__global const Dtype* loc_data,
|
||||
__global const Dtype* prior_data,
|
||||
const int variance_encoded_in_target,
|
||||
const int num_priors,
|
||||
const int share_location,
|
||||
const int num_loc_classes,
|
||||
const int background_label_id,
|
||||
const int clip_bbox,
|
||||
__global Dtype* bbox_data)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
|
||||
const int i = index % 4;
|
||||
const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;
|
||||
|
||||
const int c = (index / 4) % num_loc_classes;
|
||||
int label = share_location ? -1 : c;
|
||||
if (label == background_label_id)
|
||||
return; // Ignore background class.
|
||||
|
||||
Dtype4 loc_vec = vload4(0, loc_data + index - i);
|
||||
Dtype4 bbox_vec, prior_variance;
|
||||
if (variance_encoded_in_target)
|
||||
{
|
||||
bbox_vec = loc_vec;
|
||||
} else {
|
||||
const int start_index = num_priors * 4 + p;
|
||||
prior_variance = vload4(0, prior_data + start_index);
|
||||
bbox_vec = loc_vec * prior_variance;
|
||||
}
|
||||
|
||||
bbox_xmin = bbox_vec.x;
|
||||
bbox_ymin = bbox_vec.y;
|
||||
bbox_xmax = bbox_vec.z;
|
||||
bbox_ymax = bbox_vec.w;
|
||||
|
||||
Dtype4 prior_vec = vload4(0, prior_data + p);
|
||||
Dtype val;
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
val = prior_vec.x + bbox_xmin;
|
||||
break;
|
||||
case 1:
|
||||
val = prior_vec.y + bbox_ymin;
|
||||
break;
|
||||
case 2:
|
||||
val = prior_vec.z + bbox_xmax;
|
||||
break;
|
||||
case 3:
|
||||
val = prior_vec.w + bbox_ymax;
|
||||
break;
|
||||
}
|
||||
|
||||
if (clip_bbox)
|
||||
val = max(min(val, (Dtype)1.), (Dtype)0.);
|
||||
|
||||
bbox_data[index] = val;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
|
||||
__global const Dtype* loc_data,
|
||||
__global const Dtype* prior_data,
|
||||
const int variance_encoded_in_target,
|
||||
const int num_priors,
|
||||
const int share_location,
|
||||
const int num_loc_classes,
|
||||
const int background_label_id,
|
||||
const int clip_bbox,
|
||||
__global Dtype* bbox_data)
|
||||
{
|
||||
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
|
||||
{
|
||||
Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
|
||||
const int i = index % 4;
|
||||
const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;
|
||||
|
||||
const int c = (index / 4) % num_loc_classes;
|
||||
int label = share_location ? -1 : c;
|
||||
if (label == background_label_id)
|
||||
return; // Ignore background class.
|
||||
|
||||
Dtype4 loc_vec = vload4(0, loc_data + index - i);
|
||||
Dtype4 bbox_vec, prior_variance;
|
||||
if (variance_encoded_in_target)
|
||||
{
|
||||
bbox_vec = loc_vec;
|
||||
} else {
|
||||
const int start_index = num_priors * 4 + p;
|
||||
prior_variance = vload4(0, prior_data + start_index);
|
||||
bbox_vec = loc_vec * prior_variance;
|
||||
}
|
||||
|
||||
bbox_xmin = bbox_vec.x;
|
||||
bbox_ymin = bbox_vec.y;
|
||||
bbox_xmax = bbox_vec.z;
|
||||
bbox_ymax = bbox_vec.w;
|
||||
|
||||
Dtype4 prior_vec = vload4(0, prior_data + p);
|
||||
Dtype prior_width = prior_vec.z - prior_vec.x;
|
||||
Dtype prior_height = prior_vec.w - prior_vec.y;
|
||||
Dtype prior_center_x = (prior_vec.x + prior_vec.z) * .5;
|
||||
Dtype prior_center_y = (prior_vec.y + prior_vec.w) * .5;
|
||||
|
||||
Dtype decode_bbox_center_x, decode_bbox_center_y;
|
||||
Dtype decode_bbox_width, decode_bbox_height;
|
||||
decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
|
||||
decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
|
||||
decode_bbox_width = exp(bbox_xmax) * prior_width;
|
||||
decode_bbox_height = exp(bbox_ymax) * prior_height;
|
||||
|
||||
Dtype val;
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
val = decode_bbox_center_x - decode_bbox_width * .5;
|
||||
break;
|
||||
case 1:
|
||||
val = decode_bbox_center_y - decode_bbox_height * .5;
|
||||
break;
|
||||
case 2:
|
||||
val = decode_bbox_center_x + decode_bbox_width * .5;
|
||||
break;
|
||||
case 3:
|
||||
val = decode_bbox_center_y + decode_bbox_height * .5;
|
||||
break;
|
||||
}
|
||||
|
||||
if (clip_bbox)
|
||||
val = max(min(val, (Dtype)1.), (Dtype)0.);
|
||||
|
||||
bbox_data[index] = val;
|
||||
}
|
||||
}
|
67
modules/dnn/src/opencl/permute.cl
Normal file
67
modules/dnn/src/opencl/permute.cl
Normal file
@ -0,0 +1,67 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
|
||||
__kernel void permute(const int nthreads,
|
||||
__global Dtype* bottom_data,
|
||||
global int* permute_order,
|
||||
global int* oldStride,
|
||||
global int* newStride,
|
||||
const int num_axes,
|
||||
__global Dtype* top_data)
|
||||
{
|
||||
for (int i = get_global_id(0); i < nthreads; i += get_global_size(0))
|
||||
{
|
||||
int oldPosition = 0;
|
||||
int newPosition = i;
|
||||
|
||||
for (int j = 0; j < num_axes; ++j)
|
||||
{
|
||||
int order = permute_order[j];
|
||||
oldPosition += (newPosition / newStride[j]) * oldStride[order];
|
||||
newPosition %= newStride[j];
|
||||
}
|
||||
|
||||
top_data[i] = bottom_data[oldPosition];
|
||||
}
|
||||
}
|
109
modules/dnn/src/opencl/region.cl
Normal file
109
modules/dnn/src/opencl/region.cl
Normal file
@ -0,0 +1,109 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
|
||||
__kernel void logistic_activ(const int count,
|
||||
__global const Dtype* src,
|
||||
const int cell_size,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
for (int i = get_global_id(0); i < count; i += get_global_size(0))
|
||||
{
|
||||
int index = cell_size * i;
|
||||
Dtype x = src[index + 4];
|
||||
dst[index + 4] = 1.f / (1.f + exp(-x));
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void softmax_activ(const int count,
|
||||
__global const Dtype* src,
|
||||
__global const Dtype* biasData,
|
||||
const int cell_size,
|
||||
const int classes,
|
||||
const int classfix,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const int anchors,
|
||||
const float thresh,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
for (int index = get_global_id(0); index < count; index += get_global_size(0))
|
||||
{
|
||||
int box_index = index * cell_size;
|
||||
float largest = -FLT_MAX;
|
||||
__global const Dtype *input = src + box_index + 5;
|
||||
__global Dtype *output = dst + box_index + 5;
|
||||
|
||||
for (int i = 0; i < classes; ++i)
|
||||
largest = fmax(largest, input[i]);
|
||||
|
||||
float sum = 0;
|
||||
for (int i = 0; i < classes; ++i)
|
||||
{
|
||||
float e = exp((input[i] - largest));
|
||||
sum += e;
|
||||
output[i] = e;
|
||||
}
|
||||
|
||||
int y = index / anchors / cols;
|
||||
int x = index / anchors % cols;
|
||||
int a = index - anchors * (x + y * cols);
|
||||
float scale = dst[box_index + 4];
|
||||
if (classfix == -1 && scale < .5) scale = 0;
|
||||
|
||||
float v1 = src[box_index + 0];
|
||||
float v2 = src[box_index + 1];
|
||||
float l1 = 1.f / (1.f + exp(-v1));
|
||||
float l2 = 1.f / (1.f + exp(-v2));
|
||||
|
||||
dst[box_index + 0] = (x + l1) / cols;
|
||||
dst[box_index + 1] = (y + l2) / rows;
|
||||
dst[box_index + 2] = exp(src[box_index + 2]) * biasData[2 * a] / cols;
|
||||
dst[box_index + 3] = exp(src[box_index + 3]) * biasData[2 * a + 1] / rows;
|
||||
|
||||
for (int i = 0; i < classes; ++i)
|
||||
{
|
||||
float prob = scale * output[i] / sum;
|
||||
output[i] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
}
|
||||
}
|
63
modules/dnn/src/opencl/reorg.cl
Normal file
63
modules/dnn/src/opencl/reorg.cl
Normal file
@ -0,0 +1,63 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void reorg(const int count,
|
||||
__global const Dtype* src,
|
||||
const int channels,
|
||||
const int height,
|
||||
const int width,
|
||||
const int reorgStride,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
for (int index = get_global_id(0); index < count; index += get_global_size(0))
|
||||
{
|
||||
int k = index / (height * width);
|
||||
int j = (index - (k * height * width)) / width;
|
||||
int i = (index - (k * height * width)) % width;
|
||||
int out_c = channels / (reorgStride*reorgStride);
|
||||
int c2 = k % out_c;
|
||||
int offset = k / out_c;
|
||||
int w2 = i*reorgStride + offset % reorgStride;
|
||||
int h2 = j*reorgStride + offset / reorgStride;
|
||||
int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
|
||||
dst[index] = src[in_index];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user