mirror of
https://github.com/opencv/opencv.git
synced 2025-07-21 03:26:46 +08:00
Merge pull request #9882 from pengli:ocl4dnn
This commit is contained in:
commit
8a3a75cc16
@ -187,16 +187,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
*/
|
*/
|
||||||
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
|
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
|
||||||
|
|
||||||
|
/** @brief Given the @p input blobs, computes the output @p blobs.
|
||||||
|
* @param[in] inputs the input blobs.
|
||||||
|
* @param[out] outputs allocated output blobs, which will store results of the computation.
|
||||||
|
* @param[out] internals allocated internal blobs
|
||||||
|
*/
|
||||||
|
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
|
||||||
|
|
||||||
|
/** @brief Given the @p input blobs, computes the output @p blobs.
|
||||||
|
* @param[in] inputs the input blobs.
|
||||||
|
* @param[out] outputs allocated output blobs, which will store results of the computation.
|
||||||
|
* @param[out] internals allocated internal blobs
|
||||||
|
*/
|
||||||
|
void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
|
||||||
|
|
||||||
/** @brief @overload */
|
/** @brief @overload */
|
||||||
CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
|
CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
|
||||||
|
|
||||||
/** @brief @overload */
|
/** @brief @overload */
|
||||||
CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
|
CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
|
||||||
|
|
||||||
/** @brief @overload */
|
|
||||||
CV_WRAP void forward(const std::vector<Mat> &inputs, CV_IN_OUT std::vector<Mat> &outputs,
|
|
||||||
CV_IN_OUT std::vector<Mat> &internals);
|
|
||||||
|
|
||||||
/** @brief Allocates layer and computes output. */
|
/** @brief Allocates layer and computes output. */
|
||||||
CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
|
CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
|
||||||
CV_IN_OUT std::vector<Mat> &internals);
|
CV_IN_OUT std::vector<Mat> &internals);
|
||||||
|
@ -132,6 +132,11 @@ static inline MatShape shape(const Mat& mat)
|
|||||||
return shape(mat.size.p, mat.dims);
|
return shape(mat.size.p, mat.dims);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline MatShape shape(const UMat& mat)
|
||||||
|
{
|
||||||
|
return shape(mat.size.p, mat.dims);
|
||||||
|
}
|
||||||
|
|
||||||
namespace {inline bool is_neg(int i) { return i < 0; }}
|
namespace {inline bool is_neg(int i) { return i < 0; }}
|
||||||
|
|
||||||
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
|
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
|
||||||
@ -151,7 +156,7 @@ static inline int total(const MatShape& shape, int start = -1, int end = -1)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
int elems = 1;
|
int elems = 1;
|
||||||
CV_Assert(start < (int)shape.size() && end <= (int)shape.size() &&
|
CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
|
||||||
start <= end);
|
start <= end);
|
||||||
for(int i = start; i < end; i++)
|
for(int i = start; i < end; i++)
|
||||||
{
|
{
|
||||||
|
@ -233,6 +233,9 @@ struct LayerData
|
|||||||
std::vector<Mat> outputBlobs;
|
std::vector<Mat> outputBlobs;
|
||||||
std::vector<Mat*> inputBlobs;
|
std::vector<Mat*> inputBlobs;
|
||||||
std::vector<Mat> internals;
|
std::vector<Mat> internals;
|
||||||
|
std::vector<UMat> umat_outputBlobs;
|
||||||
|
std::vector<UMat> umat_inputBlobs;
|
||||||
|
std::vector<UMat> umat_internals;
|
||||||
// Computation nodes of implemented backends (except DEFAULT).
|
// Computation nodes of implemented backends (except DEFAULT).
|
||||||
std::map<int, Ptr<BackendNode> > backendNodes;
|
std::map<int, Ptr<BackendNode> > backendNodes;
|
||||||
// Flag for skip layer computation for specific backend.
|
// Flag for skip layer computation for specific backend.
|
||||||
@ -263,6 +266,7 @@ struct DataLayer : public Layer
|
|||||||
{
|
{
|
||||||
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) {}
|
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) {}
|
||||||
void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) {}
|
void forward(std::vector<Mat*>&, std::vector<Mat>&, std::vector<Mat> &) {}
|
||||||
|
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) {}
|
||||||
|
|
||||||
int outputNameToIndex(String tgtName)
|
int outputNameToIndex(String tgtName)
|
||||||
{
|
{
|
||||||
@ -398,22 +402,77 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
|
||||||
|
{
|
||||||
|
UMat bestBlob;
|
||||||
|
LayerPin bestBlobPin;
|
||||||
|
|
||||||
|
if( !force )
|
||||||
|
{
|
||||||
|
std::map<LayerPin, UMat>::iterator hostIt;
|
||||||
|
std::map<LayerPin, int>::iterator refIt;
|
||||||
|
|
||||||
|
const int targetTotal = total(shape);
|
||||||
|
int bestBlobTotal = INT_MAX;
|
||||||
|
|
||||||
|
for (hostIt = umat_memHosts.begin(); hostIt != umat_memHosts.end(); ++hostIt)
|
||||||
|
{
|
||||||
|
refIt = refCounter.find(hostIt->first);
|
||||||
|
// Use only blobs that had references before because if not,
|
||||||
|
// it might be used as output.
|
||||||
|
if (refIt != refCounter.end() && refIt->second == 0)
|
||||||
|
{
|
||||||
|
UMat& unusedBlob = hostIt->second;
|
||||||
|
if (unusedBlob.total() >= targetTotal &&
|
||||||
|
unusedBlob.total() < bestBlobTotal)
|
||||||
|
{
|
||||||
|
bestBlobPin = hostIt->first;
|
||||||
|
bestBlob = unusedBlob;
|
||||||
|
bestBlobTotal = unusedBlob.total();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!bestBlob.empty())
|
||||||
|
{
|
||||||
|
reuse(bestBlobPin, lp);
|
||||||
|
umat_dst.create(shape, CV_32F);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// if dst already has been allocated with total(shape) elements,
|
||||||
|
// it won't be recrreated and pointer of dst.data remains the same.
|
||||||
|
umat_dst.create(shape, CV_32F);
|
||||||
|
addHost(lp, umat_dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
||||||
std::vector<LayerPin>& pinsForInternalBlobs,
|
std::vector<LayerPin>& pinsForInternalBlobs,
|
||||||
bool maximizeReuse)
|
bool maximizeReuse)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||||
|
preferableTarget == DNN_TARGET_OPENCL);
|
||||||
|
|
||||||
pinsForInternalBlobs.clear();
|
pinsForInternalBlobs.clear();
|
||||||
|
|
||||||
std::vector<Mat>& outputBlobs = ld.outputBlobs,
|
std::vector<Mat>& outputBlobs = ld.outputBlobs,
|
||||||
&internalBlobs = ld.internals;
|
&internalBlobs = ld.internals;
|
||||||
|
|
||||||
|
std::vector<UMat>& umat_outputBlobs = ld.umat_outputBlobs,
|
||||||
|
&umat_internalBlobs = ld.umat_internals;
|
||||||
|
|
||||||
const ShapesVec& outShapes = layerShapes.out,
|
const ShapesVec& outShapes = layerShapes.out,
|
||||||
internalShapes = layerShapes.internal;
|
internalShapes = layerShapes.internal;
|
||||||
|
|
||||||
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
|
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
|
||||||
internalBlobs.resize(internalShapes.size());
|
internalBlobs.resize(internalShapes.size());
|
||||||
|
if (use_umat)
|
||||||
|
{
|
||||||
|
umat_outputBlobs.resize(std::max((size_t)1, outShapes.size()));
|
||||||
|
umat_internalBlobs.resize(internalShapes.size());
|
||||||
|
}
|
||||||
|
|
||||||
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
|
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
|
||||||
|
|
||||||
@ -433,14 +492,19 @@ public:
|
|||||||
ShapesVec shapes(outShapes);
|
ShapesVec shapes(outShapes);
|
||||||
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
|
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
|
||||||
std::vector<Mat*> blobs;
|
std::vector<Mat*> blobs;
|
||||||
|
std::vector<UMat*> umat_blobs;
|
||||||
for(int i = 0; i < outputBlobs.size(); i++)
|
for(int i = 0; i < outputBlobs.size(); i++)
|
||||||
{
|
{
|
||||||
blobs.push_back(&outputBlobs[i]);
|
blobs.push_back(&outputBlobs[i]);
|
||||||
|
if (use_umat)
|
||||||
|
umat_blobs.push_back(&umat_outputBlobs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < internalBlobs.size(); i++)
|
for(int i = 0; i < internalBlobs.size(); i++)
|
||||||
{
|
{
|
||||||
blobs.push_back(&internalBlobs[i]);
|
blobs.push_back(&internalBlobs[i]);
|
||||||
|
if (use_umat)
|
||||||
|
umat_blobs.push_back(&umat_internalBlobs[i]);
|
||||||
if (total(internalShapes[i]))
|
if (total(internalShapes[i]))
|
||||||
{
|
{
|
||||||
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
|
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
|
||||||
@ -467,13 +531,26 @@ public:
|
|||||||
LayerPin blobPin(ld.id, index);
|
LayerPin blobPin(ld.id, index);
|
||||||
if (index < outShapes.size() && inPlace && !force)
|
if (index < outShapes.size() && inPlace && !force)
|
||||||
{
|
{
|
||||||
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
|
if (use_umat)
|
||||||
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
|
{
|
||||||
|
CV_Assert(ld.umat_inputBlobs[0].total() == total(shapes[index]));
|
||||||
|
ld.umat_outputBlobs[index] =
|
||||||
|
ld.umat_inputBlobs[0].reshape(1, shapes[index].size(),
|
||||||
|
&shapes[index][0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
|
||||||
|
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
|
||||||
|
}
|
||||||
reuse(ld.inputBlobsId[0], blobPin);
|
reuse(ld.inputBlobsId[0], blobPin);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
|
if (use_umat)
|
||||||
|
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
|
||||||
|
else
|
||||||
|
reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -488,6 +565,19 @@ public:
|
|||||||
refCounter.clear();
|
refCounter.clear();
|
||||||
reuseMap.clear();
|
reuseMap.clear();
|
||||||
memHosts.clear();
|
memHosts.clear();
|
||||||
|
umat_memHosts.clear();
|
||||||
|
preferableTarget = DNN_TARGET_CPU;
|
||||||
|
preferableBackend = DNN_BACKEND_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setPreferableTarget(int targetId)
|
||||||
|
{
|
||||||
|
preferableTarget = targetId;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setPreferableBackend(int backendId)
|
||||||
|
{
|
||||||
|
preferableBackend = backendId;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -499,11 +589,21 @@ private:
|
|||||||
memHosts[lp] = mat;
|
memHosts[lp] = mat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void addHost(const LayerPin& lp, const UMat& umat)
|
||||||
|
{
|
||||||
|
CV_Assert(umat_memHosts.find(lp) == umat_memHosts.end());
|
||||||
|
reuseMap[lp] = lp;
|
||||||
|
umat_memHosts[lp] = umat;
|
||||||
|
}
|
||||||
|
|
||||||
std::map<LayerPin, int> refCounter;
|
std::map<LayerPin, int> refCounter;
|
||||||
// Maps pin to origin blob (for whom memory was allocated firstly).
|
// Maps pin to origin blob (for whom memory was allocated firstly).
|
||||||
// For origin blobs key == value.
|
// For origin blobs key == value.
|
||||||
std::map<LayerPin, LayerPin> reuseMap;
|
std::map<LayerPin, LayerPin> reuseMap;
|
||||||
std::map<LayerPin, Mat> memHosts;
|
std::map<LayerPin, Mat> memHosts;
|
||||||
|
std::map<LayerPin, UMat> umat_memHosts;
|
||||||
|
int preferableTarget;
|
||||||
|
int preferableBackend;
|
||||||
};
|
};
|
||||||
|
|
||||||
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m)
|
static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, const cv::Mat& m)
|
||||||
@ -654,6 +754,9 @@ struct Net::Impl
|
|||||||
it->second.inputBlobs.clear();
|
it->second.inputBlobs.clear();
|
||||||
it->second.outputBlobs.clear();
|
it->second.outputBlobs.clear();
|
||||||
it->second.internals.clear();
|
it->second.internals.clear();
|
||||||
|
it->second.umat_inputBlobs.clear();
|
||||||
|
it->second.umat_outputBlobs.clear();
|
||||||
|
it->second.umat_internals.clear();
|
||||||
}
|
}
|
||||||
it->second.skipFlags.clear();
|
it->second.skipFlags.clear();
|
||||||
//it->second.consumers.clear();
|
//it->second.consumers.clear();
|
||||||
@ -974,7 +1077,11 @@ struct Net::Impl
|
|||||||
allocateLayer(*i, layersShapes);
|
allocateLayer(*i, layersShapes);
|
||||||
|
|
||||||
//bind inputs
|
//bind inputs
|
||||||
|
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||||
|
preferableTarget == DNN_TARGET_OPENCL);
|
||||||
ld.inputBlobs.resize(ninputs);
|
ld.inputBlobs.resize(ninputs);
|
||||||
|
if (use_umat)
|
||||||
|
ld.umat_inputBlobs.resize(ninputs);
|
||||||
ld.inputBlobsWrappers.resize(ninputs);
|
ld.inputBlobsWrappers.resize(ninputs);
|
||||||
for (size_t i = 0; i < ninputs; i++)
|
for (size_t i = 0; i < ninputs; i++)
|
||||||
{
|
{
|
||||||
@ -982,6 +1089,8 @@ struct Net::Impl
|
|||||||
CV_Assert(from.valid());
|
CV_Assert(from.valid());
|
||||||
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
|
CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid);
|
||||||
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
|
ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid];
|
||||||
|
if (use_umat)
|
||||||
|
ld.umat_inputBlobs[i] = layers[from.lid].umat_outputBlobs[from.oid];
|
||||||
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
|
ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1000,7 +1109,26 @@ struct Net::Impl
|
|||||||
|
|
||||||
Ptr<Layer> layerPtr = ld.getLayerInstance();
|
Ptr<Layer> layerPtr = ld.getLayerInstance();
|
||||||
{
|
{
|
||||||
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
|
if (use_umat)
|
||||||
|
{
|
||||||
|
std::vector<Mat*> inputs(ld.umat_inputBlobs.size());;
|
||||||
|
std::vector<Mat> outputs(ld.umat_outputBlobs.size());
|
||||||
|
Mat mat;
|
||||||
|
for (int i = 0; i < inputs.size(); i++)
|
||||||
|
{
|
||||||
|
mat = ld.umat_inputBlobs[i].getMat(ACCESS_READ);
|
||||||
|
inputs[i] = &mat;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < outputs.size(); i++)
|
||||||
|
{
|
||||||
|
outputs[i] = ld.umat_outputBlobs[i].getMat(ACCESS_READ);
|
||||||
|
}
|
||||||
|
layerPtr->finalize(inputs, outputs);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
|
||||||
|
}
|
||||||
layerPtr->preferableTarget = preferableTarget;
|
layerPtr->preferableTarget = preferableTarget;
|
||||||
#if 0
|
#if 0
|
||||||
std::cout << "\toutputs:";
|
std::cout << "\toutputs:";
|
||||||
@ -1234,6 +1362,8 @@ struct Net::Impl
|
|||||||
getLayersShapes(inputShapes, layersShapes);
|
getLayersShapes(inputShapes, layersShapes);
|
||||||
|
|
||||||
blobManager.reset();
|
blobManager.reset();
|
||||||
|
blobManager.setPreferableTarget(preferableTarget);
|
||||||
|
blobManager.setPreferableBackend(preferableBackend);
|
||||||
backendWrappers.clear();
|
backendWrappers.clear();
|
||||||
blobManager.addReference(LayerPin(0, 0));
|
blobManager.addReference(LayerPin(0, 0));
|
||||||
for (it = layers.begin(); it != layers.end(); ++it)
|
for (it = layers.begin(); it != layers.end(); ++it)
|
||||||
@ -1276,7 +1406,10 @@ struct Net::Impl
|
|||||||
if (!ld.inputBlobsWrappers[i].empty())
|
if (!ld.inputBlobsWrappers[i].empty())
|
||||||
ld.inputBlobsWrappers[i]->copyToHost();
|
ld.inputBlobsWrappers[i]->copyToHost();
|
||||||
}
|
}
|
||||||
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
|
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
|
||||||
|
layer->forward(ld.umat_inputBlobs, ld.umat_outputBlobs, ld.umat_internals);
|
||||||
|
else
|
||||||
|
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
|
||||||
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
|
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
|
||||||
{
|
{
|
||||||
if (!ld.outputBlobsWrappers[i].empty())
|
if (!ld.outputBlobsWrappers[i].empty())
|
||||||
@ -1421,6 +1554,10 @@ struct Net::Impl
|
|||||||
{
|
{
|
||||||
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
|
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ld.umat_outputBlobs.size() > 0 && !ld.umat_outputBlobs[pin.oid].empty())
|
||||||
|
ld.umat_outputBlobs[pin.oid].copyTo(ld.outputBlobs[pin.oid]);
|
||||||
|
|
||||||
return ld.outputBlobs[pin.oid];
|
return ld.outputBlobs[pin.oid];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1520,6 +1657,13 @@ void Net::forward(std::vector<Mat>& outputBlobs, const String& outputName)
|
|||||||
|
|
||||||
LayerPin pin = impl->getPinByAlias(layerName);
|
LayerPin pin = impl->getPinByAlias(layerName);
|
||||||
LayerData &ld = impl->layers[pin.lid];
|
LayerData &ld = impl->layers[pin.lid];
|
||||||
|
|
||||||
|
if (ld.umat_outputBlobs.size() > 0)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < ld.umat_outputBlobs.size(); i++)
|
||||||
|
ld.umat_outputBlobs[i].copyTo(ld.outputBlobs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
outputBlobs = ld.outputBlobs;
|
outputBlobs = ld.outputBlobs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1584,6 +1728,7 @@ void Net::setPreferableBackend(int backendId)
|
|||||||
if( impl->preferableBackend != backendId )
|
if( impl->preferableBackend != backendId )
|
||||||
{
|
{
|
||||||
impl->preferableBackend = backendId;
|
impl->preferableBackend = backendId;
|
||||||
|
impl->blobManager.setPreferableBackend(backendId);
|
||||||
impl->netWasAllocated = false;
|
impl->netWasAllocated = false;
|
||||||
impl->clear();
|
impl->clear();
|
||||||
}
|
}
|
||||||
@ -1597,6 +1742,7 @@ void Net::setPreferableTarget(int targetId)
|
|||||||
if( impl->preferableTarget != targetId )
|
if( impl->preferableTarget != targetId )
|
||||||
{
|
{
|
||||||
impl->preferableTarget = targetId;
|
impl->preferableTarget = targetId;
|
||||||
|
impl->blobManager.setPreferableTarget(targetId);
|
||||||
impl->netWasAllocated = false;
|
impl->netWasAllocated = false;
|
||||||
impl->clear();
|
impl->clear();
|
||||||
}
|
}
|
||||||
@ -1623,13 +1769,25 @@ void Net::setInput(const Mat &blob_, const String& name)
|
|||||||
|
|
||||||
LayerData &ld = impl->layers[pin.lid];
|
LayerData &ld = impl->layers[pin.lid];
|
||||||
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
||||||
|
bool use_umat = (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||||
|
impl->preferableTarget == DNN_TARGET_OPENCL);
|
||||||
|
if (use_umat)
|
||||||
|
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
||||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||||
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
|
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
|
||||||
bool oldShape = prevShape == shape(blob_);
|
bool oldShape = prevShape == shape(blob_);
|
||||||
if (oldShape)
|
if (oldShape)
|
||||||
|
{
|
||||||
blob_.copyTo(ld.outputBlobs[pin.oid]);
|
blob_.copyTo(ld.outputBlobs[pin.oid]);
|
||||||
|
if (use_umat)
|
||||||
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
ld.outputBlobs[pin.oid] = blob_.clone();
|
ld.outputBlobs[pin.oid] = blob_.clone();
|
||||||
|
if (use_umat)
|
||||||
|
blob_.copyTo(ld.umat_outputBlobs[pin.oid]);
|
||||||
|
}
|
||||||
|
|
||||||
if (!ld.outputBlobsWrappers[pin.oid].empty())
|
if (!ld.outputBlobsWrappers[pin.oid].empty())
|
||||||
{
|
{
|
||||||
@ -2132,13 +2290,24 @@ std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
|
|||||||
return outputs;
|
return outputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Layer::forward(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
std::vector<Mat*> inputsp;
|
std::vector<Mat> inpvec;
|
||||||
vecToPVec(inputs, inputsp);
|
std::vector<Mat> outputs;
|
||||||
this->forward(inputsp, outputs, internals);
|
std::vector<Mat> internals;
|
||||||
|
|
||||||
|
inputs_arr.getMatVector(inpvec);
|
||||||
|
outputs_arr.getMatVector(outputs);
|
||||||
|
internals_arr.getMatVector(internals);
|
||||||
|
|
||||||
|
std::vector<Mat*> inputs(inpvec.size());
|
||||||
|
for (int i = 0; i < inpvec.size(); i++)
|
||||||
|
inputs[i] = &inpvec[i];
|
||||||
|
|
||||||
|
this->forward(inputs, outputs, internals);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void Layer::run(const std::vector<Mat> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
|
@ -102,6 +102,14 @@ public:
|
|||||||
backendId == DNN_BACKEND_HALIDE && haveHalide();
|
backendId == DNN_BACKEND_HALIDE && haveHalide();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -62,6 +62,25 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool forward_ocl(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -176,36 +176,38 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
std::vector<UMat> inputs;
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
int cAxis = clamp(axis, inputs[0]->dims);
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
|
int cAxis = clamp(axis, inputs[0].dims);
|
||||||
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
|
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int bottom_concat_axis;
|
int bottom_concat_axis;
|
||||||
int concat_size = inputs[0]->size[2] * inputs[0]->size[3];
|
int concat_size = inputs[0].size[2] * inputs[0].size[3];
|
||||||
int top_concat_axis = outputs[0].size[1];
|
int top_concat_axis = outputs[0].size[1];
|
||||||
int offset_concat_axis = 0;
|
int offset_concat_axis = 0;
|
||||||
UMat inpMat, outMat;
|
UMat& outMat = outputs[0];
|
||||||
outMat = outputs[0].getUMat(ACCESS_WRITE);
|
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
|
||||||
|
|
||||||
ocl::Kernel kernel;
|
|
||||||
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0]->type()) + String(" ");
|
|
||||||
if (!kernel.create("concat", ocl::dnn::concat_oclsrc, buildopt))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < inputs.size(); i++)
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
inpMat = inputs[i]->getUMat(ACCESS_READ);
|
ocl::Kernel kernel("concat", ocl::dnn::concat_oclsrc, buildopt);
|
||||||
bottom_concat_axis = inputs[i]->size[1];
|
if (kernel.empty())
|
||||||
size_t nthreads = inputs[i]->total();
|
return false;
|
||||||
|
|
||||||
|
UMat& inpMat = inputs[i];
|
||||||
|
bottom_concat_axis = inputs[i].size[1];
|
||||||
|
size_t nthreads = inputs[i].total();
|
||||||
|
|
||||||
kernel.set(0, (int)nthreads);
|
kernel.set(0, (int)nthreads);
|
||||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
|
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||||
kernel.set(2, (int)inputs[i]->size[0]);
|
kernel.set(2, (int)inputs[i].size[0]);
|
||||||
kernel.set(3, (int)concat_size);
|
kernel.set(3, (int)concat_size);
|
||||||
kernel.set(4, (int)top_concat_axis);
|
kernel.set(4, (int)top_concat_axis);
|
||||||
kernel.set(5, (int)bottom_concat_axis);
|
kernel.set(5, (int)bottom_concat_axis);
|
||||||
@ -222,14 +224,22 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
forward_ocl(inputs, outputs, internals))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
int cAxis = clamp(axis, inputs[0]->dims);
|
int cAxis = clamp(axis, inputs[0]->dims);
|
||||||
Mat& outMat = outputs[0];
|
Mat& outMat = outputs[0];
|
||||||
|
@ -671,14 +671,20 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
int group = inputs[0]->size[1] / umat_blobs[0].size[1];
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
|
int group = inputs[0].size[1] / umat_blobs[0].size[1];
|
||||||
|
|
||||||
if (convolutionOp.empty())
|
if (convolutionOp.empty())
|
||||||
{
|
{
|
||||||
OCL4DNNConvConfig config;
|
OCL4DNNConvConfig config;
|
||||||
config.in_shape = shape(*inputs[0]);
|
config.in_shape = shape(inputs[0]);
|
||||||
config.out_shape = shape(outputs[0]);
|
config.out_shape = shape(outputs[0]);
|
||||||
config.kernel = kernel;
|
config.kernel = kernel;
|
||||||
config.pad = pad;
|
config.pad = pad;
|
||||||
@ -690,6 +696,112 @@ public:
|
|||||||
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
|
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int k, outCn = umat_blobs[0].size[0];
|
||||||
|
if( weightsMat.empty() )
|
||||||
|
{
|
||||||
|
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
|
||||||
|
// use vectorized (i.e. with intrinsics) loops without tail processing
|
||||||
|
Mat wm = blobs[0].reshape(1, outCn).clone();
|
||||||
|
if( wm.step1() % VEC_ALIGN != 0 )
|
||||||
|
{
|
||||||
|
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
|
||||||
|
Mat wm_buffer = Mat(outCn, newcols, wm.type());
|
||||||
|
Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
|
||||||
|
wm_padding.setTo(Scalar::all(0.));
|
||||||
|
Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
|
||||||
|
wm.copyTo(wm_aligned);
|
||||||
|
wm = wm_aligned;
|
||||||
|
}
|
||||||
|
weightsMat = wm;
|
||||||
|
|
||||||
|
Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
|
||||||
|
biasvec.resize(outCn+2);
|
||||||
|
if( biasMat.empty() )
|
||||||
|
{
|
||||||
|
for( k = 0; k < outCn; k++ )
|
||||||
|
biasvec[k] = 0.f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for( k = 0; k < outCn; k++ )
|
||||||
|
biasvec[k] = biasMat.at<float>(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !bnorm.empty() || !scaleLayer.empty() )
|
||||||
|
{
|
||||||
|
Mat scale, shift, scale2, shift2;
|
||||||
|
const float *scaleptr = 0, *shiftptr = 0;
|
||||||
|
const float *scaleptr2 = 0, *shiftptr2 = 0;
|
||||||
|
|
||||||
|
if( !bnorm.empty() )
|
||||||
|
{
|
||||||
|
bnorm->getScaleShift(scale, shift);
|
||||||
|
CV_Assert( scale.isContinuous() && shift.isContinuous() &&
|
||||||
|
scale.type() == CV_32F && shift.type() == CV_32F &&
|
||||||
|
scale.total() == (size_t)outCn &&
|
||||||
|
shift.total() == (size_t)outCn );
|
||||||
|
scaleptr = scale.ptr<float>();
|
||||||
|
shiftptr = shift.ptr<float>();
|
||||||
|
}
|
||||||
|
if( !scaleLayer.empty() )
|
||||||
|
{
|
||||||
|
scale2 = scaleLayer->blobs[0];
|
||||||
|
CV_Assert( scale2.isContinuous() && scale2.type() == CV_32F &&
|
||||||
|
scale2.total() == (size_t)outCn );
|
||||||
|
scaleptr2 = scale2.ptr<float>();
|
||||||
|
if( scaleLayer->hasBias )
|
||||||
|
{
|
||||||
|
shift2 = scaleLayer->blobs[1];
|
||||||
|
CV_Assert( shift2.isContinuous() && shift2.type() == CV_32F &&
|
||||||
|
shift2.total() == (size_t)outCn );
|
||||||
|
shiftptr2 = shift2.ptr<float>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shiftptr || shiftptr2)
|
||||||
|
fusedBias = true;
|
||||||
|
|
||||||
|
for( int i = 0; i < outCn; i++ )
|
||||||
|
{
|
||||||
|
float s1 = scaleptr ? scaleptr[i] : 1.f;
|
||||||
|
float delta1 = shiftptr ? shiftptr[i] : 0.f;
|
||||||
|
float s2 = scaleptr2 ? scaleptr2[i] : 1.f;
|
||||||
|
float delta2 = shiftptr2 ? shiftptr2[i] : 0.f;
|
||||||
|
float* w_i = weightsMat.ptr<float>(i);
|
||||||
|
int j, wcols = weightsMat.cols;
|
||||||
|
|
||||||
|
for( j = 0; j < wcols; j++ )
|
||||||
|
w_i[j] *= (s1*s2);
|
||||||
|
|
||||||
|
biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
reluslope.clear();
|
||||||
|
if( activ )
|
||||||
|
{
|
||||||
|
Ptr<ReLULayer> activ_relu = activ.dynamicCast<ReLULayer>();
|
||||||
|
if( !activ_relu.empty() )
|
||||||
|
{
|
||||||
|
reluslope.assign(outCn+2, activ_relu->negativeSlope);
|
||||||
|
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
|
||||||
|
if( !activ_chprelu.empty() )
|
||||||
|
{
|
||||||
|
const Mat& m = activ_chprelu->blobs[0];
|
||||||
|
CV_Assert(m.isContinuous() && m.type() == CV_32F && (int)m.total() == outCn);
|
||||||
|
const float* mdata = m.ptr<float>();
|
||||||
|
reluslope.resize(outCn+2);
|
||||||
|
std::copy(mdata, mdata + outCn, reluslope.begin());
|
||||||
|
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
|
||||||
|
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ( newWeightAndBias )
|
if ( newWeightAndBias )
|
||||||
{
|
{
|
||||||
weightsMat.copyTo(umat_blobs[0]);
|
weightsMat.copyTo(umat_blobs[0]);
|
||||||
@ -723,9 +835,8 @@ public:
|
|||||||
newActiv = false;
|
newActiv = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
UMat inpMat, outMat;
|
UMat& inpMat = inputs[0];
|
||||||
inpMat = inputs[0]->getUMat(ACCESS_READ);
|
UMat& outMat = outputs[0];
|
||||||
outMat = outputs[0].getUMat(ACCESS_WRITE);
|
|
||||||
int batch_size = inpMat.size[0];
|
int batch_size = inpMat.size[0];
|
||||||
|
|
||||||
return convolutionOp->Forward(inpMat,
|
return convolutionOp->Forward(inpMat,
|
||||||
@ -736,6 +847,18 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
@ -811,11 +934,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
if (shiftptr || shiftptr2)
|
|
||||||
fusedBias = true;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for( int i = 0; i < outCn; i++ )
|
for( int i = 0; i < outCn; i++ )
|
||||||
{
|
{
|
||||||
float s1 = scaleptr ? scaleptr[i] : 1.f;
|
float s1 = scaleptr ? scaleptr[i] : 1.f;
|
||||||
@ -841,9 +959,6 @@ public:
|
|||||||
if( !activ_relu.empty() )
|
if( !activ_relu.empty() )
|
||||||
{
|
{
|
||||||
reluslope.assign(outCn+2, activ_relu->negativeSlope);
|
reluslope.assign(outCn+2, activ_relu->negativeSlope);
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
|
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
|
||||||
@ -855,16 +970,9 @@ public:
|
|||||||
reluslope.resize(outCn+2);
|
reluslope.resize(outCn+2);
|
||||||
std::copy(mdata, mdata + outCn, reluslope.begin());
|
std::copy(mdata, mdata + outCn, reluslope.begin());
|
||||||
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
|
reluslope[outCn] = reluslope[outCn+1] = reluslope[outCn-1];
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
activType = OCL4DNN_CONV_FUSED_ACTIV_PRELU;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
forward_ocl(inputs, outputs, internals))
|
|
||||||
|
|
||||||
int nstripes = std::max(getNumThreads(), 1);
|
int nstripes = std::max(getNumThreads(), 1);
|
||||||
|
|
||||||
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
|
ParallelConv::run(*inputs[0], outputs[0], weightsMat, biasvec, reluslope,
|
||||||
@ -1173,6 +1281,14 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -133,6 +133,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -194,6 +194,95 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
std::vector<Mat> inpvec;
|
||||||
|
std::vector<Mat> outputs;
|
||||||
|
|
||||||
|
inputs_arr.getMatVector(inpvec);
|
||||||
|
outputs_arr.getMatVector(outputs);
|
||||||
|
|
||||||
|
std::vector<Mat*> inputs(inpvec.size());
|
||||||
|
for (size_t i = 0; i < inpvec.size(); i++)
|
||||||
|
inputs[i] = &inpvec[i];
|
||||||
|
|
||||||
|
std::vector<LabelBBox> allDecodedBBoxes;
|
||||||
|
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
|
||||||
|
|
||||||
|
int num = inputs[0]->size[0];
|
||||||
|
|
||||||
|
// extract predictions from input layers
|
||||||
|
{
|
||||||
|
int numPriors = inputs[2]->size[2] / 4;
|
||||||
|
|
||||||
|
const float* locationData = inputs[0]->ptr<float>();
|
||||||
|
const float* confidenceData = inputs[1]->ptr<float>();
|
||||||
|
const float* priorData = inputs[2]->ptr<float>();
|
||||||
|
|
||||||
|
// Retrieve all location predictions
|
||||||
|
std::vector<LabelBBox> allLocationPredictions;
|
||||||
|
GetLocPredictions(locationData, num, numPriors, _numLocClasses,
|
||||||
|
_shareLocation, _locPredTransposed, allLocationPredictions);
|
||||||
|
|
||||||
|
// Retrieve all confidences
|
||||||
|
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
|
||||||
|
|
||||||
|
// Retrieve all prior bboxes
|
||||||
|
std::vector<caffe::NormalizedBBox> priorBBoxes;
|
||||||
|
std::vector<std::vector<float> > priorVariances;
|
||||||
|
GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
|
||||||
|
|
||||||
|
// Decode all loc predictions to bboxes
|
||||||
|
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
|
||||||
|
_shareLocation, _numLocClasses, _backgroundLabelId,
|
||||||
|
_codeType, _varianceEncodedInTarget, false, allDecodedBBoxes);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t numKept = 0;
|
||||||
|
std::vector<std::map<int, std::vector<int> > > allIndices;
|
||||||
|
for (int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numKept == 0)
|
||||||
|
{
|
||||||
|
// Set confidences to zeros.
|
||||||
|
Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
|
||||||
|
outputs[0](ranges).setTo(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
int outputShape[] = {1, 1, (int)numKept, 7};
|
||||||
|
Mat mat(4, outputShape, CV_32F);
|
||||||
|
float* outputsData = mat.ptr<float>();
|
||||||
|
|
||||||
|
size_t count = 0;
|
||||||
|
for (int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
count += outputDetections_(i, &outputsData[count * 7],
|
||||||
|
allDecodedBBoxes[i], allConfidenceScores[i],
|
||||||
|
allIndices[i]);
|
||||||
|
}
|
||||||
|
UMat& output = outputs_arr.getUMatRef(0);
|
||||||
|
output = mat.getUMat(ACCESS_READ);
|
||||||
|
CV_Assert(count == numKept);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -156,13 +156,20 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
|
||||||
CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
|
CV_OCL_RUN((this->preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
func.applyOCL(inputs, outputs, internals))
|
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
|
||||||
for (size_t i = 0; i < inputs.size(); i++)
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
@ -258,25 +265,29 @@ struct ReLUFunctor
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
|
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
for (size_t i = 0; i < inputs.size(); i++)
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
UMat src, dst;
|
UMat& src = inputs[i];
|
||||||
inputs[i]->copyTo(src);
|
UMat& dst = outputs[i];
|
||||||
dst = outputs[i].getUMat(ACCESS_WRITE);
|
|
||||||
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
|
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
|
||||||
|
|
||||||
ocl::Kernel ker;
|
ocl::Kernel kernel;
|
||||||
CV_Assert(initKernel(ker, src));
|
CV_Assert(initKernel(kernel, src));
|
||||||
ker.set(0, (int)src.total());
|
kernel.set(0, (int)src.total());
|
||||||
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
|
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
|
||||||
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
|
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
|
||||||
|
|
||||||
size_t gSize = src.total();
|
size_t gSize = src.total();
|
||||||
CV_Assert(ker.run(1, &gSize, &wgSize, false));
|
CV_Assert(kernel.run(1, &gSize, &wgSize, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -347,7 +358,7 @@ struct ReLU6Functor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -382,7 +393,7 @@ struct TanHFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -417,7 +428,7 @@ struct SigmoidFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -454,7 +465,7 @@ struct ELUFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -489,7 +500,7 @@ struct AbsValFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -524,7 +535,7 @@ struct BNLLFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -581,7 +592,7 @@ struct PowerFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
@ -656,7 +667,7 @@ struct ChannelsPReLUFunctor
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool applyOCL(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
// TODO: implement OCL version
|
// TODO: implement OCL version
|
||||||
return false;
|
return false;
|
||||||
|
@ -254,6 +254,14 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -104,6 +104,43 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
std::vector<UMat> inpvec;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inputs_arr.getUMatVector(inpvec);
|
||||||
|
outputs_arr.getUMatVector(outputs);
|
||||||
|
|
||||||
|
std::vector<UMat*> inputs(inpvec.size());
|
||||||
|
for (int i = 0; i < inpvec.size(); i++)
|
||||||
|
inputs[i] = &inpvec[i];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
|
{
|
||||||
|
MatShape outShape = shape(outputs[i]);
|
||||||
|
UMat& output = outputs_arr.getUMatRef(i);
|
||||||
|
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
outputs_arr.isUMatVector() &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -258,12 +258,18 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &input, std::vector<Mat> &output)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
int axisCan = clamp(axis, input[0]->dims);
|
std::vector<UMat> inputs;
|
||||||
int numOutput = blobs[0].size[0];
|
std::vector<UMat> outputs;
|
||||||
int innerSize = blobs[0].size[1];
|
|
||||||
int outerSize = input[0]->total(0, axisCan);
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
|
int axisCan = clamp(axis, inputs[0].dims);
|
||||||
|
int numOutput = umat_blobs[0].size[0];
|
||||||
|
int innerSize = umat_blobs[0].size[1];
|
||||||
|
int outerSize = total(shape(inputs[0]), 0, axisCan);
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
if (innerProductOp.empty())
|
if (innerProductOp.empty())
|
||||||
@ -278,11 +284,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
||||||
for (size_t i = 0; i < input.size(); i++)
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
UMat srcMat, dstMat;
|
UMat& srcMat = inputs[i];
|
||||||
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
|
UMat& dstMat = outputs[i];
|
||||||
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
|
|
||||||
dstMat.setTo(0.0f);
|
dstMat.setTo(0.0f);
|
||||||
|
|
||||||
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
|
if (!innerProductOp->Forward(srcMat, umat_blobs[0], (bias) ? umat_blobs[1] : UMat(), dstMat))
|
||||||
@ -301,11 +306,15 @@ public:
|
|||||||
if (ret) return true;
|
if (ret) return true;
|
||||||
|
|
||||||
UMat& weights = umat_blobs[0];
|
UMat& weights = umat_blobs[0];
|
||||||
for (size_t i = 0; i < input.size(); i++)
|
for (size_t i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
|
MatShape inshape, outshape;
|
||||||
|
inshape = shape(outerSize, innerSize);
|
||||||
|
outshape = shape(outerSize, numOutput);
|
||||||
|
|
||||||
UMat srcMat, dstMat;
|
UMat srcMat, dstMat;
|
||||||
srcMat = input[i]->reshape(1, outerSize).getUMat(ACCESS_READ);
|
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
|
||||||
dstMat = output[i].reshape(1, outerSize).getUMat(ACCESS_WRITE);
|
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
|
||||||
|
|
||||||
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
|
cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T);
|
||||||
|
|
||||||
@ -320,14 +329,22 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
forward_ocl(input, output))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
int axisCan = clamp(axis, input[0]->dims);
|
int axisCan = clamp(axis, input[0]->dims);
|
||||||
int outerSize = input[0]->total(0, axisCan);
|
int outerSize = input[0]->total(0, axisCan);
|
||||||
|
@ -94,8 +94,14 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
if (lrnOp.empty())
|
if (lrnOp.empty())
|
||||||
{
|
{
|
||||||
OCL4DNNLRNConfig config;
|
OCL4DNNLRNConfig config;
|
||||||
@ -108,28 +114,38 @@ public:
|
|||||||
config.alpha = alpha;
|
config.alpha = alpha;
|
||||||
config.beta = beta;
|
config.beta = beta;
|
||||||
config.k = bias;
|
config.k = bias;
|
||||||
CHECK_EQ(4, inputs[0]->dims) << "Input must have 4 axes, "
|
CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
|
||||||
<< "corresponding to (num, channels, height, width)";
|
<< "corresponding to (num, channels, height, width)";
|
||||||
config.batch_size = inputs[0]->size[0];
|
config.batch_size = inputs[0].size[0];
|
||||||
config.channels = inputs[0]->size[1];
|
config.channels = inputs[0].size[1];
|
||||||
config.height = inputs[0]->size[2];
|
config.height = inputs[0].size[2];
|
||||||
config.width = inputs[0]->size[3];
|
config.width = inputs[0].size[3];
|
||||||
config.norm_by_size = normBySize;
|
config.norm_by_size = normBySize;
|
||||||
|
|
||||||
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
|
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
|
||||||
}
|
}
|
||||||
|
|
||||||
UMat inpMat, outMat;
|
if (!lrnOp->Forward(inputs[0], outputs[0]))
|
||||||
inpMat = inputs[0]->getUMat(ACCESS_READ);
|
|
||||||
outMat = outputs[0].getUMat(ACCESS_WRITE);
|
|
||||||
|
|
||||||
if (!lrnOp->Forward(inpMat, outMat))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
CV_Assert(inputs_arr.total() == outputs_arr.total());
|
||||||
|
|
||||||
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
@ -137,10 +153,6 @@ public:
|
|||||||
|
|
||||||
CV_Assert(inputs.size() == outputs.size());
|
CV_Assert(inputs.size() == outputs.size());
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
|
||||||
forward_ocl(inputs, outputs, internals))
|
|
||||||
|
|
||||||
for (int i = 0; i < inputs.size(); i++)
|
for (int i = 0; i < inputs.size(); i++)
|
||||||
{
|
{
|
||||||
CV_Assert(inputs[i]->dims == 4);
|
CV_Assert(inputs[i]->dims == 4);
|
||||||
|
@ -55,6 +55,14 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -60,6 +60,14 @@ public:
|
|||||||
eps = params.get<double>("eps", 1e-9);
|
eps = params.get<double>("eps", 1e-9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat *> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -69,6 +69,14 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -91,6 +91,14 @@ public:
|
|||||||
backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
|
backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -247,6 +247,14 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -113,18 +113,24 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
|
||||||
{
|
{
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
|
||||||
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
|
||||||
if (poolOp.empty())
|
if (poolOp.empty())
|
||||||
{
|
{
|
||||||
OCL4DNNPoolConfig config;
|
OCL4DNNPoolConfig config;
|
||||||
|
|
||||||
config.in_shape = shape(*inputs[0]);
|
config.in_shape = shape(inputs[0]);
|
||||||
config.out_shape = shape(outputs[0]);
|
config.out_shape = shape(outputs[0]);
|
||||||
config.kernel = kernel;
|
config.kernel = kernel;
|
||||||
config.pad = pad;
|
config.pad = pad;
|
||||||
config.stride = stride;
|
config.stride = stride;
|
||||||
config.channels = inputs[0]->size[1];
|
config.channels = inputs[0].size[1];
|
||||||
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
|
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
|
||||||
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
|
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
|
||||||
LIBDNN_POOLING_METHOD_STO);
|
LIBDNN_POOLING_METHOD_STO);
|
||||||
@ -133,18 +139,10 @@ public:
|
|||||||
|
|
||||||
for (size_t ii = 0; ii < inputs.size(); ii++)
|
for (size_t ii = 0; ii < inputs.size(); ii++)
|
||||||
{
|
{
|
||||||
UMat inpMat, outMat, maskMat;
|
UMat& inpMat = inputs[ii];
|
||||||
|
int out_index = (type == MAX) ? 2 : 1;
|
||||||
inpMat = inputs[ii]->getUMat(ACCESS_READ);
|
UMat& outMat = outputs[out_index * ii];
|
||||||
|
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
|
||||||
if (type == MAX)
|
|
||||||
{
|
|
||||||
outMat = outputs[2 * ii].getUMat(ACCESS_WRITE);
|
|
||||||
maskMat = outputs[2 * ii + 1].getUMat(ACCESS_WRITE);
|
|
||||||
} else {
|
|
||||||
outMat = outputs[ii].getUMat(ACCESS_WRITE);
|
|
||||||
maskMat = UMat();
|
|
||||||
}
|
|
||||||
|
|
||||||
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
|
CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
|
||||||
|
|
||||||
@ -156,14 +154,22 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
forward_ocl(inputs, outputs, internals))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
for (size_t ii = 0; ii < inputs.size(); ii++)
|
for (size_t ii = 0; ii < inputs.size(); ii++)
|
||||||
{
|
{
|
||||||
|
@ -249,6 +249,14 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -252,6 +252,14 @@ public:
|
|||||||
allocated = true;
|
allocated = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
@ -465,6 +473,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -114,6 +114,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -85,6 +85,15 @@ public:
|
|||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_DEFAULT;
|
return backendId == DNN_BACKEND_DEFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -182,6 +182,14 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -37,6 +37,14 @@ public:
|
|||||||
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
|
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -44,6 +44,14 @@ public:
|
|||||||
backendId == DNN_BACKEND_HALIDE && haveHalide();
|
backendId == DNN_BACKEND_HALIDE && haveHalide();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -36,6 +36,14 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -171,6 +171,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -91,35 +91,42 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
bool forward_ocl(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays itns)
|
||||||
{
|
{
|
||||||
|
std::vector<UMat> inputs;
|
||||||
|
std::vector<UMat> outputs;
|
||||||
|
std::vector<UMat> internals;
|
||||||
|
|
||||||
|
inps.getUMatVector(inputs);
|
||||||
|
outs.getUMatVector(outputs);
|
||||||
|
itns.getUMatVector(internals);
|
||||||
|
|
||||||
if (softmaxOp.empty())
|
if (softmaxOp.empty())
|
||||||
{
|
{
|
||||||
OCL4DNNSoftmaxConfig config;
|
OCL4DNNSoftmaxConfig config;
|
||||||
|
|
||||||
config.in_shape = shape(*inputs[0]);
|
config.in_shape = shape(inputs[0]);
|
||||||
config.axis = axisRaw;
|
config.axis = axisRaw;
|
||||||
config.channels = inputs[0]->size[axisRaw];
|
config.channels = inputs[0].size[axisRaw];
|
||||||
config.logsoftmax = logSoftMax;
|
config.logsoftmax = logSoftMax;
|
||||||
|
|
||||||
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
|
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
|
||||||
}
|
}
|
||||||
|
|
||||||
UMat srcMat, dstMat;
|
UMat& src = inputs[0];
|
||||||
srcMat = inputs[0]->getUMat(ACCESS_READ);
|
UMat& dstMat = outputs[0];
|
||||||
dstMat = outputs[0].getUMat(ACCESS_WRITE);
|
|
||||||
|
|
||||||
if (softmaxOp->Forward(srcMat, dstMat))
|
if (softmaxOp->Forward(src, dstMat))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
const Mat &src = *inputs[0];
|
UMat& bufMat = internals[0];
|
||||||
UMat bufMat = internals[0].getUMat(ACCESS_WRITE);
|
src.copyTo(dstMat);
|
||||||
srcMat.copyTo(dstMat);
|
|
||||||
|
|
||||||
int axis = clamp(axisRaw, src.dims);
|
int axis = clamp(axisRaw, src.dims);
|
||||||
size_t outerSize = src.total(0, axis);
|
MatShape s = shape(src);
|
||||||
|
size_t outerSize = total(s, 0, axis);
|
||||||
size_t channels = src.size[axis];
|
size_t channels = src.size[axis];
|
||||||
size_t innerSize = src.total(axis + 1);
|
size_t innerSize = total(s, axis + 1);
|
||||||
|
|
||||||
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
|
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
|
||||||
ocl::Kernel kmax, ksub, ksum, kdiv;
|
ocl::Kernel kmax, ksub, ksum, kdiv;
|
||||||
@ -175,14 +182,22 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
forward_ocl(inputs, outputs, internals))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
const Mat &src = *inputs[0];
|
const Mat &src = *inputs[0];
|
||||||
Mat &dst = outputs[0];
|
Mat &dst = outputs[0];
|
||||||
|
@ -78,6 +78,14 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
|
}
|
||||||
|
|
||||||
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
Loading…
Reference in New Issue
Block a user