mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 06:53:50 +08:00
dnn fp16 support
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
bb8ff2c463
commit
329abb5b64
@ -499,7 +499,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate)
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half)
|
||||
{
|
||||
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate)
|
||||
{
|
||||
@ -540,14 +540,14 @@ public:
|
||||
{
|
||||
// if dst already has been allocated with total(shape) elements,
|
||||
// it won't be recrreated and pointer of dst.data remains the same.
|
||||
dst.create(shape, CV_32F);
|
||||
dst.create(shape, use_half ? CV_16S : CV_32F);
|
||||
addHost(lp, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
|
||||
std::vector<LayerPin>& pinsForInternalBlobs,
|
||||
bool forceCreate = false)
|
||||
bool forceCreate = false, bool use_half = false)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
@ -618,7 +618,7 @@ public:
|
||||
reuse(ld.inputBlobsId[0], blobPin);
|
||||
}
|
||||
else
|
||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate);
|
||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -656,7 +656,7 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
|
||||
{
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
return Ptr<BackendWrapper>();
|
||||
else if (targetId == DNN_TARGET_OPENCL)
|
||||
else if (IS_DNN_OPENCL_TARGET(targetId))
|
||||
return OpenCLBackendWrapper::create(m);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
|
||||
@ -721,6 +721,7 @@ struct Net::Impl
|
||||
bool netWasAllocated;
|
||||
bool fusion;
|
||||
std::vector<int64> layersTimings;
|
||||
Mat output_blob;
|
||||
|
||||
Ptr<BackendWrapper> wrap(Mat& host)
|
||||
{
|
||||
@ -737,7 +738,7 @@ struct Net::Impl
|
||||
Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT)
|
||||
{
|
||||
CV_Assert(preferableTarget == DNN_TARGET_OPENCL);
|
||||
CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
|
||||
return OpenCLBackendWrapper::create(baseBuffer, host);
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_HALIDE)
|
||||
@ -849,7 +850,7 @@ struct Net::Impl
|
||||
|
||||
if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
|
||||
{
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
#ifndef HAVE_OPENCL
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
|
||||
@ -1034,7 +1035,7 @@ struct Net::Impl
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT)
|
||||
CV_Assert(preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL);
|
||||
CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
|
||||
else if (preferableBackend == DNN_BACKEND_HALIDE)
|
||||
initHalideBackend();
|
||||
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
@ -1369,7 +1370,9 @@ struct Net::Impl
|
||||
|
||||
std::vector<LayerPin> pinsForInternalBlobs;
|
||||
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
|
||||
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE);
|
||||
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
|
||||
preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16);
|
||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||
for (int i = 0; i < ld.outputBlobs.size(); ++i)
|
||||
{
|
||||
@ -1439,7 +1442,7 @@ struct Net::Impl
|
||||
// some other layers.
|
||||
|
||||
// TODO: OpenCL target support more fusion styles.
|
||||
if ( preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL &&
|
||||
if ( preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||
ld.layerInstance->type != "MVN")) )
|
||||
continue;
|
||||
@ -1478,8 +1481,8 @@ struct Net::Impl
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
|
||||
if ( preferableTarget != DNN_TARGET_OPENCL ||
|
||||
(preferableTarget == DNN_TARGET_OPENCL &&
|
||||
if ( !IS_DNN_OPENCL_TARGET(preferableTarget) ||
|
||||
(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
nextData &&
|
||||
((nextData->type == "ReLU") ||
|
||||
(nextData->type == "ChannelsPReLU") ||
|
||||
@ -1502,7 +1505,7 @@ struct Net::Impl
|
||||
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
|
||||
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
|
||||
|
||||
if ( preferableTarget == DNN_TARGET_OPENCL )
|
||||
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
|
||||
{
|
||||
if ( !activData->consumers.empty() )
|
||||
{
|
||||
@ -1514,7 +1517,7 @@ struct Net::Impl
|
||||
}
|
||||
|
||||
// fuse convlution layer followed by eltwise + relu
|
||||
if ( preferableTarget == DNN_TARGET_OPENCL )
|
||||
if ( IS_DNN_OPENCL_TARGET(preferableTarget) )
|
||||
{
|
||||
Ptr<EltwiseLayer> nextEltwiseLayer;
|
||||
if( nextData )
|
||||
@ -1727,6 +1730,13 @@ struct Net::Impl
|
||||
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
|
||||
{
|
||||
CV_Assert(layers[0].outputBlobs[i].total());
|
||||
if (layers[0].outputBlobs[i].depth() == CV_32F &&
|
||||
preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
Mat mat = layers[0].outputBlobs[i].clone();
|
||||
convertFp16(mat, layers[0].outputBlobs[i]);
|
||||
}
|
||||
inputShapes.push_back(shape(layers[0].outputBlobs[i]));
|
||||
}
|
||||
LayersShapesMap layersShapes;
|
||||
@ -1772,7 +1782,7 @@ struct Net::Impl
|
||||
{
|
||||
if( !ld.skip )
|
||||
{
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL)
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
|
||||
@ -1937,7 +1947,14 @@ struct Net::Impl
|
||||
// Transfer data to CPU if it's require.
|
||||
ld.outputBlobsWrappers[pin.oid]->copyToHost();
|
||||
}
|
||||
return ld.outputBlobs[pin.oid];
|
||||
|
||||
if (ld.outputBlobs[pin.oid].depth() == CV_16S)
|
||||
{
|
||||
convertFp16(ld.outputBlobs[pin.oid], output_blob);
|
||||
return output_blob;
|
||||
}
|
||||
else
|
||||
return ld.outputBlobs[pin.oid];
|
||||
}
|
||||
|
||||
Mat getBlob(String outputName)
|
||||
@ -2080,7 +2097,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
|
||||
|
||||
if (outputBlobs.isUMat())
|
||||
{
|
||||
outputBlobs.assign(ld.outputBlobs[pin.oid].getUMat(ACCESS_RW));
|
||||
outputBlobs.assign(impl->getBlob(layerName).getUMat(ACCESS_RW));
|
||||
}
|
||||
else if (outputBlobs.isMat())
|
||||
{
|
||||
@ -2096,17 +2113,33 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
|
||||
ld.outputBlobsWrappers[i]->copyToHost();
|
||||
}
|
||||
}
|
||||
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
|
||||
outputvec = ld.outputBlobs;
|
||||
if (ld.outputBlobs[0].depth() == CV_32F)
|
||||
{
|
||||
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
|
||||
outputvec = ld.outputBlobs;
|
||||
} else {
|
||||
std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
|
||||
outputvec.resize(ld.outputBlobs.size());
|
||||
for (int i = 0; i < outputvec.size(); i++)
|
||||
convertFp16(ld.outputBlobs[i], outputvec[i]);
|
||||
}
|
||||
}
|
||||
else if (outputBlobs.isUMatVector())
|
||||
{
|
||||
std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
|
||||
|
||||
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||
impl->preferableTarget == DNN_TARGET_OPENCL)
|
||||
IS_DNN_OPENCL_TARGET(impl->preferableTarget))
|
||||
{
|
||||
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
if (impl->preferableTarget == DNN_TARGET_OPENCL)
|
||||
outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
std::vector<UMat> out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
outputvec.resize(out_vec.size());
|
||||
for (int i = 0; i < out_vec.size(); i++)
|
||||
convertFp16(out_vec[i], outputvec[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2194,6 +2227,16 @@ void Net::setPreferableTarget(int targetId)
|
||||
if( impl->preferableTarget != targetId )
|
||||
{
|
||||
impl->preferableTarget = targetId;
|
||||
if (IS_DNN_OPENCL_TARGET(targetId))
|
||||
{
|
||||
#ifndef HAVE_OPENCL
|
||||
impl->preferableTarget = DNN_TARGET_CPU;
|
||||
#else
|
||||
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
|
||||
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
|
||||
impl->preferableTarget = DNN_TARGET_OPENCL;
|
||||
#endif
|
||||
}
|
||||
impl->netWasAllocated = false;
|
||||
impl->clear();
|
||||
}
|
||||
@ -2222,7 +2265,17 @@ void Net::setInput(InputArray blob, const String& name)
|
||||
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
|
||||
Mat blob_ = blob.getMat();
|
||||
Mat blob_;
|
||||
if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
|
||||
impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
Mat blob_mat = blob.getMat();
|
||||
convertFp16(blob_mat, blob_);
|
||||
}
|
||||
else
|
||||
{
|
||||
blob_ = blob.getMat();
|
||||
}
|
||||
bool oldShape = prevShape == shape(blob_);
|
||||
if (oldShape)
|
||||
{
|
||||
@ -2747,6 +2800,43 @@ void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
std::vector<UMat> orig_inputs;
|
||||
std::vector<UMat> orig_outputs;
|
||||
std::vector<UMat> orig_internals;
|
||||
|
||||
inputs_arr.getUMatVector(orig_inputs);
|
||||
outputs_arr.getUMatVector(orig_outputs);
|
||||
internals_arr.getUMatVector(orig_internals);
|
||||
|
||||
inputs.resize(orig_inputs.size());
|
||||
for (size_t i = 0; i < orig_inputs.size(); i++)
|
||||
convertFp16(orig_inputs[i], inputs[i]);
|
||||
|
||||
outputs.resize(orig_outputs.size());
|
||||
for (size_t i = 0; i < orig_outputs.size(); i++)
|
||||
outputs[i].create(shape(orig_outputs[i]), CV_32F);
|
||||
|
||||
internals.resize(orig_internals.size());
|
||||
for (size_t i = 0; i < orig_internals.size(); i++)
|
||||
internals[i].create(shape(orig_internals[i]), CV_32F);
|
||||
|
||||
forward(inputs, outputs, internals);
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
convertFp16(outputs[i], orig_outputs[i]);
|
||||
|
||||
// sync results back
|
||||
outputs_arr.assign(orig_outputs);
|
||||
internals_arr.assign(orig_internals);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inpvec;
|
||||
std::vector<Mat> outputs;
|
||||
std::vector<Mat> internals;
|
||||
|
@ -64,6 +64,7 @@
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
|
||||
Mutex& getInitializationMutex();
|
||||
void initializeLayerFactory();
|
||||
CV__DNN_EXPERIMENTAL_NS_END
|
||||
|
Loading…
Reference in New Issue
Block a user