Merge pull request #9058 from alalek:dnn_minor_fixes

This commit is contained in:
Alexander Alekhin 2017-07-14 09:53:09 +00:00
commit 4238add35b
12 changed files with 476 additions and 660 deletions

View File

@ -9,6 +9,8 @@ endif()
set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass") set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass")
ocv_add_dispatched_file("layers/layers_common" AVX AVX2)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
-Wmissing-declarations -Wmissing-prototypes -Wmissing-declarations -Wmissing-prototypes

View File

@ -44,7 +44,7 @@
// This is an umbrealla header to include into you project. // This is an umbrealla header to include into you project.
// We are free to change headers layout in dnn subfolder, so please include // We are free to change headers layout in dnn subfolder, so please include
// this header for future compartibility // this header for future compatibility
/** @defgroup dnn Deep Neural Network module /** @defgroup dnn Deep Neural Network module

View File

@ -152,7 +152,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
int outputNameToIndex(String outputName); int outputNameToIndex(String outputName);
}; };
//! Classical recurrent layer /** @brief Classical recurrent layer
Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
- input: should contain packed input @f$x_t@f$.
- output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
*/
class CV_EXPORTS RNNLayer : public Layer class CV_EXPORTS RNNLayer : public Layer
{ {
public: public:
@ -180,17 +192,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
*/ */
virtual void setProduceHiddenOutput(bool produce = false) = 0; virtual void setProduceHiddenOutput(bool produce = false) = 0;
/** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
@param input should contain packed input @f$x_t@f$.
@param output should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
@p input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
@p output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
*/
}; };
class CV_EXPORTS BaseConvolutionLayer : public Layer class CV_EXPORTS BaseConvolutionLayer : public Layer

View File

@ -969,9 +969,6 @@ struct Net::Impl
} }
} }
#define CV_RETHROW_ERROR(err, newmsg)\
cv::error(err.code, newmsg, err.func.c_str(), err.file.c_str(), err.line)
void allocateLayer(int lid, const LayersShapesMap& layersShapes) void allocateLayer(int lid, const LayersShapesMap& layersShapes)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();

View File

@ -506,13 +506,13 @@ public:
int bsz = ofs1 - ofs0; int bsz = ofs1 - ofs0;
#if CV_TRY_AVX2 #if CV_TRY_AVX2
if(useAVX2) if(useAVX2)
fastConv_avx2(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, opt_AVX2::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
outShape, bsz, vsz, vsz_a, relu, cn0 == 0); outShape, bsz, vsz, vsz_a, relu, cn0 == 0);
else else
#endif #endif
#if CV_TRY_AVX #if CV_TRY_AVX
if(useAVX) if(useAVX)
fastConv_avx(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, opt_AVX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
outShape, bsz, vsz, vsz_a, relu, cn0 == 0); outShape, bsz, vsz, vsz_a, relu, cn0 == 0);
else else
#endif #endif
@ -824,12 +824,12 @@ public:
#if CV_TRY_AVX2 #if CV_TRY_AVX2
if( useAVX2 ) if( useAVX2 )
fastGEMM_avx2( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); opt_AVX2::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax );
else else
#endif #endif
#if CV_TRY_AVX #if CV_TRY_AVX
if( useAVX ) if( useAVX )
fastGEMM_avx( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); opt_AVX::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax );
else else
#endif #endif
for( m = 0; m < mmax; m += 2 ) for( m = 0; m < mmax; m += 2 )

View File

@ -55,29 +55,13 @@ namespace util
{ {
template <typename T> template <typename T>
std::string to_string(T value) static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
{
std::ostringstream stream;
stream << value;
return stream.str();
}
template <typename T>
void make_error(const std::string& message1, const T& message2)
{
std::string error(message1);
error += std::string(util::to_string<int>(message2));
CV_Error(Error::StsBadArg, error.c_str());
}
template <typename T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) const std::pair<float, T>& pair2)
{ {
return pair1.first > pair2.first; return pair1.first > pair2.first;
} }
} } // namespace
class DetectionOutputLayerImpl : public DetectionOutputLayer class DetectionOutputLayerImpl : public DetectionOutputLayer
{ {
@ -133,7 +117,7 @@ public:
message += " layer parameter does not contain "; message += " layer parameter does not contain ";
message += parameterName; message += parameterName;
message += " parameter."; message += " parameter.";
CV_Error(Error::StsBadArg, message); CV_ErrorNoReturn(Error::StsBadArg, message);
} }
else else
{ {
@ -209,73 +193,122 @@ public:
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<LabelBBox> allDecodedBBoxes;
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
int num = inputs[0]->size[0];
// extract predictions from input layers
{
int numPriors = inputs[2]->size[2] / 4;
const float* locationData = inputs[0]->ptr<float>(); const float* locationData = inputs[0]->ptr<float>();
const float* confidenceData = inputs[1]->ptr<float>(); const float* confidenceData = inputs[1]->ptr<float>();
const float* priorData = inputs[2]->ptr<float>(); const float* priorData = inputs[2]->ptr<float>();
int num = inputs[0]->size[0]; // Retrieve all location predictions
int numPriors = inputs[2]->size[2] / 4;
// Retrieve all location predictions.
std::vector<LabelBBox> allLocationPredictions; std::vector<LabelBBox> allLocationPredictions;
GetLocPredictions(locationData, num, numPriors, _numLocClasses, GetLocPredictions(locationData, num, numPriors, _numLocClasses,
_shareLocation, &allLocationPredictions); _shareLocation, allLocationPredictions);
// Retrieve all confidences. // Retrieve all confidences
std::vector<std::vector<std::vector<float> > > allConfidenceScores; GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
GetConfidenceScores(confidenceData, num, numPriors, _numClasses,
&allConfidenceScores);
// Retrieve all prior bboxes. It is same within a batch since we assume all // Retrieve all prior bboxes
// images in a batch are of same dimension.
std::vector<caffe::NormalizedBBox> priorBBoxes; std::vector<caffe::NormalizedBBox> priorBBoxes;
std::vector<std::vector<float> > priorVariances; std::vector<std::vector<float> > priorVariances;
GetPriorBBoxes(priorData, numPriors, &priorBBoxes, &priorVariances); GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
const bool clip_bbox = false; // Decode all loc predictions to bboxes
// Decode all loc predictions to bboxes.
std::vector<LabelBBox> allDecodedBBoxes;
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num, DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, num,
_shareLocation, _numLocClasses, _backgroundLabelId, _shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, clip_bbox, &allDecodedBBoxes); _codeType, _varianceEncodedInTarget, false, allDecodedBBoxes);
}
int numKept = 0; size_t numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices; std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < num; ++i) for (int i = 0; i < num; ++i)
{ {
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
const std::vector<std::vector<float> >& confidenceScores = }
allConfidenceScores[i];
if (numKept == 0)
{
CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections");
}
int outputShape[] = {1, 1, (int)numKept, 7};
outputs[0].create(4, outputShape, CV_32F);
float* outputsData = outputs[0].ptr<float>();
size_t count = 0;
for (int i = 0; i < num; ++i)
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]);
}
CV_Assert(count == numKept);
}
size_t outputDetections_(
const int i, float* outputsData,
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
const std::map<int, std::vector<int> >& indicesMap
)
{
size_t count = 0;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
if (confidenceScores.size() <= label)
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
const std::vector<float>& scores = confidenceScores[label];
int locLabel = _shareLocation ? -1 : label;
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel);
if (label_bboxes == decodeBBoxes.end())
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", locLabel));
const std::vector<int>& indices = it->second;
for (size_t j = 0; j < indices.size(); ++j, ++count)
{
int idx = indices[j];
const caffe::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
outputsData[count * 7] = i;
outputsData[count * 7 + 1] = label;
outputsData[count * 7 + 2] = scores[idx];
outputsData[count * 7 + 3] = decode_bbox.xmin();
outputsData[count * 7 + 4] = decode_bbox.ymin();
outputsData[count * 7 + 5] = decode_bbox.xmax();
outputsData[count * 7 + 6] = decode_bbox.ymax();
}
}
return count;
}
size_t processDetections_(
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
std::vector<std::map<int, std::vector<int> > >& allIndices
)
{
std::map<int, std::vector<int> > indices; std::map<int, std::vector<int> > indices;
int numDetections = 0; size_t numDetections = 0;
for (int c = 0; c < (int)_numClasses; ++c) for (int c = 0; c < (int)_numClasses; ++c)
{ {
if (c == _backgroundLabelId) if (c == _backgroundLabelId)
{ continue; // Ignore background class.
// Ignore background class. if (c >= confidenceScores.size())
continue; CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c));
}
if (confidenceScores.size() <= c)
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find confidence predictions for label ", c);
}
const std::vector<float>& scores = confidenceScores[c]; const std::vector<float>& scores = confidenceScores[c];
int label = _shareLocation ? -1 : c; int label = _shareLocation ? -1 : c;
if (decodeBBoxes.find(label) == decodeBBoxes.end())
{ LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
// Something bad happened if there are no predictions for current label. if (label_bboxes == decodeBBoxes.end())
util::make_error<int>("Could not find location predictions for label ", label); CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
continue; ApplyNMSFast(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK, indices[c]);
}
const std::vector<caffe::NormalizedBBox>& bboxes =
decodeBBoxes.find(label)->second;
ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold, 1.0,
_topK, &(indices[c]));
numDetections += indices[c].size(); numDetections += indices[c].size();
} }
if (_keepTopK > -1 && numDetections > _keepTopK) if (_keepTopK > -1 && numDetections > (size_t)_keepTopK)
{ {
std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs; std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs;
for (std::map<int, std::vector<int> >::iterator it = indices.begin(); for (std::map<int, std::vector<int> >::iterator it = indices.begin();
@ -283,26 +316,21 @@ public:
{ {
int label = it->first; int label = it->first;
const std::vector<int>& labelIndices = it->second; const std::vector<int>& labelIndices = it->second;
if (confidenceScores.size() <= label) if (label >= confidenceScores.size())
{ CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
// Something bad happened for current label.
util::make_error<int>("Could not find location predictions for label ", label);
continue;
}
const std::vector<float>& scores = confidenceScores[label]; const std::vector<float>& scores = confidenceScores[label];
for (size_t j = 0; j < labelIndices.size(); ++j) for (size_t j = 0; j < labelIndices.size(); ++j)
{ {
size_t idx = labelIndices[j]; size_t idx = labelIndices[j];
CV_Assert(idx < scores.size()); CV_Assert(idx < scores.size());
scoreIndexPairs.push_back( scoreIndexPairs.push_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
std::make_pair(scores[idx], std::make_pair(label, idx)));
} }
} }
// Keep outputs k results per image. // Keep outputs k results per image.
std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(),
util::SortScorePairDescend<std::pair<int, int> >); util::SortScorePairDescend<std::pair<int, int> >);
scoreIndexPairs.resize(_keepTopK); scoreIndexPairs.resize(_keepTopK);
// Store the new indices.
std::map<int, std::vector<int> > newIndices; std::map<int, std::vector<int> > newIndices;
for (size_t j = 0; j < scoreIndexPairs.size(); ++j) for (size_t j = 0; j < scoreIndexPairs.size(); ++j)
{ {
@ -311,78 +339,27 @@ public:
newIndices[label].push_back(idx); newIndices[label].push_back(idx);
} }
allIndices.push_back(newIndices); allIndices.push_back(newIndices);
numKept += _keepTopK; return (size_t)_keepTopK;
} }
else else
{ {
allIndices.push_back(indices); allIndices.push_back(indices);
numKept += numDetections; return numDetections;
} }
} }
if (numKept == 0)
{
CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections");
return;
}
int outputShape[] = {1, 1, numKept, 7};
outputs[0].create(4, outputShape, CV_32F);
float* outputsData = outputs[0].ptr<float>();
int count = 0; // **************************************************************
for (int i = 0; i < num; ++i) // Utility functions
{ // **************************************************************
const std::vector<std::vector<float> >& confidenceScores =
allConfidenceScores[i];
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i];
for (std::map<int, std::vector<int> >::iterator it = allIndices[i].begin();
it != allIndices[i].end(); ++it)
{
int label = it->first;
if (confidenceScores.size() <= label)
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find confidence predictions for label ", label);
continue;
}
const std::vector<float>& scores = confidenceScores[label];
int locLabel = _shareLocation ? -1 : label;
if (decodeBBoxes.find(locLabel) == decodeBBoxes.end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find location predictions for label ", locLabel);
continue;
}
const std::vector<caffe::NormalizedBBox>& bboxes =
decodeBBoxes.find(locLabel)->second;
std::vector<int>& indices = it->second;
for (size_t j = 0; j < indices.size(); ++j) // Compute bbox size
{ template<bool normalized>
int idx = indices[j]; static float BBoxSize(const caffe::NormalizedBBox& bbox)
outputsData[count * 7] = i;
outputsData[count * 7 + 1] = label;
outputsData[count * 7 + 2] = scores[idx];
caffe::NormalizedBBox clipBBox = bboxes[idx];
outputsData[count * 7 + 3] = clipBBox.xmin();
outputsData[count * 7 + 4] = clipBBox.ymin();
outputsData[count * 7 + 5] = clipBBox.xmax();
outputsData[count * 7 + 6] = clipBBox.ymax();
++count;
}
}
}
}
// Compute bbox size.
float BBoxSize(const caffe::NormalizedBBox& bbox,
const bool normalized=true)
{ {
if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin()) if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin())
{ {
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. return 0; // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return 0;
} }
else else
{ {
@ -407,191 +384,153 @@ public:
} }
} }
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1].
void ClipBBox(const caffe::NormalizedBBox& bbox,
caffe::NormalizedBBox* clipBBox)
{
clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f));
clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f));
clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f));
clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f));
clipBBox->clear_size();
clipBBox->set_size(BBoxSize(*clipBBox));
clipBBox->set_difficult(bbox.difficult());
}
// Decode a bbox according to a prior bbox. // Decode a bbox according to a prior bbox
void DecodeBBox( template<bool variance_encoded_in_target>
static void DecodeBBox(
const caffe::NormalizedBBox& prior_bbox, const std::vector<float>& prior_variance, const caffe::NormalizedBBox& prior_bbox, const std::vector<float>& prior_variance,
const CodeType code_type, const bool variance_encoded_in_target, const CodeType code_type,
const bool clip_bbox, const caffe::NormalizedBBox& bbox, const bool clip_bbox, const caffe::NormalizedBBox& bbox,
caffe::NormalizedBBox* decode_bbox) { caffe::NormalizedBBox& decode_bbox)
if (code_type == caffe::PriorBoxParameter_CodeType_CORNER) { {
if (variance_encoded_in_target) { float bbox_xmin = variance_encoded_in_target ? bbox.xmin() : prior_variance[0] * bbox.xmin();
// variance is encoded in target, we simply need to add the offset float bbox_ymin = variance_encoded_in_target ? bbox.ymin() : prior_variance[1] * bbox.ymin();
// predictions. float bbox_xmax = variance_encoded_in_target ? bbox.xmax() : prior_variance[2] * bbox.xmax();
decode_bbox->set_xmin(prior_bbox.xmin() + bbox.xmin()); float bbox_ymax = variance_encoded_in_target ? bbox.ymax() : prior_variance[3] * bbox.ymax();
decode_bbox->set_ymin(prior_bbox.ymin() + bbox.ymin()); switch(code_type)
decode_bbox->set_xmax(prior_bbox.xmax() + bbox.xmax()); {
decode_bbox->set_ymax(prior_bbox.ymax() + bbox.ymax()); case caffe::PriorBoxParameter_CodeType_CORNER:
} else { decode_bbox.set_xmin(prior_bbox.xmin() + bbox_xmin);
// variance is encoded in bbox, we need to scale the offset accordingly. decode_bbox.set_ymin(prior_bbox.ymin() + bbox_ymin);
decode_bbox->set_xmin( decode_bbox.set_xmax(prior_bbox.xmax() + bbox_xmax);
prior_bbox.xmin() + prior_variance[0] * bbox.xmin()); decode_bbox.set_ymax(prior_bbox.ymax() + bbox_ymax);
decode_bbox->set_ymin( break;
prior_bbox.ymin() + prior_variance[1] * bbox.ymin()); case caffe::PriorBoxParameter_CodeType_CENTER_SIZE:
decode_bbox->set_xmax( {
prior_bbox.xmax() + prior_variance[2] * bbox.xmax());
decode_bbox->set_ymax(
prior_bbox.ymax() + prior_variance[3] * bbox.ymax());
}
} else if (code_type == caffe::PriorBoxParameter_CodeType_CENTER_SIZE) {
float prior_width = prior_bbox.xmax() - prior_bbox.xmin(); float prior_width = prior_bbox.xmax() - prior_bbox.xmin();
CV_Assert(prior_width > 0); CV_Assert(prior_width > 0);
float prior_height = prior_bbox.ymax() - prior_bbox.ymin(); float prior_height = prior_bbox.ymax() - prior_bbox.ymin();
CV_Assert(prior_height > 0); CV_Assert(prior_height > 0);
float prior_center_x = (prior_bbox.xmin() + prior_bbox.xmax()) / 2.; float prior_center_x = (prior_bbox.xmin() + prior_bbox.xmax()) * .5;
float prior_center_y = (prior_bbox.ymin() + prior_bbox.ymax()) / 2.; float prior_center_y = (prior_bbox.ymin() + prior_bbox.ymax()) * .5;
float decode_bbox_center_x, decode_bbox_center_y; float decode_bbox_center_x, decode_bbox_center_y;
float decode_bbox_width, decode_bbox_height; float decode_bbox_width, decode_bbox_height;
if (variance_encoded_in_target) { decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
// variance is encoded in target, we simply need to retore the offset decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
// predictions. decode_bbox_width = exp(bbox_xmax) * prior_width;
decode_bbox_center_x = bbox.xmin() * prior_width + prior_center_x; decode_bbox_height = exp(bbox_ymax) * prior_height;
decode_bbox_center_y = bbox.ymin() * prior_height + prior_center_y; decode_bbox.set_xmin(decode_bbox_center_x - decode_bbox_width * .5);
decode_bbox_width = exp(bbox.xmax()) * prior_width; decode_bbox.set_ymin(decode_bbox_center_y - decode_bbox_height * .5);
decode_bbox_height = exp(bbox.ymax()) * prior_height; decode_bbox.set_xmax(decode_bbox_center_x + decode_bbox_width * .5);
} else { decode_bbox.set_ymax(decode_bbox_center_y + decode_bbox_height * .5);
// variance is encoded in bbox, we need to scale the offset accordingly. break;
decode_bbox_center_x = }
prior_variance[0] * bbox.xmin() * prior_width + prior_center_x; default:
decode_bbox_center_y = CV_ErrorNoReturn(Error::StsBadArg, "Unknown type.");
prior_variance[1] * bbox.ymin() * prior_height + prior_center_y; };
decode_bbox_width = if (clip_bbox)
exp(prior_variance[2] * bbox.xmax()) * prior_width; {
decode_bbox_height = // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]
exp(prior_variance[3] * bbox.ymax()) * prior_height; decode_bbox.set_xmin(std::max(std::min(decode_bbox.xmin(), 1.f), 0.f));
decode_bbox.set_ymin(std::max(std::min(decode_bbox.ymin(), 1.f), 0.f));
decode_bbox.set_xmax(std::max(std::min(decode_bbox.xmax(), 1.f), 0.f));
decode_bbox.set_ymax(std::max(std::min(decode_bbox.ymax(), 1.f), 0.f));
}
decode_bbox.clear_size();
decode_bbox.set_size(BBoxSize<true>(decode_bbox));
} }
decode_bbox->set_xmin(decode_bbox_center_x - decode_bbox_width / 2.); // Decode a set of bboxes according to a set of prior bboxes
decode_bbox->set_ymin(decode_bbox_center_y - decode_bbox_height / 2.); static void DecodeBBoxes(
decode_bbox->set_xmax(decode_bbox_center_x + decode_bbox_width / 2.);
decode_bbox->set_ymax(decode_bbox_center_y + decode_bbox_height / 2.);
} else {
CV_Error(Error::StsBadArg, "Unknown LocLossType.");
}
float bbox_size = BBoxSize(*decode_bbox);
decode_bbox->set_size(bbox_size);
if (clip_bbox) {
ClipBBox(*decode_bbox, decode_bbox);
}
}
// Decode a set of bboxes according to a set of prior bboxes.
void DecodeBBoxes(
const std::vector<caffe::NormalizedBBox>& prior_bboxes, const std::vector<caffe::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances, const std::vector<std::vector<float> >& prior_variances,
const CodeType code_type, const bool variance_encoded_in_target, const CodeType code_type, const bool variance_encoded_in_target,
const bool clip_bbox, const std::vector<caffe::NormalizedBBox>& bboxes, const bool clip_bbox, const std::vector<caffe::NormalizedBBox>& bboxes,
std::vector<caffe::NormalizedBBox>* decode_bboxes) { std::vector<caffe::NormalizedBBox>& decode_bboxes)
{
CV_Assert(prior_bboxes.size() == prior_variances.size()); CV_Assert(prior_bboxes.size() == prior_variances.size());
CV_Assert(prior_bboxes.size() == bboxes.size()); CV_Assert(prior_bboxes.size() == bboxes.size());
int num_bboxes = prior_bboxes.size(); size_t num_bboxes = prior_bboxes.size();
if (num_bboxes >= 1) { CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4);
CV_Assert(prior_variances[0].size() == 4); decode_bboxes.clear(); decode_bboxes.resize(num_bboxes);
if(variance_encoded_in_target)
{
for (int i = 0; i < num_bboxes; ++i)
DecodeBBox<true>(prior_bboxes[i], prior_variances[i], code_type,
clip_bbox, bboxes[i], decode_bboxes[i]);
} }
decode_bboxes->clear(); else
for (int i = 0; i < num_bboxes; ++i) { {
caffe::NormalizedBBox decode_bbox; for (int i = 0; i < num_bboxes; ++i)
DecodeBBox(prior_bboxes[i], prior_variances[i], code_type, DecodeBBox<false>(prior_bboxes[i], prior_variances[i], code_type,
variance_encoded_in_target, clip_bbox, bboxes[i], &decode_bbox); clip_bbox, bboxes[i], decode_bboxes[i]);
decode_bboxes->push_back(decode_bbox);
} }
} }
// Decode all bboxes in a batch. // Decode all bboxes in a batch
void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_preds, static void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_preds,
const std::vector<caffe::NormalizedBBox>& prior_bboxes, const std::vector<caffe::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances, const std::vector<std::vector<float> >& prior_variances,
const int num, const bool share_location, const int num, const bool share_location,
const int num_loc_classes, const int background_label_id, const int num_loc_classes, const int background_label_id,
const CodeType code_type, const bool variance_encoded_in_target, const CodeType code_type, const bool variance_encoded_in_target,
const bool clip, std::vector<LabelBBox>* all_decode_bboxes) { const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
{
CV_Assert(all_loc_preds.size() == num); CV_Assert(all_loc_preds.size() == num);
all_decode_bboxes->clear(); all_decode_bboxes.clear();
all_decode_bboxes->resize(num); all_decode_bboxes.resize(num);
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i)
{
// Decode predictions into bboxes. // Decode predictions into bboxes.
LabelBBox& decode_bboxes = (*all_decode_bboxes)[i]; const LabelBBox& loc_preds = all_loc_preds[i];
for (int c = 0; c < num_loc_classes; ++c) { LabelBBox& decode_bboxes = all_decode_bboxes[i];
for (int c = 0; c < num_loc_classes; ++c)
{
int label = share_location ? -1 : c; int label = share_location ? -1 : c;
if (label == background_label_id) { if (label == background_label_id)
// Ignore background class. continue; // Ignore background class.
continue; LabelBBox::const_iterator label_loc_preds = loc_preds.find(label);
} if (label_loc_preds == loc_preds.end())
if (all_loc_preds[i].find(label) == all_loc_preds[i].end()) { CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find location predictions for label ", label);
}
const std::vector<caffe::NormalizedBBox>& label_loc_preds =
all_loc_preds[i].find(label)->second;
DecodeBBoxes(prior_bboxes, prior_variances, DecodeBBoxes(prior_bboxes, prior_variances,
code_type, variance_encoded_in_target, clip, code_type, variance_encoded_in_target, clip,
label_loc_preds, &(decode_bboxes[label])); label_loc_preds->second, decode_bboxes[label]);
} }
} }
} }
// Get prior bounding boxes from prior_data. // Get prior bounding boxes from prior_data
// prior_data: 1 x 2 x num_priors * 4 x 1 blob. // prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// num_priors: number of priors. // num_priors: number of priors.
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox. // prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
// prior_variances: stores all the variances needed by prior bboxes. // prior_variances: stores all the variances needed by prior bboxes.
void GetPriorBBoxes(const float* priorData, const int& numPriors, static void GetPriorBBoxes(const float* priorData, const int& numPriors,
std::vector<caffe::NormalizedBBox>* priorBBoxes, std::vector<caffe::NormalizedBBox>& priorBBoxes,
std::vector<std::vector<float> >* priorVariances) std::vector<std::vector<float> >& priorVariances)
{ {
priorBBoxes->clear(); priorBBoxes.clear(); priorBBoxes.resize(numPriors);
priorVariances->clear(); priorVariances.clear(); priorVariances.resize(numPriors);
for (int i = 0; i < numPriors; ++i) for (int i = 0; i < numPriors; ++i)
{ {
int startIdx = i * 4; int startIdx = i * 4;
caffe::NormalizedBBox bbox; caffe::NormalizedBBox& bbox = priorBBoxes[i];
bbox.set_xmin(priorData[startIdx]); bbox.set_xmin(priorData[startIdx]);
bbox.set_ymin(priorData[startIdx + 1]); bbox.set_ymin(priorData[startIdx + 1]);
bbox.set_xmax(priorData[startIdx + 2]); bbox.set_xmax(priorData[startIdx + 2]);
bbox.set_ymax(priorData[startIdx + 3]); bbox.set_ymax(priorData[startIdx + 3]);
float bboxSize = BBoxSize(bbox); bbox.set_size(BBoxSize<true>(bbox));
bbox.set_size(bboxSize);
priorBBoxes->push_back(bbox);
} }
for (int i = 0; i < numPriors; ++i) for (int i = 0; i < numPriors; ++i)
{ {
int startIdx = (numPriors + i) * 4; int startIdx = (numPriors + i) * 4;
std::vector<float> var; // not needed here: priorVariances[i].clear();
for (int j = 0; j < 4; ++j) for (int j = 0; j < 4; ++j)
{ {
var.push_back(priorData[startIdx + j]); priorVariances[i].push_back(priorData[startIdx + j]);
}
priorVariances->push_back(var);
} }
} }
// Scale the caffe::NormalizedBBox w.r.t. height and width.
void ScaleBBox(const caffe::NormalizedBBox& bbox,
const int height, const int width,
caffe::NormalizedBBox* scaleBBox)
{
scaleBBox->set_xmin(bbox.xmin() * width);
scaleBBox->set_ymin(bbox.ymin() * height);
scaleBBox->set_xmax(bbox.xmax() * width);
scaleBBox->set_ymax(bbox.ymax() * height);
scaleBBox->clear_size();
bool normalized = !(width > 1 || height > 1);
scaleBBox->set_size(BBoxSize(*scaleBBox, normalized));
scaleBBox->set_difficult(bbox.difficult());
} }
// Get location predictions from loc_data. // Get location predictions from loc_data.
@ -603,19 +542,19 @@ public:
// share_location: if true, all classes share the same location prediction. // share_location: if true, all classes share the same location prediction.
// loc_preds: stores the location prediction, where each item contains // loc_preds: stores the location prediction, where each item contains
// location prediction for an image. // location prediction for an image.
void GetLocPredictions(const float* locData, const int num, static void GetLocPredictions(const float* locData, const int num,
const int numPredsPerClass, const int numLocClasses, const int numPredsPerClass, const int numLocClasses,
const bool shareLocation, std::vector<LabelBBox>* locPreds) const bool shareLocation, std::vector<LabelBBox>& locPreds)
{ {
locPreds->clear(); locPreds.clear();
if (shareLocation) if (shareLocation)
{ {
CV_Assert(numLocClasses == 1); CV_Assert(numLocClasses == 1);
} }
locPreds->resize(num); locPreds.resize(num);
for (int i = 0; i < num; ++i) for (int i = 0; i < num; ++i, locData += numPredsPerClass * numLocClasses * 4)
{ {
LabelBBox& labelBBox = (*locPreds)[i]; LabelBBox& labelBBox = locPreds[i];
for (int p = 0; p < numPredsPerClass; ++p) for (int p = 0; p < numPredsPerClass; ++p)
{ {
int startIdx = p * numLocClasses * 4; int startIdx = p * numLocClasses * 4;
@ -626,13 +565,13 @@ public:
{ {
labelBBox[label].resize(numPredsPerClass); labelBBox[label].resize(numPredsPerClass);
} }
labelBBox[label][p].set_xmin(locData[startIdx + c * 4]); caffe::NormalizedBBox& bbox = labelBBox[label][p];
labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]); bbox.set_xmin(locData[startIdx + c * 4]);
labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]); bbox.set_ymin(locData[startIdx + c * 4 + 1]);
labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]); bbox.set_xmax(locData[startIdx + c * 4 + 2]);
bbox.set_ymax(locData[startIdx + c * 4 + 3]);
} }
} }
locData += numPredsPerClass * numLocClasses * 4;
} }
} }
@ -643,25 +582,24 @@ public:
// num_classes: number of classes. // num_classes: number of classes.
// conf_preds: stores the confidence prediction, where each item contains // conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image. // confidence prediction for an image.
void GetConfidenceScores(const float* confData, const int num, static void GetConfidenceScores(const float* confData, const int num,
const int numPredsPerClass, const int numClasses, const int numPredsPerClass, const int numClasses,
std::vector<std::vector<std::vector<float> > >* confPreds) std::vector<std::vector<std::vector<float> > >& confPreds)
{ {
confPreds->clear(); confPreds.clear(); confPreds.resize(num);
confPreds->resize(num); for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses)
for (int i = 0; i < num; ++i)
{ {
std::vector<std::vector<float> >& labelScores = (*confPreds)[i]; std::vector<std::vector<float> >& labelScores = confPreds[i];
labelScores.resize(numClasses); labelScores.resize(numClasses);
for (int p = 0; p < numPredsPerClass; ++p)
{
int startIdx = p * numClasses;
for (int c = 0; c < numClasses; ++c) for (int c = 0; c < numClasses; ++c)
{ {
labelScores[c].push_back(confData[startIdx + c]); std::vector<float>& classLabelScores = labelScores[c];
classLabelScores.resize(numPredsPerClass);
for (int p = 0; p < numPredsPerClass; ++p)
{
classLabelScores[p] = confData[p * numClasses + c];
} }
} }
confData += numPredsPerClass * numClasses;
} }
} }
@ -674,35 +612,30 @@ public:
// nms_threshold: a threshold used in non maximum suppression. // nms_threshold: a threshold used in non maximum suppression.
// top_k: if not -1, keep at most top_k picked indices. // top_k: if not -1, keep at most top_k picked indices.
// indices: the kept indices of bboxes after nms. // indices: the kept indices of bboxes after nms.
void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes, static void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
const std::vector<float>& scores, const float score_threshold, const std::vector<float>& scores, const float score_threshold,
const float nms_threshold, const float eta, const int top_k, const float nms_threshold, const float eta, const int top_k,
std::vector<int>* indices) { std::vector<int>& indices)
// Sanity check. {
CV_Assert(bboxes.size() == scores.size()); CV_Assert(bboxes.size() == scores.size());
// Get top_k scores (with corresponding indices). // Get top_k scores (with corresponding indices).
std::vector<std::pair<float, int> > score_index_vec; std::vector<std::pair<float, int> > score_index_vec;
GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);
// Do nms. // Do nms.
float adaptive_threshold = nms_threshold; float adaptive_threshold = nms_threshold;
indices->clear(); indices.clear();
while (score_index_vec.size() != 0) { while (score_index_vec.size() != 0) {
const int idx = score_index_vec.front().second; const int idx = score_index_vec.front().second;
bool keep = true; bool keep = true;
for (int k = 0; k < indices->size(); ++k) { for (int k = 0; k < (int)indices.size() && keep; ++k) {
if (keep) { const int kept_idx = indices[k];
const int kept_idx = (*indices)[k]; float overlap = JaccardOverlap<true>(bboxes[idx], bboxes[kept_idx]);
float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
keep = overlap <= adaptive_threshold; keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
indices->push_back(idx);
} }
if (keep)
indices.push_back(idx);
score_index_vec.erase(score_index_vec.begin()); score_index_vec.erase(score_index_vec.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) { if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta; adaptive_threshold *= eta;
@ -715,74 +648,66 @@ public:
// threshold: only consider scores higher than the threshold. // threshold: only consider scores higher than the threshold.
// top_k: if -1, keep all; otherwise, keep at most top_k. // top_k: if -1, keep all; otherwise, keep at most top_k.
// score_index_vec: store the sorted (score, index) pair. // score_index_vec: store the sorted (score, index) pair.
void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold,const int top_k, static void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold, const int top_k,
std::vector<std::pair<float, int> >* score_index_vec) std::vector<std::pair<float, int> >& score_index_vec)
{ {
CV_DbgAssert(score_index_vec.empty());
// Generate index score pairs. // Generate index score pairs.
for (size_t i = 0; i < scores.size(); ++i) for (size_t i = 0; i < scores.size(); ++i)
{ {
if (scores[i] > threshold) if (scores[i] > threshold)
{ {
score_index_vec->push_back(std::make_pair(scores[i], i)); score_index_vec.push_back(std::make_pair(scores[i], i));
} }
} }
// Sort the score pair according to the scores in descending order // Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec->begin(), score_index_vec->end(), std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
util::SortScorePairDescend<int>); util::SortScorePairDescend<int>);
// Keep top_k scores if needed. // Keep top_k scores if needed.
if (top_k > -1 && top_k < (int)score_index_vec->size()) if (top_k > -1 && top_k < (int)score_index_vec.size())
{ {
score_index_vec->resize(top_k); score_index_vec.resize(top_k);
}
}
// Compute the intersection between two bboxes.
void IntersectBBox(const caffe::NormalizedBBox& bbox1,
const caffe::NormalizedBBox& bbox2,
caffe::NormalizedBBox* intersect_bbox) {
if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() ||
bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin())
{
// Return [0, 0, 0, 0] if there is no intersection.
intersect_bbox->set_xmin(0);
intersect_bbox->set_ymin(0);
intersect_bbox->set_xmax(0);
intersect_bbox->set_ymax(0);
}
else
{
intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin()));
intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin()));
intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax()));
intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax()));
} }
} }
// Compute the jaccard (intersection over union IoU) overlap between two bboxes. // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
float JaccardOverlap(const caffe::NormalizedBBox& bbox1, template<bool normalized>
const caffe::NormalizedBBox& bbox2, static float JaccardOverlap(const caffe::NormalizedBBox& bbox1,
const bool normalized=true) const caffe::NormalizedBBox& bbox2)
{ {
caffe::NormalizedBBox intersect_bbox; caffe::NormalizedBBox intersect_bbox;
IntersectBBox(bbox1, bbox2, &intersect_bbox); if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() ||
float intersect_width, intersect_height; bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin())
if (normalized)
{ {
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); // Return [0, 0, 0, 0] if there is no intersection.
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); intersect_bbox.set_xmin(0);
intersect_bbox.set_ymin(0);
intersect_bbox.set_xmax(0);
intersect_bbox.set_ymax(0);
} }
else else
{ {
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1; intersect_bbox.set_xmin(std::max(bbox1.xmin(), bbox2.xmin()));
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1; intersect_bbox.set_ymin(std::max(bbox1.ymin(), bbox2.ymin()));
intersect_bbox.set_xmax(std::min(bbox1.xmax(), bbox2.xmax()));
intersect_bbox.set_ymax(std::min(bbox1.ymax(), bbox2.ymax()));
} }
float intersect_width, intersect_height;
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin();
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin();
if (intersect_width > 0 && intersect_height > 0) if (intersect_width > 0 && intersect_height > 0)
{ {
if (!normalized)
{
intersect_width++;
intersect_height++;
}
float intersect_size = intersect_width * intersect_height; float intersect_size = intersect_width * intersect_height;
float bbox1_size = BBoxSize(bbox1); float bbox1_size = BBoxSize<true>(bbox1);
float bbox2_size = BBoxSize(bbox2); float bbox2_size = BBoxSize<true>(bbox2);
return intersect_size / (bbox1_size + bbox2_size - intersect_size); return intersect_size / (bbox1_size + bbox2_size - intersect_size);
} }
else else

View File

@ -177,12 +177,12 @@ public:
#if CV_TRY_AVX2 #if CV_TRY_AVX2
if( useAVX2 ) if( useAVX2 )
fastGEMM1T_avx2( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else else
#endif #endif
#if CV_TRY_AVX #if CV_TRY_AVX
if( useAVX ) if( useAVX )
fastGEMM1T_avx( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else else
#endif #endif
{ {
@ -191,19 +191,19 @@ public:
#if CV_SIMD128 #if CV_SIMD128
for( ; i <= nw - 4; i += 4, wptr += 4*wstep ) for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
{ {
vfloat32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f); v_float32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f);
vfloat32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f); v_float32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f);
for( k = 0; k < vecsize; k += 4 ) for( k = 0; k < vecsize; k += 4 )
{ {
vfloat32x4 v = v_load_aligned(sptr + k); v_float32x4 v = v_load_aligned(sptr + k);
vs0 += v*v_load_aligned(wptr + k); vs0 += v*v_load_aligned(wptr + k);
vs1 += v*v_load_aligned(wptr + wstep + k); vs1 += v*v_load_aligned(wptr + wstep + k);
vs2 += v*v_load_aligned(wptr + wstep*2 + k); vs2 += v*v_load_aligned(wptr + wstep*2 + k);
vs3 += v*v_load_aligned(wptr + wstep*3 + k); vs3 += v*v_load_aligned(wptr + wstep*3 + k);
} }
vfloat32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3); v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3);
s += v_load(biasptr + i); s += v_load(biasptr + i);
v_store(dptr + i, s); v_store(dptr + i, s);
} }

View File

@ -1,54 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "layers_common.hpp"
#include "opencv2/core/hal/intrin.hpp"
#define fastConv_some_avx fastConv_avx
#define fastGEMM1T_some_avx fastGEMM1T_avx
#define fastGEMM_some_avx fastGEMM_avx
#undef _mm256_fmadd_ps
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
#include "layers_common.simd.hpp"

View File

@ -1,51 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "layers_common.hpp"
#include "opencv2/core/hal/intrin.hpp"
#define fastConv_some_avx fastConv_avx2
#define fastGEMM1T_some_avx fastGEMM1T_avx2
#define fastGEMM_some_avx fastGEMM_avx2
#include "layers_common.simd.hpp"

View File

@ -45,6 +45,10 @@
#include <opencv2/dnn.hpp> #include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp> #include <opencv2/dnn/shape_utils.hpp>
// dispatched AVX/AVX2 optimizations
#include "layers/layers_common.simd.hpp"
#include "layers/layers_common.simd_declarations.hpp"
namespace cv namespace cv
{ {
namespace dnn namespace dnn
@ -64,32 +68,6 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride, const Size &kernel, const Size &stride,
const String &padMode, Size &pad); const String &padMode, Size &pad);
#if CV_TRY_AVX
void fastConv_avx(const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput);
void fastGEMM1T_avx( const float* vec, const float* weights,
size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize );
void fastGEMM_avx( const float* aptr, size_t astep, const float* bptr0,
size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb );
#endif
#if CV_TRY_AVX2
void fastConv_avx2(const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput);
void fastGEMM1T_avx2( const float* vec, const float* weights,
size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize );
void fastGEMM_avx2( const float* aptr, size_t astep, const float* bptr0,
size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb );
#endif
} }
} }

View File

@ -40,13 +40,31 @@
// //
//M*/ //M*/
#ifndef __DNN_LAYERS_COMMON_SIMD_HPP__ #include "opencv2/core/hal/intrin.hpp"
#define __DNN_LAYERS_COMMON_SIMD_HPP__
namespace cv { namespace cv {
namespace dnn { namespace dnn {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void fastConv_some_avx( const float* weights, size_t wstep, const float* bias, void fastConv( const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput );
void fastGEMM1T( const float* vec, const float* weights,
size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize );
void fastGEMM( const float* aptr, size_t astep, const float* bptr,
size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb );
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX
#if !CV_FMA // AVX workaround
#undef _mm256_fmadd_ps
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
#endif
void fastConv( const float* weights, size_t wstep, const float* bias,
const float* rowbuf, float* output, const int* outShape, const float* rowbuf, float* output, const int* outShape,
int blockSize, int vecsize, int vecsize_aligned, int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput ) const float* relu, bool initOutput )
@ -214,7 +232,7 @@ void fastConv_some_avx( const float* weights, size_t wstep, const float* bias,
} }
// dst = vec * weights^t + bias // dst = vec * weights^t + bias
void fastGEMM1T_some_avx( const float* vec, const float* weights, void fastGEMM1T( const float* vec, const float* weights,
size_t wstep, const float* bias, size_t wstep, const float* bias,
float* dst, int nvecs, int vecsize ) float* dst, int nvecs, int vecsize )
{ {
@ -276,7 +294,7 @@ void fastGEMM1T_some_avx( const float* vec, const float* weights,
_mm256_zeroupper(); _mm256_zeroupper();
} }
void fastGEMM_some_avx( const float* aptr, size_t astep, const float* bptr, void fastGEMM( const float* aptr, size_t astep, const float* bptr,
size_t bstep, float* cptr, size_t cstep, size_t bstep, float* cptr, size_t cstep,
int ma, int na, int nb ) int ma, int na, int nb )
{ {
@ -346,7 +364,7 @@ void fastGEMM_some_avx( const float* aptr, size_t astep, const float* bptr,
_mm256_zeroupper(); _mm256_zeroupper();
} }
} #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
}
#endif CV_CPU_OPTIMIZATION_NAMESPACE_END
}} // namespace