mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
JavaScript bindings for dnn module
This commit is contained in:
parent
6185f7209e
commit
f503515082
@ -224,8 +224,7 @@ static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF);
|
|||||||
|
|
||||||
#if defined(__clang__) && defined(__has_cpp_attribute) \
|
#if defined(__clang__) && defined(__has_cpp_attribute) \
|
||||||
&& !defined(GOOGLE_PROTOBUF_OS_APPLE)
|
&& !defined(GOOGLE_PROTOBUF_OS_APPLE)
|
||||||
# if defined(GOOGLE_PROTOBUF_OS_NACL) || defined(EMSCRIPTEN) || \
|
# if defined(GOOGLE_PROTOBUF_OS_NACL) || __has_cpp_attribute(clang::fallthrough)
|
||||||
__has_cpp_attribute(clang::fallthrough)
|
|
||||||
# define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]]
|
# define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]]
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
44
doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown
Normal file
44
doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# How to run deep networks in browser {#tutorial_dnn_javascript}
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
This tutorial will show us how to run deep learning models using OpenCV.js right
|
||||||
|
in a browser. Tutorial refers a sample of face detection and face recognition
|
||||||
|
models pipeline.
|
||||||
|
|
||||||
|
## Face detection
|
||||||
|
Face detection network gets BGR image as input and produces set of bounding boxes
|
||||||
|
that might contain faces. All that we need is just select the boxes with a strong
|
||||||
|
confidence.
|
||||||
|
|
||||||
|
## Face recognition
|
||||||
|
Network is called OpenFace (project https://github.com/cmusatyalab/openface).
|
||||||
|
Face recognition model receives RGB face image of size `96x96`. Then it returns
|
||||||
|
`128`-dimensional unit vector that represents input face as a point on the unit
|
||||||
|
multidimensional sphere. So difference between two faces is an angle between two
|
||||||
|
output vectors.
|
||||||
|
|
||||||
|
## Sample
|
||||||
|
All the sample is an HTML page that has JavaScript code to use OpenCV.js functionality.
|
||||||
|
You may see an insertion of this page below. Press `Start` button to begin a demo.
|
||||||
|
Press `Add a person` to name a person that is recognized as an unknown one.
|
||||||
|
Next we'll discuss main parts of the code.
|
||||||
|
|
||||||
|
@htmlinclude js_face_recognition.html
|
||||||
|
|
||||||
|
-# Run face detection network to detect faces on input image.
|
||||||
|
@snippet dnn/js_face_recognition.html Run face detection model
|
||||||
|
You may play with input blob sizes to balance detection quality and efficiency.
|
||||||
|
The bigger input blob the smaller faces may be detected.
|
||||||
|
|
||||||
|
-# Run face recognition network to receive `128`-dimensional unit feature vector by input face image.
|
||||||
|
@snippet dnn/js_face_recognition.html Get 128 floating points feature vector
|
||||||
|
|
||||||
|
-# Perform a recognition.
|
||||||
|
@snippet dnn/js_face_recognition.html Recognize
|
||||||
|
Match a new feature vector with registered ones. Return a name of the best matched person.
|
||||||
|
|
||||||
|
-# The main loop.
|
||||||
|
@snippet dnn/js_face_recognition.html Define frames processing
|
||||||
|
A main loop of our application receives a frames from a camera and makes a recognition
|
||||||
|
of an every detected face on the frame. We start this function ones when OpenCV.js was
|
||||||
|
initialized and deep learning models were downloaded.
|
@ -25,6 +25,14 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
|
|||||||
|
|
||||||
In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
|
In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
|
||||||
|
|
||||||
|
- @subpage tutorial_dnn_android
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 3.3
|
||||||
|
|
||||||
|
*Author:* Dmitry Kurtaev
|
||||||
|
|
||||||
|
This tutorial will show you how to run deep learning model using OpenCV on Android device.
|
||||||
|
|
||||||
- @subpage tutorial_dnn_yolo
|
- @subpage tutorial_dnn_yolo
|
||||||
|
|
||||||
*Compatibility:* \> OpenCV 3.3.1
|
*Compatibility:* \> OpenCV 3.3.1
|
||||||
@ -32,3 +40,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
|
|||||||
*Author:* Alessandro de Oliveira Faria
|
*Author:* Alessandro de Oliveira Faria
|
||||||
|
|
||||||
In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image.
|
In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image.
|
||||||
|
|
||||||
|
- @subpage tutorial_dnn_javascript
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 3.3.1
|
||||||
|
|
||||||
|
*Author:* Dmitry Kurtaev
|
||||||
|
|
||||||
|
In this tutorial we'll run deep learning models in browser using OpenCV.js.
|
||||||
|
@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d
|
|||||||
|
|
||||||
ocv_add_dispatched_file("layers/layers_common" AVX AVX2)
|
ocv_add_dispatched_file("layers/layers_common" AVX AVX2)
|
||||||
|
|
||||||
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java)
|
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)
|
||||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
|
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
|
||||||
-Wmissing-declarations -Wmissing-prototypes
|
-Wmissing-declarations -Wmissing-prototypes
|
||||||
)
|
)
|
||||||
|
@ -221,11 +221,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
class CV_EXPORTS LRNLayer : public Layer
|
class CV_EXPORTS LRNLayer : public Layer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum Type
|
|
||||||
{
|
|
||||||
CHANNEL_NRM,
|
|
||||||
SPATIAL_NRM
|
|
||||||
};
|
|
||||||
int type;
|
int type;
|
||||||
|
|
||||||
int size;
|
int size;
|
||||||
@ -238,14 +233,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
class CV_EXPORTS PoolingLayer : public Layer
|
class CV_EXPORTS PoolingLayer : public Layer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum Type
|
|
||||||
{
|
|
||||||
MAX,
|
|
||||||
AVE,
|
|
||||||
STOCHASTIC,
|
|
||||||
ROI
|
|
||||||
};
|
|
||||||
|
|
||||||
int type;
|
int type;
|
||||||
Size kernel, stride, pad;
|
Size kernel, stride, pad;
|
||||||
bool globalPooling;
|
bool globalPooling;
|
||||||
@ -474,13 +461,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
class CV_EXPORTS EltwiseLayer : public Layer
|
class CV_EXPORTS EltwiseLayer : public Layer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum EltwiseOp
|
|
||||||
{
|
|
||||||
PROD = 0,
|
|
||||||
SUM = 1,
|
|
||||||
MAX = 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
static Ptr<EltwiseLayer> create(const LayerParams ¶ms);
|
static Ptr<EltwiseLayer> create(const LayerParams ¶ms);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -423,8 +423,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
* @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
|
* @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
|
||||||
* @param outBlobNames names for layers which outputs are needed to get
|
* @param outBlobNames names for layers which outputs are needed to get
|
||||||
*/
|
*/
|
||||||
CV_WRAP void forward(std::vector<std::vector<Mat> >& outputBlobs,
|
void forward(std::vector<std::vector<Mat> >& outputBlobs,
|
||||||
const std::vector<String>& outBlobNames);
|
const std::vector<String>& outBlobNames);
|
||||||
|
|
||||||
//TODO:
|
//TODO:
|
||||||
/** @brief Optimized forward.
|
/** @brief Optimized forward.
|
||||||
@ -467,7 +467,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
* @note If updating blob is not empty then @p blob must have the same shape,
|
* @note If updating blob is not empty then @p blob must have the same shape,
|
||||||
* because network reshaping is not implemented yet.
|
* because network reshaping is not implemented yet.
|
||||||
*/
|
*/
|
||||||
CV_WRAP void setInput(const Mat &blob, const String& name = "");
|
CV_WRAP void setInput(InputArray blob, const String& name = "");
|
||||||
|
|
||||||
/** @brief Sets the new value for the learned param of the layer.
|
/** @brief Sets the new value for the learned param of the layer.
|
||||||
* @param layer name or id of the layer.
|
* @param layer name or id of the layer.
|
||||||
@ -733,7 +733,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
||||||
* @returns 4-dimansional Mat with NCHW dimensions order.
|
* @returns 4-dimansional Mat with NCHW dimensions order.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(),
|
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
|
||||||
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
|
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
|
||||||
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
|
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
|
||||||
* crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
|
* crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
|
||||||
|
@ -84,11 +84,11 @@ static String toString(const T &v)
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat blobFromImage(const Mat& image, double scalefactor, const Size& size,
|
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
|
||||||
const Scalar& mean, bool swapRB, bool crop)
|
const Scalar& mean, bool swapRB, bool crop)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
std::vector<Mat> images(1, image);
|
std::vector<Mat> images(1, image.getMat());
|
||||||
return blobFromImages(images, scalefactor, size, mean, swapRB, crop);
|
return blobFromImages(images, scalefactor, size, mean, swapRB, crop);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)
|
|||||||
impl->netInputLayer->setNames(inputBlobNames);
|
impl->netInputLayer->setNames(inputBlobNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Net::setInput(const Mat &blob_, const String& name)
|
void Net::setInput(InputArray blob, const String& name)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name)
|
|||||||
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
|
||||||
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
|
||||||
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
|
MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
|
||||||
|
Mat blob_ = blob.getMat();
|
||||||
bool oldShape = prevShape == shape(blob_);
|
bool oldShape = prevShape == shape(blob_);
|
||||||
if (oldShape)
|
if (oldShape)
|
||||||
{
|
{
|
||||||
|
@ -52,22 +52,27 @@ namespace dnn
|
|||||||
class EltwiseLayerImpl : public EltwiseLayer
|
class EltwiseLayerImpl : public EltwiseLayer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
EltwiseOp op;
|
enum EltwiseOp
|
||||||
|
{
|
||||||
|
PROD = 0,
|
||||||
|
SUM = 1,
|
||||||
|
MAX = 2,
|
||||||
|
} op;
|
||||||
std::vector<float> coeffs;
|
std::vector<float> coeffs;
|
||||||
|
|
||||||
EltwiseLayerImpl(const LayerParams& params)
|
EltwiseLayerImpl(const LayerParams& params)
|
||||||
{
|
{
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
op = EltwiseLayer::SUM;
|
op = SUM;
|
||||||
if (params.has("operation"))
|
if (params.has("operation"))
|
||||||
{
|
{
|
||||||
String operation = params.get<String>("operation").toLowerCase();
|
String operation = params.get<String>("operation").toLowerCase();
|
||||||
if (operation == "prod")
|
if (operation == "prod")
|
||||||
op = EltwiseLayer::PROD;
|
op = PROD;
|
||||||
else if (operation == "sum")
|
else if (operation == "sum")
|
||||||
op = EltwiseLayer::SUM;
|
op = SUM;
|
||||||
else if (operation == "max")
|
else if (operation == "max")
|
||||||
op = EltwiseLayer::MAX;
|
op = MAX;
|
||||||
else
|
else
|
||||||
CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
|
CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
|
||||||
}
|
}
|
||||||
@ -122,7 +127,7 @@ public:
|
|||||||
int channels;
|
int channels;
|
||||||
size_t planeSize;
|
size_t planeSize;
|
||||||
|
|
||||||
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}
|
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}
|
||||||
|
|
||||||
static void run(const Mat** srcs, int nsrcs, Mat& dst,
|
static void run(const Mat** srcs, int nsrcs, Mat& dst,
|
||||||
const std::vector<float>& coeffs, EltwiseOp op,
|
const std::vector<float>& coeffs, EltwiseOp op,
|
||||||
@ -150,7 +155,7 @@ public:
|
|||||||
CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);
|
CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);
|
||||||
|
|
||||||
bool simpleCoeffs = true;
|
bool simpleCoeffs = true;
|
||||||
if( op == EltwiseLayer::SUM && !coeffs.empty() )
|
if( op == SUM && !coeffs.empty() )
|
||||||
{
|
{
|
||||||
CV_Assert( coeffs.size() == (size_t)nsrcs );
|
CV_Assert( coeffs.size() == (size_t)nsrcs );
|
||||||
|
|
||||||
@ -192,7 +197,7 @@ public:
|
|||||||
const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;
|
const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;
|
||||||
float* dstptr = dstptr0 + globalDelta;
|
float* dstptr = dstptr0 + globalDelta;
|
||||||
|
|
||||||
if( op == EltwiseLayer::PROD )
|
if( op == PROD )
|
||||||
{
|
{
|
||||||
for( k = 1; k < n; k++ )
|
for( k = 1; k < n; k++ )
|
||||||
{
|
{
|
||||||
@ -204,7 +209,7 @@ public:
|
|||||||
srcptr0 = (const float*)dstptr;
|
srcptr0 = (const float*)dstptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( op == EltwiseLayer::MAX )
|
else if( op == MAX )
|
||||||
{
|
{
|
||||||
for( k = 1; k < n; k++ )
|
for( k = 1; k < n; k++ )
|
||||||
{
|
{
|
||||||
|
@ -67,9 +67,9 @@ public:
|
|||||||
type = -1;
|
type = -1;
|
||||||
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
|
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
|
||||||
if (nrmType == "ACROSS_CHANNELS")
|
if (nrmType == "ACROSS_CHANNELS")
|
||||||
type = LRNLayer::CHANNEL_NRM;
|
type = CHANNEL_NRM;
|
||||||
else if (nrmType == "WITHIN_CHANNEL")
|
else if (nrmType == "WITHIN_CHANNEL")
|
||||||
type = LRNLayer::SPATIAL_NRM;
|
type = SPATIAL_NRM;
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
|
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
|
||||||
|
|
||||||
@ -397,6 +397,13 @@ public:
|
|||||||
}
|
}
|
||||||
return flops;
|
return flops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum Type
|
||||||
|
{
|
||||||
|
CHANNEL_NRM,
|
||||||
|
SPATIAL_NRM
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
|
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
|
||||||
|
@ -63,7 +63,7 @@ class PoolingLayerImpl : public PoolingLayer
|
|||||||
public:
|
public:
|
||||||
PoolingLayerImpl(const LayerParams& params)
|
PoolingLayerImpl(const LayerParams& params)
|
||||||
{
|
{
|
||||||
type = PoolingLayer::MAX;
|
type = MAX;
|
||||||
computeMaxIdx = true;
|
computeMaxIdx = true;
|
||||||
globalPooling = false;
|
globalPooling = false;
|
||||||
|
|
||||||
@ -71,11 +71,11 @@ public:
|
|||||||
{
|
{
|
||||||
String pool = params.get<String>("pool").toLowerCase();
|
String pool = params.get<String>("pool").toLowerCase();
|
||||||
if (pool == "max")
|
if (pool == "max")
|
||||||
type = PoolingLayer::MAX;
|
type = MAX;
|
||||||
else if (pool == "ave")
|
else if (pool == "ave")
|
||||||
type = PoolingLayer::AVE;
|
type = AVE;
|
||||||
else if (pool == "stochastic")
|
else if (pool == "stochastic")
|
||||||
type = PoolingLayer::STOCHASTIC;
|
type = STOCHASTIC;
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
||||||
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
|
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
|
||||||
@ -83,7 +83,7 @@ public:
|
|||||||
}
|
}
|
||||||
else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
|
else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
|
||||||
{
|
{
|
||||||
type = PoolingLayer::ROI;
|
type = ROI;
|
||||||
}
|
}
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
ceilMode = params.get<bool>("ceil_mode", true);
|
ceilMode = params.get<bool>("ceil_mode", true);
|
||||||
@ -115,8 +115,7 @@ public:
|
|||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_DEFAULT ||
|
return backendId == DNN_BACKEND_DEFAULT ||
|
||||||
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
|
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
|
||||||
(type == PoolingLayer::MAX ||
|
(type == MAX || type == AVE && !pad.width && !pad.height);
|
||||||
type == PoolingLayer::AVE && !pad.width && !pad.height);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
@ -200,9 +199,9 @@ public:
|
|||||||
|
|
||||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
|
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
|
||||||
{
|
{
|
||||||
if (type == PoolingLayer::MAX)
|
if (type == MAX)
|
||||||
return initMaxPoolingHalide(inputs);
|
return initMaxPoolingHalide(inputs);
|
||||||
else if (type == PoolingLayer::AVE)
|
else if (type == AVE)
|
||||||
return initAvePoolingHalide(inputs);
|
return initAvePoolingHalide(inputs);
|
||||||
else
|
else
|
||||||
return Ptr<BackendNode>();
|
return Ptr<BackendNode>();
|
||||||
@ -221,7 +220,7 @@ public:
|
|||||||
float spatialScale;
|
float spatialScale;
|
||||||
|
|
||||||
PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
|
PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
|
||||||
computeMaxIdx(0), poolingType(PoolingLayer::MAX), spatialScale(0) {}
|
computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
|
||||||
|
|
||||||
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
|
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
|
||||||
Size stride, Size pad, int poolingType, float spatialScale,
|
Size stride, Size pad, int poolingType, float spatialScale,
|
||||||
@ -698,7 +697,7 @@ public:
|
|||||||
out.height = 1;
|
out.height = 1;
|
||||||
out.width = 1;
|
out.width = 1;
|
||||||
}
|
}
|
||||||
else if (type == PoolingLayer::ROI)
|
else if (type == ROI)
|
||||||
{
|
{
|
||||||
out.height = pooledSize.height;
|
out.height = pooledSize.height;
|
||||||
out.width = pooledSize.width;
|
out.width = pooledSize.width;
|
||||||
@ -757,6 +756,14 @@ public:
|
|||||||
}
|
}
|
||||||
return flops;
|
return flops;
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
enum Type
|
||||||
|
{
|
||||||
|
MAX,
|
||||||
|
AVE,
|
||||||
|
STOCHASTIC,
|
||||||
|
ROI
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
|
Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
|
||||||
|
@ -73,11 +73,13 @@
|
|||||||
#include "opencv2/video/tracking.hpp"
|
#include "opencv2/video/tracking.hpp"
|
||||||
#include "opencv2/video/background_segm.hpp"
|
#include "opencv2/video/background_segm.hpp"
|
||||||
#include "opencv2/objdetect.hpp"
|
#include "opencv2/objdetect.hpp"
|
||||||
|
#include "opencv2/dnn.hpp"
|
||||||
|
|
||||||
#include <emscripten/bind.h>
|
#include <emscripten/bind.h>
|
||||||
|
|
||||||
using namespace emscripten;
|
using namespace emscripten;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
using namespace dnn;
|
||||||
|
|
||||||
namespace binding_utils
|
namespace binding_utils
|
||||||
{
|
{
|
||||||
@ -339,12 +341,12 @@ EMSCRIPTEN_BINDINGS(binding_utils)
|
|||||||
.constructor<int, int, int, const Scalar&>()
|
.constructor<int, int, int, const Scalar&>()
|
||||||
.constructor(&binding_utils::createMat, allow_raw_pointers())
|
.constructor(&binding_utils::createMat, allow_raw_pointers())
|
||||||
|
|
||||||
.class_function("eye", select_overload<Mat(int, int, int)>(&binding_utils::matEye))
|
|
||||||
.class_function("eye", select_overload<Mat(Size, int)>(&binding_utils::matEye))
|
.class_function("eye", select_overload<Mat(Size, int)>(&binding_utils::matEye))
|
||||||
.class_function("ones", select_overload<Mat(int, int, int)>(&binding_utils::matOnes))
|
.class_function("eye", select_overload<Mat(int, int, int)>(&binding_utils::matEye))
|
||||||
.class_function("ones", select_overload<Mat(Size, int)>(&binding_utils::matOnes))
|
.class_function("ones", select_overload<Mat(Size, int)>(&binding_utils::matOnes))
|
||||||
.class_function("zeros", select_overload<Mat(int, int, int)>(&binding_utils::matZeros))
|
.class_function("ones", select_overload<Mat(int, int, int)>(&binding_utils::matOnes))
|
||||||
.class_function("zeros", select_overload<Mat(Size, int)>(&binding_utils::matZeros))
|
.class_function("zeros", select_overload<Mat(Size, int)>(&binding_utils::matZeros))
|
||||||
|
.class_function("zeros", select_overload<Mat(int, int, int)>(&binding_utils::matZeros))
|
||||||
|
|
||||||
.property("rows", &cv::Mat::rows)
|
.property("rows", &cv::Mat::rows)
|
||||||
.property("cols", &cv::Mat::cols)
|
.property("cols", &cv::Mat::cols)
|
||||||
|
@ -125,6 +125,9 @@ video = {'': ['CamShift', 'calcOpticalFlowFarneback', 'calcOpticalFlowPyrLK', 'c
|
|||||||
'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'],
|
'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'],
|
||||||
'BackgroundSubtractor': ['apply', 'getBackgroundImage']}
|
'BackgroundSubtractor': ['apply', 'getBackgroundImage']}
|
||||||
|
|
||||||
|
dnn = {'dnn_Net': ['setInput', 'forward'],
|
||||||
|
'': ['readNetFromCaffe', 'readNetFromTensorflow', 'readNetFromTorch', 'readNetFromDarknet', 'blobFromImage']}
|
||||||
|
|
||||||
def makeWhiteList(module_list):
|
def makeWhiteList(module_list):
|
||||||
wl = {}
|
wl = {}
|
||||||
for m in module_list:
|
for m in module_list:
|
||||||
@ -135,7 +138,7 @@ def makeWhiteList(module_list):
|
|||||||
wl[k] = m[k]
|
wl[k] = m[k]
|
||||||
return wl
|
return wl
|
||||||
|
|
||||||
white_list = makeWhiteList([core, imgproc, objdetect, video])
|
white_list = makeWhiteList([core, imgproc, objdetect, video, dnn])
|
||||||
|
|
||||||
# Features to be exported
|
# Features to be exported
|
||||||
export_enums = False
|
export_enums = False
|
||||||
|
@ -134,7 +134,7 @@ class Builder:
|
|||||||
"-DBUILD_ZLIB=ON",
|
"-DBUILD_ZLIB=ON",
|
||||||
"-DBUILD_opencv_apps=OFF",
|
"-DBUILD_opencv_apps=OFF",
|
||||||
"-DBUILD_opencv_calib3d=OFF",
|
"-DBUILD_opencv_calib3d=OFF",
|
||||||
"-DBUILD_opencv_dnn=OFF",
|
"-DBUILD_opencv_dnn=ON",
|
||||||
"-DBUILD_opencv_features2d=OFF",
|
"-DBUILD_opencv_features2d=OFF",
|
||||||
"-DBUILD_opencv_flann=OFF",
|
"-DBUILD_opencv_flann=OFF",
|
||||||
"-DBUILD_opencv_ml=OFF",
|
"-DBUILD_opencv_ml=OFF",
|
||||||
|
205
samples/dnn/js_face_recognition.html
Normal file
205
samples/dnn/js_face_recognition.html
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<script async src="../../opencv.js" type="text/javascript"></script>
|
||||||
|
<script src="../../utils.js" type="text/javascript"></script>
|
||||||
|
|
||||||
|
<script type='text/javascript'>
|
||||||
|
var netDet = undefined, netRecogn = undefined;
|
||||||
|
var persons = {};
|
||||||
|
|
||||||
|
//! [Run face detection model]
|
||||||
|
function detectFaces(img) {
|
||||||
|
var blob = cv.blobFromImage(img, 1, {width: 128, height: 96}, [104, 177, 123, 0], false, false);
|
||||||
|
netDet.setInput(blob);
|
||||||
|
var out = netDet.forward();
|
||||||
|
|
||||||
|
var faces = [];
|
||||||
|
for (var i = 0, n = out.data32F.length; i < n; i += 7) {
|
||||||
|
var confidence = out.data32F[i + 2];
|
||||||
|
var left = out.data32F[i + 3] * img.cols;
|
||||||
|
var top = out.data32F[i + 4] * img.rows;
|
||||||
|
var right = out.data32F[i + 5] * img.cols;
|
||||||
|
var bottom = out.data32F[i + 6] * img.rows;
|
||||||
|
left = Math.min(Math.max(0, left), img.cols - 1);
|
||||||
|
right = Math.min(Math.max(0, right), img.cols - 1);
|
||||||
|
bottom = Math.min(Math.max(0, bottom), img.rows - 1);
|
||||||
|
top = Math.min(Math.max(0, top), img.rows - 1);
|
||||||
|
|
||||||
|
if (confidence > 0.5 && left < right && top < bottom) {
|
||||||
|
faces.push({x: left, y: top, width: right - left, height: bottom - top})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blob.delete();
|
||||||
|
out.delete();
|
||||||
|
return faces;
|
||||||
|
};
|
||||||
|
//! [Run face detection model]
|
||||||
|
|
||||||
|
//! [Get 128 floating points feature vector]
|
||||||
|
function face2vec(face) {
|
||||||
|
var blob = cv.blobFromImage(face, 1.0 / 255, {width: 96, height: 96}, [0, 0, 0, 0], true, false)
|
||||||
|
netRecogn.setInput(blob);
|
||||||
|
var vec = netRecogn.forward();
|
||||||
|
blob.delete();
|
||||||
|
return vec;
|
||||||
|
};
|
||||||
|
//! [Get 128 floating points feature vector]
|
||||||
|
|
||||||
|
//! [Recognize]
|
||||||
|
function recognize(face) {
|
||||||
|
var vec = face2vec(face);
|
||||||
|
|
||||||
|
var bestMatchName = 'unknown';
|
||||||
|
var bestMatchScore = 0.5; // Actually, the minimum is -1 but we use it as a threshold.
|
||||||
|
for (name in persons) {
|
||||||
|
var personVec = persons[name];
|
||||||
|
var score = vec.dot(personVec);
|
||||||
|
if (score > bestMatchScore) {
|
||||||
|
bestMatchScore = score;
|
||||||
|
bestMatchName = name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vec.delete();
|
||||||
|
return bestMatchName;
|
||||||
|
};
|
||||||
|
//! [Recognize]
|
||||||
|
|
||||||
|
function loadModels(callback) {
|
||||||
|
var utils = new Utils('');
|
||||||
|
var proto = 'https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt';
|
||||||
|
var weights = 'https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel';
|
||||||
|
var recognModel = 'https://raw.githubusercontent.com/pyannote/pyannote-data/master/openface.nn4.small2.v1.t7';
|
||||||
|
utils.createFileFromUrl('face_detector.prototxt', proto, () => {
|
||||||
|
document.getElementById('status').innerHTML = 'Downloading face_detector.caffemodel';
|
||||||
|
utils.createFileFromUrl('face_detector.caffemodel', weights, () => {
|
||||||
|
document.getElementById('status').innerHTML = 'Downloading OpenFace model';
|
||||||
|
utils.createFileFromUrl('face_recognition.t7', recognModel, () => {
|
||||||
|
document.getElementById('status').innerHTML = '';
|
||||||
|
netDet = cv.readNetFromCaffe('face_detector.prototxt', 'face_detector.caffemodel');
|
||||||
|
netRecogn = cv.readNetFromTorch('face_recognition.t7');
|
||||||
|
callback();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
// Create a camera object.
|
||||||
|
var output = document.getElementById('output');
|
||||||
|
var camera = document.createElement("video");
|
||||||
|
camera.setAttribute("width", output.width);
|
||||||
|
camera.setAttribute("height", output.height);
|
||||||
|
|
||||||
|
// Get a permission from user to use a camera.
|
||||||
|
navigator.mediaDevices.getUserMedia({video: true, audio: false})
|
||||||
|
.then(function(stream) {
|
||||||
|
camera.srcObject = stream;
|
||||||
|
camera.onloadedmetadata = function(e) {
|
||||||
|
camera.play();
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
//! [Open a camera stream]
|
||||||
|
var cap = new cv.VideoCapture(camera);
|
||||||
|
var frame = new cv.Mat(camera.height, camera.width, cv.CV_8UC4);
|
||||||
|
var frameBGR = new cv.Mat(camera.height, camera.width, cv.CV_8UC3);
|
||||||
|
//! [Open a camera stream]
|
||||||
|
|
||||||
|
//! [Add a person]
|
||||||
|
document.getElementById('addPersonButton').onclick = function() {
|
||||||
|
var rects = detectFaces(frameBGR);
|
||||||
|
if (rects.length > 0) {
|
||||||
|
var face = frameBGR.roi(rects[0]);
|
||||||
|
|
||||||
|
var name = prompt('Say your name:');
|
||||||
|
var cell = document.getElementById("targetNames").insertCell(0);
|
||||||
|
cell.innerHTML = name;
|
||||||
|
|
||||||
|
persons[name] = face2vec(face).clone();
|
||||||
|
|
||||||
|
var canvas = document.createElement("canvas");
|
||||||
|
canvas.setAttribute("width", 96);
|
||||||
|
canvas.setAttribute("height", 96);
|
||||||
|
var cell = document.getElementById("targetImgs").insertCell(0);
|
||||||
|
cell.appendChild(canvas);
|
||||||
|
|
||||||
|
var faceResized = new cv.Mat(canvas.height, canvas.width, cv.CV_8UC3);
|
||||||
|
cv.resize(face, faceResized, {width: canvas.width, height: canvas.height});
|
||||||
|
cv.cvtColor(faceResized, faceResized, cv.COLOR_BGR2RGB);
|
||||||
|
cv.imshow(canvas, faceResized);
|
||||||
|
faceResized.delete();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//! [Add a person]
|
||||||
|
|
||||||
|
//! [Define frames processing]
|
||||||
|
var isRunning = false;
|
||||||
|
const FPS = 30; // Target number of frames processed per second.
|
||||||
|
function captureFrame() {
|
||||||
|
var begin = Date.now();
|
||||||
|
cap.read(frame); // Read a frame from camera
|
||||||
|
cv.cvtColor(frame, frameBGR, cv.COLOR_RGBA2BGR);
|
||||||
|
|
||||||
|
var faces = detectFaces(frameBGR);
|
||||||
|
faces.forEach(function(rect) {
|
||||||
|
cv.rectangle(frame, {x: rect.x, y: rect.y}, {x: rect.x + rect.width, y: rect.y + rect.height}, [0, 255, 0, 255]);
|
||||||
|
|
||||||
|
var face = frameBGR.roi(rect);
|
||||||
|
var name = recognize(face);
|
||||||
|
cv.putText(frame, name, {x: rect.x, y: rect.y}, cv.FONT_HERSHEY_SIMPLEX, 1.0, [0, 255, 0, 255]);
|
||||||
|
});
|
||||||
|
|
||||||
|
cv.imshow(output, frame);
|
||||||
|
|
||||||
|
// Loop this function.
|
||||||
|
if (isRunning) {
|
||||||
|
var delay = 1000 / FPS - (Date.now() - begin);
|
||||||
|
setTimeout(captureFrame, delay);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//! [Define frames processing]
|
||||||
|
|
||||||
|
document.getElementById('startStopButton').onclick = function toggle() {
|
||||||
|
if (isRunning) {
|
||||||
|
isRunning = false;
|
||||||
|
document.getElementById('startStopButton').innerHTML = 'Start';
|
||||||
|
document.getElementById('addPersonButton').disabled = true;
|
||||||
|
} else {
|
||||||
|
function run() {
|
||||||
|
isRunning = true;
|
||||||
|
captureFrame();
|
||||||
|
document.getElementById('startStopButton').innerHTML = 'Stop';
|
||||||
|
document.getElementById('startStopButton').disabled = false;
|
||||||
|
document.getElementById('addPersonButton').disabled = false;
|
||||||
|
}
|
||||||
|
if (netDet == undefined || netRecogn == undefined) {
|
||||||
|
document.getElementById('startStopButton').disabled = true;
|
||||||
|
loadModels(run); // Load models and run a pipeline;
|
||||||
|
} else {
|
||||||
|
run();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
document.getElementById('startStopButton').disabled = false;
|
||||||
|
};
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body onload="main()">
|
||||||
|
<button id="startStopButton" type="button" disabled="true">Start</button>
|
||||||
|
<div id="status"></div>
|
||||||
|
<canvas id="output" width=640 height=480 style="max-width: 100%"></canvas>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr id="targetImgs"></tr>
|
||||||
|
<tr id="targetNames"></tr>
|
||||||
|
</table>
|
||||||
|
<button id="addPersonButton" type="button" disabled="true">Add a person</button>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
Loading…
Reference in New Issue
Block a user