mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
ea5499fa51
@ -857,8 +857,8 @@ double cv::fisheye::stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayO
|
||||
|
||||
CV_Assert(K1.empty() || (K1.size() == Size(3,3)));
|
||||
CV_Assert(D1.empty() || (D1.total() == 4));
|
||||
CV_Assert(K2.empty() || (K1.size() == Size(3,3)));
|
||||
CV_Assert(D2.empty() || (D1.total() == 4));
|
||||
CV_Assert(K2.empty() || (K2.size() == Size(3,3)));
|
||||
CV_Assert(D2.empty() || (D2.total() == 4));
|
||||
|
||||
CV_Assert((!K1.empty() && !K2.empty() && !D1.empty() && !D2.empty()) || !(flags & CALIB_FIX_INTRINSIC));
|
||||
|
||||
|
@ -53,6 +53,10 @@
|
||||
|
||||
#include <opencv2/core/utils/trace.hpp>
|
||||
|
||||
#ifdef ENABLE_INSTRUMENTATION
|
||||
#include "opencv2/core/utils/instrumentation.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_EIGEN
|
||||
# if defined __GNUC__ && defined __APPLE__
|
||||
# pragma GCC diagnostic ignored "-Wshadow"
|
||||
|
@ -65,30 +65,6 @@
|
||||
namespace cv
|
||||
{
|
||||
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
|
||||
CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
|
||||
// Get stored implementation flags and functions names arrays
|
||||
// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function
|
||||
CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
|
||||
|
||||
CV_EXPORTS bool useCollection(); // return implementation collection state
|
||||
CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
|
||||
|
||||
#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation
|
||||
#define CV_IMPL_OCL 0x02 // OpenCL implementation
|
||||
#define CV_IMPL_IPP 0x04 // IPP implementation
|
||||
#define CV_IMPL_MT 0x10 // multithreaded implementation
|
||||
|
||||
#define CV_IMPL_ADD(impl) \
|
||||
if(cv::useCollection()) \
|
||||
{ \
|
||||
cv::addImpl(impl, CV_Func); \
|
||||
}
|
||||
#else
|
||||
#define CV_IMPL_ADD(impl)
|
||||
#endif
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
@ -702,54 +678,6 @@ typedef std::recursive_mutex Mutex;
|
||||
typedef std::lock_guard<cv::Mutex> AutoLock;
|
||||
#endif
|
||||
|
||||
// TLS interface
|
||||
class CV_EXPORTS TLSDataContainer
|
||||
{
|
||||
protected:
|
||||
TLSDataContainer();
|
||||
virtual ~TLSDataContainer();
|
||||
|
||||
void gatherData(std::vector<void*> &data) const;
|
||||
void* getData() const;
|
||||
void release();
|
||||
|
||||
private:
|
||||
virtual void* createDataInstance() const = 0;
|
||||
virtual void deleteDataInstance(void* pData) const = 0;
|
||||
|
||||
int key_;
|
||||
|
||||
public:
|
||||
void cleanup(); //! Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid.
|
||||
};
|
||||
|
||||
// Main TLS data class
|
||||
template <typename T>
|
||||
class TLSData : protected TLSDataContainer
|
||||
{
|
||||
public:
|
||||
inline TLSData() {}
|
||||
inline ~TLSData() { release(); } // Release key and delete associated data
|
||||
inline T* get() const { return (T*)getData(); } // Get data associated with key
|
||||
inline T& getRef() const { T* ptr = (T*)getData(); CV_Assert(ptr); return *ptr; } // Get data associated with key
|
||||
|
||||
// Get data from all threads
|
||||
inline void gather(std::vector<T*> &data) const
|
||||
{
|
||||
std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
|
||||
gatherData(dataVoid);
|
||||
}
|
||||
|
||||
inline void cleanup() { TLSDataContainer::cleanup(); }
|
||||
|
||||
private:
|
||||
virtual void* createDataInstance() const CV_OVERRIDE {return new T;} // Wrapper to allocate data by template
|
||||
virtual void deleteDataInstance(void* pData) const CV_OVERRIDE {delete (T*)pData;} // Wrapper to release data by template
|
||||
|
||||
// Disable TLS copy operations
|
||||
TLSData(TLSData &) {}
|
||||
TLSData& operator =(const TLSData &) {return *this;}
|
||||
};
|
||||
|
||||
/** @brief Designed for command line parsing
|
||||
|
||||
@ -1159,88 +1087,6 @@ public:
|
||||
std::vector<Node<OBJECT>*> m_childs;
|
||||
};
|
||||
|
||||
// Instrumentation external interface
|
||||
namespace instr
|
||||
{
|
||||
|
||||
#if !defined OPENCV_ABI_CHECK
|
||||
|
||||
enum TYPE
|
||||
{
|
||||
TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
|
||||
TYPE_MARKER, // Information marker
|
||||
TYPE_WRAPPER, // Wrapper function for implementation
|
||||
TYPE_FUN, // Simple function call
|
||||
};
|
||||
|
||||
enum IMPL
|
||||
{
|
||||
IMPL_PLAIN = 0,
|
||||
IMPL_IPP,
|
||||
IMPL_OPENCL,
|
||||
};
|
||||
|
||||
struct NodeDataTls
|
||||
{
|
||||
NodeDataTls()
|
||||
{
|
||||
m_ticksTotal = 0;
|
||||
}
|
||||
uint64 m_ticksTotal;
|
||||
};
|
||||
|
||||
class CV_EXPORTS NodeData
|
||||
{
|
||||
public:
|
||||
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
|
||||
NodeData(NodeData &ref);
|
||||
~NodeData();
|
||||
NodeData& operator=(const NodeData&);
|
||||
|
||||
cv::String m_funName;
|
||||
cv::instr::TYPE m_instrType;
|
||||
cv::instr::IMPL m_implType;
|
||||
const char* m_fileName;
|
||||
int m_lineNum;
|
||||
void* m_retAddress;
|
||||
bool m_alwaysExpand;
|
||||
bool m_funError;
|
||||
|
||||
volatile int m_counter;
|
||||
volatile uint64 m_ticksTotal;
|
||||
TLSData<NodeDataTls> m_tls;
|
||||
int m_threads;
|
||||
|
||||
// No synchronization
|
||||
double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
|
||||
double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
|
||||
};
|
||||
bool operator==(const NodeData& lhs, const NodeData& rhs);
|
||||
|
||||
typedef Node<NodeData> InstrNode;
|
||||
|
||||
CV_EXPORTS InstrNode* getTrace();
|
||||
|
||||
#endif // !defined OPENCV_ABI_CHECK
|
||||
|
||||
|
||||
CV_EXPORTS bool useInstrumentation();
|
||||
CV_EXPORTS void setUseInstrumentation(bool flag);
|
||||
CV_EXPORTS void resetTrace();
|
||||
|
||||
enum FLAGS
|
||||
{
|
||||
FLAGS_NONE = 0,
|
||||
FLAGS_MAPPING = 0x01,
|
||||
FLAGS_EXPAND_SAME_NAMES = 0x02,
|
||||
};
|
||||
|
||||
CV_EXPORTS void setFlags(FLAGS modeFlags);
|
||||
static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
|
||||
CV_EXPORTS FLAGS getFlags();
|
||||
|
||||
} // namespace instr
|
||||
|
||||
|
||||
namespace samples {
|
||||
|
||||
@ -1315,4 +1161,11 @@ CV_EXPORTS int getThreadID();
|
||||
|
||||
} //namespace cv
|
||||
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
#include "opencv2/core/utils/instrumentation.hpp"
|
||||
#else
|
||||
/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option.
|
||||
#define CV_IMPL_ADD(impl)
|
||||
#endif
|
||||
|
||||
#endif //OPENCV_CORE_UTILITY_H
|
||||
|
125
modules/core/include/opencv2/core/utils/instrumentation.hpp
Normal file
125
modules/core/include/opencv2/core/utils/instrumentation.hpp
Normal file
@ -0,0 +1,125 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_UTILS_INSTR_HPP
|
||||
#define OPENCV_UTILS_INSTR_HPP
|
||||
|
||||
#include <opencv2/core/utility.hpp>
|
||||
#include <opencv2/core/utils/tls.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
#ifdef CV_COLLECT_IMPL_DATA
|
||||
CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
|
||||
CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
|
||||
// Get stored implementation flags and functions names arrays
|
||||
// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function
|
||||
CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
|
||||
|
||||
CV_EXPORTS bool useCollection(); // return implementation collection state
|
||||
CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
|
||||
|
||||
#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation
|
||||
#define CV_IMPL_OCL 0x02 // OpenCL implementation
|
||||
#define CV_IMPL_IPP 0x04 // IPP implementation
|
||||
#define CV_IMPL_MT 0x10 // multithreaded implementation
|
||||
|
||||
#undef CV_IMPL_ADD
|
||||
#define CV_IMPL_ADD(impl) \
|
||||
if(cv::useCollection()) \
|
||||
{ \
|
||||
cv::addImpl(impl, CV_Func); \
|
||||
}
|
||||
#endif
|
||||
|
||||
// Instrumentation external interface
|
||||
namespace instr
|
||||
{
|
||||
|
||||
#if !defined OPENCV_ABI_CHECK
|
||||
|
||||
enum TYPE
|
||||
{
|
||||
TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
|
||||
TYPE_MARKER, // Information marker
|
||||
TYPE_WRAPPER, // Wrapper function for implementation
|
||||
TYPE_FUN, // Simple function call
|
||||
};
|
||||
|
||||
enum IMPL
|
||||
{
|
||||
IMPL_PLAIN = 0,
|
||||
IMPL_IPP,
|
||||
IMPL_OPENCL,
|
||||
};
|
||||
|
||||
struct NodeDataTls
|
||||
{
|
||||
NodeDataTls()
|
||||
{
|
||||
m_ticksTotal = 0;
|
||||
}
|
||||
uint64 m_ticksTotal;
|
||||
};
|
||||
|
||||
class CV_EXPORTS NodeData
|
||||
{
|
||||
public:
|
||||
NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
|
||||
NodeData(NodeData &ref);
|
||||
~NodeData();
|
||||
NodeData& operator=(const NodeData&);
|
||||
|
||||
cv::String m_funName;
|
||||
cv::instr::TYPE m_instrType;
|
||||
cv::instr::IMPL m_implType;
|
||||
const char* m_fileName;
|
||||
int m_lineNum;
|
||||
void* m_retAddress;
|
||||
bool m_alwaysExpand;
|
||||
bool m_funError;
|
||||
|
||||
volatile int m_counter;
|
||||
volatile uint64 m_ticksTotal;
|
||||
TLSDataAccumulator<NodeDataTls> m_tls;
|
||||
int m_threads;
|
||||
|
||||
// No synchronization
|
||||
double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
|
||||
double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
|
||||
};
|
||||
bool operator==(const NodeData& lhs, const NodeData& rhs);
|
||||
|
||||
typedef Node<NodeData> InstrNode;
|
||||
|
||||
CV_EXPORTS InstrNode* getTrace();
|
||||
|
||||
#endif // !defined OPENCV_ABI_CHECK
|
||||
|
||||
|
||||
CV_EXPORTS bool useInstrumentation();
|
||||
CV_EXPORTS void setUseInstrumentation(bool flag);
|
||||
CV_EXPORTS void resetTrace();
|
||||
|
||||
enum FLAGS
|
||||
{
|
||||
FLAGS_NONE = 0,
|
||||
FLAGS_MAPPING = 0x01,
|
||||
FLAGS_EXPAND_SAME_NAMES = 0x02,
|
||||
};
|
||||
|
||||
CV_EXPORTS void setFlags(FLAGS modeFlags);
|
||||
static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
|
||||
CV_EXPORTS FLAGS getFlags();
|
||||
|
||||
} // namespace instr
|
||||
|
||||
//! @}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // OPENCV_UTILS_TLS_HPP
|
233
modules/core/include/opencv2/core/utils/tls.hpp
Normal file
233
modules/core/include/opencv2/core/utils/tls.hpp
Normal file
@ -0,0 +1,233 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_UTILS_TLS_HPP
|
||||
#define OPENCV_UTILS_TLS_HPP
|
||||
|
||||
#include <opencv2/core/utility.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
namespace details { class TlsStorage; }
|
||||
|
||||
/** TLS container base implementation
|
||||
*
|
||||
* Don't use directly.
|
||||
*
|
||||
* @sa TLSData, TLSDataAccumulator templates
|
||||
*/
|
||||
class CV_EXPORTS TLSDataContainer
|
||||
{
|
||||
protected:
|
||||
TLSDataContainer();
|
||||
virtual ~TLSDataContainer();
|
||||
|
||||
/// @deprecated use detachData() instead
|
||||
void gatherData(std::vector<void*> &data) const;
|
||||
/// get TLS data and detach all data from threads (similar to cleanup() call)
|
||||
void detachData(std::vector<void*>& data);
|
||||
|
||||
void* getData() const;
|
||||
void release();
|
||||
|
||||
protected:
|
||||
virtual void* createDataInstance() const = 0;
|
||||
virtual void deleteDataInstance(void* pData) const = 0;
|
||||
|
||||
private:
|
||||
int key_;
|
||||
|
||||
friend class cv::details::TlsStorage; // core/src/system.cpp
|
||||
|
||||
public:
|
||||
void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid.
|
||||
|
||||
private:
|
||||
// Disable copy/assign (noncopyable pattern)
|
||||
TLSDataContainer(TLSDataContainer &) = delete;
|
||||
TLSDataContainer& operator =(const TLSDataContainer &) = delete;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Simple TLS data class
|
||||
*
|
||||
* @sa TLSDataAccumulator
|
||||
*/
|
||||
template <typename T>
|
||||
class TLSData : protected TLSDataContainer
|
||||
{
|
||||
public:
|
||||
inline TLSData() {}
|
||||
inline ~TLSData() { release(); }
|
||||
|
||||
inline T* get() const { return (T*)getData(); } //!< Get data associated with key
|
||||
inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; } //!< Get data associated with key
|
||||
|
||||
/// Release associated thread data
|
||||
inline void cleanup()
|
||||
{
|
||||
TLSDataContainer::cleanup();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Wrapper to allocate data by template
|
||||
virtual void* createDataInstance() const CV_OVERRIDE { return new T; }
|
||||
/// Wrapper to release data by template
|
||||
virtual void deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; }
|
||||
};
|
||||
|
||||
|
||||
/// TLS data accumulator with gathering methods
|
||||
template <typename T>
|
||||
class TLSDataAccumulator : public TLSData<T>
|
||||
{
|
||||
mutable cv::Mutex mutex;
|
||||
mutable std::vector<T*> dataFromTerminatedThreads;
|
||||
std::vector<T*> detachedData;
|
||||
bool cleanupMode;
|
||||
public:
|
||||
TLSDataAccumulator() : cleanupMode(false) {}
|
||||
~TLSDataAccumulator()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
/** @brief Get data from all threads
|
||||
* @deprecated replaced by detachData()
|
||||
*
|
||||
* Lifetime of vector data is valid until next detachData()/cleanup()/release() calls
|
||||
*
|
||||
* @param[out] data result buffer (should be empty)
|
||||
*/
|
||||
void gather(std::vector<T*> &data) const
|
||||
{
|
||||
CV_Assert(cleanupMode == false); // state is not valid
|
||||
CV_Assert(data.empty());
|
||||
{
|
||||
std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
|
||||
TLSDataContainer::gatherData(dataVoid);
|
||||
}
|
||||
{
|
||||
AutoLock lock(mutex);
|
||||
data.reserve(data.size() + dataFromTerminatedThreads.size());
|
||||
for (typename std::vector<T*>::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
|
||||
{
|
||||
data.push_back((T*)*i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Get and detach data from all threads
|
||||
*
|
||||
* Call cleanupDetachedData() when returned vector is not needed anymore.
|
||||
*
|
||||
* @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls
|
||||
*/
|
||||
std::vector<T*>& detachData()
|
||||
{
|
||||
CV_Assert(cleanupMode == false); // state is not valid
|
||||
std::vector<void*> dataVoid;
|
||||
{
|
||||
TLSDataContainer::detachData(dataVoid);
|
||||
}
|
||||
{
|
||||
AutoLock lock(mutex);
|
||||
detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size());
|
||||
for (typename std::vector<T*>::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
|
||||
{
|
||||
detachedData.push_back((T*)*i);
|
||||
}
|
||||
dataFromTerminatedThreads.clear();
|
||||
for (typename std::vector<void*>::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i)
|
||||
{
|
||||
detachedData.push_back((T*)(void*)*i);
|
||||
}
|
||||
}
|
||||
dataVoid.clear();
|
||||
return detachedData;
|
||||
}
|
||||
|
||||
/// Release associated thread data returned by detachData() call
|
||||
void cleanupDetachedData()
|
||||
{
|
||||
AutoLock lock(mutex);
|
||||
cleanupMode = true;
|
||||
_cleanupDetachedData();
|
||||
cleanupMode = false;
|
||||
}
|
||||
|
||||
/// Release associated thread data
|
||||
void cleanup()
|
||||
{
|
||||
cleanupMode = true;
|
||||
TLSDataContainer::cleanup();
|
||||
|
||||
AutoLock lock(mutex);
|
||||
_cleanupDetachedData();
|
||||
_cleanupTerminatedData();
|
||||
cleanupMode = false;
|
||||
}
|
||||
|
||||
/// Release associated thread data and free TLS key
|
||||
void release()
|
||||
{
|
||||
cleanupMode = true;
|
||||
TLSDataContainer::release();
|
||||
{
|
||||
AutoLock lock(mutex);
|
||||
_cleanupDetachedData();
|
||||
_cleanupTerminatedData();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// synchronized
|
||||
void _cleanupDetachedData()
|
||||
{
|
||||
for (typename std::vector<T*>::iterator i = detachedData.begin(); i != detachedData.end(); ++i)
|
||||
{
|
||||
deleteDataInstance((T*)*i);
|
||||
}
|
||||
detachedData.clear();
|
||||
}
|
||||
|
||||
// synchronized
|
||||
void _cleanupTerminatedData()
|
||||
{
|
||||
for (typename std::vector<T*>::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i)
|
||||
{
|
||||
deleteDataInstance((T*)*i);
|
||||
}
|
||||
dataFromTerminatedThreads.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void* createDataInstance() const CV_OVERRIDE
|
||||
{
|
||||
// Note: we can collect all allocated data here, but this would require raced mutex locks
|
||||
return new T;
|
||||
}
|
||||
virtual void deleteDataInstance(void* pData) const CV_OVERRIDE
|
||||
{
|
||||
if (cleanupMode)
|
||||
{
|
||||
delete (T*)pData;
|
||||
}
|
||||
else
|
||||
{
|
||||
AutoLock lock(mutex);
|
||||
dataFromTerminatedThreads.push_back((T*)pData);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//! @}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // OPENCV_UTILS_TLS_HPP
|
@ -9,6 +9,8 @@
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include <opencv2/core/utils/tls.hpp>
|
||||
|
||||
#include "trace.hpp"
|
||||
|
||||
//! @cond IGNORED
|
||||
@ -332,7 +334,7 @@ public:
|
||||
Mutex mutexCreate;
|
||||
Mutex mutexCount;
|
||||
|
||||
TLSData<TraceManagerThreadLocal> tls;
|
||||
TLSDataAccumulator<TraceManagerThreadLocal> tls;
|
||||
|
||||
cv::Ptr<TraceStorage> trace_storage;
|
||||
private:
|
||||
|
@ -41,6 +41,15 @@ public class Mat {
|
||||
nativeObj = n_Mat(rows, cols, type, data);
|
||||
}
|
||||
|
||||
//
|
||||
// C++: Mat::Mat(int rows, int cols, int type, void* data, size_t step)
|
||||
//
|
||||
|
||||
// javadoc: Mat::Mat(rows, cols, type, data, step)
|
||||
public Mat(int rows, int cols, int type, ByteBuffer data, long step) {
|
||||
nativeObj = n_Mat(rows, cols, type, data, step);
|
||||
}
|
||||
|
||||
//
|
||||
// C++: Mat::Mat(Size size, int type)
|
||||
//
|
||||
@ -1136,6 +1145,9 @@ public class Mat {
|
||||
// C++: Mat::Mat(int rows, int cols, int type, void* data)
|
||||
private static native long n_Mat(int rows, int cols, int type, ByteBuffer data);
|
||||
|
||||
// C++: Mat::Mat(int rows, int cols, int type, void* data, size_t step)
|
||||
private static native long n_Mat(int rows, int cols, int type, ByteBuffer data, long step);
|
||||
|
||||
// C++: Mat::Mat(Size size, int type)
|
||||
private static native long n_Mat(double size_width, double size_height, int type);
|
||||
|
||||
|
@ -1246,4 +1246,22 @@ public class MatTest extends OpenCVTestCase {
|
||||
assertEquals(1, bbuf.get(4095));
|
||||
}
|
||||
|
||||
public void testMatFromByteBufferWithStep() {
|
||||
ByteBuffer bbuf = ByteBuffer.allocateDirect(80*64);
|
||||
bbuf.putInt(0x01010101);
|
||||
bbuf.putInt(64, 0x02020202);
|
||||
bbuf.putInt(80, 0x03030303);
|
||||
Mat m = new Mat(64, 64, CvType.CV_8UC1, bbuf, 80);
|
||||
assertEquals(8, Core.countNonZero(m));
|
||||
Core.add(m, new Scalar(5), m);
|
||||
assertEquals(4096, Core.countNonZero(m));
|
||||
m.release();
|
||||
assertEquals(6, bbuf.get(0));
|
||||
assertEquals(5, bbuf.get(63));
|
||||
assertEquals(2, bbuf.get(64));
|
||||
assertEquals(0, bbuf.get(79));
|
||||
assertEquals(8, bbuf.get(80));
|
||||
assertEquals(5, bbuf.get(63*80 + 63));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -909,19 +909,19 @@ bool haveOpenCL()
|
||||
|
||||
bool useOpenCL()
|
||||
{
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
if( data->useOpenCL < 0 )
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
if (data.useOpenCL < 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
data->useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
|
||||
data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
data->useOpenCL = 0;
|
||||
data.useOpenCL = 0;
|
||||
}
|
||||
}
|
||||
return data->useOpenCL > 0;
|
||||
return data.useOpenCL > 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@ -937,14 +937,14 @@ void setUseOpenCL(bool flag)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
if (!flag)
|
||||
{
|
||||
data->useOpenCL = 0;
|
||||
data.useOpenCL = 0;
|
||||
}
|
||||
else if( haveOpenCL() )
|
||||
{
|
||||
data->useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
|
||||
data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1655,7 +1655,7 @@ size_t Device::profilingTimerResolution() const
|
||||
const Device& Device::getDefault()
|
||||
{
|
||||
const Context& ctx = Context::getDefault();
|
||||
int idx = getCoreTlsData().get()->device;
|
||||
int idx = getCoreTlsData().device;
|
||||
const Device& device = ctx.device(idx);
|
||||
return device;
|
||||
}
|
||||
@ -2557,9 +2557,10 @@ void attachContext(const String& platformName, void* platformID, void* context,
|
||||
CV_OCL_CHECK(clRetainContext((cl_context)context));
|
||||
|
||||
// clear command queue, if any
|
||||
getCoreTlsData().get()->oclQueue.finish();
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
data.oclQueue.finish();
|
||||
Queue q;
|
||||
getCoreTlsData().get()->oclQueue = q;
|
||||
data.oclQueue = q;
|
||||
|
||||
return;
|
||||
} // attachContext()
|
||||
@ -2747,7 +2748,7 @@ void* Queue::ptr() const
|
||||
|
||||
Queue& Queue::getDefault()
|
||||
{
|
||||
Queue& q = getCoreTlsData().get()->oclQueue;
|
||||
Queue& q = getCoreTlsData().oclQueue;
|
||||
if( !q.p && haveOpenCL() )
|
||||
q.create(Context::getDefault());
|
||||
return q;
|
||||
|
@ -76,13 +76,13 @@ bool haveOpenVX()
|
||||
bool useOpenVX()
|
||||
{
|
||||
#ifdef HAVE_OPENVX
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
if( data->useOpenVX < 0 )
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
if (data.useOpenVX < 0)
|
||||
{
|
||||
// enabled (if available) by default
|
||||
data->useOpenVX = haveOpenVX() ? 1 : 0;
|
||||
data.useOpenVX = haveOpenVX() ? 1 : 0;
|
||||
}
|
||||
return data->useOpenVX > 0;
|
||||
return data.useOpenVX > 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
@ -93,8 +93,8 @@ void setUseOpenVX(bool flag)
|
||||
#ifdef HAVE_OPENVX
|
||||
if( haveOpenVX() )
|
||||
{
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
data->useOpenVX = flag ? 1 : 0;
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
data.useOpenVX = flag ? 1 : 0;
|
||||
}
|
||||
#else
|
||||
CV_Assert(!flag && "OpenVX support isn't enabled at compile time");
|
||||
|
@ -344,7 +344,7 @@ struct CoreTLSData
|
||||
#endif
|
||||
};
|
||||
|
||||
TLSData<CoreTLSData>& getCoreTlsData();
|
||||
CoreTLSData& getCoreTlsData();
|
||||
|
||||
#if defined(BUILD_SHARED_LIBS)
|
||||
#if defined _WIN32 || defined WINCE
|
||||
|
@ -653,7 +653,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
|
||||
cv::RNG& cv::theRNG()
|
||||
{
|
||||
return getCoreTlsData().get()->rng;
|
||||
return getCoreTlsData().rng;
|
||||
}
|
||||
|
||||
void cv::setRNGSeed(int seed)
|
||||
|
@ -50,6 +50,9 @@
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include <opencv2/core/utils/tls.hpp>
|
||||
#include <opencv2/core/utils/instrumentation.hpp>
|
||||
|
||||
namespace cv {
|
||||
|
||||
static Mutex* __initialization_mutex = NULL;
|
||||
@ -1312,6 +1315,8 @@ bool __termination = false;
|
||||
|
||||
//////////////////////////////// thread-local storage ////////////////////////////////
|
||||
|
||||
namespace details {
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4505) // unreferenced local function has been removed
|
||||
@ -1373,9 +1378,10 @@ void TlsAbstraction::SetData(void *pData)
|
||||
}
|
||||
#endif
|
||||
#else // _WIN32
|
||||
static void opencv_tls_destructor(void* pData);
|
||||
TlsAbstraction::TlsAbstraction()
|
||||
{
|
||||
CV_Assert(pthread_key_create(&tlsKey, NULL) == 0);
|
||||
CV_Assert(pthread_key_create(&tlsKey, opencv_tls_destructor) == 0);
|
||||
}
|
||||
TlsAbstraction::~TlsAbstraction()
|
||||
{
|
||||
@ -1416,42 +1422,46 @@ public:
|
||||
}
|
||||
~TlsStorage()
|
||||
{
|
||||
for(size_t i = 0; i < threads.size(); i++)
|
||||
{
|
||||
if(threads[i])
|
||||
{
|
||||
/* Current architecture doesn't allow proper global objects release, so this check can cause crashes
|
||||
|
||||
// Check if all slots were properly cleared
|
||||
for(size_t j = 0; j < threads[i]->slots.size(); j++)
|
||||
{
|
||||
CV_Assert(threads[i]->slots[j] == 0);
|
||||
}
|
||||
*/
|
||||
delete threads[i];
|
||||
}
|
||||
}
|
||||
threads.clear();
|
||||
// TlsStorage object should not be released
|
||||
// There is no reliable way to avoid problems caused by static initialization order fiasco
|
||||
CV_LOG_FATAL(NULL, "TlsStorage::~TlsStorage() call is not expected");
|
||||
}
|
||||
|
||||
void releaseThread()
|
||||
void releaseThread(void* tlsValue = NULL)
|
||||
{
|
||||
ThreadData *pTD = tlsValue == NULL ? (ThreadData*)tls.GetData() : (ThreadData*)tlsValue;
|
||||
if (pTD == NULL)
|
||||
return; // no OpenCV TLS data for this thread
|
||||
AutoLock guard(mtxGlobalAccess);
|
||||
ThreadData *pTD = (ThreadData*)tls.GetData();
|
||||
for(size_t i = 0; i < threads.size(); i++)
|
||||
for (size_t i = 0; i < threads.size(); i++)
|
||||
{
|
||||
if(pTD == threads[i])
|
||||
if (pTD == threads[i])
|
||||
{
|
||||
threads[i] = 0;
|
||||
break;
|
||||
threads[i] = NULL;
|
||||
if (tlsValue == NULL)
|
||||
tls.SetData(0);
|
||||
std::vector<void*>& thread_slots = pTD->slots;
|
||||
for (size_t slotIdx = 0; slotIdx < thread_slots.size(); slotIdx++)
|
||||
{
|
||||
void* pData = thread_slots[slotIdx];
|
||||
thread_slots[slotIdx] = NULL;
|
||||
if (!pData)
|
||||
continue;
|
||||
TLSDataContainer* container = tlsSlots[slotIdx].container;
|
||||
if (container)
|
||||
container->deleteDataInstance(pData);
|
||||
else
|
||||
CV_LOG_ERROR(NULL, "TLS: container for slotIdx=" << slotIdx << " is NULL. Can't release thread data");
|
||||
}
|
||||
delete pTD;
|
||||
return;
|
||||
}
|
||||
}
|
||||
tls.SetData(0);
|
||||
delete pTD;
|
||||
CV_LOG_WARNING(NULL, "TLS: Can't release thread TLS data (unknown pointer or data race): " << (void*)pTD);
|
||||
}
|
||||
|
||||
// Reserve TLS storage index
|
||||
size_t reserveSlot()
|
||||
size_t reserveSlot(TLSDataContainer* container)
|
||||
{
|
||||
AutoLock guard(mtxGlobalAccess);
|
||||
CV_Assert(tlsSlotsSize == tlsSlots.size());
|
||||
@ -1459,15 +1469,15 @@ public:
|
||||
// Find unused slots
|
||||
for(size_t slot = 0; slot < tlsSlotsSize; slot++)
|
||||
{
|
||||
if(!tlsSlots[slot])
|
||||
if (tlsSlots[slot].container == NULL)
|
||||
{
|
||||
tlsSlots[slot] = 1;
|
||||
tlsSlots[slot].container = container;
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
|
||||
// Create new slot
|
||||
tlsSlots.push_back(1); tlsSlotsSize++;
|
||||
tlsSlots.push_back(TlsSlotInfo(container)); tlsSlotsSize++;
|
||||
return tlsSlotsSize - 1;
|
||||
}
|
||||
|
||||
@ -1492,7 +1502,9 @@ public:
|
||||
}
|
||||
|
||||
if (!keepSlot)
|
||||
tlsSlots[slotIdx] = 0;
|
||||
{
|
||||
tlsSlots[slotIdx].container = NULL; // mark slot as free (see reserveSlot() implementation)
|
||||
}
|
||||
}
|
||||
|
||||
// Get data by TLS storage index
|
||||
@ -1541,8 +1553,26 @@ public:
|
||||
tls.SetData((void*)threadData);
|
||||
{
|
||||
AutoLock guard(mtxGlobalAccess);
|
||||
threadData->idx = threads.size();
|
||||
threads.push_back(threadData);
|
||||
|
||||
bool found = false;
|
||||
// Find unused slots
|
||||
for(size_t slot = 0; slot < threads.size(); slot++)
|
||||
{
|
||||
if (threads[slot] == NULL)
|
||||
{
|
||||
threadData->idx = (int)slot;
|
||||
threads[slot] = threadData;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
// Create new slot
|
||||
threadData->idx = threads.size();
|
||||
threads.push_back(threadData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1559,8 +1589,14 @@ private:
|
||||
|
||||
Mutex mtxGlobalAccess; // Shared objects operation guard
|
||||
size_t tlsSlotsSize; // equal to tlsSlots.size() in synchronized sections
|
||||
// without synchronization this counter doesn't desrease - it is used for slotIdx sanity checks
|
||||
std::vector<int> tlsSlots; // TLS keys state
|
||||
// without synchronization this counter doesn't decrease - it is used for slotIdx sanity checks
|
||||
|
||||
struct TlsSlotInfo
|
||||
{
|
||||
TlsSlotInfo(TLSDataContainer* _container) : container(_container) {}
|
||||
TLSDataContainer* container; // attached container (to dispose data of terminated threads)
|
||||
};
|
||||
std::vector<struct TlsSlotInfo> tlsSlots; // TLS keys state
|
||||
std::vector<ThreadData*> threads; // Array for all allocated data. Thread data pointers are placed here to allow data cleanup
|
||||
};
|
||||
|
||||
@ -1570,9 +1606,19 @@ static TlsStorage &getTlsStorage()
|
||||
CV_SINGLETON_LAZY_INIT_REF(TlsStorage, new TlsStorage())
|
||||
}
|
||||
|
||||
#ifndef _WIN32 // pthread key destructor
|
||||
static void opencv_tls_destructor(void* pData)
|
||||
{
|
||||
getTlsStorage().releaseThread(pData);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace details
|
||||
using namespace details;
|
||||
|
||||
TLSDataContainer::TLSDataContainer()
|
||||
{
|
||||
key_ = (int)getTlsStorage().reserveSlot(); // Reserve key from TLS storage
|
||||
key_ = (int)getTlsStorage().reserveSlot(this); // Reserve key from TLS storage
|
||||
}
|
||||
|
||||
TLSDataContainer::~TLSDataContainer()
|
||||
@ -1585,11 +1631,17 @@ void TLSDataContainer::gatherData(std::vector<void*> &data) const
|
||||
getTlsStorage().gather(key_, data);
|
||||
}
|
||||
|
||||
void TLSDataContainer::detachData(std::vector<void*> &data)
|
||||
{
|
||||
getTlsStorage().releaseSlot(key_, data, true);
|
||||
}
|
||||
|
||||
void TLSDataContainer::release()
|
||||
{
|
||||
std::vector<void*> data;
|
||||
data.reserve(32);
|
||||
getTlsStorage().releaseSlot(key_, data); // Release key and get stored data for proper destruction
|
||||
if (key_ == -1)
|
||||
return; // already released
|
||||
std::vector<void*> data; data.reserve(32);
|
||||
getTlsStorage().releaseSlot(key_, data, false); // Release key and get stored data for proper destruction
|
||||
key_ = -1;
|
||||
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
|
||||
deleteDataInstance(data[i]);
|
||||
@ -1597,8 +1649,7 @@ void TLSDataContainer::release()
|
||||
|
||||
void TLSDataContainer::cleanup()
|
||||
{
|
||||
std::vector<void*> data;
|
||||
data.reserve(32);
|
||||
std::vector<void*> data; data.reserve(32);
|
||||
getTlsStorage().releaseSlot(key_, data, true); // Extract stored data with removal from TLS tables
|
||||
for(size_t i = 0; i < data.size(); i++) // Delete all associated data
|
||||
deleteDataInstance(data[i]);
|
||||
@ -1617,11 +1668,16 @@ void* TLSDataContainer::getData() const
|
||||
return pData;
|
||||
}
|
||||
|
||||
TLSData<CoreTLSData>& getCoreTlsData()
|
||||
static TLSData<CoreTLSData>& getCoreTlsDataTLS()
|
||||
{
|
||||
CV_SINGLETON_LAZY_INIT_REF(TLSData<CoreTLSData>, new TLSData<CoreTLSData>())
|
||||
}
|
||||
|
||||
CoreTLSData& getCoreTlsData()
|
||||
{
|
||||
return getCoreTlsDataTLS().getRef();
|
||||
}
|
||||
|
||||
#if defined CVAPI_EXPORTS && defined _WIN32 && !defined WINCE
|
||||
#ifdef WINRT
|
||||
#pragma warning(disable:4447) // Disable warning 'main' signature found without threading model
|
||||
@ -2269,12 +2325,12 @@ String getIppVersion()
|
||||
bool useIPP()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
if(data->useIPP < 0)
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
if (data.useIPP < 0)
|
||||
{
|
||||
data->useIPP = getIPPSingleton().useIPP;
|
||||
data.useIPP = getIPPSingleton().useIPP;
|
||||
}
|
||||
return (data->useIPP > 0);
|
||||
return (data.useIPP > 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
@ -2282,24 +2338,24 @@ bool useIPP()
|
||||
|
||||
void setUseIPP(bool flag)
|
||||
{
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
#ifdef HAVE_IPP
|
||||
data->useIPP = (getIPPSingleton().useIPP)?flag:false;
|
||||
data.useIPP = (getIPPSingleton().useIPP)?flag:false;
|
||||
#else
|
||||
CV_UNUSED(flag);
|
||||
data->useIPP = false;
|
||||
data.useIPP = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool useIPP_NotExact()
|
||||
{
|
||||
#ifdef HAVE_IPP
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
if(data->useIPP_NE < 0)
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
if (data.useIPP_NE < 0)
|
||||
{
|
||||
data->useIPP_NE = getIPPSingleton().useIPP_NE;
|
||||
data.useIPP_NE = getIPPSingleton().useIPP_NE;
|
||||
}
|
||||
return (data->useIPP_NE > 0);
|
||||
return (data.useIPP_NE > 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
@ -2307,12 +2363,12 @@ bool useIPP_NotExact()
|
||||
|
||||
void setUseIPP_NotExact(bool flag)
|
||||
{
|
||||
CoreTLSData* data = getCoreTlsData().get();
|
||||
CoreTLSData& data = getCoreTlsData();
|
||||
#ifdef HAVE_IPP
|
||||
data->useIPP_NE = flag;
|
||||
data.useIPP_NE = flag;
|
||||
#else
|
||||
CV_UNUSED(flag);
|
||||
data->useIPP_NE = false;
|
||||
data.useIPP_NE = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -196,14 +196,27 @@ static __itt_domain* domain = NULL;
|
||||
|
||||
static bool isITTEnabled()
|
||||
{
|
||||
static bool isInitialized = false;
|
||||
static volatile bool isInitialized = false;
|
||||
static bool isEnabled = false;
|
||||
if (!isInitialized)
|
||||
{
|
||||
isEnabled = !!(__itt_api_version());
|
||||
CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
|
||||
domain = __itt_domain_create("OpenCVTrace");
|
||||
isInitialized = true;
|
||||
cv::AutoLock lock(cv::getInitializationMutex());
|
||||
if (!isInitialized)
|
||||
{
|
||||
bool param_traceITTEnable = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_ENABLE", true);
|
||||
if (param_traceITTEnable)
|
||||
{
|
||||
isEnabled = !!(__itt_api_version());
|
||||
CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
|
||||
domain = __itt_domain_create("OpenCVTrace");
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_LOG_ITT("ITT is disabled through OpenCV parameter");
|
||||
isEnabled = false;
|
||||
}
|
||||
isInitialized = true;
|
||||
}
|
||||
}
|
||||
return isEnabled;
|
||||
}
|
||||
|
@ -43,6 +43,8 @@
|
||||
#include "opencl_kernels_core.hpp"
|
||||
#include "umatrix.hpp"
|
||||
|
||||
#include <opencv2/core/utils/tls.hpp>
|
||||
|
||||
///////////////////////////////// UMat implementation ///////////////////////////////
|
||||
|
||||
namespace cv {
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/core/utils/logger.hpp"
|
||||
|
||||
#include "test_utils_tls.impl.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
static const char * const keys =
|
||||
|
134
modules/core/test/test_utils_tls.impl.hpp
Normal file
134
modules/core/test/test_utils_tls.impl.hpp
Normal file
@ -0,0 +1,134 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// This is .hpp file included from test_utils.cpp
|
||||
|
||||
#ifdef CV_CXX11
|
||||
#include <thread> // std::thread
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/utils/tls.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
class TLSReporter
|
||||
{
|
||||
public:
|
||||
static int g_last_id;
|
||||
static int g_allocated;
|
||||
|
||||
int id;
|
||||
|
||||
TLSReporter()
|
||||
{
|
||||
id = CV_XADD(&g_last_id, 1);
|
||||
CV_XADD(&g_allocated, 1);
|
||||
}
|
||||
~TLSReporter()
|
||||
{
|
||||
CV_XADD(&g_allocated, -1);
|
||||
}
|
||||
};
|
||||
|
||||
int TLSReporter::g_last_id = 0;
|
||||
int TLSReporter::g_allocated = 0;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
|
||||
template<typename T>
|
||||
static void callNThreadsWithTLS(int N, TLSData<T>& tls)
|
||||
{
|
||||
std::vector<std::thread> threads(N);
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
threads[i] = std::thread([&]() {
|
||||
TLSReporter* pData = tls.get();
|
||||
(void)pData;
|
||||
});
|
||||
}
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
threads[i].join();
|
||||
}
|
||||
threads.clear();
|
||||
}
|
||||
|
||||
TEST(Core_TLS, HandleThreadTermination)
|
||||
{
|
||||
const int init_id = TLSReporter::g_last_id;
|
||||
const int init_allocated = TLSReporter::g_allocated;
|
||||
|
||||
const int N = 4;
|
||||
TLSData<TLSReporter> tls;
|
||||
|
||||
// use TLS
|
||||
ASSERT_NO_THROW(callNThreadsWithTLS(N, tls));
|
||||
|
||||
EXPECT_EQ(init_id + N, TLSReporter::g_last_id);
|
||||
EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated);
|
||||
}
|
||||
|
||||
|
||||
static void testTLSAccumulator(bool detachFirst)
|
||||
{
|
||||
const int init_id = TLSReporter::g_last_id;
|
||||
const int init_allocated = TLSReporter::g_allocated;
|
||||
|
||||
const int N = 4;
|
||||
TLSDataAccumulator<TLSReporter> tls;
|
||||
|
||||
{ // empty TLS checks
|
||||
std::vector<TLSReporter*>& data0 = tls.detachData();
|
||||
EXPECT_EQ((size_t)0, data0.size());
|
||||
tls.cleanupDetachedData();
|
||||
}
|
||||
|
||||
// use TLS
|
||||
ASSERT_NO_THROW(callNThreadsWithTLS(N, tls));
|
||||
|
||||
EXPECT_EQ(init_id + N, TLSReporter::g_last_id);
|
||||
EXPECT_EQ(init_allocated + N, TLSReporter::g_allocated);
|
||||
|
||||
if (detachFirst)
|
||||
{
|
||||
std::vector<TLSReporter*>& data1 = tls.detachData();
|
||||
EXPECT_EQ((size_t)N, data1.size());
|
||||
|
||||
// no data through gather after detachData()
|
||||
std::vector<TLSReporter*> data2;
|
||||
tls.gather(data2);
|
||||
EXPECT_EQ((size_t)0, data2.size());
|
||||
|
||||
tls.cleanupDetachedData();
|
||||
|
||||
EXPECT_EQ(init_id + N, TLSReporter::g_last_id);
|
||||
EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated);
|
||||
EXPECT_EQ((size_t)0, data1.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<TLSReporter*> data2;
|
||||
tls.gather(data2);
|
||||
EXPECT_EQ((size_t)N, data2.size());
|
||||
|
||||
std::vector<TLSReporter*>& data1 = tls.detachData();
|
||||
EXPECT_EQ((size_t)N, data1.size());
|
||||
|
||||
tls.cleanupDetachedData();
|
||||
|
||||
EXPECT_EQ((size_t)0, data1.size());
|
||||
// data2 is not empty, but it has invalid contents
|
||||
EXPECT_EQ((size_t)N, data2.size());
|
||||
}
|
||||
|
||||
EXPECT_EQ(init_id + N, TLSReporter::g_last_id);
|
||||
EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated);
|
||||
}
|
||||
|
||||
TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); }
|
||||
TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); }
|
||||
|
||||
#endif
|
||||
|
||||
}} // namespace
|
@ -86,6 +86,7 @@
|
||||
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
#include <limits>
|
||||
|
||||
@ -111,7 +112,7 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bayer2RGBA(const T*, int, T*, int, int) const
|
||||
int bayer2RGBA(const T*, int, T*, int, int, const T) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -122,279 +123,14 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SSE2
|
||||
#if CV_SIMD128
|
||||
class SIMDBayerInterpolator_8u
|
||||
{
|
||||
public:
|
||||
SIMDBayerInterpolator_8u()
|
||||
{
|
||||
use_simd = checkHardwareSupport(CV_CPU_SSE2);
|
||||
}
|
||||
|
||||
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
|
||||
int width, int bcoeff, int gcoeff, int rcoeff) const
|
||||
{
|
||||
if( !use_simd )
|
||||
return 0;
|
||||
|
||||
__m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));
|
||||
__m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));
|
||||
__m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
|
||||
{
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
|
||||
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
|
||||
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
|
||||
|
||||
__m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),
|
||||
_mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));
|
||||
__m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
|
||||
b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);
|
||||
|
||||
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));
|
||||
__m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);
|
||||
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
|
||||
g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);
|
||||
|
||||
r0 = _mm_srli_epi16(r1, 8);
|
||||
r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);
|
||||
r0 = _mm_slli_epi16(r0, 3);
|
||||
|
||||
g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));
|
||||
g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));
|
||||
g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));
|
||||
g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));
|
||||
g0 = _mm_srli_epi16(g0, 2);
|
||||
g1 = _mm_srli_epi16(g1, 2);
|
||||
g0 = _mm_packus_epi16(g0, g0);
|
||||
g1 = _mm_packus_epi16(g1, g1);
|
||||
g0 = _mm_unpacklo_epi8(g0, g1);
|
||||
_mm_storeu_si128((__m128i*)dst, g0);
|
||||
}
|
||||
|
||||
return (int)(bayer - (bayer_end - width));
|
||||
}
|
||||
|
||||
int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
|
||||
{
|
||||
if( !use_simd )
|
||||
return 0;
|
||||
/*
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
|
||||
__m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();
|
||||
__m128i masklo = _mm_set1_epi16(0x00ff);
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
|
||||
{
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
|
||||
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
|
||||
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
|
||||
|
||||
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));
|
||||
__m128i nextb1 = _mm_srli_si128(b1, 2);
|
||||
__m128i b0 = _mm_add_epi16(b1, nextb1);
|
||||
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
|
||||
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
|
||||
// b0 b2 ... b14 b1 b3 ... b15
|
||||
b0 = _mm_packus_epi16(b0, b1);
|
||||
|
||||
__m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));
|
||||
__m128i g1 = _mm_and_si128(r1, masklo);
|
||||
g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
|
||||
g1 = _mm_srli_si128(g1, 2);
|
||||
g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);
|
||||
// g0 g2 ... g14 g1 g3 ... g15
|
||||
g0 = _mm_packus_epi16(g0, g1);
|
||||
|
||||
r0 = _mm_srli_epi16(r1, 8);
|
||||
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
|
||||
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
|
||||
// r0 r2 ... r14 r1 r3 ... r15
|
||||
r0 = _mm_packus_epi16(r0, r1);
|
||||
|
||||
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
|
||||
b0 = _mm_xor_si128(b0, b1);
|
||||
r0 = _mm_xor_si128(r0, b1);
|
||||
|
||||
// b1 g1 b3 g3 b5 g5...
|
||||
b1 = _mm_unpackhi_epi8(b0, g0);
|
||||
// b0 g0 b2 g2 b4 g4 ....
|
||||
b0 = _mm_unpacklo_epi8(b0, g0);
|
||||
|
||||
// r1 0 r3 0 r5 0 ...
|
||||
r1 = _mm_unpackhi_epi8(r0, z);
|
||||
// r0 0 r2 0 r4 0 ...
|
||||
r0 = _mm_unpacklo_epi8(r0, z);
|
||||
|
||||
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
||||
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
|
||||
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
||||
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
|
||||
|
||||
// b1 g1 r1 0 b3 g3 r3 0 ...
|
||||
r0 = _mm_unpacklo_epi16(b1, r1);
|
||||
// b9 g9 r9 0 b11 g11 r11 0 ...
|
||||
r1 = _mm_unpackhi_epi16(b1, r1);
|
||||
|
||||
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
||||
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
|
||||
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
||||
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst-1+0), b0);
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*2), b1);
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));
|
||||
|
||||
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
||||
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
|
||||
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
||||
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*4), g0);
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst-1+6*6), g1);
|
||||
}
|
||||
|
||||
return (int)(bayer - (bayer_end - width));
|
||||
}
|
||||
|
||||
int bayer2RGBA(const uchar*, int, uchar*, int, int) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
|
||||
{
|
||||
if (!use_simd)
|
||||
return 0;
|
||||
|
||||
const uchar* bayer_end = bayer + width;
|
||||
__m128i masklow = _mm_set1_epi16(0x00ff);
|
||||
__m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
|
||||
__m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128();
|
||||
__m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0);
|
||||
|
||||
for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42)
|
||||
{
|
||||
/*
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
__m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
|
||||
__m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
|
||||
__m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
|
||||
|
||||
__m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow));
|
||||
__m128i nextb1 = _mm_srli_si128(b1, 2);
|
||||
__m128i b0 = _mm_add_epi16(b1, nextb1);
|
||||
b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
|
||||
b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
|
||||
// b0 b2 ... b14 b1 b3 ... b15
|
||||
b0 = _mm_packus_epi16(b0, b1);
|
||||
|
||||
// vertical sum
|
||||
__m128i r0g = _mm_srli_epi16(r0, 8);
|
||||
__m128i r2g = _mm_srli_epi16(r2, 8);
|
||||
__m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1);
|
||||
// gorizontal sum
|
||||
__m128i g1 = _mm_and_si128(masklow, r1);
|
||||
__m128i nextg1 = _mm_srli_si128(g1, 2);
|
||||
__m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1);
|
||||
|
||||
// gradients
|
||||
__m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g));
|
||||
__m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1));
|
||||
__m128i gmask = _mm_cmpgt_epi16(gradg, gradv);
|
||||
|
||||
__m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full)));
|
||||
// g0 g2 ... g14 g1 g3 ...
|
||||
g0 = _mm_packus_epi16(g0, nextg1);
|
||||
|
||||
r0 = _mm_srli_epi16(r1, 8);
|
||||
r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
|
||||
r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
|
||||
// r0 r2 ... r14 r1 r3 ... r15
|
||||
r0 = _mm_packus_epi16(r0, r1);
|
||||
|
||||
b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
|
||||
b0 = _mm_xor_si128(b0, b1);
|
||||
r0 = _mm_xor_si128(r0, b1);
|
||||
|
||||
// b1 g1 b3 g3 b5 g5...
|
||||
b1 = _mm_unpackhi_epi8(b0, g0);
|
||||
// b0 g0 b2 g2 b4 g4 ....
|
||||
b0 = _mm_unpacklo_epi8(b0, g0);
|
||||
|
||||
// r1 0 r3 0 r5 0 ...
|
||||
r1 = _mm_unpackhi_epi8(r0, z);
|
||||
// r0 0 r2 0 r4 0 ...
|
||||
r0 = _mm_unpacklo_epi8(r0, z);
|
||||
|
||||
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
||||
g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
|
||||
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
||||
g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
|
||||
|
||||
// b1 g1 r1 0 b3 g3 r3 0 ...
|
||||
r0 = _mm_unpacklo_epi16(b1, r1);
|
||||
// b9 g9 r9 0 b11 g11 r11 0 ...
|
||||
r1 = _mm_unpackhi_epi16(b1, r1);
|
||||
|
||||
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
||||
b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
|
||||
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
||||
b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst+0), b0);
|
||||
_mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8));
|
||||
_mm_storel_epi64((__m128i*)(dst+6*2), b1);
|
||||
_mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8));
|
||||
|
||||
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
||||
g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
|
||||
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
||||
g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst+6*4), g0);
|
||||
_mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8));
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dst+6*6), g1);
|
||||
}
|
||||
|
||||
return int(bayer - (bayer_end - width));
|
||||
}
|
||||
|
||||
bool use_simd;
|
||||
};
|
||||
#elif CV_NEON
|
||||
class SIMDBayerInterpolator_8u
|
||||
{
|
||||
public:
|
||||
SIMDBayerInterpolator_8u()
|
||||
{
|
||||
}
|
||||
|
||||
int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
|
||||
int width, int bcoeff, int gcoeff, int rcoeff) const
|
||||
{
|
||||
/*
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
#if CV_NEON
|
||||
uint16x8_t masklo = vdupq_n_u16(255);
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
@ -440,6 +176,40 @@ public:
|
||||
vst1_u8(dst, p.val[0]);
|
||||
vst1_u8(dst + 8, p.val[1]);
|
||||
}
|
||||
#else
|
||||
v_uint16x8 _b2y = v_setall_u16((ushort)(rcoeff*2));
|
||||
v_uint16x8 _g2y = v_setall_u16((ushort)(gcoeff*2));
|
||||
v_uint16x8 _r2y = v_setall_u16((ushort)(bcoeff*2));
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
|
||||
{
|
||||
v_uint16x8 r0 = v_load((ushort*)bayer);
|
||||
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
|
||||
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
|
||||
|
||||
v_uint16x8 b1 = ((r0 << 8) >> 7) + ((r2 << 8) >> 7);
|
||||
v_uint16x8 b0 = v_rotate_right<1>(b1) + b1;
|
||||
b1 = v_rotate_right<1>(b1) << 1;
|
||||
|
||||
v_uint16x8 g0 = (r0 >> 7) + (r2 >> 7);
|
||||
v_uint16x8 g1 = (r1 << 8) >> 7;
|
||||
g0 += v_rotate_right<1>(g1) + g1;
|
||||
g1 = v_rotate_right<1>(g1) << 2;
|
||||
|
||||
r0 = r1 >> 8;
|
||||
r1 = (v_rotate_right<1>(r0) + r0) << 2;
|
||||
r0 = r0 << 3;
|
||||
|
||||
g0 = (v_mul_hi(b0, _b2y) + v_mul_hi(g0, _g2y) + v_mul_hi(r0, _r2y)) >> 2;
|
||||
g1 = (v_mul_hi(b1, _b2y) + v_mul_hi(g1, _g2y) + v_mul_hi(r1, _r2y)) >> 2;
|
||||
v_uint8x16 pack_lo, pack_hi;
|
||||
v_zip(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g0)),
|
||||
v_pack_u(v_reinterpret_as_s16(g1), v_reinterpret_as_s16(g1)),
|
||||
pack_lo, pack_hi);
|
||||
v_store(dst, pack_lo);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (int)(bayer - (bayer_end - width));
|
||||
}
|
||||
@ -451,6 +221,8 @@ public:
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
#if CV_NEON
|
||||
uint16x8_t masklo = vdupq_n_u16(255);
|
||||
uint8x16x3_t pix;
|
||||
const uchar* bayer_end = bayer + width;
|
||||
@ -484,21 +256,109 @@ public:
|
||||
|
||||
vst3q_u8(dst-1, pix);
|
||||
}
|
||||
#else
|
||||
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2);
|
||||
v_uint16x8 mask = v_setall_u16(blue < 0 ? (ushort)(-1) : 0);
|
||||
v_uint16x8 masklo = v_setall_u16(0x00ff);
|
||||
v_uint8x16 z = v_setzero_u8();
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
|
||||
{
|
||||
v_uint16x8 r0 = v_load((ushort*)bayer);
|
||||
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
|
||||
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
|
||||
|
||||
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
|
||||
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
|
||||
v_uint16x8 b0 = b1 + nextb1;
|
||||
b1 = (nextb1 + delta1) >> 1;
|
||||
b0 = (b0 + delta2) >> 2;
|
||||
// b0 b2 ... b14 b1 b3 ... b15
|
||||
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1)));
|
||||
|
||||
v_uint16x8 g0 = (r0 >> 8) + (r2 >> 8);
|
||||
v_uint16x8 g1 = r1 & masklo;
|
||||
g0 += v_rotate_right<1>(g1) + g1;
|
||||
g1 = v_rotate_right<1>(g1);
|
||||
g0 = (g0 + delta2) >> 2;
|
||||
// g0 g2 ... g14 g1 g3 ... g15
|
||||
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g1)));
|
||||
|
||||
r0 = r1 >> 8;
|
||||
r1 = v_rotate_right<1>(r0) + r0;
|
||||
r1 = (r1 + delta1) >> 1;
|
||||
// r0 r2 ... r14 r1 r3 ... r15
|
||||
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1)));
|
||||
|
||||
b1 = (b0 ^ r0) & mask;
|
||||
b0 = b0 ^ b1;
|
||||
r0 = r0 ^ b1;
|
||||
|
||||
// b1 g1 b3 g3 b5 g5...
|
||||
v_uint8x16 pack_lo, pack_hi;
|
||||
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi);
|
||||
b1 = v_reinterpret_as_u16(pack_hi);
|
||||
// b0 g0 b2 g2 b4 g4 ....
|
||||
b0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// r1 0 r3 0 r5 0 ...
|
||||
v_zip(v_reinterpret_as_u8(r0), z, pack_lo, pack_hi);
|
||||
r1 = v_reinterpret_as_u16(pack_hi);
|
||||
// r0 0 r2 0 r4 0 ...
|
||||
r0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
||||
v_zip(b0, r0, g0, g1);
|
||||
g0 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g0)));
|
||||
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
||||
g1 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g1)));
|
||||
|
||||
// b1 g1 r1 0 b3 g3 r3 0 ...
|
||||
v_zip(b1, r1, r0, r1);
|
||||
// b9 g9 r9 0 b11 g11 r11 0 ...
|
||||
|
||||
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
||||
v_uint32x4 pack32_lo, pack32_hi;
|
||||
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi);
|
||||
b0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo)));
|
||||
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
||||
b1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi)));
|
||||
|
||||
v_store_low(dst-1+0, v_reinterpret_as_u8(b0));
|
||||
v_store_high(dst-1+6*1, v_reinterpret_as_u8(b0));
|
||||
v_store_low(dst-1+6*2, v_reinterpret_as_u8(b1));
|
||||
v_store_high(dst-1+6*3, v_reinterpret_as_u8(b1));
|
||||
|
||||
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
||||
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi);
|
||||
g0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo)));
|
||||
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
||||
g1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi)));
|
||||
|
||||
v_store_low(dst-1+6*4, v_reinterpret_as_u8(g0));
|
||||
v_store_high(dst-1+6*5, v_reinterpret_as_u8(g0));
|
||||
|
||||
v_store_low(dst-1+6*6, v_reinterpret_as_u8(g1));
|
||||
}
|
||||
#endif
|
||||
|
||||
return (int)(bayer - (bayer_end - width));
|
||||
}
|
||||
|
||||
int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
|
||||
int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue, const uchar alpha) const
|
||||
{
|
||||
/*
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
#if CV_NEON
|
||||
uint16x8_t masklo = vdupq_n_u16(255);
|
||||
uint8x16x4_t pix;
|
||||
const uchar* bayer_end = bayer + width;
|
||||
pix.val[3] = vdupq_n_u8(255);
|
||||
pix.val[3] = vdupq_n_u8(alpha);
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
|
||||
{
|
||||
@ -529,13 +389,198 @@ public:
|
||||
|
||||
vst4q_u8(dst-1, pix);
|
||||
}
|
||||
#else
|
||||
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2);
|
||||
v_uint16x8 mask = v_setall_u16(blue < 0 ? (ushort)(-1) : 0);
|
||||
v_uint16x8 masklo = v_setall_u16(0x00ff);
|
||||
v_uint8x16 a = v_setall_u8(alpha);
|
||||
const uchar* bayer_end = bayer + width;
|
||||
|
||||
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
|
||||
{
|
||||
v_uint16x8 r0 = v_load((ushort*)bayer);
|
||||
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
|
||||
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
|
||||
|
||||
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
|
||||
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
|
||||
v_uint16x8 b0 = b1 + nextb1;
|
||||
b1 = (nextb1 + delta1) >> 1;
|
||||
b0 = (b0 + delta2) >> 2;
|
||||
// b0 b2 ... b14 b1 b3 ... b15
|
||||
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1)));
|
||||
|
||||
v_uint16x8 g0 = (r0 >> 8) + (r2 >> 8);
|
||||
v_uint16x8 g1 = r1 & masklo;
|
||||
g0 += v_rotate_right<1>(g1) + g1;
|
||||
g1 = v_rotate_right<1>(g1);
|
||||
g0 = (g0 + delta2) >> 2;
|
||||
// g0 g2 ... g14 g1 g3 ... g15
|
||||
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(g1)));
|
||||
|
||||
r0 = r1 >> 8;
|
||||
r1 = v_rotate_right<1>(r0) + r0;
|
||||
r1 = (r1 + delta1) >> 1;
|
||||
// r0 r2 ... r14 r1 r3 ... r15
|
||||
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1)));
|
||||
|
||||
b1 = (b0 ^ r0) & mask;
|
||||
b0 = b0 ^ b1;
|
||||
r0 = r0 ^ b1;
|
||||
|
||||
// b1 g1 b3 g3 b5 g5...
|
||||
v_uint8x16 pack_lo, pack_hi;
|
||||
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi);
|
||||
b1 = v_reinterpret_as_u16(pack_hi);
|
||||
// b0 g0 b2 g2 b4 g4 ....
|
||||
b0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// r1 a r3 a r5 a ...
|
||||
v_zip(v_reinterpret_as_u8(r0), a, pack_lo, pack_hi);
|
||||
r1 = v_reinterpret_as_u16(pack_hi);
|
||||
// r0 a r2 a r4 a ...
|
||||
r0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// a b0 g0 r0 a b2 g2 r2 ...
|
||||
v_zip(b0, r0, g0, g1);
|
||||
// a b8 g8 r8 a b10 g10 r10 ...
|
||||
|
||||
// b1 g1 r1 a b3 g3 r3 a ...
|
||||
v_zip(b1, r1, r0, r1);
|
||||
// b9 g9 r9 a b11 g11 r11 a ...
|
||||
|
||||
// a b0 g0 r0 b1 g1 r1 a ...
|
||||
v_uint32x4 pack32_lo, pack32_hi;
|
||||
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi);
|
||||
b0 = v_reinterpret_as_u16(pack32_lo);
|
||||
// a b4 g4 r4 b5 g5 r5 a ...
|
||||
b1 = v_reinterpret_as_u16(pack32_hi);
|
||||
|
||||
v_store_low(dst-1+0, v_reinterpret_as_u8(b0));
|
||||
v_store_high(dst-1+8*1, v_reinterpret_as_u8(b0));
|
||||
v_store_low(dst-1+8*2, v_reinterpret_as_u8(b1));
|
||||
v_store_high(dst-1+8*3, v_reinterpret_as_u8(b1));
|
||||
|
||||
// a b8 g8 r8 b9 g9 r9 a ...
|
||||
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi);
|
||||
g0 = v_reinterpret_as_u16(pack32_lo);
|
||||
// a b12 g12 r12 b13 g13 r13 a ...
|
||||
g1 = v_reinterpret_as_u16(pack32_hi);
|
||||
|
||||
v_store_low(dst-1+8*4, v_reinterpret_as_u8(g0));
|
||||
v_store_high(dst-1+8*5, v_reinterpret_as_u8(g0));
|
||||
|
||||
v_store_low(dst-1+8*6, v_reinterpret_as_u8(g1));
|
||||
}
|
||||
#endif
|
||||
|
||||
return (int)(bayer - (bayer_end - width));
|
||||
}
|
||||
|
||||
int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const
|
||||
int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
|
||||
{
|
||||
return 0;
|
||||
const uchar* bayer_end = bayer + width;
|
||||
v_uint16x8 masklow = v_setall_u16(0x00ff);
|
||||
v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2);
|
||||
v_uint16x8 full = v_setall_u16((ushort)(-1));
|
||||
v_uint8x16 z = v_setzero_u8();
|
||||
v_uint16x8 mask = v_setall_u16(blue > 0 ? (ushort)(-1) : 0);
|
||||
|
||||
for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42)
|
||||
{
|
||||
/*
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
G R G R | G R G R | G R G R | G R G R
|
||||
B G B G | B G B G | B G B G | B G B G
|
||||
*/
|
||||
|
||||
v_uint16x8 r0 = v_load((ushort*)bayer);
|
||||
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
|
||||
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
|
||||
|
||||
v_uint16x8 b1 = (r0 & masklow) + (r2 & masklow);
|
||||
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
|
||||
v_uint16x8 b0 = b1 + nextb1;
|
||||
b1 = (nextb1 + delta1) >> 1;
|
||||
b0 = (b0 + delta2) >> 2;
|
||||
// b0 b2 ... b14 b1 b3 ... b15
|
||||
b0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(b0), v_reinterpret_as_s16(b1)));
|
||||
|
||||
// vertical sum
|
||||
v_uint16x8 r0g = r0 >> 8;
|
||||
v_uint16x8 r2g = r2 >> 8;
|
||||
v_uint16x8 sumv = ((r0g + r2g) + delta1) >> 1;
|
||||
// horizontal sum
|
||||
v_uint16x8 g1 = r1 & masklow;
|
||||
v_uint16x8 nextg1 = v_rotate_right<1>(g1);
|
||||
v_uint16x8 sumg = (g1 + nextg1 + delta1) >> 1;
|
||||
|
||||
// gradients
|
||||
v_uint16x8 gradv = (r0g - r2g) + (r2g - r0g);
|
||||
v_uint16x8 gradg = (nextg1 - g1) + (g1 - nextg1);
|
||||
v_uint16x8 gmask = gradg > gradv;
|
||||
v_uint16x8 g0 = (gmask & sumv) + (sumg & (gmask ^ full));
|
||||
// g0 g2 ... g14 g1 g3 ...
|
||||
g0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(g0), v_reinterpret_as_s16(nextg1)));
|
||||
|
||||
r0 = r1 >> 8;
|
||||
r1 = v_rotate_right<1>(r0) + r0;
|
||||
r1 = (r1 + delta1) >> 1;
|
||||
// r0 r2 ... r14 r1 r3 ... r15
|
||||
r0 = v_reinterpret_as_u16(v_pack_u(v_reinterpret_as_s16(r0), v_reinterpret_as_s16(r1)));
|
||||
|
||||
b1 = (b0 ^ r0) & mask;
|
||||
b0 = b0 ^ b1;
|
||||
r0 = r0 ^ b1;
|
||||
|
||||
// b1 g1 b3 g3 b5 g5...
|
||||
v_uint8x16 pack_lo, pack_hi;
|
||||
v_zip(v_reinterpret_as_u8(b0), v_reinterpret_as_u8(g0), pack_lo, pack_hi);
|
||||
b1 = v_reinterpret_as_u16(pack_hi);
|
||||
// b0 g0 b2 g2 b4 g4 ....
|
||||
b0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// r1 0 r3 0 r5 0 ...
|
||||
v_zip(v_reinterpret_as_u8(r0), z, pack_lo, pack_hi);
|
||||
r1 = v_reinterpret_as_u16(pack_hi);
|
||||
// r0 0 r2 0 r4 0 ...
|
||||
r0 = v_reinterpret_as_u16(pack_lo);
|
||||
|
||||
// 0 b0 g0 r0 0 b2 g2 r2 ...
|
||||
v_zip(b0, r0, g0, g1);
|
||||
g0 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g0)));
|
||||
// 0 b8 g8 r8 0 b10 g10 r10 ...
|
||||
g1 = v_reinterpret_as_u16(v_rotate_left<1>(v_reinterpret_as_u8(g1)));
|
||||
|
||||
// b1 g1 r1 0 b3 g3 r3 0 ...
|
||||
v_zip(b1, r1, r0, r1);
|
||||
// b9 g9 r9 0 b11 g11 r11 0 ...
|
||||
|
||||
// 0 b0 g0 r0 b1 g1 r1 0 ...
|
||||
v_uint32x4 pack32_lo, pack32_hi;
|
||||
v_zip(v_reinterpret_as_u32(g0), v_reinterpret_as_u32(r0), pack32_lo, pack32_hi);
|
||||
b0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo)));
|
||||
// 0 b4 g4 r4 b5 g5 r5 0 ...
|
||||
b1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi)));
|
||||
|
||||
v_store_low(dst+0, v_reinterpret_as_u8(b0));
|
||||
v_store_high(dst+6*1, v_reinterpret_as_u8(b0));
|
||||
v_store_low(dst+6*2, v_reinterpret_as_u8(b1));
|
||||
v_store_high(dst+6*3, v_reinterpret_as_u8(b1));
|
||||
|
||||
// 0 b8 g8 r8 b9 g9 r9 0 ...
|
||||
v_zip(v_reinterpret_as_u32(g1), v_reinterpret_as_u32(r1), pack32_lo, pack32_hi);
|
||||
g0 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_lo)));
|
||||
// 0 b12 g12 r12 b13 g13 r13 0 ...
|
||||
g1 = v_reinterpret_as_u16(v_rotate_right<1>(v_reinterpret_as_u8(pack32_hi)));
|
||||
|
||||
v_store_low(dst+6*4, v_reinterpret_as_u8(g0));
|
||||
v_store_high(dst+6*5, v_reinterpret_as_u8(g0));
|
||||
|
||||
v_store_low(dst+6*6, v_reinterpret_as_u8(g1));
|
||||
}
|
||||
|
||||
return int(bayer - (bayer_end - width));
|
||||
}
|
||||
};
|
||||
#else
|
||||
@ -775,7 +820,7 @@ public:
|
||||
|
||||
// simd optimization only for dcn == 3
|
||||
int delta = dcn == 4 ?
|
||||
vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) :
|
||||
vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue, alpha) :
|
||||
vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);
|
||||
bayer += delta;
|
||||
dst += delta*dcn;
|
||||
|
@ -45,6 +45,8 @@
|
||||
|
||||
#include "opencv2/core/openvx/ovx_defs.hpp"
|
||||
|
||||
#include "opencv2/core/utils/tls.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
|
@ -332,8 +332,10 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
Image.Plane[] planes = mImage.getPlanes();
|
||||
int w = mImage.getWidth();
|
||||
int h = mImage.getHeight();
|
||||
assert(planes[0].getPixelStride() == 1);
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
mGray = new Mat(h, w, CvType.CV_8UC1, y_plane);
|
||||
int y_plane_step = planes[0].getRowStride();
|
||||
mGray = new Mat(h, w, CvType.CV_8UC1, y_plane, y_plane_step);
|
||||
return mGray;
|
||||
}
|
||||
|
||||
@ -349,11 +351,14 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
assert(planes[0].getPixelStride() == 1);
|
||||
assert(planes[2].getPixelStride() == 2);
|
||||
ByteBuffer y_plane = planes[0].getBuffer();
|
||||
int y_plane_step = planes[0].getRowStride();
|
||||
ByteBuffer uv_plane1 = planes[1].getBuffer();
|
||||
int uv_plane1_step = planes[1].getRowStride();
|
||||
ByteBuffer uv_plane2 = planes[2].getBuffer();
|
||||
Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane);
|
||||
Mat uv_mat1 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane1);
|
||||
Mat uv_mat2 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane2);
|
||||
int uv_plane2_step = planes[2].getRowStride();
|
||||
Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane, y_plane_step);
|
||||
Mat uv_mat1 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane1, uv_plane1_step);
|
||||
Mat uv_mat2 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane2, uv_plane2_step);
|
||||
long addr_diff = uv_mat2.dataAddr() - uv_mat1.dataAddr();
|
||||
if (addr_diff > 0) {
|
||||
assert(addr_diff == 1);
|
||||
@ -369,30 +374,45 @@ public class JavaCamera2View extends CameraBridgeViewBase {
|
||||
ByteBuffer u_plane = planes[1].getBuffer();
|
||||
ByteBuffer v_plane = planes[2].getBuffer();
|
||||
|
||||
y_plane.get(yuv_bytes, 0, w*h);
|
||||
int yuv_bytes_offset = 0;
|
||||
|
||||
int y_plane_step = planes[0].getRowStride();
|
||||
if (y_plane_step == w) {
|
||||
y_plane.get(yuv_bytes, 0, w*h);
|
||||
yuv_bytes_offset = w*h;
|
||||
} else {
|
||||
int padding = y_plane_step - w;
|
||||
for (int i = 0; i < h; i++){
|
||||
y_plane.get(yuv_bytes, yuv_bytes_offset, w);
|
||||
yuv_bytes_offset += w;
|
||||
if (i < h - 1) {
|
||||
y_plane.position(y_plane.position() + padding);
|
||||
}
|
||||
}
|
||||
assert(yuv_bytes_offset == w * h);
|
||||
}
|
||||
|
||||
int chromaRowStride = planes[1].getRowStride();
|
||||
int chromaRowPadding = chromaRowStride - w/2;
|
||||
|
||||
int offset = w*h;
|
||||
if (chromaRowPadding == 0){
|
||||
// When the row stride of the chroma channels equals their width, we can copy
|
||||
// the entire channels in one go
|
||||
u_plane.get(yuv_bytes, offset, w*h/4);
|
||||
offset += w*h/4;
|
||||
v_plane.get(yuv_bytes, offset, w*h/4);
|
||||
u_plane.get(yuv_bytes, yuv_bytes_offset, w*h/4);
|
||||
yuv_bytes_offset += w*h/4;
|
||||
v_plane.get(yuv_bytes, yuv_bytes_offset, w*h/4);
|
||||
} else {
|
||||
// When not equal, we need to copy the channels row by row
|
||||
for (int i = 0; i < h/2; i++){
|
||||
u_plane.get(yuv_bytes, offset, w/2);
|
||||
offset += w/2;
|
||||
u_plane.get(yuv_bytes, yuv_bytes_offset, w/2);
|
||||
yuv_bytes_offset += w/2;
|
||||
if (i < h/2-1){
|
||||
u_plane.position(u_plane.position() + chromaRowPadding);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < h/2; i++){
|
||||
v_plane.get(yuv_bytes, offset, w/2);
|
||||
offset += w/2;
|
||||
v_plane.get(yuv_bytes, yuv_bytes_offset, w/2);
|
||||
yuv_bytes_offset += w/2;
|
||||
if (i < h/2-1){
|
||||
v_plane.position(v_plane.position() + chromaRowPadding);
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIILjava_nio_ByteBuffer
|
||||
JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIILjava_nio_ByteBuffer_2
|
||||
(JNIEnv* env, jclass, jint rows, jint cols, jint type, jobject data)
|
||||
{
|
||||
static const char method_name[] = "Mat::n_1Mat__IIILByteBuffer()";
|
||||
static const char method_name[] = "Mat::n_1Mat__IIILjava_nio_ByteBuffer_2()";
|
||||
try {
|
||||
LOGD("%s", method_name);
|
||||
return (jlong) new Mat( rows, cols, type, (void*)env->GetDirectBufferAddress(data) );
|
||||
@ -88,6 +88,32 @@ JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIILjava_nio_ByteBuffer
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_opencv_core_Mat
|
||||
* Method: n_Mat
|
||||
* Signature: (IIILjava/nio/ByteBuffer;J)J
|
||||
*
|
||||
* Mat::Mat(int rows, int cols, int type, void* data, size_t step)
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIILjava_nio_ByteBuffer_2J
|
||||
(JNIEnv* env, jclass, jint rows, jint cols, jint type, jobject data, jlong step);
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_org_opencv_core_Mat_n_1Mat__IIILjava_nio_ByteBuffer_2J
|
||||
(JNIEnv* env, jclass, jint rows, jint cols, jint type, jobject data, jlong step)
|
||||
{
|
||||
static const char method_name[] = "Mat::n_1Mat__IIILjava_nio_ByteBuffer_2J()";
|
||||
try {
|
||||
LOGD("%s", method_name);
|
||||
return (jlong) new Mat(rows, cols, type, (void*)env->GetDirectBufferAddress(data), (size_t)step);
|
||||
} catch(const std::exception &e) {
|
||||
throwJavaException(env, &e, method_name);
|
||||
} catch (...) {
|
||||
throwJavaException(env, 0, method_name);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Mat::Mat(int rows, int cols, int type)
|
||||
|
@ -301,6 +301,11 @@ void HOGDescriptor::computeGradient(InputArray _img, InputOutputArray _grad, Inp
|
||||
Mat Dy(1, width, CV_32F, dbuf + width);
|
||||
Mat Mag(1, width, CV_32F, dbuf + width*2);
|
||||
Mat Angle(1, width, CV_32F, dbuf + width*3);
|
||||
#if CV_SIMD128
|
||||
int widthP2 = width+2;
|
||||
AutoBuffer<float> _lutBuf(9*widthP2);
|
||||
float* const lutBuf = _lutBuf.data();
|
||||
#endif
|
||||
|
||||
if (cn == 3)
|
||||
{
|
||||
@ -319,6 +324,63 @@ void HOGDescriptor::computeGradient(InputArray _img, InputOutputArray _grad, Inp
|
||||
xmap += 1;
|
||||
}
|
||||
|
||||
#if CV_SIMD128
|
||||
typedef const uchar* const T;
|
||||
float *lutPrev, *lutCurr, *lutNext;
|
||||
{
|
||||
y = 0;
|
||||
const uchar* imgPtr = img.ptr(ymap[y]);
|
||||
const uchar* prevPtr = img.data + img.step*ymap[y-1];
|
||||
|
||||
lutPrev = lutBuf+widthP2*0;
|
||||
lutCurr = lutBuf+widthP2*3;
|
||||
|
||||
{
|
||||
int x0 = xmap[-1], x1 = xmap[0];
|
||||
T p02 = imgPtr + x0, p12 = imgPtr + x1;
|
||||
|
||||
lutPrev[0+widthP2*0] = lut[prevPtr[x0+0]];
|
||||
lutPrev[0+widthP2*1] = lut[prevPtr[x0+1]];
|
||||
lutPrev[0+widthP2*2] = lut[prevPtr[x0+2]];
|
||||
lutCurr[0+widthP2*0] = lut[p02[0]]; lutCurr[1+widthP2*0] = lut[p12[0]];
|
||||
lutCurr[0+widthP2*1] = lut[p02[1]]; lutCurr[1+widthP2*1] = lut[p12[1]];
|
||||
lutCurr[0+widthP2*2] = lut[p02[2]]; lutCurr[1+widthP2*2] = lut[p12[2]];
|
||||
}
|
||||
|
||||
for( x = 0; x <= width - 4; x += 4 )
|
||||
{
|
||||
int x0 = xmap[x], x1 = xmap[x+1], x2 = xmap[x+2], x3 = xmap[x+3];
|
||||
T p02 = imgPtr + xmap[x+1];
|
||||
T p12 = imgPtr + xmap[x+2];
|
||||
T p22 = imgPtr + xmap[x+3];
|
||||
T p32 = imgPtr + xmap[x+4];
|
||||
|
||||
v_float32x4 _dx00 = v_float32x4(lut[p02[0]], lut[p12[0]], lut[p22[0]], lut[p32[0]]);
|
||||
v_float32x4 _dx10 = v_float32x4(lut[p02[1]], lut[p12[1]], lut[p22[1]], lut[p32[1]]);
|
||||
v_float32x4 _dx20 = v_float32x4(lut[p02[2]], lut[p12[2]], lut[p22[2]], lut[p32[2]]);
|
||||
|
||||
v_store(lutCurr+x+widthP2*0+2, _dx00);
|
||||
v_store(lutCurr+x+widthP2*1+2, _dx10);
|
||||
v_store(lutCurr+x+widthP2*2+2, _dx20);
|
||||
|
||||
v_float32x4 _dy00 = v_float32x4(lut[prevPtr[x0+0]], lut[prevPtr[x1+0]], lut[prevPtr[x2+0]], lut[prevPtr[x3+0]]);
|
||||
v_float32x4 _dy10 = v_float32x4(lut[prevPtr[x0+1]], lut[prevPtr[x1+1]], lut[prevPtr[x2+1]], lut[prevPtr[x3+1]]);
|
||||
v_float32x4 _dy20 = v_float32x4(lut[prevPtr[x0+2]], lut[prevPtr[x1+2]], lut[prevPtr[x2+2]], lut[prevPtr[x3+2]]);
|
||||
|
||||
v_store(lutPrev+x+widthP2*0+1, _dy00);
|
||||
v_store(lutPrev+x+widthP2*1+1, _dy10);
|
||||
v_store(lutPrev+x+widthP2*2+1, _dy20);
|
||||
}
|
||||
{
|
||||
int x0 = xmap[x];
|
||||
|
||||
lutPrev[x+widthP2*0+1] = lut[prevPtr[x0+0]];
|
||||
lutPrev[x+widthP2*1+1] = lut[prevPtr[x0+1]];
|
||||
lutPrev[x+widthP2*2+1] = lut[prevPtr[x0+2]];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float angleScale = signedGradient ? (float)(nbins/(2.0*CV_PI)) : (float)(nbins/CV_PI);
|
||||
for( y = 0; y < gradsize.height; y++ )
|
||||
{
|
||||
@ -344,28 +406,57 @@ void HOGDescriptor::computeGradient(InputArray _img, InputOutputArray _grad, Inp
|
||||
{
|
||||
x = 0;
|
||||
#if CV_SIMD128
|
||||
int yMod = y%3;
|
||||
|
||||
// Circular lut history buffer
|
||||
if (yMod == 0)
|
||||
{
|
||||
lutPrev = lutBuf+widthP2*0;
|
||||
lutCurr = lutBuf+widthP2*3;
|
||||
lutNext = lutBuf+widthP2*6;
|
||||
}
|
||||
else if (yMod == 1)
|
||||
{
|
||||
lutPrev = lutBuf+widthP2*3;
|
||||
lutCurr = lutBuf+widthP2*6;
|
||||
lutNext = lutBuf+widthP2*0;
|
||||
}
|
||||
else
|
||||
{
|
||||
lutPrev = lutBuf+widthP2*6;
|
||||
lutCurr = lutBuf+widthP2*0;
|
||||
lutNext = lutBuf+widthP2*3;
|
||||
}
|
||||
|
||||
{
|
||||
int x0 = xmap[-1];
|
||||
|
||||
lutNext[0+widthP2*0] = lut[nextPtr[x0+0]];
|
||||
lutNext[0+widthP2*1] = lut[nextPtr[x0+1]];
|
||||
lutNext[0+widthP2*2] = lut[nextPtr[x0+2]];
|
||||
}
|
||||
for( ; x <= width - 4; x += 4 )
|
||||
{
|
||||
int x0 = xmap[x], x1 = xmap[x+1], x2 = xmap[x+2], x3 = xmap[x+3];
|
||||
typedef const uchar* const T;
|
||||
T p02 = imgPtr + xmap[x+1], p00 = imgPtr + xmap[x-1];
|
||||
T p12 = imgPtr + xmap[x+2], p10 = imgPtr + xmap[x];
|
||||
T p22 = imgPtr + xmap[x+3], p20 = p02;
|
||||
T p32 = imgPtr + xmap[x+4], p30 = p12;
|
||||
|
||||
v_float32x4 _dx0 = v_float32x4(lut[p02[0]], lut[p12[0]], lut[p22[0]], lut[p32[0]]) -
|
||||
v_float32x4(lut[p00[0]], lut[p10[0]], lut[p20[0]], lut[p30[0]]);
|
||||
v_float32x4 _dx1 = v_float32x4(lut[p02[1]], lut[p12[1]], lut[p22[1]], lut[p32[1]]) -
|
||||
v_float32x4(lut[p00[1]], lut[p10[1]], lut[p20[1]], lut[p30[1]]);
|
||||
v_float32x4 _dx2 = v_float32x4(lut[p02[2]], lut[p12[2]], lut[p22[2]], lut[p32[2]]) -
|
||||
v_float32x4(lut[p00[2]], lut[p10[2]], lut[p20[2]], lut[p30[2]]);
|
||||
v_float32x4 _dx0 = v_load(lutCurr+x+widthP2*0+2) - v_load(lutCurr+x+widthP2*0);
|
||||
v_float32x4 _dx1 = v_load(lutCurr+x+widthP2*1+2) - v_load(lutCurr+x+widthP2*1);
|
||||
v_float32x4 _dx2 = v_load(lutCurr+x+widthP2*2+2) - v_load(lutCurr+x+widthP2*2);
|
||||
|
||||
v_float32x4 _dy0 = v_float32x4(lut[nextPtr[x0]], lut[nextPtr[x1]], lut[nextPtr[x2]], lut[nextPtr[x3]]) -
|
||||
v_float32x4(lut[prevPtr[x0]], lut[prevPtr[x1]], lut[prevPtr[x2]], lut[prevPtr[x3]]);
|
||||
v_float32x4 _dy1 = v_float32x4(lut[nextPtr[x0+1]], lut[nextPtr[x1+1]], lut[nextPtr[x2+1]], lut[nextPtr[x3+1]]) -
|
||||
v_float32x4(lut[prevPtr[x0+1]], lut[prevPtr[x1+1]], lut[prevPtr[x2+1]], lut[prevPtr[x3+1]]);
|
||||
v_float32x4 _dy2 = v_float32x4(lut[nextPtr[x0+2]], lut[nextPtr[x1+2]], lut[nextPtr[x2+2]], lut[nextPtr[x3+2]]) -
|
||||
v_float32x4(lut[prevPtr[x0+2]], lut[prevPtr[x1+2]], lut[prevPtr[x2+2]], lut[prevPtr[x3+2]]);
|
||||
v_float32x4 _dy00 = v_float32x4(lut[nextPtr[x0+0]], lut[nextPtr[x1+0]], lut[nextPtr[x2+0]], lut[nextPtr[x3+0]]);
|
||||
v_float32x4 _dy0 = _dy00 - v_load(lutPrev+x+widthP2*0+1);
|
||||
|
||||
v_store(lutNext+x+widthP2*0+1, _dy00);
|
||||
|
||||
v_float32x4 _dy10 = v_float32x4(lut[nextPtr[x0+1]], lut[nextPtr[x1+1]], lut[nextPtr[x2+1]], lut[nextPtr[x3+1]]);
|
||||
v_float32x4 _dy1 = _dy10 - v_load(lutPrev+x+widthP2*1+1);
|
||||
|
||||
v_store(lutNext+x+widthP2*1+1, _dy10);
|
||||
|
||||
v_float32x4 _dy20 = v_float32x4(lut[nextPtr[x0+2]], lut[nextPtr[x1+2]], lut[nextPtr[x2+2]], lut[nextPtr[x3+2]]);
|
||||
v_float32x4 _dy2 = _dy20 - v_load(lutPrev+x+widthP2*2+1);
|
||||
|
||||
v_store(lutNext+x+widthP2*2+1, _dy20);
|
||||
|
||||
v_float32x4 _mag0 = (_dx0 * _dx0) + (_dy0 * _dy0);
|
||||
v_float32x4 _mag1 = (_dx1 * _dx1) + (_dy1 * _dy1);
|
||||
@ -382,6 +473,13 @@ void HOGDescriptor::computeGradient(InputArray _img, InputOutputArray _grad, Inp
|
||||
v_store(dbuf + x, _dx2);
|
||||
v_store(dbuf + x + width, _dy2);
|
||||
}
|
||||
{
|
||||
int x0 = xmap[x];
|
||||
|
||||
lutNext[x+widthP2*0+1] = lut[nextPtr[x0+0]];
|
||||
lutNext[x+widthP2*1+1] = lut[nextPtr[x0+1]];
|
||||
lutNext[x+widthP2*2+1] = lut[nextPtr[x0+2]];
|
||||
}
|
||||
#endif
|
||||
for( ; x < width; x++ )
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user