mirror of
https://github.com/opencv/opencv.git
synced 2025-06-10 19:24:07 +08:00
Merge pull request #11773 from alalek:dnn_ocl_update_force_tuning_flag
This commit is contained in:
commit
1894f1a37f
@ -306,6 +306,7 @@ class OCL4DNNConvSpatial
|
|||||||
std::string kernel_name_;
|
std::string kernel_name_;
|
||||||
std::string cache_path_;
|
std::string cache_path_;
|
||||||
bool use_cache_path_; // true if cache_path_ directory exists
|
bool use_cache_path_; // true if cache_path_ directory exists
|
||||||
|
bool run_auto_tuning_;
|
||||||
bool force_auto_tuning_;
|
bool force_auto_tuning_;
|
||||||
int32_t kernel_index_;
|
int32_t kernel_index_;
|
||||||
std::vector< cv::Ptr<kernelConfig> > kernelQueue;
|
std::vector< cv::Ptr<kernelConfig> > kernelQueue;
|
||||||
|
@ -67,6 +67,46 @@ typedef std::map<std::string, std::string> kernel_hash_t;
|
|||||||
static kernel_hash_t kernelConfigMap;
|
static kernel_hash_t kernelConfigMap;
|
||||||
static bool defaultConfigLoaded = false;
|
static bool defaultConfigLoaded = false;
|
||||||
|
|
||||||
|
static std::string sanitize(const std::string& s)
|
||||||
|
{
|
||||||
|
std::string s_ = s;
|
||||||
|
for (size_t i = 0; i < s_.size(); i++)
|
||||||
|
{
|
||||||
|
char c = s_[i];
|
||||||
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
|
||||||
|
{
|
||||||
|
s_[i] = '_';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO add hash?
|
||||||
|
// s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size()));
|
||||||
|
return s_;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void initializeGlobalBuiltinConfigurations(const std::string& cache_path)
|
||||||
|
{
|
||||||
|
CV_Assert(defaultConfigLoaded == false);
|
||||||
|
CV_Assert(kernelConfigMap.empty());
|
||||||
|
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
|
||||||
|
for (size_t i = 0; i < numConfigs; i++)
|
||||||
|
{
|
||||||
|
std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i];
|
||||||
|
if (!cache_path.empty())
|
||||||
|
{
|
||||||
|
std::string cacheFile = cache_path + sanitize(key);
|
||||||
|
std::ifstream cachedKernel(cacheFile.c_str());
|
||||||
|
if (cachedKernel)
|
||||||
|
continue; // external configuration found, skip builtin
|
||||||
|
}
|
||||||
|
std::pair<std::string, std::string> entry(
|
||||||
|
key,
|
||||||
|
default_kernel_config_intel[2 * i + 1]);
|
||||||
|
kernelConfigMap.insert(entry);
|
||||||
|
}
|
||||||
|
defaultConfigLoaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename Dtype>
|
template<typename Dtype>
|
||||||
OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||||
{
|
{
|
||||||
@ -139,9 +179,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
force_auto_tuning_ =
|
run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false);
|
||||||
(use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
|
force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
|
||||||
|| utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dtype>
|
template<typename Dtype>
|
||||||
@ -562,17 +601,7 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
|
|||||||
|
|
||||||
|
|
||||||
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
|
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
|
||||||
key_sanitized_ = key_;
|
key_sanitized_ = sanitize(key_);
|
||||||
for (size_t i = 0; i < key_sanitized_.size(); i++)
|
|
||||||
{
|
|
||||||
char c = key_sanitized_[i];
|
|
||||||
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
|
|
||||||
{
|
|
||||||
key_sanitized_[i] = '_';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// TODO add hash?
|
|
||||||
// key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
|
|
||||||
short_key_ = keyBuilder.str();
|
short_key_ = keyBuilder.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1805,7 +1834,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
|
|||||||
|
|
||||||
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
|
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
|
||||||
|
|
||||||
if (force_auto_tuning_)
|
if (run_auto_tuning_ || force_auto_tuning_)
|
||||||
{
|
{
|
||||||
setupConvolution(bottom, top, weight, bias, numImages, benchData);
|
setupConvolution(bottom, top, weight, bias, numImages, benchData);
|
||||||
}
|
}
|
||||||
@ -1820,18 +1849,8 @@ template<typename Dtype>
|
|||||||
bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
|
bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
|
||||||
{
|
{
|
||||||
cv::AutoLock lock(kernelConfigMutex);
|
cv::AutoLock lock(kernelConfigMutex);
|
||||||
if (!defaultConfigLoaded)
|
if (!defaultConfigLoaded && !force_auto_tuning_)
|
||||||
{
|
initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string());
|
||||||
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
|
|
||||||
for (size_t i = 0; i < numConfigs; i++)
|
|
||||||
{
|
|
||||||
std::pair<std::string, std::string> entry(
|
|
||||||
std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
|
|
||||||
default_kernel_config_intel[2 * i + 1]);
|
|
||||||
kernelConfigMap.insert(entry);
|
|
||||||
}
|
|
||||||
defaultConfigLoaded = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel_hash_t::iterator it = kernelConfigMap.find(key_);
|
kernel_hash_t::iterator it = kernelConfigMap.find(key_);
|
||||||
if (it != kernelConfigMap.end())
|
if (it != kernelConfigMap.end())
|
||||||
@ -1904,9 +1923,12 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
|
|||||||
template<typename Dtype>
|
template<typename Dtype>
|
||||||
bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
|
bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
|
||||||
{
|
{
|
||||||
|
if (force_auto_tuning_)
|
||||||
|
return false; // don't load results from external storage
|
||||||
|
|
||||||
if (!use_cache_path_)
|
if (!use_cache_path_)
|
||||||
{
|
{
|
||||||
if (cache_path_.empty() && !force_auto_tuning_)
|
if (cache_path_.empty())
|
||||||
{
|
{
|
||||||
static int warn_ = 0;
|
static int warn_ = 0;
|
||||||
if (!warn_)
|
if (!warn_)
|
||||||
|
Loading…
Reference in New Issue
Block a user