mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 06:53:50 +08:00
Merge pull request #11773 from alalek:dnn_ocl_update_force_tuning_flag
This commit is contained in:
commit
1894f1a37f
@ -306,6 +306,7 @@ class OCL4DNNConvSpatial
|
||||
std::string kernel_name_;
|
||||
std::string cache_path_;
|
||||
bool use_cache_path_; // true if cache_path_ directory exists
|
||||
bool run_auto_tuning_;
|
||||
bool force_auto_tuning_;
|
||||
int32_t kernel_index_;
|
||||
std::vector< cv::Ptr<kernelConfig> > kernelQueue;
|
||||
|
@ -67,6 +67,46 @@ typedef std::map<std::string, std::string> kernel_hash_t;
|
||||
static kernel_hash_t kernelConfigMap;
|
||||
static bool defaultConfigLoaded = false;
|
||||
|
||||
static std::string sanitize(const std::string& s)
|
||||
{
|
||||
std::string s_ = s;
|
||||
for (size_t i = 0; i < s_.size(); i++)
|
||||
{
|
||||
char c = s_[i];
|
||||
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
|
||||
{
|
||||
s_[i] = '_';
|
||||
}
|
||||
}
|
||||
// TODO add hash?
|
||||
// s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size()));
|
||||
return s_;
|
||||
}
|
||||
|
||||
static void initializeGlobalBuiltinConfigurations(const std::string& cache_path)
|
||||
{
|
||||
CV_Assert(defaultConfigLoaded == false);
|
||||
CV_Assert(kernelConfigMap.empty());
|
||||
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
|
||||
for (size_t i = 0; i < numConfigs; i++)
|
||||
{
|
||||
std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i];
|
||||
if (!cache_path.empty())
|
||||
{
|
||||
std::string cacheFile = cache_path + sanitize(key);
|
||||
std::ifstream cachedKernel(cacheFile.c_str());
|
||||
if (cachedKernel)
|
||||
continue; // external configuration found, skip builtin
|
||||
}
|
||||
std::pair<std::string, std::string> entry(
|
||||
key,
|
||||
default_kernel_config_intel[2 * i + 1]);
|
||||
kernelConfigMap.insert(entry);
|
||||
}
|
||||
defaultConfigLoaded = true;
|
||||
}
|
||||
|
||||
|
||||
template<typename Dtype>
|
||||
OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
{
|
||||
@ -139,9 +179,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
}
|
||||
}
|
||||
|
||||
force_auto_tuning_ =
|
||||
(use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
|
||||
|| utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
|
||||
run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false);
|
||||
force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
|
||||
}
|
||||
|
||||
template<typename Dtype>
|
||||
@ -562,17 +601,7 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
|
||||
|
||||
|
||||
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
|
||||
key_sanitized_ = key_;
|
||||
for (size_t i = 0; i < key_sanitized_.size(); i++)
|
||||
{
|
||||
char c = key_sanitized_[i];
|
||||
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
|
||||
{
|
||||
key_sanitized_[i] = '_';
|
||||
}
|
||||
}
|
||||
// TODO add hash?
|
||||
// key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
|
||||
key_sanitized_ = sanitize(key_);
|
||||
short_key_ = keyBuilder.str();
|
||||
}
|
||||
|
||||
@ -1805,7 +1834,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
|
||||
|
||||
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
|
||||
|
||||
if (force_auto_tuning_)
|
||||
if (run_auto_tuning_ || force_auto_tuning_)
|
||||
{
|
||||
setupConvolution(bottom, top, weight, bias, numImages, benchData);
|
||||
}
|
||||
@ -1820,18 +1849,8 @@ template<typename Dtype>
|
||||
bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
|
||||
{
|
||||
cv::AutoLock lock(kernelConfigMutex);
|
||||
if (!defaultConfigLoaded)
|
||||
{
|
||||
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
|
||||
for (size_t i = 0; i < numConfigs; i++)
|
||||
{
|
||||
std::pair<std::string, std::string> entry(
|
||||
std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
|
||||
default_kernel_config_intel[2 * i + 1]);
|
||||
kernelConfigMap.insert(entry);
|
||||
}
|
||||
defaultConfigLoaded = true;
|
||||
}
|
||||
if (!defaultConfigLoaded && !force_auto_tuning_)
|
||||
initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string());
|
||||
|
||||
kernel_hash_t::iterator it = kernelConfigMap.find(key_);
|
||||
if (it != kernelConfigMap.end())
|
||||
@ -1904,9 +1923,12 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
|
||||
template<typename Dtype>
|
||||
bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
|
||||
{
|
||||
if (force_auto_tuning_)
|
||||
return false; // don't load results from external storage
|
||||
|
||||
if (!use_cache_path_)
|
||||
{
|
||||
if (cache_path_.empty() && !force_auto_tuning_)
|
||||
if (cache_path_.empty())
|
||||
{
|
||||
static int warn_ = 0;
|
||||
if (!warn_)
|
||||
|
Loading…
Reference in New Issue
Block a user