Merge pull request #11773 from alalek:dnn_ocl_update_force_tuning_flag

This commit is contained in:
Alexander Alekhin 2018-06-22 05:23:55 +00:00
commit 1894f1a37f
2 changed files with 51 additions and 28 deletions

View File

@ -306,6 +306,7 @@ class OCL4DNNConvSpatial
std::string kernel_name_;
std::string cache_path_;
bool use_cache_path_; // true if cache_path_ directory exists
bool run_auto_tuning_;
bool force_auto_tuning_;
int32_t kernel_index_;
std::vector< cv::Ptr<kernelConfig> > kernelQueue;

View File

@ -67,6 +67,46 @@ typedef std::map<std::string, std::string> kernel_hash_t;
static kernel_hash_t kernelConfigMap;
static bool defaultConfigLoaded = false;
static std::string sanitize(const std::string& s)
{
std::string s_ = s;
for (size_t i = 0; i < s_.size(); i++)
{
char c = s_[i];
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
{
s_[i] = '_';
}
}
// TODO add hash?
// s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size()));
return s_;
}
static void initializeGlobalBuiltinConfigurations(const std::string& cache_path)
{
CV_Assert(defaultConfigLoaded == false);
CV_Assert(kernelConfigMap.empty());
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
for (size_t i = 0; i < numConfigs; i++)
{
std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i];
if (!cache_path.empty())
{
std::string cacheFile = cache_path + sanitize(key);
std::ifstream cachedKernel(cacheFile.c_str());
if (cachedKernel)
continue; // external configuration found, skip builtin
}
std::pair<std::string, std::string> entry(
key,
default_kernel_config_intel[2 * i + 1]);
kernelConfigMap.insert(entry);
}
defaultConfigLoaded = true;
}
template<typename Dtype>
OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
{
@ -139,9 +179,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
}
}
force_auto_tuning_ =
(use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
|| utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false);
force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
}
template<typename Dtype>
@ -562,17 +601,7 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
key_sanitized_ = key_;
for (size_t i = 0; i < key_sanitized_.size(); i++)
{
char c = key_sanitized_[i];
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
{
key_sanitized_[i] = '_';
}
}
// TODO add hash?
// key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
key_sanitized_ = sanitize(key_);
short_key_ = keyBuilder.str();
}
@ -1805,7 +1834,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
if (force_auto_tuning_)
if (run_auto_tuning_ || force_auto_tuning_)
{
setupConvolution(bottom, top, weight, bias, numImages, benchData);
}
@ -1820,18 +1849,8 @@ template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
{
cv::AutoLock lock(kernelConfigMutex);
if (!defaultConfigLoaded)
{
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
for (size_t i = 0; i < numConfigs; i++)
{
std::pair<std::string, std::string> entry(
std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
default_kernel_config_intel[2 * i + 1]);
kernelConfigMap.insert(entry);
}
defaultConfigLoaded = true;
}
if (!defaultConfigLoaded && !force_auto_tuning_)
initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string());
kernel_hash_t::iterator it = kernelConfigMap.find(key_);
if (it != kernelConfigMap.end())
@ -1904,9 +1923,12 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
{
if (force_auto_tuning_)
return false; // don't load results from external storage
if (!use_cache_path_)
{
if (cache_path_.empty() && !force_auto_tuning_)
if (cache_path_.empty())
{
static int warn_ = 0;
if (!warn_)