Merge pull request #12650 from alalek:dnn_ocl4dnn_verification_test

* dnn(ocl4dnn): update kernel checks

* dnn: workaround for IDLF kernels on Intel iGPU

* dnn(test): remove "skip" check for unstable cases
This commit is contained in:
Alexander Alekhin 2018-09-27 12:54:23 +03:00 committed by GitHub
parent fe56bdeeb9
commit fae329a0ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 183 additions and 77 deletions

View File

@ -60,6 +60,8 @@
#if defined WIN32 || defined _WIN32
#include <windows.h>
#include <direct.h>
#undef min
#undef max
#endif
namespace cv { namespace dnn { namespace ocl4dnn {
@ -68,6 +70,30 @@ typedef std::map<std::string, std::string> kernel_hash_t;
static kernel_hash_t kernelConfigMap;
static bool defaultConfigLoaded = false;
static bool enableWorkaroundIDLF()
{
static bool param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_WORKAROUND_IDLF", true);
return param;
}
static bool dumpFailedResult()
{
static bool param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_DUMP_FAILED_RESULT", false);
return param;
}
static size_t testAllKernels()
{
static size_t param = utils::getConfigurationParameterSizeT("OPENCV_OCL4DNN_TEST_ALL_KERNELS", 0);
return param;
}
static bool raiseOnCheckError()
{
static bool param = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_TUNING_RAISE_CHECK_ERROR", false);
return param;
}
static std::string sanitize(const std::string& s)
{
std::string s_ = s;
@ -1221,9 +1247,6 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
kernelConfig* config,
UMat &verifyTop)
{
uint32_t verificationFail = 0;
if (config->verified)
return true;
else if (config->tested)
@ -1236,6 +1259,8 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
convolve(bottom, top, weight, bias, numImages, config);
tuned_ = saved_tuned;
config->tested = true;
UMat new_top, new_verify_top;
Mat mat_top, mat_verify_top;
if (use_half_)
@ -1254,41 +1279,88 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
const float* data = mat_top.ptr<float>();
const float* verify_data = mat_verify_top.ptr<float>();
for (int32_t n = 0; n < num_; ++n) {
for (int32_t g = 0; g < group_; ++g) {
int32_t output_image_offset = n * top_dim_ + output_w_ * output_h_ * M_ * g;
for (int out_ch = 0; out_ch < M_ && !verificationFail; out_ch++)
for (int h = 0; h < output_h_ && !verificationFail; h++)
for (int w = 0; w < output_w_; w++) {
size_t offset = output_image_offset + out_ch * output_w_ * output_h_ + h * output_w_ + w;
int error_slice_offset = 0;
int error_slice = 0;
float relative_eps = use_half_ ? 0.1f : 0.01f;
float error_factor = fabs(data[offset] - verify_data[offset]);
if (use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
error_factor > 0.04 && !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
{
CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
<< " out_ch " << out_ch << " h " << h << " w " << w
<< " got " << data[offset] << " expected " << verify_data[offset]);
verificationFail = 1;
goto out;
size_t errors = 0;
double rel_err = norm(mat_top.reshape(1, 1), mat_verify_top.reshape(1, 1), NORM_L1 | NORM_RELATIVE);
if (rel_err >= relative_eps)
{
for (int32_t n = 0; n < num_; ++n) {
for (int32_t g = 0; g < group_; ++g) {
int32_t output_image_offset = n * top_dim_ + output_w_ * output_h_ * M_ * g;
for (int out_ch = 0; out_ch < M_; out_ch++)
for (int h = 0; h < output_h_; h++)
for (int w = 0; w < output_w_; w++) {
size_t offset = output_image_offset + out_ch * output_w_ * output_h_ + h * output_w_ + w;
bool has_error = !(data[offset] == data[offset]); // is NaN
if (!has_error)
{
float error_factor = std::fabs(data[offset] - verify_data[offset]);
float base_value_abs = std::max(1e-3f, std::fabs(verify_data[offset]));
has_error = error_factor > relative_eps * base_value_abs;
}
if (has_error)
{
if (errors == 0)
{
error_slice = (int)(offset / (output_w_ * output_h_));
error_slice_offset = (int)(offset % (output_w_ * output_h_));
CV_LOG_ERROR(NULL, "Kernel: " << config->kernelName);
}
if (errors < 10)
CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
<< " out_ch " << out_ch << " h " << h << " w " << w
<< " (offset: " << offset << ")"
<< " got " << data[offset] << " expected " << verify_data[offset]);
errors++;
}
}
else if (!use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
!(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
{
CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
<< " out_ch " << out_ch << " h " << h << " w " << w
<< " got " << data[offset] << " expected " << verify_data[offset]);
verificationFail = 1;
goto out;
}
}
}
}
}
out:
if (verificationFail == 1)
if (errors)
{
if (dumpFailedResult())
{
try
{
int n_outputs = (int)(mat_top.size[0]*mat_top.size[1]);
int slice_size = (int)(mat_top.total() / n_outputs);
Rect roi(0, 0, slice_size, n_outputs);
roi.width = std::min(roi.width, 32);
roi.height = std::min(roi.height, 16);
roi.x = std::max(0, std::min(slice_size - roi.width, error_slice_offset - roi.width/2));
roi.y = std::max(0, std::min(n_outputs - roi.height, error_slice - roi.height/2));
std::cout << "roi = " << roi << " errors=" << errors << std::endl;
std::cout << "mat_top = " << shape(mat_top) << std::endl
<< mat_top.reshape(1, 1).reshape(1, n_outputs)(roi) << std::endl;
std::cout << "verify_top = " << shape(mat_verify_top) << std::endl
<< mat_verify_top.reshape(1, 1).reshape(1, n_outputs)(roi) << std::endl;
}
catch (const std::exception& e)
{
CV_LOG_ERROR(NULL, "Results dump failed: " << e.what());
}
catch (...)
{
CV_LOG_ERROR(NULL, "Results dump failed")
}
}
if (raiseOnCheckError())
CV_Error_(Error::StsError, ("ocl4dnn tuning verification failed: %s (errors %lld)", config->kernelName.c_str(), (long long int)errors));
return false;
}
else
{
config->verified = true;
return true;
}
}
template<typename Dtype>
@ -1408,6 +1480,17 @@ bool OCL4DNNConvSpatial<float>::createIDLFKernel(int32_t blockWidth,
setupKernel();
if (enableWorkaroundIDLF() && ocl::Device::getDefault().intelSubgroupsSupport())
{
// Issues are observed with these kernels: 3x1 (covered by tests), 2x1, 4x1, 5x1, 3x2
// kernels 1x3, 3x3, 2x3 are good
if (pad_h_ != 0 && kernel_w_ <= simd_size && kernel_h_ <= 2)
{
CV_LOG_INFO(NULL, "DNN(workaround): skip IDLF kernel: " << kernel_name_);
return false;
}
}
ocl::Program program = compileKernel();
if (program.ptr())
{
@ -1623,13 +1706,38 @@ void OCL4DNNConvSpatial<float>::useFirstAvailable(const UMat &bottom,
generateTunerItems(tunerItems);
tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_BASIC, 1, 1, 1));
for (int i = 0; i < tunerItems.size(); i++) {
for (int i = 0; i < tunerItems.size(); i++)
{
if (createConvolutionKernel(tunerItems[i]->kernelType,
tunerItems[i]->blockWidth,
tunerItems[i]->blockHeight,
tunerItems[i]->blockDepth)) {
tunerItems[i]->blockDepth))
{
int kernelIdx = kernelQueue.size() - 1;
if (verifyResult(bottom, top, weight, bias, numImages, kernelQueue[kernelIdx], verifyTop)) {
kernelConfig* config = kernelQueue[kernelIdx].get();
bool failed = false;
const size_t testCount = testAllKernels();
for(int t = 0; t < testCount; t++)
{
try
{
config->tested = false;
config->verified = false;
if (!verifyResult(bottom, top, weight, bias, numImages, config, verifyTop))
{
CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
failed = true;
break;
}
}
catch (...)
{
CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
throw;
}
}
if (!failed && verifyResult(bottom, top, weight, bias, numImages, config, verifyTop))
{
bestKernelConfig = kernelQueue[kernelIdx];
if (bestKernelConfig->kernelType != KERNEL_TYPE_INTEL_IDLF &&
bestKernelConfig->kernelType != KERNEL_TYPE_GEMM_LIKE)
@ -1685,42 +1793,50 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
tunerItems[i]->blockHeight,
tunerItems[i]->blockDepth);
for (int32_t x = 0; x < kernelQueue.size(); x++) {
kernelQueue[x]->executionTime = timedConvolve(bottom, top, weight, bias, numImages,
kernelQueue[x]);
#ifdef TEST_ALL_KERNELS
if (kernelQueue[x]->tested == false) {
bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[x], verifyTop);
if (verified == false) {
CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[x]->kernelName << " failed verification");
CV_LOG_ERROR(NULL, "kernelQueue[x]->workItem_output[0]: "
<< kernelQueue[x]->workItem_output[0] << " "
<< "kernelQueue[x]->workItem_output[1]: "
<< kernelQueue[x]->workItem_output[1] << " "
<< "kernelQueue[x]->workItem_output[2]: "
<< kernelQueue[x]->workItem_output[2] << " "
<< "kernelQueue[x]->kernelType: "
<< kernelQueue[x]->kernelType << " "
<< "kernelQueue[x]->global_work_size[0]: "
<< kernelQueue[x]->global_work_size[0] << " "
<< "kernelQueue[x]->global_work_size[1]: "
<< kernelQueue[x]->global_work_size[1] << " "
<< "kernelQueue[x]->global_work_size[2]: "
<< kernelQueue[x]->global_work_size[2] << " "
<< "kernelQueue[x]->local_work_size[0]: "
<< kernelQueue[x]->local_work_size[0] << " "
<< "kernelQueue[x]->local_work_size[1]: "
<< kernelQueue[x]->local_work_size[1] << " "
<< "kernelQueue[x]->local_work_size[2]: "
<< kernelQueue[x]->local_work_size[2] << " "
<< kernelQueue[x]->swizzle_weights << " "
<< kernelQueue[x]->use_null_local);
} else {
CV_LOG_INFO(NULL, "Kernel " << kernelQueue[x]->kernelName << " pass verification");
const size_t testCount = testAllKernels();
for (int32_t x = 0; x < kernelQueue.size(); x++)
{
kernelConfig* config = kernelQueue[x];
config->executionTime = timedConvolve(bottom, top, weight, bias, numImages, config);
for(int t = 0; t < testCount; t++)
{
try
{
config->tested = false;
config->verified = false;
bool verified = verifyResult(bottom, top, weight, bias, numImages, config, verifyTop);
if (verified == false)
{
CV_LOG_ERROR(NULL, "Kernel " << config->kernelName << " failed verification");
CV_LOG_ERROR(NULL, "workItem="
<< config->workItem_output[0] << ","
<< config->workItem_output[1] << ","
<< config->workItem_output[2] << " "
<< "kernelType: " << config->kernelType << " "
<< "global_work_size="
<< config->global_work_size[0] << ","
<< config->global_work_size[1] << ","
<< config->global_work_size[2] << " "
<< "local_work_size="
<< config->local_work_size[0] << ","
<< config->local_work_size[1] << ","
<< config->local_work_size[2] << " "
<< config->swizzle_weights << " "
<< config->use_null_local);
}
else
{
CV_LOG_VERBOSE(NULL, "Kernel " << config->kernelName << " pass verification");
}
}
catch (...)
{
CV_LOG_ERROR(NULL, "Failed on test iteration: " << t);
throw;
}
}
#endif
}
int32_t failures = 0;
bool verification = false;
if (kernelQueue.size()) {
@ -1739,12 +1855,10 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
// Test fastest kernel
bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[fastestKernel], verifyTop);
if (verified == true) {
kernelQueue[fastestKernel]->verified = true;
kernel_index_ = fastestKernel;
verification = true;
break;
} else {
kernelQueue[fastestKernel]->tested = true;
CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[fastestKernel]->kernelName <<
" failed verification");
failures++;

View File

@ -99,14 +99,6 @@ TEST_P(Convolution, Accuracy)
#endif
bool skipCheck = false;
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
(
(kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1)) ||
(stride.area() > 1 && !(pad.width == 0 && pad.height == 0))
)
)
skipCheck = true;
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
Mat weights(4, &sz[0], CV_32F);