mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 05:29:54 +08:00
improved cv::ocl::predictOptimalVectorWidth
This commit is contained in:
parent
a350b76738
commit
2c6b7a52e9
@ -598,9 +598,24 @@ CV_EXPORTS const char* typeToStr(int t);
|
||||
CV_EXPORTS const char* memopTypeToStr(int t);
|
||||
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
|
||||
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
|
||||
|
||||
|
||||
enum OclVectorStrategy
|
||||
{
|
||||
// all matrices have its own vector width
|
||||
OCL_VECTOR_OWN = 0,
|
||||
// all matrices have maximal vector width among all matrices
|
||||
// (useful for cases when matrices have different data types)
|
||||
OCL_VECTOR_MAX = 1,
|
||||
|
||||
// default strategy
|
||||
OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
|
||||
};
|
||||
|
||||
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
|
||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||
|
||||
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
|
||||
|
||||
|
@ -4451,42 +4451,45 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
|
||||
if (!src.empty()) \
|
||||
{ \
|
||||
CV_Assert(src.isMat() || src.isUMat()); \
|
||||
int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
|
||||
Size csize = src.size(); \
|
||||
cols.push_back(ccn * csize.width); \
|
||||
if (ctype != type) \
|
||||
int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
|
||||
ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
|
||||
if (cwidth < ckercn || ckercn <= 0) \
|
||||
return 1; \
|
||||
cols.push_back(cwidth); \
|
||||
if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
|
||||
return 1; \
|
||||
offsets.push_back(src.offset()); \
|
||||
steps.push_back(src.step()); \
|
||||
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
|
||||
} \
|
||||
} \
|
||||
while ((void)0, 0)
|
||||
|
||||
int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
InputArray src4, InputArray src5, InputArray src6,
|
||||
InputArray src7, InputArray src8, InputArray src9)
|
||||
InputArray src7, InputArray src8, InputArray src9,
|
||||
OclVectorStrategy strat)
|
||||
{
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
|
||||
Size ssize = src1.size();
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
int ref_type = src1.type();
|
||||
|
||||
int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
|
||||
d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
|
||||
d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
|
||||
d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth];
|
||||
d.preferredVectorWidthDouble(), -1 };
|
||||
|
||||
// if the device says don't use vectors
|
||||
if (vectorWidths[0] == 1)
|
||||
{
|
||||
// it's heuristic
|
||||
int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 };
|
||||
kercn = vectorWidthsOthers[depth];
|
||||
vectorWidths[0] = vectorWidths[1] = 4;
|
||||
vectorWidths[2] = vectorWidths[3] = 2;
|
||||
vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4;
|
||||
}
|
||||
|
||||
if (ssize.width * cn < kercn || kercn <= 0)
|
||||
return 1;
|
||||
|
||||
std::vector<size_t> offsets, steps, cols;
|
||||
std::vector<int> dividers;
|
||||
PROCESS_SRC(src1);
|
||||
PROCESS_SRC(src2);
|
||||
PROCESS_SRC(src3);
|
||||
@ -4498,23 +4501,21 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
PROCESS_SRC(src9);
|
||||
|
||||
size_t size = offsets.size();
|
||||
int wsz = kercn * esz1;
|
||||
std::vector<int> dividers(size, wsz);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
|
||||
dividers[i] >>= 1;
|
||||
|
||||
// default strategy
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (dividers[i] != wsz)
|
||||
{
|
||||
kercn = 1;
|
||||
break;
|
||||
}
|
||||
int kercn = *std::min_element(dividers.begin(), dividers.end());
|
||||
|
||||
// another strategy
|
||||
// width = *std::min_element(dividers.begin(), dividers.end());
|
||||
// for (size_t i = 0; i < size; ++i)
|
||||
// if (dividers[i] != wsz)
|
||||
// {
|
||||
// kercn = 1;
|
||||
// break;
|
||||
// }
|
||||
|
||||
return kercn;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user