mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 22:00:25 +08:00
move Level into shared memory
This commit is contained in:
parent
30bce16ad6
commit
1b9bccb856
@ -94,11 +94,6 @@ namespace icf {
|
|||||||
float relScale = level.relScale;
|
float relScale = level.relScale;
|
||||||
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
|
||||||
|
|
||||||
dprintf("%d: feature %d box %d %d %d %d\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y,
|
|
||||||
scaledRect.z, scaledRect.w);
|
|
||||||
dprintf("%d: rescale: %f [%f %f] selected %f\n",threadIdx.x, level.relScale, level.scaling[0], level.scaling[1],
|
|
||||||
level.scaling[(node.threshold >> 28) > 6]);
|
|
||||||
|
|
||||||
// rescale
|
// rescale
|
||||||
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
|
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
|
||||||
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
|
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
|
||||||
@ -110,14 +105,7 @@ namespace icf {
|
|||||||
const float expected_new_area = farea * relScale * relScale;
|
const float expected_new_area = farea * relScale * relScale;
|
||||||
float approx = __fdividef(sarea, expected_new_area);
|
float approx = __fdividef(sarea, expected_new_area);
|
||||||
|
|
||||||
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
|
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx * level.scaling[(node.threshold >> 28) > 6];
|
||||||
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
|
|
||||||
|
|
||||||
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
|
|
||||||
rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
|
|
||||||
|
|
||||||
dprintf("%d: approximation %f %d -> %f %f\n",threadIdx.x, approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
|
|
||||||
level.scaling[(node.threshold >> 28) > 6]);
|
|
||||||
|
|
||||||
return rootThreshold;
|
return rootThreshold;
|
||||||
}
|
}
|
||||||
@ -179,18 +167,20 @@ namespace icf {
|
|||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = blockIdx.x;
|
const int x = blockIdx.x;
|
||||||
|
|
||||||
__shared__ volatile char roiCache[8];
|
// load Lavel
|
||||||
|
__shared__ Level level;
|
||||||
|
|
||||||
|
// check POI
|
||||||
|
__shared__ volatile char roiCache[8];
|
||||||
if (!threadIdx.y && !threadIdx.x)
|
if (!threadIdx.y && !threadIdx.x)
|
||||||
{
|
|
||||||
((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x);
|
((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x);
|
||||||
}
|
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if (!roiCache[threadIdx.y]) return;
|
if (!roiCache[threadIdx.y]) return;
|
||||||
|
|
||||||
Level level = levels[downscales + blockIdx.z];
|
if (!threadIdx.x)
|
||||||
|
level = levels[downscales + blockIdx.z];
|
||||||
|
|
||||||
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
if(x >= level.workRect.x || y >= level.workRect.y) return;
|
||||||
|
|
||||||
|
@ -90,6 +90,8 @@ struct __align__(8) Level //is actually 24 bytes
|
|||||||
objSize.x = round(oct.size.x * relScale);
|
objSize.x = round(oct.size.x * relScale);
|
||||||
objSize.y = round(oct.size.y * relScale);
|
objSize.y = round(oct.size.y * relScale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device Level(){}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __align__(8) Node
|
struct __align__(8) Node
|
||||||
|
Loading…
Reference in New Issue
Block a user