mirror of
https://github.com/opencv/opencv.git
synced 2025-06-12 20:42:53 +08:00
Merge pull request #19534 from tomoaki0705:fixCudaFiltersRace
This commit is contained in:
commit
0421c3991d
@ -50,9 +50,6 @@
|
|||||||
|
|
||||||
namespace cv { namespace cuda { namespace device
|
namespace cv { namespace cuda { namespace device
|
||||||
{
|
{
|
||||||
// // namespace imgproc
|
|
||||||
// {
|
|
||||||
|
|
||||||
__device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
|
__device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
|
||||||
int tx = threadIdx.x;
|
int tx = threadIdx.x;
|
||||||
if (tx<8){
|
if (tx<8){
|
||||||
@ -120,6 +117,25 @@ namespace cv { namespace cuda { namespace device
|
|||||||
luc[tx]=0;
|
luc[tx]=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define scanNeighbor(array, range, index, threadIndex) \
|
||||||
|
{ \
|
||||||
|
int v = 0; \
|
||||||
|
if (index <= threadIndex && threadIndex < range) \
|
||||||
|
v = array[threadIndex] + array[threadIndex-index]; \
|
||||||
|
__syncthreads(); \
|
||||||
|
if (index <= threadIndex && threadIndex < range) \
|
||||||
|
array[threadIndex] = v; \
|
||||||
|
}
|
||||||
|
#define findMedian(array, range, threadIndex, result, count, position) \
|
||||||
|
if (threadIndex < range) \
|
||||||
|
{ \
|
||||||
|
if (array[threadIndex+1] > position && array[threadIndex] <= position) \
|
||||||
|
{ \
|
||||||
|
*result = threadIndex+1; \
|
||||||
|
*count = array[threadIndex]; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
__device__ void histogramMedianPar8LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
|
__device__ void histogramMedianPar8LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
|
||||||
int tx=threadIdx.x;
|
int tx=threadIdx.x;
|
||||||
*retval=*countAtMed=0;
|
*retval=*countAtMed=0;
|
||||||
@ -127,28 +143,14 @@ namespace cv { namespace cuda { namespace device
|
|||||||
Hscan[tx]=H[tx];
|
Hscan[tx]=H[tx];
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (1 <= tx && tx < 8 )
|
scanNeighbor(Hscan, 8, 1, tx);
|
||||||
Hscan[tx]+=Hscan[tx-1];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (2 <= tx && tx < 8 )
|
scanNeighbor(Hscan, 8, 2, tx);
|
||||||
Hscan[tx]+=Hscan[tx-2];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (4 <= tx && tx < 8 )
|
scanNeighbor(Hscan, 8, 4, tx);
|
||||||
Hscan[tx]+=Hscan[tx-4];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if(tx<7){
|
findMedian(Hscan, 7, tx, retval, countAtMed, medPos);
|
||||||
if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
|
|
||||||
*retval=tx+1;
|
|
||||||
*countAtMed=Hscan[tx];
|
|
||||||
}
|
|
||||||
else if(Hscan[tx]==medPos){
|
|
||||||
if(Hscan[tx+1]>medPos){
|
|
||||||
*retval=tx+1;
|
|
||||||
*countAtMed=Hscan[tx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void histogramMedianPar32LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
|
__device__ void histogramMedianPar32LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
|
||||||
@ -158,33 +160,18 @@ namespace cv { namespace cuda { namespace device
|
|||||||
Hscan[tx]=H[tx];
|
Hscan[tx]=H[tx];
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if ( 1 <= tx && tx < 32 )
|
scanNeighbor(Hscan, 32, 1, tx);
|
||||||
Hscan[tx]+=Hscan[tx-1];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if ( 2 <= tx && tx < 32 )
|
scanNeighbor(Hscan, 32, 2, tx);
|
||||||
Hscan[tx]+=Hscan[tx-2];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if ( 4 <= tx && tx < 32 )
|
scanNeighbor(Hscan, 32, 4, tx);
|
||||||
Hscan[tx]+=Hscan[tx-4];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if ( 8 <= tx && tx < 32 )
|
scanNeighbor(Hscan, 32, 8, tx);
|
||||||
Hscan[tx]+=Hscan[tx-8];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if ( 16 <= tx && tx < 32 )
|
scanNeighbor(Hscan, 32, 16, tx);
|
||||||
Hscan[tx]+=Hscan[tx-16];
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if(tx<31){
|
|
||||||
if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
|
findMedian(Hscan, 31, tx, retval, countAtMed, medPos);
|
||||||
*retval=tx+1;
|
|
||||||
*countAtMed=Hscan[tx];
|
|
||||||
}
|
|
||||||
else if(Hscan[tx]==medPos){
|
|
||||||
if(Hscan[tx+1]>medPos){
|
|
||||||
*retval=tx+1;
|
|
||||||
*countAtMed=Hscan[tx];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void cuMedianFilterMultiBlock(PtrStepSzb src, PtrStepSzb dest, PtrStepSzi histPar, PtrStepSzi coarseHistGrid,int r, int medPos_)
|
__global__ void cuMedianFilterMultiBlock(PtrStepSzb src, PtrStepSzb dest, PtrStepSzi histPar, PtrStepSzi coarseHistGrid,int r, int medPos_)
|
||||||
@ -283,7 +270,6 @@ namespace cv { namespace cuda { namespace device
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
histogramMultipleAdd8(HCoarse,histCoarse, 2*r+1);
|
histogramMultipleAdd8(HCoarse,histCoarse, 2*r+1);
|
||||||
// __syncthreads();
|
|
||||||
int cols_m_1=cols-1;
|
int cols_m_1=cols-1;
|
||||||
|
|
||||||
for(int j=r;j<cols-r;j++){
|
for(int j=r;j<cols-r;j++){
|
||||||
@ -295,23 +281,24 @@ namespace cv { namespace cuda { namespace device
|
|||||||
histogramMedianPar8LookupOnly(HCoarse,HCoarseScan,medPos, &firstBin,&countAtMed);
|
histogramMedianPar8LookupOnly(HCoarse,HCoarseScan,medPos, &firstBin,&countAtMed);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if ( luc[firstBin] <= (j-r))
|
int loopIndex = luc[firstBin];
|
||||||
|
if (loopIndex <= (j-r))
|
||||||
{
|
{
|
||||||
histogramClear32(HFine[firstBin]);
|
histogramClear32(HFine[firstBin]);
|
||||||
for ( luc[firstBin] = j-r; luc[firstBin] < ::min(j+r+1,cols); luc[firstBin]++ ){
|
for ( loopIndex = j-r; loopIndex < ::min(j+r+1,cols); loopIndex++ ){
|
||||||
histogramAdd32(HFine[firstBin], hist+(luc[firstBin]*256+(firstBin<<5) ) );
|
histogramAdd32(HFine[firstBin], hist+(loopIndex*256+(firstBin<<5) ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
for ( ; luc[firstBin] < (j+r+1);luc[firstBin]++ ) {
|
for ( ; loopIndex < (j+r+1);loopIndex++ ) {
|
||||||
histogramAddAndSub32(HFine[firstBin],
|
histogramAddAndSub32(HFine[firstBin],
|
||||||
hist+(::min(luc[firstBin],cols_m_1)*256+(firstBin<<5) ),
|
hist+(::min(loopIndex,cols_m_1)*256+(firstBin<<5) ),
|
||||||
hist+(::max(luc[firstBin]-2*r-1,0)*256+(firstBin<<5) ) );
|
hist+(::max(loopIndex-2*r-1,0)*256+(firstBin<<5) ) );
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
luc[firstBin] = loopIndex;
|
||||||
|
|
||||||
int leftOver=medPos-countAtMed;
|
int leftOver=medPos-countAtMed;
|
||||||
if(leftOver>=0){
|
if(leftOver>=0){
|
||||||
|
Loading…
Reference in New Issue
Block a user