*applied patch from NVidia (nppstTraspose bug)

*fixed some warnings
*finished gpu test port to gtest framework
This commit is contained in:
Anatoly Baksheev 2011-02-18 12:23:18 +00:00
parent 916690a674
commit 047c7e0fd6
12 changed files with 69 additions and 47 deletions

View File

@ -100,6 +100,9 @@ macro(define_opencv_module name)
file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.h*")
source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
set(the_target "opencv_test_${name}")

View File

@ -55,5 +55,6 @@
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/internal.hpp"
#include "opencv2/features2d/features2d.hpp"
#include <vector>
#endif

View File

@ -41,6 +41,8 @@
#include "precomp.hpp"
using namespace std;
#undef INFINITY
#define INFINITY 10000
#define OCCLUSION_PENALTY 10000

View File

@ -38,10 +38,9 @@ source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA)
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
file(GLOB_RECURSE ncv_hdrs "src/nvidia/*.hpp" "src/nvidia/*.h")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
endif()
@ -83,7 +82,7 @@ if (HAVE_CUDA)
#CUDA_BUILD_CLEAN_TARGET()
endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
@ -147,10 +146,15 @@ install(FILES ${lib_hdrs}
# DESTINATION include/opencv2/${name}/device
# COMPONENT main)
################################################################################################################
################################ GPU Module Tests #####################################################
################################################################################################################
# Test files processing is in the separated directory to avoid 'Src' source
# filter creation in Visual Studio
if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
set(the_target "opencv_test_${name}")
set(the_test_target "opencv_test_${name}")
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
"${CMAKE_CURRENT_SOURCE_DIR}/test"
@ -169,44 +173,46 @@ if(BUILD_TESTS AND NOT ANDROID AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/test)
file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.h*")
source_group("Src" FILES ${test_hdrs} ${test_srcs})
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
source_group("nvidia" FILES ${nvidia})
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.h*")
source_group("Src\\NVidia" FILES ${nvidia})
endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia})
add_executable(${the_test_target} ${test_srcs} ${test_hdrs} ${nvidia})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/test/test_precomp.hpp)
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
if(${CMAKE_GENERATOR} MATCHES "Visual*")
set(${the_target}_pch "test/test_precomp.cpp")
set(${the_test_target}_pch "test/test_precomp.cpp")
endif()
add_native_precompiled_header(${the_target} ${pch_header})
add_native_precompiled_header(${the_test_target} ${pch_header})
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
add_precompiled_header(${the_target} ${pch_header})
add_precompiled_header(${the_test_target} ${pch_header})
endif()
endif()
# Additional target properties
set_target_properties(${the_target} PROPERTIES
set_target_properties(${the_test_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
)
add_dependencies(${the_target} ${test_deps})
add_dependencies(${the_test_target} ${test_deps})
# Add the required libraries for linking:
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${test_deps})
target_link_libraries(${the_test_target} ${OPENCV_LINKER_LIBS} ${test_deps})
enable_testing()
get_target_property(LOC ${the_target} LOCATION)
add_test(${the_target} "${LOC}")
get_target_property(LOC ${the_test_target} LOCATION)
add_test(${the_test_target} "${LOC}")
if(WIN32)
install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main)
install(TARGETS ${the_test_target} RUNTIME DESTINATION bin COMPONENT main)
endif()
endif()

View File

@ -1198,26 +1198,32 @@ __global__ void transpose(T *d_src, Ncv32u srcStride,
Ncv32u xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
Ncv32u yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
Ncv32u index_in = xIndex + yIndex * srcStride;
Ncv32u index_gmem = xIndex + yIndex * srcStride;
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
Ncv32u index_out = xIndex + yIndex * dstStride;
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
if (xIndex < srcRoi.width)
{
tile[threadIdx.y+i][threadIdx.x] = d_src[index_in+i*srcStride];
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < srcRoi.height)
{
tile[threadIdx.y+i][threadIdx.x] = d_src[index_gmem+i*srcStride];
}
}
}
__syncthreads();
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
index_gmem = xIndex + yIndex * dstStride;
if (xIndex < srcRoi.height)
{
for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)
{
if (yIndex + i < srcRoi.width)
{
d_dst[index_out+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];
d_dst[index_gmem+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];
}
}
}

View File

@ -71,21 +71,21 @@ namespace cv { namespace gpu { namespace device
template <size_t src_elem_size, size_t dst_elem_size>
struct UnReadWriteTraits_
{
enum {shift=1};
enum { shift = 1 };
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 1>
{
enum {shift=4};
enum { shift = 4 };
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 2>
{
enum {shift=2};
enum { shift = 2 };
};
template <typename T, typename D> struct UnReadWriteTraits
{
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
enum { shift = UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift };
typedef typename TypeVec<T, shift>::vec_t read_type;
typedef typename TypeVec<D, shift>::vec_t write_type;
@ -94,21 +94,21 @@ namespace cv { namespace gpu { namespace device
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
struct BinReadWriteTraits_
{
enum {shift=1};
enum { shift = 1 };
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
{
enum {shift=4};
enum { shift = 4 };
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
{
enum {shift=2};
enum { shift = 2 };
};
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
{
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
enum {shift = BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
typedef typename TypeVec<T1, shift>::vec_t read_type1;
typedef typename TypeVec<T2, shift>::vec_t read_type2;

View File

@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
//regression tests
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
}

View File

@ -69,4 +69,4 @@ protected:
}
};
TEST(NPP_Staging, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }
TEST(NVidia, DISABLED_multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }

View File

@ -1,4 +1,8 @@
#define GTEST_CREATE_AS_SHARED_LIBRARY 1
#if _MSC_VER >= 1200
#pragma warning( disable: 4127 4251)
#endif
#include "opencv2/ts/ts.hpp"
#include "opencv2/core/core_c.h"

View File

@ -5,10 +5,5 @@
#add_subdirectory(ml)
#add_subdirectory(cxts)
#if(WITH_CUDA)
# set (BUILD_TESTS_GPU OFF CACHE BOOL "Build tests GPU")
# if(BUILD_TESTS_GPU AND WITH_CUDA)
# add_subdirectory(gpu)
# endif()
#endif()
#add_subdirectory(gpu)

View File

@ -38,12 +38,11 @@ include_directories(../cxts)
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "src/nvidia/*.*")
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
file(GLOB nvidia "src/nvidia/*.*")
source_group("Src\\nvidia" FILES ${nvidia})
endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia})
# Additional target properties
set_target_properties(${the_target} PROPERTIES

View File

@ -179,6 +179,9 @@ void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
//regression tests
testLister.add(new TestTranspose<T>("TestTranspose_reg_0", src, 1072, 375));
}