diff --git a/hal/riscv-rvv/src/features2d/fast.cpp b/hal/riscv-rvv/src/features2d/fast.cpp index bb6c61f4df..afc96c04a5 100644 --- a/hal/riscv-rvv/src/features2d/fast.cpp +++ b/hal/riscv-rvv/src/features2d/fast.cpp @@ -14,7 +14,7 @@ #include -// #define CV_HAL_RVV_FAST_DEBUG +#define CV_HAL_RVV_FAST_DEBUG #ifdef CV_HAL_RVV_FAST_DEBUG #include #include @@ -52,6 +52,17 @@ void printVectorUint16(T vec, int vl, std::string name) { free(data); } template +void printVectorInt16(T vec, int vl, std::string name) { + int16_t* data = (int16_t*)malloc(vl * sizeof(int16_t)); + __riscv_vse16(data, vec, vl); + std::cout << name << ": "; + for (int i = 0; i < vl; i++) { + std::cout << data[i] << " "; + } + std::cout << std::endl; + free(data); +} +template void printVectorUint32(T vec, int vl, std::string name) { uint32_t* data = (uint32_t*)malloc(vl * sizeof(uint32_t)); __riscv_vse32(data, vec, vl); @@ -108,6 +119,14 @@ void printVector(T vec, int vl, std::string name) { printVectorUint16(vec, vl, name); } else if constexpr (std::is_same::value) { printVectorUint16(vec, vl, name); + } else if constexpr (std::is_same::value) { + printVectorInt16(vec, vl, name); + } else if constexpr (std::is_same::value) { + printVectorInt16(vec, vl, name); + } else if constexpr (std::is_same::value) { + printVectorInt16(vec, vl, name); + } else if constexpr (std::is_same::value) { + printVectorInt16(vec, vl, name); } else if constexpr (std::is_same::value) { printVectorUint32(vec, vl, name); } else if constexpr (std::is_same::value) { @@ -129,24 +148,44 @@ using RVV_VECTOR_TYPE = vuint8m4_t; // Since uint16_t range is 0 to 65535, row stride should be less than 65535/6 = 10922 inline void makeOffsets(int16_t pixel[], vuint16m2_t& v_offset, int64_t row_stride, int patternSize) { - // set min element (pixel[8] = 0 + row_stride * -3) as the base addr uint16_t pixel_u[25]; - pixel_u[0] = row_stride * 6; - pixel_u[1] = 1 + row_stride * 6; - pixel_u[2] = 2 + row_stride * 5; - pixel_u[3] = 3 + row_stride * 4; - pixel_u[4] = 3 + row_stride * 3; - pixel_u[5] = 3 + row_stride * 2; - pixel_u[6] = 2 + row_stride * 1; - pixel_u[7] = 1 + row_stride * 0; - pixel_u[8] = 0 + row_stride * 0; - pixel_u[9] = -1 + row_stride * 0; - pixel_u[10] = -2 + row_stride * 1; - pixel_u[11] = -3 + row_stride * 2; - pixel_u[12] = -3 + row_stride * 3; - pixel_u[13] = -3 + row_stride * 4; - pixel_u[14] = -2 + row_stride * 5; - pixel_u[15] = -1 + row_stride * 6; + + // set min element (pixel[8] = 0 + row_stride * -3) as the base addr + // pixel_u[0] = row_stride * 6; + // pixel_u[1] = 1 + row_stride * 6; + // pixel_u[2] = 2 + row_stride * 5; + // pixel_u[3] = 3 + row_stride * 4; + // pixel_u[4] = 3 + row_stride * 3; + // pixel_u[5] = 3 + row_stride * 2; + // pixel_u[6] = 2 + row_stride * 1; + // pixel_u[7] = 1 + row_stride * 0; + // pixel_u[8] = 0 + row_stride * 0; + // pixel_u[9] = -1 + row_stride * 0; + // pixel_u[10] = -2 + row_stride * 1; + // pixel_u[11] = -3 + row_stride * 2; + // pixel_u[12] = -3 + row_stride * 3; + // pixel_u[13] = -3 + row_stride * 4; + // pixel_u[14] = -2 + row_stride * 5; + // pixel_u[15] = -1 + row_stride * 6; + + // set min element (pixel[9] = -1 + row_stride * -3) as the base addr + pixel_u[0] = 1 + row_stride * 6; + pixel_u[1] = 2 + row_stride * 6; + pixel_u[2] = 3 + row_stride * 5; + pixel_u[3] = 4 + row_stride * 4; + pixel_u[4] = 4 + row_stride * 3; + pixel_u[5] = 4 + row_stride * 2; + pixel_u[6] = 3 + row_stride * 1; + pixel_u[7] = 2 + row_stride * 0; + pixel_u[8] = 1 + row_stride * 0; + pixel_u[9] = 0 + row_stride * 0; + pixel_u[10] = -1 + row_stride * 1; + pixel_u[11] = -2 + row_stride * 2; + pixel_u[12] = -2 + row_stride * 3; + pixel_u[13] = -2 + row_stride * 4; + pixel_u[14] = -1 + row_stride * 5; + pixel_u[15] = 0 + row_stride * 6; + for (int i = 16; i < 25; i++) { pixel_u[i] = pixel_u[i - 16]; @@ -154,7 +193,7 @@ inline void makeOffsets(int16_t pixel[], vuint16m2_t& v_offset, int64_t row_stri v_offset = __riscv_vle16_v_u16m2(pixel_u, 25); for (int i = 0; i < 25; i++) { - pixel[i] = pixel_u[i] - 3 * row_stride; + pixel[i] = pixel_u[i] - 3 * row_stride - 1; } } @@ -163,22 +202,52 @@ template inline T* alignPtr(T* ptr, size_t n=sizeof(T)) return (T*)(((size_t)ptr + n-1) & -n); } -inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int64_t row_stride) +inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int64_t row_stride, bool debug = false) { const uint32_t K = 8, N = 16 + K + 1; uint32_t k, v = ptr[0]; int vl = __riscv_vsetvl_e16m2(N); - + std::string msg; + if (debug) + { + msg = cv::format("riscv fast_16: vl=%d, N=%d", vl, N); + CV_LOG_INFO(NULL, msg); + std::cout<<"vanilla offset loading" << std::endl; + // 3073 3074 2563 2052 1540 1028 515 2 1 0 511 1022 1534 2046 2559 3072 3073 3074 2563 2052 1540 1028 515 2 1 + uint16_t pixel[25] = { + 3073, 3074, 2563, 2052, 1540, 1028, 515, 2, + 1, 0, 511, 1022, 1534, 2046, 2559, 3072, + 3073, 3074, 2563, 2052, 1540, 1028, 515, 2, + 1 + }; + uint8_t* shift_ptr; + shift_ptr = ((uint8_t*)ptr) - 3 * row_stride - 1; + for (int i = 0; i < 25; i++) + { + std::cout << (int)(shift_ptr[pixel[i]]) << " "; + } + std::cout << std::endl; + } // use vloxei16_v to indexed ordered load vint16m2_t v_c_pixel = __riscv_vmv_v_x_i16m2((int16_t)v, vl); // vloxei only support positive offset - vuint8m1_t v_d_u8 = __riscv_vloxei16(ptr - 3 * row_stride, v_offset, vl); + vuint8m1_t v_d_u8 = __riscv_vloxei16(ptr - 3 * row_stride - 1, v_offset, vl); vuint16m2_t v_d_u16 = __riscv_vzext_vf2(v_d_u8, vl); vint16m2_t d = __riscv_vreinterpret_i16m2(v_d_u16); // for( k = 0; k < N; k++ ) // d[k] = (uint16_t)(v - ptr[pixel[k]]); + if (debug) + { + printVector(v_offset, vl, "v_offset"); + printVector(d, vl, "d before sub"); + } + d = __riscv_vsub_vv_i16m2(v_c_pixel, d, vl); + if (debug) { + std::cout << "row_stride: " << row_stride << std::endl; + printVector(d, vl, "d"); + } vint16m2_t d_slide = __riscv_vmv_v(d, vl); @@ -191,25 +260,42 @@ inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int6 for (int i = 0; i < 8; i++) { - d_slide = __riscv_vslide1down(d, (int16_t)0, vl); - ak0 = __riscv_vmin(ak0, d, vl); - bk0 = __riscv_vmax(bk0, d, vl); + d_slide = __riscv_vslide1down(d_slide, (int16_t)0, vl); + ak0 = __riscv_vmin(ak0, d_slide, vl); + bk0 = __riscv_vmax(bk0, d_slide, vl); + } + if(debug) { + printVector(ak0, vl, "ak0"); + printVector(bk0, vl, "bk0"); } q0 = __riscv_vmax(q0, __riscv_vmin(ak0, d, vl), vl); q1 = __riscv_vmin(q1, __riscv_vmax(bk0, d, vl), vl); - d_slide = __riscv_vslide1down(d, (int16_t)0, vl); + if (debug) { + printVector(q0, vl, "q0"); + printVector(q1, vl, "q1"); + } + + d_slide = __riscv_vslide1down(d_slide, (int16_t)0, vl); q0 = __riscv_vmax(q0, __riscv_vmin(ak0, d_slide, vl), vl); q1 = __riscv_vmin(q1, __riscv_vmax(bk0, d_slide, vl), vl); + if (debug) { + printVector(q0, vl, "q0 after slide"); + printVector(q1, vl, "q1 after slide"); + } + q1 = __riscv_vrsub(q1, (int16_t)0, vl); q0 = __riscv_vmax(q0, q1, vl); vint16m1_t res = __riscv_vredmax(q0, __riscv_vmv_s_x_i16m1((int16_t)0, vl), vl); - uint8_t result = (uint8_t)__riscv_vmv_x(res); + if (debug) { + printVector(res, vl, "res"); + } + uint8_t result = (uint8_t)__riscv_vmv_x(res); return result; } @@ -342,8 +428,15 @@ inline int fast_16(const uchar* src_data, size_t src_step, int width, int height if( (m[k / 8] >> (k % 8)) & 1 ) { cornerpos[ncorners++] = j + k; - if(nonmax_suppression) - curr[j + k] = (uchar)cornerScore(ptr + k, v_offset, (int64_t)src_step); + if(nonmax_suppression) { + bool debug = false; + int debug_x = 15; + int debug_y = 357; + debug = (debug_x == i && debug_y == j + k); + curr[j + k] = (uchar)cornerScore(ptr + k, v_offset, (int64_t)src_step, debug); + // msg = cv::format("keypoint = (%d, %d, %f, %f, %d), debug = %d", j + k, i, 7.f, -1.f, curr[j + k], debug); + // CV_LOG_INFO(NULL, msg); + } } } } @@ -370,8 +463,8 @@ inline int fast_16(const uchar* src_data, size_t src_step, int width, int height score > curr[j-1] && score > curr[j] && score > curr[j+1]) ) { KeyPoint kp((float)j, (float)(i-1), 7.f, -1, (float)score); - msg = cv::format("keypoint = (%f, %f, %f, %f, %f)", kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response); - CV_LOG_INFO(NULL, msg); + // msg = cv::format("keypoint = (%f, %f, %f, %f, %f)", kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response); + // CV_LOG_INFO(NULL, msg); keypoints.push_back(kp); } }