mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
weird memory read
This commit is contained in:
parent
2147287cf5
commit
8d116fa7f1
@ -14,7 +14,7 @@
|
||||
#include <cfloat>
|
||||
|
||||
|
||||
// #define CV_HAL_RVV_FAST_DEBUG
|
||||
#define CV_HAL_RVV_FAST_DEBUG
|
||||
#ifdef CV_HAL_RVV_FAST_DEBUG
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
@ -52,6 +52,17 @@ void printVectorUint16(T vec, int vl, std::string name) {
|
||||
free(data);
|
||||
}
|
||||
template <typename T>
|
||||
void printVectorInt16(T vec, int vl, std::string name) {
|
||||
int16_t* data = (int16_t*)malloc(vl * sizeof(int16_t));
|
||||
__riscv_vse16(data, vec, vl);
|
||||
std::cout << name << ": ";
|
||||
for (int i = 0; i < vl; i++) {
|
||||
std::cout << data[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
free(data);
|
||||
}
|
||||
template <typename T>
|
||||
void printVectorUint32(T vec, int vl, std::string name) {
|
||||
uint32_t* data = (uint32_t*)malloc(vl * sizeof(uint32_t));
|
||||
__riscv_vse32(data, vec, vl);
|
||||
@ -108,6 +119,14 @@ void printVector(T vec, int vl, std::string name) {
|
||||
printVectorUint16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vuint16m8_t>::value) {
|
||||
printVectorUint16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vint16m1_t>::value) {
|
||||
printVectorInt16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vint16m2_t>::value) {
|
||||
printVectorInt16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vint16m4_t>::value) {
|
||||
printVectorInt16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vint16m8_t>::value) {
|
||||
printVectorInt16(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vuint32m1_t>::value) {
|
||||
printVectorUint32(vec, vl, name);
|
||||
} else if constexpr (std::is_same<T, vuint32m2_t>::value) {
|
||||
@ -129,24 +148,44 @@ using RVV_VECTOR_TYPE = vuint8m4_t;
|
||||
// Since uint16_t range is 0 to 65535, row stride should be less than 65535/6 = 10922
|
||||
inline void makeOffsets(int16_t pixel[], vuint16m2_t& v_offset, int64_t row_stride, int patternSize)
|
||||
{
|
||||
// set min element (pixel[8] = 0 + row_stride * -3) as the base addr
|
||||
uint16_t pixel_u[25];
|
||||
pixel_u[0] = row_stride * 6;
|
||||
pixel_u[1] = 1 + row_stride * 6;
|
||||
pixel_u[2] = 2 + row_stride * 5;
|
||||
pixel_u[3] = 3 + row_stride * 4;
|
||||
pixel_u[4] = 3 + row_stride * 3;
|
||||
pixel_u[5] = 3 + row_stride * 2;
|
||||
pixel_u[6] = 2 + row_stride * 1;
|
||||
pixel_u[7] = 1 + row_stride * 0;
|
||||
pixel_u[8] = 0 + row_stride * 0;
|
||||
pixel_u[9] = -1 + row_stride * 0;
|
||||
pixel_u[10] = -2 + row_stride * 1;
|
||||
pixel_u[11] = -3 + row_stride * 2;
|
||||
pixel_u[12] = -3 + row_stride * 3;
|
||||
pixel_u[13] = -3 + row_stride * 4;
|
||||
pixel_u[14] = -2 + row_stride * 5;
|
||||
pixel_u[15] = -1 + row_stride * 6;
|
||||
|
||||
// set min element (pixel[8] = 0 + row_stride * -3) as the base addr
|
||||
// pixel_u[0] = row_stride * 6;
|
||||
// pixel_u[1] = 1 + row_stride * 6;
|
||||
// pixel_u[2] = 2 + row_stride * 5;
|
||||
// pixel_u[3] = 3 + row_stride * 4;
|
||||
// pixel_u[4] = 3 + row_stride * 3;
|
||||
// pixel_u[5] = 3 + row_stride * 2;
|
||||
// pixel_u[6] = 2 + row_stride * 1;
|
||||
// pixel_u[7] = 1 + row_stride * 0;
|
||||
// pixel_u[8] = 0 + row_stride * 0;
|
||||
// pixel_u[9] = -1 + row_stride * 0;
|
||||
// pixel_u[10] = -2 + row_stride * 1;
|
||||
// pixel_u[11] = -3 + row_stride * 2;
|
||||
// pixel_u[12] = -3 + row_stride * 3;
|
||||
// pixel_u[13] = -3 + row_stride * 4;
|
||||
// pixel_u[14] = -2 + row_stride * 5;
|
||||
// pixel_u[15] = -1 + row_stride * 6;
|
||||
|
||||
// set min element (pixel[9] = -1 + row_stride * -3) as the base addr
|
||||
pixel_u[0] = 1 + row_stride * 6;
|
||||
pixel_u[1] = 2 + row_stride * 6;
|
||||
pixel_u[2] = 3 + row_stride * 5;
|
||||
pixel_u[3] = 4 + row_stride * 4;
|
||||
pixel_u[4] = 4 + row_stride * 3;
|
||||
pixel_u[5] = 4 + row_stride * 2;
|
||||
pixel_u[6] = 3 + row_stride * 1;
|
||||
pixel_u[7] = 2 + row_stride * 0;
|
||||
pixel_u[8] = 1 + row_stride * 0;
|
||||
pixel_u[9] = 0 + row_stride * 0;
|
||||
pixel_u[10] = -1 + row_stride * 1;
|
||||
pixel_u[11] = -2 + row_stride * 2;
|
||||
pixel_u[12] = -2 + row_stride * 3;
|
||||
pixel_u[13] = -2 + row_stride * 4;
|
||||
pixel_u[14] = -1 + row_stride * 5;
|
||||
pixel_u[15] = 0 + row_stride * 6;
|
||||
|
||||
for (int i = 16; i < 25; i++)
|
||||
{
|
||||
pixel_u[i] = pixel_u[i - 16];
|
||||
@ -154,7 +193,7 @@ inline void makeOffsets(int16_t pixel[], vuint16m2_t& v_offset, int64_t row_stri
|
||||
v_offset = __riscv_vle16_v_u16m2(pixel_u, 25);
|
||||
for (int i = 0; i < 25; i++)
|
||||
{
|
||||
pixel[i] = pixel_u[i] - 3 * row_stride;
|
||||
pixel[i] = pixel_u[i] - 3 * row_stride - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,22 +202,52 @@ template<typename T> inline T* alignPtr(T* ptr, size_t n=sizeof(T))
|
||||
return (T*)(((size_t)ptr + n-1) & -n);
|
||||
}
|
||||
|
||||
inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int64_t row_stride)
|
||||
inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int64_t row_stride, bool debug = false)
|
||||
{
|
||||
const uint32_t K = 8, N = 16 + K + 1;
|
||||
uint32_t k, v = ptr[0];
|
||||
|
||||
int vl = __riscv_vsetvl_e16m2(N);
|
||||
|
||||
std::string msg;
|
||||
if (debug)
|
||||
{
|
||||
msg = cv::format("riscv fast_16: vl=%d, N=%d", vl, N);
|
||||
CV_LOG_INFO(NULL, msg);
|
||||
std::cout<<"vanilla offset loading" << std::endl;
|
||||
// 3073 3074 2563 2052 1540 1028 515 2 1 0 511 1022 1534 2046 2559 3072 3073 3074 2563 2052 1540 1028 515 2 1
|
||||
uint16_t pixel[25] = {
|
||||
3073, 3074, 2563, 2052, 1540, 1028, 515, 2,
|
||||
1, 0, 511, 1022, 1534, 2046, 2559, 3072,
|
||||
3073, 3074, 2563, 2052, 1540, 1028, 515, 2,
|
||||
1
|
||||
};
|
||||
uint8_t* shift_ptr;
|
||||
shift_ptr = ((uint8_t*)ptr) - 3 * row_stride - 1;
|
||||
for (int i = 0; i < 25; i++)
|
||||
{
|
||||
std::cout << (int)(shift_ptr[pixel[i]]) << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
// use vloxei16_v to indexed ordered load
|
||||
vint16m2_t v_c_pixel = __riscv_vmv_v_x_i16m2((int16_t)v, vl);
|
||||
// vloxei only support positive offset
|
||||
vuint8m1_t v_d_u8 = __riscv_vloxei16(ptr - 3 * row_stride, v_offset, vl);
|
||||
vuint8m1_t v_d_u8 = __riscv_vloxei16(ptr - 3 * row_stride - 1, v_offset, vl);
|
||||
vuint16m2_t v_d_u16 = __riscv_vzext_vf2(v_d_u8, vl);
|
||||
vint16m2_t d = __riscv_vreinterpret_i16m2(v_d_u16);
|
||||
// for( k = 0; k < N; k++ )
|
||||
// d[k] = (uint16_t)(v - ptr[pixel[k]]);
|
||||
if (debug)
|
||||
{
|
||||
printVector(v_offset, vl, "v_offset");
|
||||
printVector(d, vl, "d before sub");
|
||||
}
|
||||
|
||||
d = __riscv_vsub_vv_i16m2(v_c_pixel, d, vl);
|
||||
if (debug) {
|
||||
std::cout << "row_stride: " << row_stride << std::endl;
|
||||
printVector(d, vl, "d");
|
||||
}
|
||||
|
||||
vint16m2_t d_slide = __riscv_vmv_v(d, vl);
|
||||
|
||||
@ -191,25 +260,42 @@ inline uint8_t cornerScore(const uint8_t* ptr, const vuint16m2_t& v_offset, int6
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
d_slide = __riscv_vslide1down(d, (int16_t)0, vl);
|
||||
ak0 = __riscv_vmin(ak0, d, vl);
|
||||
bk0 = __riscv_vmax(bk0, d, vl);
|
||||
d_slide = __riscv_vslide1down(d_slide, (int16_t)0, vl);
|
||||
ak0 = __riscv_vmin(ak0, d_slide, vl);
|
||||
bk0 = __riscv_vmax(bk0, d_slide, vl);
|
||||
}
|
||||
if(debug) {
|
||||
printVector(ak0, vl, "ak0");
|
||||
printVector(bk0, vl, "bk0");
|
||||
}
|
||||
|
||||
q0 = __riscv_vmax(q0, __riscv_vmin(ak0, d, vl), vl);
|
||||
q1 = __riscv_vmin(q1, __riscv_vmax(bk0, d, vl), vl);
|
||||
|
||||
d_slide = __riscv_vslide1down(d, (int16_t)0, vl);
|
||||
if (debug) {
|
||||
printVector(q0, vl, "q0");
|
||||
printVector(q1, vl, "q1");
|
||||
}
|
||||
|
||||
d_slide = __riscv_vslide1down(d_slide, (int16_t)0, vl);
|
||||
q0 = __riscv_vmax(q0, __riscv_vmin(ak0, d_slide, vl), vl);
|
||||
q1 = __riscv_vmin(q1, __riscv_vmax(bk0, d_slide, vl), vl);
|
||||
|
||||
if (debug) {
|
||||
printVector(q0, vl, "q0 after slide");
|
||||
printVector(q1, vl, "q1 after slide");
|
||||
}
|
||||
|
||||
q1 = __riscv_vrsub(q1, (int16_t)0, vl);
|
||||
q0 = __riscv_vmax(q0, q1, vl);
|
||||
|
||||
vint16m1_t res = __riscv_vredmax(q0, __riscv_vmv_s_x_i16m1((int16_t)0, vl), vl);
|
||||
|
||||
uint8_t result = (uint8_t)__riscv_vmv_x(res);
|
||||
if (debug) {
|
||||
printVector(res, vl, "res");
|
||||
}
|
||||
|
||||
uint8_t result = (uint8_t)__riscv_vmv_x(res);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -342,8 +428,15 @@ inline int fast_16(const uchar* src_data, size_t src_step, int width, int height
|
||||
if( (m[k / 8] >> (k % 8)) & 1 )
|
||||
{
|
||||
cornerpos[ncorners++] = j + k;
|
||||
if(nonmax_suppression)
|
||||
curr[j + k] = (uchar)cornerScore(ptr + k, v_offset, (int64_t)src_step);
|
||||
if(nonmax_suppression) {
|
||||
bool debug = false;
|
||||
int debug_x = 15;
|
||||
int debug_y = 357;
|
||||
debug = (debug_x == i && debug_y == j + k);
|
||||
curr[j + k] = (uchar)cornerScore(ptr + k, v_offset, (int64_t)src_step, debug);
|
||||
// msg = cv::format("keypoint = (%d, %d, %f, %f, %d), debug = %d", j + k, i, 7.f, -1.f, curr[j + k], debug);
|
||||
// CV_LOG_INFO(NULL, msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -370,8 +463,8 @@ inline int fast_16(const uchar* src_data, size_t src_step, int width, int height
|
||||
score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
|
||||
{
|
||||
KeyPoint kp((float)j, (float)(i-1), 7.f, -1, (float)score);
|
||||
msg = cv::format("keypoint = (%f, %f, %f, %f, %f)", kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response);
|
||||
CV_LOG_INFO(NULL, msg);
|
||||
// msg = cv::format("keypoint = (%f, %f, %f, %f, %f)", kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response);
|
||||
// CV_LOG_INFO(NULL, msg);
|
||||
keypoints.push_back(kp);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user