mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 12:40:05 +08:00
Merge pull request #17253 from YashasSamaga:cuda4dnn-region-yolov4
This commit is contained in:
commit
583f4b9633
@ -29,7 +29,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
__global__ void region_box(
|
||||
Span<T> output, View<T> input, View<T> bias,
|
||||
size_type boxes_per_cell, size_type box_size,
|
||||
size_type rows, size_type cols,
|
||||
size_type rows, size_type cols, T scale_x_y,
|
||||
size_type height_norm, size_type width_norm,
|
||||
T object_prob_cutoff)
|
||||
{
|
||||
@ -48,8 +48,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
const auto x = (box_index % row_inner_size) / col_inner_size;
|
||||
|
||||
using device::fast_sigmoid;
|
||||
output[box_offset + 0] = (T(x) + fast_sigmoid(input[box_offset + 0])) / T(cols);
|
||||
output[box_offset + 1] = (T(y) + fast_sigmoid(input[box_offset + 1])) / T(rows);
|
||||
const auto tmp_x = (fast_sigmoid(input[box_offset + 0]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
const auto tmp_y = (fast_sigmoid(input[box_offset + 1]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
output[box_offset + 0] = (T(x) + tmp_x) / T(cols);
|
||||
output[box_offset + 1] = (T(y) + tmp_y) / T(rows);
|
||||
|
||||
vector2_type bias_xy;
|
||||
v_load(bias_xy, bias_vPtr[box_of_the_cell]);
|
||||
@ -143,7 +145,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
void region(const Stream& stream, Span<T> output, View<T> input, View<T> bias,
|
||||
T object_prob_cutoff, T class_prob_cutoff,
|
||||
std::size_t boxes_per_cell, std::size_t box_size,
|
||||
std::size_t rows, std::size_t cols,
|
||||
std::size_t rows, std::size_t cols, T scale_x_y,
|
||||
std::size_t height_norm, std::size_t width_norm,
|
||||
bool if_true_sigmoid_else_softmax /* true = sigmoid, false = softmax */)
|
||||
{
|
||||
@ -155,7 +157,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
auto box_policy = make_policy(box_kernel, output.size() / box_size, 0, stream);
|
||||
launch_kernel(box_kernel, box_policy,
|
||||
output, input, bias, boxes_per_cell, box_size,
|
||||
rows, cols, height_norm, width_norm,
|
||||
rows, cols, scale_x_y, height_norm, width_norm,
|
||||
object_prob_cutoff);
|
||||
|
||||
if (if_true_sigmoid_else_softmax) {
|
||||
@ -171,10 +173,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void region(const Stream&, Span<__half>, View<__half>, View<__half>,
|
||||
__half, __half, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
|
||||
__half, __half, std::size_t, std::size_t, std::size_t, std::size_t, __half, std::size_t, std::size_t, bool);
|
||||
#endif
|
||||
|
||||
template void region(const Stream&, Span<float>, View<float>, View<float>,
|
||||
float, float, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
|
||||
float, float, std::size_t, std::size_t, std::size_t, std::size_t, float, std::size_t, std::size_t, bool);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
@ -16,7 +16,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
void region(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
|
||||
T object_prob_cutoff, T class_prob_cutoff,
|
||||
std::size_t boxes_per_cell, std::size_t box_size,
|
||||
std::size_t rows, std::size_t cols,
|
||||
std::size_t rows, std::size_t cols, T scale_x_y,
|
||||
std::size_t height_norm, std::size_t width_norm,
|
||||
bool if_true_sigmoid_else_softmax);
|
||||
|
||||
|
@ -48,14 +48,13 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
*
|
||||
* actual class probability = conditional_class_prob * object_prob
|
||||
*/
|
||||
std::size_t classes, boxes_per_cell;
|
||||
std::size_t width_norm, height_norm;
|
||||
T scale_x_y;
|
||||
|
||||
/* method for reducing class scores to probabilities */
|
||||
SquashMethod squash_method;
|
||||
|
||||
std::size_t classes, boxes_per_cell;
|
||||
|
||||
std::size_t width_norm, height_norm;
|
||||
|
||||
/* prob cutoffs below which the prediction is nulled */
|
||||
T object_prob_cutoff;
|
||||
T class_prob_cutoff;
|
||||
@ -81,8 +80,9 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
width_norm = config.width_norm;
|
||||
height_norm = config.height_norm;
|
||||
|
||||
squash_type = config.squash_method;
|
||||
scale_x_y = config.scale_x_y;
|
||||
|
||||
squash_type = config.squash_method;
|
||||
object_prob_cutoff = config.object_prob_cutoff;
|
||||
class_prob_cutoff = config.class_prob_cutoff;
|
||||
|
||||
@ -113,7 +113,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
kernels::region<T>(stream, output, input, biasTensor,
|
||||
object_prob_cutoff, class_prob_cutoff,
|
||||
boxes_per_cell, cell_box_size,
|
||||
rows, cols,
|
||||
rows, cols, scale_x_y,
|
||||
height_norm, width_norm,
|
||||
if_true_sigmoid_else_softmax
|
||||
);
|
||||
@ -170,9 +170,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
csl::Tensor<T> biasTensor;
|
||||
std::size_t classes, boxes_per_cell;
|
||||
std::size_t width_norm, height_norm;
|
||||
SquashMethod squash_type;
|
||||
T scale_x_y;
|
||||
|
||||
SquashMethod squash_type;
|
||||
T object_prob_cutoff, class_prob_cutoff;
|
||||
|
||||
T nms_iou_threshold;
|
||||
};
|
||||
|
||||
|
@ -405,6 +405,8 @@ public:
|
||||
config.height_norm = height_norm;
|
||||
config.width_norm = width_norm;
|
||||
|
||||
config.scale_x_y = scale_x_y;
|
||||
|
||||
config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
|
||||
config.class_prob_cutoff = thresh;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user