Merge pull request #17253 from YashasSamaga:cuda4dnn-region-yolov4

This commit is contained in:
Alexander Alekhin 2020-05-11 19:23:55 +00:00
commit 583f4b9633
4 changed files with 21 additions and 15 deletions

View File

@ -29,7 +29,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
__global__ void region_box(
Span<T> output, View<T> input, View<T> bias,
size_type boxes_per_cell, size_type box_size,
size_type rows, size_type cols,
size_type rows, size_type cols, T scale_x_y,
size_type height_norm, size_type width_norm,
T object_prob_cutoff)
{
@ -48,8 +48,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
const auto x = (box_index % row_inner_size) / col_inner_size;
using device::fast_sigmoid;
output[box_offset + 0] = (T(x) + fast_sigmoid(input[box_offset + 0])) / T(cols);
output[box_offset + 1] = (T(y) + fast_sigmoid(input[box_offset + 1])) / T(rows);
const auto tmp_x = (fast_sigmoid(input[box_offset + 0]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
const auto tmp_y = (fast_sigmoid(input[box_offset + 1]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
output[box_offset + 0] = (T(x) + tmp_x) / T(cols);
output[box_offset + 1] = (T(y) + tmp_y) / T(rows);
vector2_type bias_xy;
v_load(bias_xy, bias_vPtr[box_of_the_cell]);
@ -143,7 +145,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
void region(const Stream& stream, Span<T> output, View<T> input, View<T> bias,
T object_prob_cutoff, T class_prob_cutoff,
std::size_t boxes_per_cell, std::size_t box_size,
std::size_t rows, std::size_t cols,
std::size_t rows, std::size_t cols, T scale_x_y,
std::size_t height_norm, std::size_t width_norm,
bool if_true_sigmoid_else_softmax /* true = sigmoid, false = softmax */)
{
@ -155,7 +157,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
auto box_policy = make_policy(box_kernel, output.size() / box_size, 0, stream);
launch_kernel(box_kernel, box_policy,
output, input, bias, boxes_per_cell, box_size,
rows, cols, height_norm, width_norm,
rows, cols, scale_x_y, height_norm, width_norm,
object_prob_cutoff);
if (if_true_sigmoid_else_softmax) {
@ -171,10 +173,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
template void region(const Stream&, Span<__half>, View<__half>, View<__half>,
__half, __half, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
__half, __half, std::size_t, std::size_t, std::size_t, std::size_t, __half, std::size_t, std::size_t, bool);
#endif
template void region(const Stream&, Span<float>, View<float>, View<float>,
float, float, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool);
float, float, std::size_t, std::size_t, std::size_t, std::size_t, float, std::size_t, std::size_t, bool);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */

View File

@ -16,7 +16,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
void region(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<T> bias,
T object_prob_cutoff, T class_prob_cutoff,
std::size_t boxes_per_cell, std::size_t box_size,
std::size_t rows, std::size_t cols,
std::size_t rows, std::size_t cols, T scale_x_y,
std::size_t height_norm, std::size_t width_norm,
bool if_true_sigmoid_else_softmax);

View File

@ -48,14 +48,13 @@ namespace cv { namespace dnn { namespace cuda4dnn {
*
* actual class probability = conditional_class_prob * object_prob
*/
std::size_t classes, boxes_per_cell;
std::size_t width_norm, height_norm;
T scale_x_y;
/* method for reducing class scores to probabilities */
SquashMethod squash_method;
std::size_t classes, boxes_per_cell;
std::size_t width_norm, height_norm;
/* prob cutoffs below which the prediction is nulled */
T object_prob_cutoff;
T class_prob_cutoff;
@ -81,8 +80,9 @@ namespace cv { namespace dnn { namespace cuda4dnn {
width_norm = config.width_norm;
height_norm = config.height_norm;
squash_type = config.squash_method;
scale_x_y = config.scale_x_y;
squash_type = config.squash_method;
object_prob_cutoff = config.object_prob_cutoff;
class_prob_cutoff = config.class_prob_cutoff;
@ -113,7 +113,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
kernels::region<T>(stream, output, input, biasTensor,
object_prob_cutoff, class_prob_cutoff,
boxes_per_cell, cell_box_size,
rows, cols,
rows, cols, scale_x_y,
height_norm, width_norm,
if_true_sigmoid_else_softmax
);
@ -170,9 +170,11 @@ namespace cv { namespace dnn { namespace cuda4dnn {
csl::Tensor<T> biasTensor;
std::size_t classes, boxes_per_cell;
std::size_t width_norm, height_norm;
SquashMethod squash_type;
T scale_x_y;
SquashMethod squash_type;
T object_prob_cutoff, class_prob_cutoff;
T nms_iou_threshold;
};

View File

@ -405,6 +405,8 @@ public:
config.height_norm = height_norm;
config.width_norm = width_norm;
config.scale_x_y = scale_x_y;
config.object_prob_cutoff = (classfix == -1) ? 0.5 : 0.0;
config.class_prob_cutoff = thresh;