opencv/modules/dnn/src/opencl/detection_output.cl

/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#define Dtype float
#define Dtype4 float4

__kernel void DecodeBBoxesCORNER(const int nthreads,
                                 __global const Dtype* loc_data,
                                 __global const Dtype* prior_data,
                                 const int variance_encoded_in_target,
                                 const int num_priors,
                                 const int share_location,
                                 const int num_loc_classes,
                                 const int background_label_id,
                                 const int clip_bbox,
                                 const int locPredTransposed,
                                 __global Dtype* bbox_data)
{
    for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
    {
        Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
        const int i = index % 4;
        const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;

        const int c = (index / 4) % num_loc_classes;
        int label = share_location ? -1 : c;
        if (label == background_label_id)
            return; // Ignore background class.

        Dtype4 loc_vec = vload4(0, loc_data + index - i);
        Dtype4 bbox_vec, prior_variance;
        if (variance_encoded_in_target)
        {
            bbox_vec = loc_vec;
        } else {
            const int start_index = num_priors * 4 + p;
            prior_variance = vload4(0, prior_data + start_index);
            bbox_vec = loc_vec * prior_variance;
        }

        if (locPredTransposed)
        {
            bbox_ymin = bbox_vec.x;
            bbox_xmin = bbox_vec.y;
            bbox_ymax = bbox_vec.z;
            bbox_xmax = bbox_vec.w;
        } else {
            bbox_xmin = bbox_vec.x;
            bbox_ymin = bbox_vec.y;
            bbox_xmax = bbox_vec.z;
            bbox_ymax = bbox_vec.w;
        }

        Dtype4 prior_vec = vload4(0, prior_data + p);
        Dtype val;
        switch (i)
        {
            case 0:
                val = prior_vec.x + bbox_xmin;
                break;
            case 1:
                val = prior_vec.y + bbox_ymin;
                break;
            case 2:
                val = prior_vec.z + bbox_xmax;
                break;
            case 3:
                val = prior_vec.w + bbox_ymax;
                break;
        }

        if (clip_bbox)
            val = max(min(val, (Dtype)1.), (Dtype)0.);

        bbox_data[index] = val;
    }
}

__kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
                                      __global const Dtype* loc_data,
                                      __global const Dtype* prior_data,
                                      const int variance_encoded_in_target,
                                      const int num_priors,
                                      const int share_location,
                                      const int num_loc_classes,
                                      const int background_label_id,
                                      const int clip_bbox,
                                      const int locPredTransposed,
                                      __global Dtype* bbox_data)
{
    for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
    {
        Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;
        const int i = index % 4;
        const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;

        const int c = (index / 4) % num_loc_classes;
        int label = share_location ? -1 : c;
        if (label == background_label_id)
            return; // Ignore background class.

        Dtype4 loc_vec = vload4(0, loc_data + index - i);
        Dtype4 bbox_vec, prior_variance;
        if (variance_encoded_in_target)
        {
            bbox_vec = loc_vec;
        } else {
            const int start_index = num_priors * 4 + p;
            prior_variance = vload4(0, prior_data + start_index);
            bbox_vec = loc_vec * prior_variance;
        }

        if (locPredTransposed)
        {
            bbox_ymin = bbox_vec.x;
            bbox_xmin = bbox_vec.y;
            bbox_ymax = bbox_vec.z;
            bbox_xmax = bbox_vec.w;
        } else {
            bbox_xmin = bbox_vec.x;
            bbox_ymin = bbox_vec.y;
            bbox_xmax = bbox_vec.z;
            bbox_ymax = bbox_vec.w;
        }

        Dtype4 prior_vec = vload4(0, prior_data + p);
        Dtype prior_width = prior_vec.z - prior_vec.x;
        Dtype prior_height = prior_vec.w - prior_vec.y;
        Dtype prior_center_x = (prior_vec.x + prior_vec.z) * .5;
        Dtype prior_center_y = (prior_vec.y + prior_vec.w) * .5;

        Dtype decode_bbox_center_x, decode_bbox_center_y;
        Dtype decode_bbox_width, decode_bbox_height;
        decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
        decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
        decode_bbox_width = exp(bbox_xmax) * prior_width;
        decode_bbox_height = exp(bbox_ymax) * prior_height;

        Dtype val;
        switch (i)
        {
            case 0:
                val = decode_bbox_center_x - decode_bbox_width * .5;
                break;
            case 1:
                val = decode_bbox_center_y - decode_bbox_height * .5;
                break;
            case 2:
                val = decode_bbox_center_x + decode_bbox_width * .5;
                break;
            case 3:
                val = decode_bbox_center_y + decode_bbox_height * .5;
                break;
        }

        if (clip_bbox)
            val = max(min(val, (Dtype)1.), (Dtype)0.);

        bbox_data[index] = val;
    }
}
detection_output layer ocl implementation Signed-off-by: Li Peng <peng.li@intel.com> 2017-11-30 20:43:49 +08:00			`/*M///////////////////////////////////////////////////////////////////////////////////////`
			`//`
			`// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.`
			`//`
			`// By downloading, copying, installing or using the software you agree to this license.`
			`// If you do not agree to this license, do not download, install,`
			`// copy or use the software.`
			`//`
			`//`
			`// License Agreement`
			`// For Open Source Computer Vision Library`
			`//`
			`// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.`
			`// Third party copyrights are property of their respective owners.`
			`//`
			`// Redistribution and use in source and binary forms, with or without modification,`
			`// are permitted provided that the following conditions are met:`
			`//`
			`// * Redistribution's of source code must retain the above copyright notice,`
			`// this list of conditions and the following disclaimer.`
			`//`
			`// * Redistribution's in binary form must reproduce the above copyright notice,`
			`// this list of conditions and the following disclaimer in the documentation`
			`// and/or other materials provided with the distribution.`
			`//`
			`// * The name of the copyright holders may not be used to endorse or promote products`
			`// derived from this software without specific prior written permission.`
			`//`
			`// This software is provided by the copyright holders and contributors "as is" and`
			`// any express or implied warranties, including, but not limited to, the implied`
			`// warranties of merchantability and fitness for a particular purpose are disclaimed.`
			`// In no event shall the Intel Corporation or contributors be liable for any direct,`
			`// indirect, incidental, special, exemplary, or consequential damages`
			`// (including, but not limited to, procurement of substitute goods or services;`
			`// loss of use, data, or profits; or business interruption) however caused`
			`// and on any theory of liability, whether in contract, strict liability,`
			`// or tort (including negligence or otherwise) arising in any way out of`
			`// the use of this software, even if advised of the possibility of such damage.`
			`//`
			`//M*/`

			`#define Dtype float`
			`#define Dtype4 float4`

			`__kernel void DecodeBBoxesCORNER(const int nthreads,`
			`__global const Dtype* loc_data,`
			`__global const Dtype* prior_data,`
			`const int variance_encoded_in_target,`
			`const int num_priors,`
			`const int share_location,`
			`const int num_loc_classes,`
			`const int background_label_id,`
			`const int clip_bbox,`
ocl fix for detection_output and prior_box layer Signed-off-by: Li Peng <peng.li@intel.com> 2018-02-11 18:12:25 +08:00			`const int locPredTransposed,`
detection_output layer ocl implementation Signed-off-by: Li Peng <peng.li@intel.com> 2017-11-30 20:43:49 +08:00			`__global Dtype* bbox_data)`
			`{`
			`for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))`
			`{`
			`Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;`
			`const int i = index % 4;`
			`const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;`

			`const int c = (index / 4) % num_loc_classes;`
			`int label = share_location ? -1 : c;`
			`if (label == background_label_id)`
			`return; // Ignore background class.`

			`Dtype4 loc_vec = vload4(0, loc_data + index - i);`
			`Dtype4 bbox_vec, prior_variance;`
			`if (variance_encoded_in_target)`
			`{`
			`bbox_vec = loc_vec;`
			`} else {`
			`const int start_index = num_priors * 4 + p;`
			`prior_variance = vload4(0, prior_data + start_index);`
			`bbox_vec = loc_vec * prior_variance;`
			`}`

ocl fix for detection_output and prior_box layer Signed-off-by: Li Peng <peng.li@intel.com> 2018-02-11 18:12:25 +08:00			`if (locPredTransposed)`
			`{`
			`bbox_ymin = bbox_vec.x;`
			`bbox_xmin = bbox_vec.y;`
			`bbox_ymax = bbox_vec.z;`
			`bbox_xmax = bbox_vec.w;`
			`} else {`
			`bbox_xmin = bbox_vec.x;`
			`bbox_ymin = bbox_vec.y;`
			`bbox_xmax = bbox_vec.z;`
			`bbox_ymax = bbox_vec.w;`
			`}`
detection_output layer ocl implementation Signed-off-by: Li Peng <peng.li@intel.com> 2017-11-30 20:43:49 +08:00
			`Dtype4 prior_vec = vload4(0, prior_data + p);`
			`Dtype val;`
			`switch (i)`
			`{`
			`case 0:`
			`val = prior_vec.x + bbox_xmin;`
			`break;`
			`case 1:`
			`val = prior_vec.y + bbox_ymin;`
			`break;`
			`case 2:`
			`val = prior_vec.z + bbox_xmax;`
			`break;`
			`case 3:`
			`val = prior_vec.w + bbox_ymax;`
			`break;`
			`}`

			`if (clip_bbox)`
			`val = max(min(val, (Dtype)1.), (Dtype)0.);`

			`bbox_data[index] = val;`
			`}`
			`}`

			`__kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,`
			`__global const Dtype* loc_data,`
			`__global const Dtype* prior_data,`
			`const int variance_encoded_in_target,`
			`const int num_priors,`
			`const int share_location,`
			`const int num_loc_classes,`
			`const int background_label_id,`
			`const int clip_bbox,`
ocl fix for detection_output and prior_box layer Signed-off-by: Li Peng <peng.li@intel.com> 2018-02-11 18:12:25 +08:00			`const int locPredTransposed,`
detection_output layer ocl implementation Signed-off-by: Li Peng <peng.li@intel.com> 2017-11-30 20:43:49 +08:00			`__global Dtype* bbox_data)`
			`{`
			`for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))`
			`{`
			`Dtype bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax;`
			`const int i = index % 4;`
			`const int p = ((index / 4 / num_loc_classes) % num_priors) * 4;`

			`const int c = (index / 4) % num_loc_classes;`
			`int label = share_location ? -1 : c;`
			`if (label == background_label_id)`
			`return; // Ignore background class.`

			`Dtype4 loc_vec = vload4(0, loc_data + index - i);`
			`Dtype4 bbox_vec, prior_variance;`
			`if (variance_encoded_in_target)`
			`{`
			`bbox_vec = loc_vec;`
			`} else {`
			`const int start_index = num_priors * 4 + p;`
			`prior_variance = vload4(0, prior_data + start_index);`
			`bbox_vec = loc_vec * prior_variance;`
			`}`

ocl fix for detection_output and prior_box layer Signed-off-by: Li Peng <peng.li@intel.com> 2018-02-11 18:12:25 +08:00			`if (locPredTransposed)`
			`{`
			`bbox_ymin = bbox_vec.x;`
			`bbox_xmin = bbox_vec.y;`
			`bbox_ymax = bbox_vec.z;`
			`bbox_xmax = bbox_vec.w;`
			`} else {`
			`bbox_xmin = bbox_vec.x;`
			`bbox_ymin = bbox_vec.y;`
			`bbox_xmax = bbox_vec.z;`
			`bbox_ymax = bbox_vec.w;`
			`}`
detection_output layer ocl implementation Signed-off-by: Li Peng <peng.li@intel.com> 2017-11-30 20:43:49 +08:00
			`Dtype4 prior_vec = vload4(0, prior_data + p);`
			`Dtype prior_width = prior_vec.z - prior_vec.x;`
			`Dtype prior_height = prior_vec.w - prior_vec.y;`
			`Dtype prior_center_x = (prior_vec.x + prior_vec.z) * .5;`
			`Dtype prior_center_y = (prior_vec.y + prior_vec.w) * .5;`

			`Dtype decode_bbox_center_x, decode_bbox_center_y;`
			`Dtype decode_bbox_width, decode_bbox_height;`
			`decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;`
			`decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;`
			`decode_bbox_width = exp(bbox_xmax) * prior_width;`
			`decode_bbox_height = exp(bbox_ymax) * prior_height;`

			`Dtype val;`
			`switch (i)`
			`{`
			`case 0:`
			`val = decode_bbox_center_x - decode_bbox_width * .5;`
			`break;`
			`case 1:`
			`val = decode_bbox_center_y - decode_bbox_height * .5;`
			`break;`
			`case 2:`
			`val = decode_bbox_center_x + decode_bbox_width * .5;`
			`break;`
			`case 3:`
			`val = decode_bbox_center_y + decode_bbox_height * .5;`
			`break;`
			`}`

			`if (clip_bbox)`
			`val = max(min(val, (Dtype)1.), (Dtype)0.);`

			`bbox_data[index] = val;`
			`}`
			`}`