mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00

dnn: add DepthToSpace and SpaceToDepth #25779 We are working on updating WeChat QRCode module. One of the new models is a fully convolutional model and hence it should be able to run with different input shapes. However, it has an operator `DepthToSpace`, which is parsed as a subgraph of `Reshape -> Permute -> Reshape` with a fixed shape getting during parsing. The subgraph itself is not a problem, but the true problem is the subgraph with a fixed input and output shape regardless input changes. This does not allow the model to run with different input shapes. Solution is to add a dedicated layer for DepthtoSpace and SpaceToDepth. Backend support: - [x] CPU - [x] CUDA - [x] OpenCL - [x] OpenVINO - [x] CANN - [x] TIMVX - ~Vulkan~ (missing fundamental tools, like permutation and reshape) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1216 lines
47 KiB
C++
1216 lines
47 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#ifndef OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
|
|
#define OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
|
|
|
|
#include "nvcc_defs.hpp"
|
|
#include "memory.hpp"
|
|
#include "cublas.hpp"
|
|
#include "cudnn.hpp"
|
|
#include "span.hpp"
|
|
|
|
#include "../cxx_utils/resizable_static_array.hpp"
|
|
#include "../cxx_utils/is_iterator.hpp"
|
|
|
|
#include <opencv2/core.hpp>
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <type_traits>
|
|
#include <array>
|
|
#include <functional>
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <iterator>
|
|
#include <vector>
|
|
#include <utility>
|
|
|
|
#ifndef CSL_MAX_TENSOR_RANK
|
|
#define CSL_MAX_TENSOR_RANK 6
|
|
#endif
|
|
|
|
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
|
|
|
|
/** \file tensor.hpp
|
|
*
|
|
* TYPE | OWNERSHIP | MUTABLE
|
|
* ------------ + --------- + --------
|
|
* Tensor | Yes | Yes
|
|
* TensorSpan | No | Yes
|
|
* TensorView | No | No
|
|
*
|
|
* Tensor is implicitly convertible to TensorSpan and TensorView
|
|
* TensorSpan is implicitly convertible to TensorView
|
|
*
|
|
* Concepts and template parameter naming convention:
|
|
* - "MutableTensorType" can refer to a Tensor or TensorSpan
|
|
* - "ImmutableTensorType" can refer to a Tensor, TensorSpan or TensorView
|
|
* - "TensorType" can refer to a Tensor, TensorSpan or TensorView
|
|
*
|
|
* "ImmutableTensorType" is used when the tensor data might be used.
|
|
* "TensorType" is used when only meta-information such as the size or shape is required, i.e. the data won't be touched
|
|
*/
|
|
|
|
/** if the \p axis is a negative index, the equivalent positive index is returned; otherwise, returns \p axis */
|
|
CUDA4DNN_HOST_DEVICE constexpr std::size_t clamp_axis(int axis, std::size_t rank) {
|
|
return axis < 0 ? axis + rank : axis;
|
|
}
|
|
|
|
/** @brief multi-dimensional contiguous non-copyable GPU tensor
|
|
*
|
|
* \tparam T type of data stored
|
|
*
|
|
* @note scalars or zero rank tensors are not supported
|
|
* @note the maximum rank supported is controlled by the `CSL_MAX_TENSOR_RANK` preprocessor symbol
|
|
*/
|
|
template <class T>
|
|
class Tensor {
|
|
static_assert(std::is_standard_layout<T>::value, "T must satisfy StandardLayoutType");
|
|
|
|
public:
|
|
using value_type = typename ManagedPtr<T>::element_type;
|
|
using pointer = typename ManagedPtr<value_type>::pointer;
|
|
using const_pointer = typename ManagedPtr<value_type>::const_pointer;
|
|
using size_type = typename ManagedPtr<value_type>::size_type;
|
|
|
|
Tensor() noexcept { }
|
|
Tensor(const Tensor&) = delete;
|
|
Tensor(Tensor&& other) noexcept {
|
|
data = std::move(other.data);
|
|
shape = other.shape;
|
|
other.shape.clear();
|
|
}
|
|
|
|
/** @brief constructs a tensor of a specific shape
|
|
*
|
|
* Whatever arguments are accepted by the resize methods are accepted here.
|
|
*/
|
|
template <class ...Args>
|
|
Tensor(Args&&... sizes) { resize(std::forward<Args>(sizes)...); }
|
|
|
|
Tensor& operator=(const Tensor&) = delete;
|
|
Tensor& operator=(Tensor&& other) noexcept {
|
|
data = std::move(other.data);
|
|
shape = other.shape;
|
|
other.shape.clear();
|
|
return *this;
|
|
}
|
|
|
|
/** returns true if the tensor is empty (or uninitialized) */
|
|
bool empty() const noexcept { return shape.size() == 0; }
|
|
|
|
/** returns the total number of elements in the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
*/
|
|
size_type size() const noexcept {
|
|
CV_Assert(!empty());
|
|
return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns the rank of the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
*/
|
|
size_type rank() const noexcept {
|
|
CV_Assert(!empty());
|
|
return shape.size();
|
|
}
|
|
|
|
/** @brief returns the length of the axis
|
|
*
|
|
* Every axis is assigned a zero-based index which can be used to select an axis.
|
|
* Negative index can be used to select an axis from the end.
|
|
*
|
|
* Examples:
|
|
* > -1 represents the last axis
|
|
* > 0 represents the first axis
|
|
* > 1 represents the second axis
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
* - the axis must be in the range [-rank(), rank())
|
|
*/
|
|
size_type get_axis_size(int axis) const noexcept {
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
return shape[axis];
|
|
}
|
|
|
|
/** @brief returns the combined size of the axes in an axis range
|
|
*
|
|
* if the shape is [3 x 5 x 7 x 11]
|
|
* - `size_range(0, 2)` will return 3 x 5 = 15
|
|
* - `size_range(1, 3)` will return 5 x 7 = 35
|
|
* - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
* - `axis_start` must be less than or equal to `axis_end`
|
|
* - `axis_end` must be less than or equal to the rank
|
|
*
|
|
* returns one if the two `axis_start` and `axis_end` are equal
|
|
*/
|
|
size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
|
|
CV_Assert(!empty());
|
|
CV_Assert(axis_start <= axis_end);
|
|
CV_Assert(axis_end <= rank());
|
|
auto start = std::begin(shape) + axis_start;
|
|
auto end = std::begin(shape) + axis_end;
|
|
return std::accumulate(start, end, 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns an std::vector containing axis lengths starting from axis zero
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
std::vector<size_type> shape_as_vector() const {
|
|
CV_Assert(!empty());
|
|
return std::vector<size_type>(std::begin(shape), std::end(shape));
|
|
}
|
|
|
|
/** returns a pointer to mutable device memory owned by the tensor */
|
|
pointer get() noexcept { return data.get(); }
|
|
|
|
/** returns a pointer to immutable device memory owned by the tensor */
|
|
const_pointer get() const noexcept { return data.get(); }
|
|
|
|
/** @brief releases the memory owned by the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - tensor must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void clear() {
|
|
CV_Assert(!empty());
|
|
data.reset();
|
|
shape.clear();
|
|
}
|
|
|
|
/** @brief resizes the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - [start, end) represents a forward range containing the length of the axes in order starting from axis zero
|
|
* - number of lengths provided must not exceed the maximum tensor rank (CSL_MAX_TENSOR_RANK)
|
|
* - the sizes must be positive integers
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
|
|
::type resize(ForwardItr start, ForwardItr end) {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
data.reset(total);
|
|
|
|
shape.assign(start, end);
|
|
}
|
|
|
|
/** @brief resizes the tensor
|
|
* constructs a range out of the arguments and invokes the range-based resize method
|
|
*/
|
|
template <class ...Sizes>
|
|
void resize(Sizes... new_sizes_) {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<size_type, sizeof...(Sizes)> new_sizes = { static_cast<size_type>(new_sizes_)... };
|
|
resize(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief resizes the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - the reference tensor must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class TensorType>
|
|
void resize_as(const TensorType& tensor) {
|
|
CV_Assert(!tensor.empty());
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
|
|
for (int i = 0; i < new_sizes.size(); i++)
|
|
new_sizes[i] = tensor.get_axis_size(i);
|
|
resize(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief reshapes the tensor
|
|
*
|
|
* Length deduction:
|
|
* The length of at most one axis can be deduced using the total size constraint. The axis can
|
|
* be marked for deduction by specifying the size as -1.
|
|
*
|
|
* The axes for which no size was provided (excluding -1) will be assumed to be one.
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - [start, end) represents a forward range containing the length of the axes starting from axis zero
|
|
* - the number of lengths provided must be less than or equal to the tensor rank
|
|
* - at most one axis length is allowed for length deduction
|
|
* - the lengths provided must ensure that the total number of elements remains unchanged
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
|
|
::type reshape(ForwardItr start, ForwardItr end) {
|
|
CV_Assert(start != end);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
|
|
/* the user may leave at most one axis size for deduction by specifying -1 */
|
|
auto sizes_to_deduce = std::count(start, end, -1);
|
|
if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
|
|
|
|
/* sizes must be positive numbers with the exception of -1 */
|
|
auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
|
|
return !(x > 0 || x == -1);
|
|
});
|
|
if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
|
|
|
|
/* compute the total number of elements in the new tensor */
|
|
size_type unknown_size = 0;
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
if (total < 0) {
|
|
/* there is an unknown size */
|
|
CV_CheckEQ(size() % std::abs(total), static_cast<size_type>(0), "cannot be reshaped"); // must be divisible
|
|
if (std::abs(total) <= size()) {
|
|
unknown_size = size() / std::abs(total);
|
|
total = size();
|
|
}
|
|
/* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
|
|
** Since `total` is negative, the size check which follows will fail and throw an error
|
|
*/
|
|
}
|
|
|
|
/* the number of elements before and after reshape must be exactly same */
|
|
if (total != size()) {
|
|
CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
|
|
}
|
|
|
|
/* copy shape from given iterator and reshape -1 with deduced value */
|
|
shape.resize(std::distance(start, end));
|
|
std::copy(start, end, shape.begin());
|
|
std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
|
|
}
|
|
|
|
/** @brief reshapes the tensor
|
|
* constructs a range out of the arguments and invokes range-based reshape method
|
|
*/
|
|
template <class ...Sizes>
|
|
void reshape(Sizes... new_sizes_) {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief reshapes the tensor
|
|
*
|
|
* Pre-conditions:
|
|
* - the reference tensor must be a non-empty tensor
|
|
* - the reference tensor's rank must be lesser than or equal to the rank of target tensor
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class TensorType>
|
|
void reshape_as(const TensorType& tensor) {
|
|
CV_Assert(!tensor.empty());
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
|
|
for (int i = 0; i < new_sizes.size(); i++)
|
|
new_sizes[i] = tensor.get_axis_size(i);
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes all axes of unit size
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze() {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
auto itr = std::remove(std::begin(shape), std::end(shape), 1);
|
|
shape.resize(itr - std::begin(shape));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes the specified axis if the axis length is one; otherwise, ignores the request
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze(int axis) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.erase(std::begin(shape) + axis);
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes leading singleton axes until the tensor's rank is equal to the requested rank
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be at least two
|
|
* - the tensor's rank must be at least the requested rank
|
|
* - the tensor must be squeezable up to the requested rank
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze_to(int r) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= r);
|
|
CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
|
|
std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
|
|
shape.resize(r);
|
|
}
|
|
|
|
/** @brief unsqueezes the tensor
|
|
*
|
|
* adds a axis of unit size at the requested before the specified axis
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void unsqueeze(int axis = 0) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.insert(std::begin(shape) + axis, 1);
|
|
}
|
|
|
|
operator Span<T>() noexcept { return Span<T>(data.get(), size()); }
|
|
operator View<T>() const noexcept { return View<T>(data.get(), size()); }
|
|
|
|
friend void swap(Tensor& lhs, Tensor& rhs) noexcept {
|
|
using std::swap;
|
|
swap(lhs.data, rhs.data);
|
|
swap(lhs.shape, rhs.shape);
|
|
}
|
|
|
|
private:
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
|
|
ManagedPtr<value_type> data;
|
|
};
|
|
|
|
/** @brief provides a non-owning mutable span of a Tensor
|
|
*
|
|
* \tparam T type of data stored by the tensor
|
|
*
|
|
* A span is valid if and only if the following hold true:
|
|
* - span is non-empty
|
|
* - spanned memory is still allocated
|
|
*
|
|
* A span may be used if and only if it is valid.
|
|
*/
|
|
template <class T>
|
|
class TensorSpan {
|
|
public:
|
|
using value_type = typename Tensor<T>::value_type;
|
|
using pointer = typename Tensor<T>::pointer;
|
|
using const_pointer = typename Tensor<T>::const_pointer;
|
|
using size_type = typename Tensor<T>::size_type;
|
|
|
|
TensorSpan() noexcept : ptr{ nullptr } { }
|
|
TensorSpan(const TensorSpan&) noexcept = default;
|
|
TensorSpan(Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
|
|
const auto rank = tensor.rank();
|
|
shape.resize(rank);
|
|
for (int i = 0; i < rank; i++)
|
|
shape[i] = tensor.get_axis_size(i);
|
|
}
|
|
|
|
template <class ForwardItr>
|
|
TensorSpan(pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
|
|
CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
|
|
}
|
|
|
|
shape.assign(start, end);
|
|
}
|
|
|
|
/** creates a subspan of a tensor (or span); refer to subspan method for more details */
|
|
template <class... Args>
|
|
TensorSpan(TensorSpan other, size_type offset, Args&&... args)
|
|
: TensorSpan(other.subspan(offset, std::forward<Args>(args)...)) { }
|
|
|
|
/** returns true if the span is empty */
|
|
bool empty() const noexcept { return shape.size() == 0; }
|
|
|
|
/** returns the total number of elements in the span
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
*/
|
|
size_type size() const noexcept {
|
|
CV_Assert(!empty());
|
|
return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns the rank of the span
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
*/
|
|
size_type rank() const noexcept {
|
|
CV_Assert(!empty());
|
|
return shape.size();
|
|
}
|
|
|
|
/** @brief returns the length of the axis
|
|
*
|
|
* Every axis is assigned a zero-based index which can be used to select an axis.
|
|
* Negative index can be used to select an axis from the end.
|
|
*
|
|
* Examples:
|
|
* > -1 represents the last axis
|
|
* > 0 represents the first axis
|
|
* > 1 represents the second axis
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
* - the axis must be in the range [-rank(), rank())
|
|
*/
|
|
size_type get_axis_size(int axis) const noexcept {
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
return shape[axis];
|
|
}
|
|
|
|
/** @brief returns the combined size of the axes in an axis range
|
|
*
|
|
* if the shape is [3 x 5 x 7 x 11]
|
|
* - `size_range(0, 2)` will return 3 x 5 = 15
|
|
* - `size_range(1, 3)` will return 5 x 7 = 35
|
|
* - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
* - `axis_start` must be less than or equal to `axis_end`
|
|
* - `axis_end` must be less than or equal to the rank
|
|
*
|
|
* returns one if the two `axis_start` and `axis_end` are equal
|
|
*/
|
|
size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
|
|
CV_Assert(!empty());
|
|
CV_Assert(axis_start <= axis_end);
|
|
CV_Assert(axis_end <= rank());
|
|
auto start = std::begin(shape) + axis_start;
|
|
auto end = std::begin(shape) + axis_end;
|
|
return std::accumulate(start, end, 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns an std::vector containing axis lengths starting from axis zero
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
std::vector<size_type> shape_as_vector() const {
|
|
CV_Assert(!empty());
|
|
return std::vector<size_type>(std::begin(shape), std::end(shape));
|
|
}
|
|
|
|
/** returns a pointer to mutable device memory */
|
|
pointer get() const noexcept { return ptr; }
|
|
|
|
/** @brief clears the span
|
|
*
|
|
* Pre-conditions:
|
|
* - span must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void clear() noexcept {
|
|
CV_Assert(!empty());
|
|
ptr = nullptr;
|
|
shape.clear();
|
|
}
|
|
|
|
/** @brief reshapes the span
|
|
*
|
|
* Length deduction:
|
|
* The length of at most one axis can be deduced using the total size constraint. The axis can
|
|
* be marked for deduction by specifying the corresponding size as -1.
|
|
*
|
|
* The axes for which no size was provided (excluding -1) will be assumed to be one.
|
|
*
|
|
* Pre-conditions:
|
|
* - the span must be non-empty
|
|
* - [start, end) represents a forward range containing the length of the axes in order
|
|
* - the number of axis lengths must be less than or equal to the rank
|
|
* - at most one axis length is allowed for length deduction
|
|
* - the lengths provided must ensure that the total number of elements remains unchanged
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
|
|
::type reshape(ForwardItr start, ForwardItr end) {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= rank());
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
|
|
/* the user may leave at most one axis size for deduction by specifying -1 */
|
|
auto sizes_to_deduce = std::count(start, end, -1);
|
|
if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
|
|
|
|
/* sizes must be positive numbers with the exception of -1 */
|
|
auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
|
|
return !(x > 0 || x == -1);
|
|
});
|
|
if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
|
|
|
|
/* compute the total number of elements in the new tensor */
|
|
size_type unknown_size = 0;
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
if (total < 0) {
|
|
/* there is an unknown size */
|
|
CV_CheckEQ(size() % std::abs(total), static_cast<size_type>(0), "cannot be reshaped"); // must be divisible
|
|
if (std::abs(total) <= size()) {
|
|
unknown_size = size() / std::abs(total);
|
|
total = size();
|
|
}
|
|
/* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
|
|
** Since `total` is negative, the size check which follows will fail and throw an error
|
|
*/
|
|
}
|
|
|
|
/* the number of elements before and after reshape must be exactly same */
|
|
if (total != size()) {
|
|
CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
|
|
}
|
|
|
|
/* copy shape from given iterator and reshape -1 with deduced value */
|
|
shape.resize(std::distance(start, end));
|
|
std::copy(start, end, shape.begin());
|
|
std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
|
|
}
|
|
|
|
/** @brief reshapes the tensor
|
|
* constructs a range out of the arguments and invokes the range-based reshape method
|
|
*/
|
|
template <class ...Sizes>
|
|
void reshape(Sizes... new_sizes_) {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "unsupported tensor rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief reshapes the span
|
|
*
|
|
* Pre-conditions:
|
|
* - the reference tensor/span/view must be non-empty
|
|
* - the reference tensor/span/view's rank must be less than or equal to the rank of the span
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class TensorType>
|
|
void reshape_as(const TensorType& tensor) {
|
|
CV_Assert(!tensor.empty());
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
|
|
for (int i = 0; i < new_sizes.size(); i++)
|
|
new_sizes[i] = tensor.get_axis_size(i);
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes all axes of unit size
|
|
*
|
|
* Pre-conditions:
|
|
* - the span must be non-empty
|
|
* - the span's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze() {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
auto itr = std::remove(std::begin(shape), std::end(shape), 1);
|
|
shape.resize(itr - std::begin(shape));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes the specified axis if the axis length is one; otherwise, ignores the request
|
|
*
|
|
* Pre-conditions:
|
|
* - the span must be non-empty
|
|
* - the span's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze(int axis) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.erase(std::begin(shape) + axis);
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes leading singleton axes until the tensor's rank is equal to the requested rank
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be at least two
|
|
* - the tensor's rank must be at least the requested rank
|
|
* - the tensor must be squeezable up to the requested rank
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze_to(int r) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= r);
|
|
CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
|
|
std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
|
|
shape.resize(r);
|
|
}
|
|
|
|
/** @brief unsqueezes the tensor
|
|
*
|
|
* adds a axis of unit size at the requested before the specified axis
|
|
*
|
|
* Pre-conditions:
|
|
* - the span must be non-empty
|
|
* - the span's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void unsqueeze(int axis = 0) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.insert(std::begin(shape) + axis, 1);
|
|
}
|
|
|
|
/** @brief obtains a subspan of the span
|
|
*
|
|
* Pre-conditions:
|
|
* - the span must be non-empty
|
|
* - the `offset` must be less than the size of the span
|
|
* - [start, end) represents a forward range containing length of the subspan axes
|
|
* - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorSpan>
|
|
::type subspan(size_type offset, ForwardItr start, ForwardItr end) const {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= rank());
|
|
|
|
auto cur_size = size();
|
|
CV_Assert(offset < cur_size);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
|
|
/* sizes must be positive numbers */
|
|
auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
|
|
return !(x > 0);
|
|
});
|
|
if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
|
|
|
|
/* the number of elements must be equal to the new size */
|
|
auto max_size = (cur_size - offset);
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
if (total > max_size) {
|
|
CV_Error(Error::StsBadArg, "axis lengths lead to OOB accesses");
|
|
}
|
|
|
|
TensorSpan temp;
|
|
temp.shape.assign(start, end);
|
|
temp.ptr = ptr + offset;
|
|
return temp;
|
|
}
|
|
|
|
/** @brief obtains a subspan of the span
|
|
* constructs a range out of the size arguments and invokes the range-based subspan method
|
|
*/
|
|
template <class ...Sizes>
|
|
TensorSpan subspan(size_type offset, Sizes... new_sizes_) const {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
|
|
return subspan(offset, std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
operator Span<T>() noexcept { return Span<T>(ptr, size()); }
|
|
operator View<T>() const noexcept { return View<T>(ptr, size()); }
|
|
|
|
friend void swap(TensorSpan& lhs, TensorSpan& rhs) noexcept {
|
|
using std::swap;
|
|
swap(lhs.ptr, rhs.ptr);
|
|
swap(lhs.shape, rhs.shape);
|
|
}
|
|
|
|
private:
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
|
|
pointer ptr;
|
|
};
|
|
|
|
/** @brief view of a tensor
|
|
*
|
|
* \tparam T type of data stored by the tensor
|
|
*
|
|
* A view is valid if and only if the following hold true:
|
|
* - view is non-empty
|
|
* - viewed memory is still allocated
|
|
*/
|
|
template <class T>
|
|
class TensorView {
|
|
public:
|
|
using value_type = typename Tensor<T>::value_type;
|
|
using pointer = typename Tensor<T>::pointer;
|
|
using const_pointer = typename Tensor<T>::const_pointer;
|
|
using size_type = typename Tensor<T>::size_type;
|
|
|
|
TensorView() noexcept : ptr{ nullptr } { }
|
|
TensorView(const TensorView&) noexcept = default;
|
|
TensorView(TensorSpan<T> other) noexcept : ptr{ other.get() } {
|
|
const auto rank = other.rank();
|
|
shape.resize(rank);
|
|
for (int i = 0; i < rank; i++)
|
|
shape[i] = other.get_axis_size(i);
|
|
}
|
|
TensorView(const Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
|
|
const auto rank = tensor.rank();
|
|
shape.resize(rank);
|
|
for (int i = 0; i < rank; i++)
|
|
shape[i] = tensor.get_axis_size(i);
|
|
}
|
|
|
|
template <class ForwardItr>
|
|
TensorView(const_pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
|
|
CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
|
|
}
|
|
|
|
shape.assign(start, end);
|
|
}
|
|
|
|
/** creates a subview of a tensor (or span or view); refer to subview method for more details */
|
|
template <class... Args>
|
|
TensorView(TensorView other, size_type offset, Args&&... args) noexcept
|
|
: TensorView(other.subview(offset, std::forward<Args>(args)...)) { }
|
|
|
|
TensorView& operator=(const TensorView&) = default;
|
|
TensorView& operator=(TensorSpan<T> other) noexcept {
|
|
TensorView tmp(other);
|
|
swap(*this, tmp);
|
|
return *this;
|
|
}
|
|
|
|
/** returns true if the view is empty */
|
|
bool empty() const noexcept { return shape.size() == 0; }
|
|
|
|
/** returns the total number of elements in the view
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
*/
|
|
size_type size() const noexcept {
|
|
CV_Assert(!empty());
|
|
return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns the rank of the view
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
*/
|
|
size_type rank() const noexcept {
|
|
CV_Assert(!empty());
|
|
return shape.size();
|
|
}
|
|
|
|
/** @brief returns the length of the axis
|
|
*
|
|
* Every axis is assigned a zero-based index which can be used to select an axis.
|
|
* Negative index can be used to select an axis from the end.
|
|
*
|
|
* Examples:
|
|
* > -1 represents the last axis
|
|
* > 0 represents the first axis
|
|
* > 1 represents the second axis
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
* - the axis must be in the range [-rank(), rank())
|
|
*/
|
|
size_type get_axis_size(int axis) const noexcept {
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
return shape[axis];
|
|
}
|
|
|
|
/** @brief returns the combined size of the axes in an axis range
|
|
*
|
|
* if the shape is [3 x 5 x 7 x 11]
|
|
* - `size_range(0, 2)` will return 3 x 5 = 15
|
|
* - `size_range(1, 3)` will return 5 x 7 = 35
|
|
* - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
* - `axis_start` must be less than or equal to `axis_end`
|
|
* - `axis_end` must be less than or equal to the rank
|
|
*
|
|
* returns one if the two `axis_start` and `axis_end` are equal
|
|
*/
|
|
size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
|
|
CV_Assert(!empty());
|
|
CV_Assert(axis_start <= axis_end);
|
|
CV_Assert(axis_end <= rank());
|
|
auto start = std::begin(shape) + axis_start;
|
|
auto end = std::begin(shape) + axis_end;
|
|
return std::accumulate(start, end, 1, std::multiplies<size_type>());
|
|
}
|
|
|
|
/** returns an std::vector containing axis lengths starting from axis zero
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
std::vector<size_type> shape_as_vector() const {
|
|
CV_Assert(!empty());
|
|
return std::vector<size_type>(std::begin(shape), std::end(shape));
|
|
}
|
|
|
|
/** returns a device pointer to immutable device memory */
|
|
const_pointer get() const noexcept { return ptr; }
|
|
|
|
/** @brief reshapes the view
|
|
*
|
|
* Length deduction:
|
|
* The length of at most one axis can be deduced using the total size constraint. The axis can
|
|
* be marked for deduction by specifying the size as -1.
|
|
*
|
|
* The axes for which no size was provided (excluding -1) will be assumed to be one.
|
|
*
|
|
* Pre-conditions:
|
|
* - view must be non-empty
|
|
* - [start, end) represents a forward range containing length of the axes in order starting from axis zero
|
|
* - the number of axis lengths must be less than or equal to the tensor rank
|
|
* - at most one axis length is allowed for length deduction
|
|
* - the lengths provided must ensure that the total number of elements remains unchanged
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<!std::is_integral<ForwardItr>::value, void>
|
|
::type reshape(ForwardItr start, ForwardItr end) {
|
|
CV_Assert(start != end);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
|
|
/* the user may leave at most one axis size for deduction by specifying -1 */
|
|
auto sizes_to_deduce = std::count(start, end, -1);
|
|
if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
|
|
|
|
/* sizes must be positive numbers with the exception of -1 */
|
|
auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
|
|
return !(x > 0 || x == -1);
|
|
});
|
|
if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
|
|
|
|
/* compute the total number of elements in the new tensor */
|
|
size_type unknown_size = 0;
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
if (total < 0) {
|
|
/* there is an unknown size */
|
|
CV_CheckEQ(size() % std::abs(total), static_cast<size_type>(0), "cannot be reshaped"); // must be divisible
|
|
if (std::abs(total) <= size()) {
|
|
unknown_size = size() / std::abs(total);
|
|
total = size();
|
|
}
|
|
/* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
|
|
** Since `total` is negative, the size check which follows will fail and throw an error
|
|
*/
|
|
}
|
|
|
|
/* the number of elements before and after reshape must be exactly same */
|
|
if (total != size()) {
|
|
CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
|
|
}
|
|
|
|
/* copy shape from given iterator and reshape -1 with deduced value */
|
|
shape.resize(std::distance(start, end));
|
|
std::copy(start, end, shape.begin());
|
|
std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
|
|
}
|
|
|
|
/** @brief reshapes the view
|
|
* constructs a range out of the arguments and invokes the range-based reshape method
|
|
*/
|
|
template <class ...Sizes>
|
|
void reshape(Sizes... new_sizes_) {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief reshapes the view
|
|
*
|
|
* Pre-conditions:
|
|
* - the reference tensor/span/view must be non-empty
|
|
* - the reference tensor/span/view's rank must be less than or equal to the rank of the view
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class TensorType>
|
|
void reshape_as(const TensorType& tensor) {
|
|
CV_Assert(!tensor.empty());
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
|
|
for (int i = 0; i < new_sizes.size(); i++)
|
|
new_sizes[i] = tensor.get_axis_size(i);
|
|
reshape(std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes all axes of unit size
|
|
*
|
|
* Pre-conditions:
|
|
* - the view must be non-empty
|
|
* - the view's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze() {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
auto itr = std::remove(std::begin(shape), std::end(shape), 1);
|
|
shape.resize(itr - std::begin(shape));
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes the specified axis if the axis length is one; otherwise, ignores the request
|
|
*
|
|
* Pre-conditions:
|
|
* - the view must be non-empty
|
|
* - the view's rank must be at least two
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze(int axis) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= 2);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.erase(std::begin(shape) + axis);
|
|
}
|
|
|
|
/** @brief squeezes the tensor
|
|
*
|
|
* removes leading singleton axes until the tensor's rank is equal to the requested rank
|
|
*
|
|
* Pre-conditions:
|
|
* - the tensor must be non-empty
|
|
* - the tensor's rank must be at least two
|
|
* - the tensor's rank must be at least the requested rank
|
|
* - the tensor must be squeezable up to the requested rank
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void squeeze_to(int r) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() >= r);
|
|
CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
|
|
std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
|
|
shape.resize(r);
|
|
}
|
|
|
|
/** @brief unsqueezes the tensor
|
|
*
|
|
* adds a axis of unit size at the requested before the specified axis
|
|
*
|
|
* Pre-conditions:
|
|
* - the view must be non-empty
|
|
* - the view's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
void unsqueeze(int axis = 0) {
|
|
CV_Assert(!empty());
|
|
CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
|
|
axis = clamp_axis(axis, rank());
|
|
CV_Assert(axis >= 0 && axis < rank());
|
|
shape.insert(std::begin(shape) + axis, 1);
|
|
}
|
|
|
|
/** @brief obtains a subview of the view
|
|
*
|
|
* The axes for which no size was provided will be assumed to be one.
|
|
*
|
|
* Pre-conditions:
|
|
* - the view must be non-empty
|
|
* - the `offset` must be less than the size of the view
|
|
* - [start, end) represents a forward range containing length of the subview axes in order
|
|
* - the number of axis lengths provided must be less than or equal to the tensor rank
|
|
* - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
|
|
*
|
|
* Exception Guarantee: Strong
|
|
*/
|
|
template <class ForwardItr>
|
|
typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorView>
|
|
::type subview(size_type offset, ForwardItr start, ForwardItr end) const {
|
|
CV_Assert(start != end);
|
|
CV_Assert(std::distance(start, end) <= rank());
|
|
|
|
auto cur_size = size();
|
|
CV_Assert(offset < cur_size);
|
|
|
|
using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
|
|
|
|
/* sizes must be positive numbers */
|
|
auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
|
|
return !(x > 0);
|
|
});
|
|
if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
|
|
|
|
/* the number of elements must be equal to the new size */
|
|
auto max_size = (cur_size - offset);
|
|
auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
|
|
if (total > max_size) {
|
|
CV_Error(Error::StsBadArg, "axes lengths lead to OOB accesses");
|
|
}
|
|
|
|
TensorView temp;
|
|
temp.shape.assign(start, end);
|
|
temp.ptr = ptr + offset;
|
|
return temp;
|
|
}
|
|
|
|
/** @brief obtains a subview of the view
|
|
* constructs a range out of the size arguments and invokes the range-based subview method
|
|
*/
|
|
template <class ...Sizes>
|
|
TensorView subview(size_type offset, Sizes... new_sizes_) const {
|
|
static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
|
|
static_assert(sizeof...(Sizes) > 0, "no sizes provided");
|
|
std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
|
|
return subview(offset, std::begin(new_sizes), std::end(new_sizes));
|
|
}
|
|
|
|
operator View<T>() const noexcept { return View<T>(ptr, size()); }
|
|
|
|
friend void swap(TensorView& lhs, TensorView& rhs) noexcept {
|
|
using std::swap;
|
|
swap(lhs.ptr, rhs.ptr);
|
|
swap(lhs.shape, rhs.shape);
|
|
}
|
|
|
|
private:
|
|
cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
|
|
const_pointer ptr;
|
|
};
|
|
|
|
/** returns true if the two TensorType objects have the same shape */
|
|
template <class TensorType1, class TensorType2>
|
|
bool is_shape_same(const TensorType1& x, const TensorType2& y) noexcept {
|
|
auto rank1 = x.rank();
|
|
auto rank2 = y.rank();
|
|
|
|
if (rank1 != rank2)
|
|
return false;
|
|
|
|
for (int i = 0; i < rank1; i++)
|
|
if (x.get_axis_size(i) != y.get_axis_size(i))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/** returns true if the two TensorType objects are compatible */
|
|
template <class TensorType1, class TensorType2>
|
|
bool is_shape_compatible(const TensorType1& x, const TensorType2& y) noexcept {
|
|
const auto rank1 = x.rank();
|
|
const auto rank2 = y.rank();
|
|
|
|
/* mathematically not required but is a technically required */
|
|
if (rank1 != rank2)
|
|
return false;
|
|
|
|
for (int i = 0; i < rank1; i++)
|
|
if (x.get_axis_size(i) != y.get_axis_size(i) &&
|
|
x.get_axis_size(i) != 1 && y.get_axis_size(i) != 1)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
template <typename ShapeType>
|
|
bool is_shape_compatible1(const ShapeType &x_shape, const ShapeType &y_shape) noexcept {
|
|
const auto x_ndims = x_shape.size(), y_ndims = y_shape.size();
|
|
|
|
if (x_ndims != y_ndims) {
|
|
return false;
|
|
}
|
|
|
|
for (int i = 0; i < x_ndims; i++) {
|
|
if (x_shape[i] != y_shape[i] && x_shape[i] != 1 && y_shape[i] != 1) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/** returns the rank to which the given tensor can be squeezed to */
|
|
template <class TensorType>
|
|
std::size_t get_effective_rank(const TensorType& x) noexcept {
|
|
const auto rank = x.rank();
|
|
auto effective_rank = rank;
|
|
for (int i = 0; i < rank; i++, effective_rank--)
|
|
if (x.get_axis_size(i) != 1)
|
|
break;
|
|
return effective_rank;
|
|
}
|
|
|
|
}}}} /* namespace cv::dnn::cuda4dnn::csl */
|
|
|
|
#endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP */
|