From 9c87d8bf9cd11756bb153ff36143653e40d7f79d Mon Sep 17 00:00:00 2001
From: Artem Saratovtsev <46279571+DumDereDum@users.noreply.github.com>
Date: Fri, 18 Feb 2022 17:50:26 +0300
Subject: [PATCH] Merge pull request #21189 from DumDereDum:new_volume

New Volume pipeline
---
 .../3d/include/opencv2/3d/detail/submap.hpp   |   39 +-
 .../3d/include/opencv2/3d/odometry_frame.hpp  |    1 +
 modules/3d/include/opencv2/3d/volume.hpp      |  225 ++-
 .../3d/include/opencv2/3d/volume_settings.hpp |  213 ++
 modules/3d/perf/perf_tsdf.cpp                 |  997 +++++++--
 modules/3d/src/rgbd/color_tsdf_functions.cpp  | 1259 ++++++++++++
 modules/3d/src/rgbd/color_tsdf_functions.hpp  |   43 +
 modules/3d/src/rgbd/colored_tsdf.cpp          | 1024 ----------
 modules/3d/src/rgbd/colored_tsdf.hpp          |   39 -
 modules/3d/src/rgbd/hash_tsdf.cpp             | 1795 -----------------
 modules/3d/src/rgbd/hash_tsdf.hpp             |   47 -
 modules/3d/src/rgbd/hash_tsdf_functions.cpp   | 1534 ++++++++++++++
 modules/3d/src/rgbd/hash_tsdf_functions.hpp   |  325 +++
 modules/3d/src/rgbd/odometry_frame_impl.cpp   |   26 +-
 modules/3d/src/rgbd/odometry_functions.cpp    |    9 +-
 modules/3d/src/rgbd/tsdf.cpp                  | 1228 -----------
 modules/3d/src/rgbd/tsdf.hpp                  |   41 -
 modules/3d/src/rgbd/tsdf_functions.cpp        | 1419 ++++++++++---
 modules/3d/src/rgbd/tsdf_functions.hpp        |  262 +--
 modules/3d/src/rgbd/volume.cpp                |  120 --
 modules/3d/src/rgbd/volume_impl.cpp           |  540 +++++
 modules/3d/src/rgbd/volume_impl.hpp           |  220 ++
 modules/3d/src/rgbd/volume_settings_impl.cpp  |  532 +++++
 modules/3d/test/ocl/test_tsdf.cpp             |  511 +++--
 modules/3d/test/test_tsdf.cpp                 |  961 +++++++--
 25 files changed, 8024 insertions(+), 5386 deletions(-)
 create mode 100644 modules/3d/include/opencv2/3d/volume_settings.hpp
 create mode 100644 modules/3d/src/rgbd/color_tsdf_functions.cpp
 create mode 100644 modules/3d/src/rgbd/color_tsdf_functions.hpp
 delete mode 100644 modules/3d/src/rgbd/colored_tsdf.cpp
 delete mode 100644 modules/3d/src/rgbd/colored_tsdf.hpp
 delete mode 100644 modules/3d/src/rgbd/hash_tsdf.cpp
 delete mode 100644 modules/3d/src/rgbd/hash_tsdf.hpp
 create mode 100644 modules/3d/src/rgbd/hash_tsdf_functions.cpp
 create mode 100644 modules/3d/src/rgbd/hash_tsdf_functions.hpp
 delete mode 100644 modules/3d/src/rgbd/tsdf.cpp
 delete mode 100644 modules/3d/src/rgbd/tsdf.hpp
 delete mode 100644 modules/3d/src/rgbd/volume.cpp
 create mode 100644 modules/3d/src/rgbd/volume_impl.cpp
 create mode 100644 modules/3d/src/rgbd/volume_impl.hpp
 create mode 100644 modules/3d/src/rgbd/volume_settings_impl.cpp
diff --git a/modules/3d/include/opencv2/3d/detail/submap.hpp b/modules/3d/include/opencv2/3d/detail/submap.hpp
index e8a77b5d1f..d42b9360bf 100644
--- a/modules/3d/include/opencv2/3d/detail/submap.hpp
+++ b/modules/3d/include/opencv2/3d/detail/submap.hpp
@@ -43,25 +43,25 @@ public:
     };
     typedef std::map<int, PoseConstraint> Constraints;
 
-    Submap(int _id, const VolumeParams& volumeParams, const cv::Affine3f& _pose = cv::Affine3f::Identity(),
+    Submap(int _id, const VolumeSettings& settings, const cv::Affine3f& _pose = cv::Affine3f::Identity(),
            int _startFrameId = 0)
         : id(_id), pose(_pose), cameraPose(Affine3f::Identity()), startFrameId(_startFrameId)
     {
-        VolumeParams vp = volumeParams;
-        vp.kind = VolumeParams::VolumeKind::HASHTSDF;
-        Ptr<VolumeParams> pvp = makePtr<VolumeParams>(vp);
-        volume = makeVolume(pvp);
+        volume = Volume(VolumeType::HashTSDF, settings);
     }
     virtual ~Submap() = default;
 
-    virtual void integrate(InputArray _depth, float depthFactor, const cv::Matx33f& intrinsics, const int currframeId);
-    virtual void raycast(const Odometry& icp, const cv::Affine3f& cameraPose, const cv::Matx33f& intrinsics, cv::Size frameSize,
+    virtual void integrate(InputArray _depth, const int currframeId);
+    virtual void raycast(const Odometry& icp, const cv::Affine3f& cameraPose, cv::Size frameSize,
                          OutputArray points = noArray(), OutputArray normals = noArray());
 
-    virtual int getTotalAllocatedBlocks() const { return int(volume->getTotalVolumeUnits()); };
+    virtual int getTotalAllocatedBlocks() const { return int(volume.getTotalVolumeUnits()); };
     virtual int getVisibleBlocks(int currFrameId) const
     {
-        return volume->getVisibleBlocks(currFrameId, FRAME_VISIBILITY_THRESHOLD);
+        CV_Assert(currFrameId >= startFrameId);
+        //return volume.getVisibleBlocks(currFrameId, FRAME_VISIBILITY_THRESHOLD);
+        return volume.getVisibleBlocks();
+
     }
 
     float calcVisibilityRatio(int currFrameId) const
@@ -100,20 +100,19 @@ public:
     OdometryFrame frame;
     OdometryFrame renderFrame;
 
-    std::shared_ptr<Volume> volume;
+    Volume volume;
 };
 
 template<typename MatType>
 
-void Submap<MatType>::integrate(InputArray _depth, float depthFactor, const cv::Matx33f& intrinsics,
-                                const int currFrameId)
+void Submap<MatType>::integrate(InputArray _depth, const int currFrameId)
 {
     CV_Assert(currFrameId >= startFrameId);
-    volume->integrate(_depth, depthFactor, cameraPose.matrix, intrinsics, currFrameId);
+    volume.integrate(_depth, cameraPose.matrix);
 }
 
 template<typename MatType>
-void Submap<MatType>::raycast(const Odometry& icp, const cv::Affine3f& _cameraPose, const cv::Matx33f& intrinsics, cv::Size frameSize,
+void Submap<MatType>::raycast(const Odometry& icp, const cv::Affine3f& _cameraPose, cv::Size frameSize,
                               OutputArray points, OutputArray normals)
 {
     if (!points.needed() && !normals.needed())
@@ -122,20 +121,20 @@ void Submap<MatType>::raycast(const Odometry& icp, const cv::Affine3f& _cameraPo
 
         frame.getPyramidAt(pts, OdometryFramePyramidType::PYR_CLOUD, 0);
         frame.getPyramidAt(nrm, OdometryFramePyramidType::PYR_NORM, 0);
-        volume->raycast(_cameraPose.matrix, intrinsics, frameSize, pts, nrm);
+        volume.raycast(_cameraPose.matrix, frameSize.height, frameSize.height, pts, nrm);
         frame.setPyramidAt(pts, OdometryFramePyramidType::PYR_CLOUD, 0);
         frame.setPyramidAt(nrm, OdometryFramePyramidType::PYR_NORM,  0);
 
         renderFrame = frame;
 
         Mat depth;
-        frame.getDepth(depth);
+        frame.getScaledDepth(depth);
         frame = icp.createOdometryFrame();
         frame.setDepth(depth);
     }
     else
     {
-        volume->raycast(_cameraPose.matrix, intrinsics, frameSize, points, normals);
+        volume.raycast(_cameraPose.matrix, frameSize.height, frameSize.height, points, normals);
     }
 }
 
@@ -188,7 +187,7 @@ public:
     typedef std::map<int, Ptr<SubmapT>> IdToSubmapPtr;
     typedef std::unordered_map<int, ActiveSubmapData> IdToActiveSubmaps;
 
-    SubmapManager(const VolumeParams& _volumeParams) : volumeParams(_volumeParams) {}
+    explicit SubmapManager(const VolumeSettings& _volumeSettings) : volumeSettings(_volumeSettings) {}
     virtual ~SubmapManager() = default;
 
     void reset() { submapList.clear(); };
@@ -214,7 +213,7 @@ public:
     Ptr<detail::PoseGraph> MapToPoseGraph();
     void PoseGraphToMap(const Ptr<detail::PoseGraph>& updatedPoseGraph);
 
-    VolumeParams volumeParams;
+    VolumeSettings volumeSettings;
 
     std::vector<Ptr<SubmapT>> submapList;
     IdToActiveSubmaps activeSubmaps;
@@ -227,7 +226,7 @@ int SubmapManager<MatType>::createNewSubmap(bool isCurrentMap, int currFrameId,
 {
     int newId = int(submapList.size());
 
-    Ptr<SubmapT> newSubmap = cv::makePtr<SubmapT>(newId, volumeParams, pose, currFrameId);
+    Ptr<SubmapT> newSubmap = cv::makePtr<SubmapT>(newId, volumeSettings, pose, currFrameId);
     submapList.push_back(newSubmap);
 
     ActiveSubmapData newSubmapData;
diff --git a/modules/3d/include/opencv2/3d/odometry_frame.hpp b/modules/3d/include/opencv2/3d/odometry_frame.hpp
index 509386b347..f5454dea83 100644
--- a/modules/3d/include/opencv2/3d/odometry_frame.hpp
+++ b/modules/3d/include/opencv2/3d/odometry_frame.hpp
@@ -52,6 +52,7 @@ public:
     void getGrayImage(OutputArray image) const;
     void setDepth(InputArray  depth);
     void getDepth(OutputArray depth) const;
+    void getScaledDepth(OutputArray depth) const;
     void setMask(InputArray  mask);
     void getMask(OutputArray mask) const;
     void setNormals(InputArray  normals);
diff --git a/modules/3d/include/opencv2/3d/volume.hpp b/modules/3d/include/opencv2/3d/volume.hpp
index 3faf66f7ea..18f96b6856 100644
--- a/modules/3d/include/opencv2/3d/volume.hpp
+++ b/modules/3d/include/opencv2/3d/volume.hpp
@@ -5,124 +5,135 @@
 #ifndef OPENCV_3D_VOLUME_HPP
 #define OPENCV_3D_VOLUME_HPP
 
+#include "volume_settings.hpp"
+
 #include "opencv2/core/affine.hpp"
 
 namespace cv
 {
+
 class CV_EXPORTS_W Volume
 {
-   public:
-    Volume(float _voxelSize, Matx44f _pose, float _raycastStepFactor) :
-        voxelSize(_voxelSize),
-        voxelSizeInv(1.0f / voxelSize),
-        pose(_pose),
-        raycastStepFactor(_raycastStepFactor)
-    { }
+public:
+    /** @brief Constructor of default volume - TSDF.
+    */
+    Volume();
+    /** @brief Constructor of custom volume.
+    * @param vtype the volume type [TSDF, HashTSDF, ColorTSDF].
+    * @param settings the custom settings for volume.
+    */
+    Volume(VolumeType vtype, const VolumeSettings& settings);
+    ~Volume();
 
-    virtual ~Volume(){};
+    /** @brief Integrates the input data to the volume.
 
-    CV_WRAP
-    virtual void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
-                           const Matx33f& intrinsics, const int frameId = 0)                   = 0;
-    virtual void integrate(InputArray _depth, InputArray _rgb, float depthFactor,
-                           const Matx44f& cameraPose, const Matx33f& intrinsics,
-                           const Matx33f& rgb_intrinsics, const int frameId = 0)               = 0;
-    CV_WRAP
-    virtual void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics,
-                         const Size& frameSize, OutputArray points, OutputArray normals) const = 0;
-    virtual void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize,
-                         OutputArray points, OutputArray normals, OutputArray colors) const    = 0;
-    CV_WRAP
-    virtual void fetchNormals(InputArray points, OutputArray _normals) const                   = 0;
-    CV_WRAP
-    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const             = 0;
-    CV_WRAP
-    virtual void reset()                                                                       = 0;
+    Camera intrinsics are taken from volume settings structure.
 
-    // Works for hash-based volumes only, otherwise returns 1
-    virtual int getVisibleBlocks(int /*currFrameId*/, int /*frameThreshold*/) const { return 1; }
-    virtual size_t getTotalVolumeUnits() const { return 1; }
+    * @param frame the object from which to take depth and image data.
+      For color TSDF a depth data should be registered with color data, i.e. have the same intrinsics & camera pose.
+      This can be done using function registerDepth() from 3d module.
+    * @param pose the pose of camera in global coordinates.
+    */
+    void integrate(const OdometryFrame& frame, InputArray pose);
 
-   public:
-    const float voxelSize;
-    const float voxelSizeInv;
-    const Affine3f pose;
-    const float raycastStepFactor;
+    /** @brief Integrates the input data to the volume.
+
+    Camera intrinsics are taken from volume settings structure.
+
+    * @param depth the depth image.
+    * @param pose the pose of camera in global coordinates.
+    */
+    void integrate(InputArray depth, InputArray pose);
+
+    /** @brief Integrates the input data to the volume.
+
+    Camera intrinsics are taken from volume settings structure.
+
+    * @param depth the depth image.
+    * @param image the color image (only for ColorTSDF).
+      For color TSDF a depth data should be registered with color data, i.e. have the same intrinsics & camera pose.
+      This can be done using function registerDepth() from 3d module.
+    * @param pose the pose of camera in global coordinates.
+    */
+    void integrate(InputArray depth, InputArray image, InputArray pose);
+
+    /** @brief Renders the volume contents into an image. The resulting points and normals are in camera's coordinate system.
+
+    Rendered image size and camera intrinsics are taken from volume settings structure.
+
+    * @param cameraPose the pose of camera in global coordinates.
+    * @param outFrame the object where to store rendered points and normals.
+    */
+    void raycast(InputArray cameraPose, OdometryFrame& outFrame) const;
+
+    /** @brief Renders the volume contents into an image. The resulting points and normals are in camera's coordinate system.
+
+    Rendered image size and camera intrinsics are taken from volume settings structure.
+
+    * @param cameraPose the pose of camera in global coordinates.
+    * @param points image to store rendered points.
+    * @param normals image to store rendered normals corresponding to points.
+    * @param colors image to store rendered colors corresponding to points (only for ColorTSDF).
+    */
+    void raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors = noArray()) const;
+
+    /** @brief Renders the volume contents into an image. The resulting points and normals are in camera's coordinate system.
+
+    Rendered image size and camera intrinsics are taken from volume settings structure.
+
+    * @param cameraPose the pose of camera in global coordinates.
+    * @param height the height of result image.
+    * @param width the width of result image.
+    * @param outFrame the object where to store rendered points and normals.
+    */
+    void raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const;
+
+    /** @brief Renders the volume contents into an image. The resulting points and normals are in camera's coordinate system.
+
+    Rendered image size and camera intrinsics are taken from volume settings structure.
+
+    * @param cameraPose the pose of camera in global coordinates.
+    * @param height the height of result image.
+    * @param width the width of result image.
+    * @param points image to store rendered points.
+    * @param normals image to store rendered normals corresponding to points.
+    * @param colors image to store rendered colors corresponding to points (only for ColorTSDF).
+    */
+    void raycast(InputArray cameraPose, int height, int width, OutputArray points, OutputArray normals, OutputArray colors = noArray()) const;
+
+    /** @brief Extract the all data from volume.
+    * @param points the input exist point.
+    * @param normals the storage of normals (corresponding to input points) in the image.
+    */
+    void fetchNormals(InputArray points, OutputArray normals) const;
+    /** @brief Extract the all data from volume.
+    * @param points the storage of all points.
+    * @param normals the storage of all normals, corresponding to points.
+    */
+    void fetchPointsNormals(OutputArray points, OutputArray normals) const;
+    /** @brief Extract the all data from volume.
+    * @param points the storage of all points.
+    * @param normals the storage of all normals, corresponding to points.
+    * @param colors the storage of all colors, corresponding to points (only for ColorTSDF).
+    */
+    void fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const;
+
+    /** @brief clear all data in volume.
+    */
+    void reset();
+
+    /** @brief return visible blocks in volume.
+    */
+    int getVisibleBlocks() const;
+
+    /** @brief return number of vulmeunits in volume.
+    */
+    size_t getTotalVolumeUnits() const;
+
+    class Impl;
+private:
+    Ptr<Impl> impl;
 };
 
-struct CV_EXPORTS_W VolumeParams
-{
-    enum VolumeKind
-    {
-        TSDF        = 0,
-        HASHTSDF    = 1,
-        COLOREDTSDF = 2
-    };
-
-    /** @brief Kind of Volume
-        Values can be TSDF (single volume) or HASHTSDF (hashtable of volume units)
-    */
-    CV_PROP_RW int kind;
-
-    /** @brief Resolution of voxel space
-        Number of voxels in each dimension.
-        Applicable only for TSDF Volume.
-        HashTSDF volume only supports equal resolution in all three dimensions
-    */
-    CV_PROP_RW int resolutionX;
-    CV_PROP_RW int resolutionY;
-    CV_PROP_RW int resolutionZ;
-
-    /** @brief Resolution of volumeUnit in voxel space
-        Number of voxels in each dimension for volumeUnit
-        Applicable only for hashTSDF.
-    */
-    CV_PROP_RW int unitResolution = {0};
-
-    /** @brief Initial pose of the volume in meters, should be 4x4 float or double matrix */
-    CV_PROP_RW Mat pose;
-
-    /** @brief Length of voxels in meters */
-    CV_PROP_RW float voxelSize;
-
-    /** @brief TSDF truncation distance
-        Distances greater than value from surface will be truncated to 1.0
-    */
-    CV_PROP_RW float tsdfTruncDist;
-
-    /** @brief Max number of frames to integrate per voxel
-        Represents the max number of frames over which a running average
-        of the TSDF is calculated for a voxel
-    */
-    CV_PROP_RW int maxWeight;
-
-    /** @brief Threshold for depth truncation in meters
-        Truncates the depth greater than threshold to 0
-    */
-    CV_PROP_RW float depthTruncThreshold;
-
-    /** @brief Length of single raycast step
-        Describes the percentage of voxel length that is skipped per march
-    */
-    CV_PROP_RW float raycastStepFactor;
-
-    /** @brief Default set of parameters that provide higher quality reconstruction
-        at the cost of slow performance.
-    */
-    CV_WRAP static Ptr<VolumeParams> defaultParams(int _volumeType);
-
-    /** @brief Coarse set of parameters that provides relatively higher performance
-        at the cost of reconstrution quality.
-    */
-    CV_WRAP static Ptr<VolumeParams> coarseParams(int _volumeType);
-};
-
-
-CV_EXPORTS_W Ptr<Volume> makeVolume(const Ptr<VolumeParams>& _volumeParams);
-CV_EXPORTS_W Ptr<Volume> makeVolume(int _volumeType, float _voxelSize, Matx44f _pose,
-                                    float _raycastStepFactor, float _truncDist, int _maxWeight, float _truncateThreshold,
-                                    int _resolutionX, int _resolutionY, int _resolutionZ);
-
 }  // namespace cv
-
 #endif // include guard
diff --git a/modules/3d/include/opencv2/3d/volume_settings.hpp b/modules/3d/include/opencv2/3d/volume_settings.hpp
new file mode 100644
index 0000000000..893fa60bfb
--- /dev/null
+++ b/modules/3d/include/opencv2/3d/volume_settings.hpp
@@ -0,0 +1,213 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+
+#ifndef OPENCV_3D_VOLUME_SETTINGS_HPP
+#define OPENCV_3D_VOLUME_SETTINGS_HPP
+
+#include <opencv2/core.hpp>
+#include <opencv2/3d/volume.hpp>
+
+namespace cv
+{
+
+enum class VolumeType
+{
+    TSDF = 0,
+    HashTSDF = 1,
+    ColorTSDF = 2
+};
+
+
+class CV_EXPORTS_W VolumeSettings
+{
+public:
+    /** @brief Constructor of settings for common TSDF volume type.
+    */
+    VolumeSettings();
+
+    /** @brief Constructor of settings for custom Volume type.
+    * @param volumeType volume type.
+    */
+    VolumeSettings(VolumeType volumeType);
+    ~VolumeSettings();
+
+    /** @brief Sets the width of the image for integration.
+    * @param val input value.
+    */
+    void  setIntegrateWidth(int val);
+
+    /** @brief Returns the width of the image for integration.
+    */
+    int   getIntegrateWidth() const;
+
+    /** @brief Sets the height of the image for integration.
+    * @param val input value.
+    */
+    void  setIntegrateHeight(int val);
+
+    /** @brief Returns the height of the image for integration.
+    */
+    int   getIntegrateHeight() const;
+
+
+    /** @brief Sets the width of the raycasted image.
+    * @param val input value.
+    */
+    void  setRaycastWidth(int val);
+
+    /** @brief Returns the width of the raycasted image.
+    */
+    int   getRaycastWidth() const;
+
+    /** @brief Sets the height of the raycasted image.
+    * @param val input value.
+    */
+    void  setRaycastHeight(int val);
+
+    /** @brief Returns the height of the raycasted image.
+    */
+    int   getRaycastHeight() const;
+
+
+    /** @brief Sets depth factor, witch is the number for depth scaling.
+    * @param val input value.
+    */
+    void  setDepthFactor(float val);
+
+    /** @brief Returns depth factor, witch is the number for depth scaling.
+    */
+    float getDepthFactor() const;
+
+    /** @brief Sets the size of voxel.
+    * @param val input value.
+    */
+    void  setVoxelSize(float val);
+
+    /** @brief Returns the size of voxel.
+    */
+    float getVoxelSize() const;
+
+
+    /** @brief Sets TSDF truncation distance. Distances greater than value from surface will be truncated to 1.0.
+    * @param val input value.
+    */
+    void  setTsdfTruncateDistance(float val);
+
+    /** @brief Returns TSDF truncation distance. Distances greater than value from surface will be truncated to 1.0.
+    */
+    float getTsdfTruncateDistance() const;
+
+    /** @brief Sets threshold for depth truncation in meters. Truncates the depth greater than threshold to 0.
+    * @param val input value.
+    */
+    void  setMaxDepth(float val);
+
+    /** @brief Returns threshold for depth truncation in meters. Truncates the depth greater than threshold to 0.
+    */
+    float getMaxDepth() const;
+
+    /** @brief Sets max number of frames to integrate per voxel.
+        Represents the max number of frames over which a running average of the TSDF is calculated for a voxel.
+    * @param val input value.
+    */
+    void  setMaxWeight(int val);
+
+    /** @brief Returns max number of frames to integrate per voxel.
+        Represents the max number of frames over which a running average of the TSDF is calculated for a voxel.
+    */
+    int   getMaxWeight() const;
+
+    /** @brief Sets length of single raycast step.
+        Describes the percentage of voxel length that is skipped per march.
+    * @param val input value.
+    */
+    void  setRaycastStepFactor(float val);
+
+    /** @brief Returns length of single raycast step.
+        Describes the percentage of voxel length that is skipped per march.
+    */
+    float getRaycastStepFactor() const;
+
+    /** @brief Sets volume pose.
+    * @param val input value.
+    */
+    void setVolumePose(InputArray val);
+
+    /** @brief Sets volume pose.
+    * @param val output value.
+    */
+    void getVolumePose(OutputArray val) const;
+
+    /** @brief Resolution of voxel space.
+        Number of voxels in each dimension.
+        Applicable only for TSDF Volume.
+        HashTSDF volume only supports equal resolution in all three dimensions.
+    * @param val input value.
+    */
+    void setVolumeResolution(InputArray val);
+
+    /** @brief Resolution of voxel space.
+        Number of voxels in each dimension.
+        Applicable only for TSDF Volume.
+        HashTSDF volume only supports equal resolution in all three dimensions.
+    * @param val output value.
+    */
+    void getVolumeResolution(OutputArray val) const;
+
+    /** @brief Returns 3 integers representing strides by x, y and z dimension.
+        Can be used to iterate over volume unit raw data.
+    * @param val output value.
+    */
+    void getVolumeDimensions(OutputArray val) const;
+
+    /** @brief Sets intrinsics of camera for integrations.
+    * Format of input:
+    * [ fx  0 cx ]
+    * [  0 fy cy ]
+    * [  0  0  1 ]
+    * where fx and fy are focus points of Ox and Oy axises, and cx and cy are central points of Ox and Oy axises.
+    * @param val input value.
+    */
+    void setCameraIntegrateIntrinsics(InputArray val);
+
+    /** @brief Returns intrinsics of camera for integrations.
+    * Format of output:
+    * [ fx  0 cx ]
+    * [  0 fy cy ]
+    * [  0  0  1 ]
+    * where fx and fy are focus points of Ox and Oy axises, and cx and cy are central points of Ox and Oy axises.
+    * @param val output value.
+    */
+    void getCameraIntegrateIntrinsics(OutputArray val) const;
+
+    /** @brief Sets intrinsics of camera for raycast image.
+    * Format of input:
+    * [ fx  0 cx ]
+    * [  0 fy cy ]
+    * [  0  0  1 ]
+    * where fx and fy are focus points of Ox and Oy axises, and cx and cy are central points of Ox and Oy axises.
+    * @param val input value.
+    */
+    void setCameraRaycastIntrinsics(InputArray val);
+
+    /** @brief Returns intrinsics of camera for raycast image.
+    * Format of output:
+    * [ fx  0 cx ]
+    * [  0 fy cy ]
+    * [  0  0  1 ]
+    * where fx and fy are focus points of Ox and Oy axises, and cx and cy are central points of Ox and Oy axises.
+    * @param val output value.
+    */
+    void getCameraRaycastIntrinsics(OutputArray val) const;
+
+
+    class Impl;
+private:
+    Ptr<Impl> impl;
+};
+
+}
+
+#endif // !OPENCV_3D_VOLUME_SETTINGS_HPP
diff --git a/modules/3d/perf/perf_tsdf.cpp b/modules/3d/perf/perf_tsdf.cpp
index e1cc5e07e4..f387bba043 100644
--- a/modules/3d/perf/perf_tsdf.cpp
+++ b/modules/3d/perf/perf_tsdf.cpp
@@ -87,11 +87,78 @@ struct RenderInvoker : ParallelLoopBody
     bool onlySemisphere;
 };
 
+template<class Scene>
+struct RenderColorInvoker : ParallelLoopBody
+{
+    RenderColorInvoker(Mat_<Vec3f>& _frame, Affine3f _pose,
+        Reprojector _reproj,
+        float _depthFactor, bool _onlySemisphere) : ParallelLoopBody(),
+        frame(_frame),
+        pose(_pose),
+        reproj(_reproj),
+        depthFactor(_depthFactor),
+        onlySemisphere(_onlySemisphere)
+    { }
+
+    virtual void operator ()(const cv::Range& r) const
+    {
+        for (int y = r.start; y < r.end; y++)
+        {
+            Vec3f* frameRow = frame[y];
+            for (int x = 0; x < frame.cols; x++)
+            {
+                Vec3f pix = 0;
+
+                Point3f orig = pose.translation();
+                // direction through pixel
+                Point3f screenVec = reproj(Point3f((float)x, (float)y, 1.f));
+                Point3f dir = normalize(Vec3f(pose.rotation() * screenVec));
+                // screen space axis
+                dir.y = -dir.y;
+
+                const float maxDepth = 20.f;
+                const float maxSteps = 256;
+                float t = 0.f;
+                for (int step = 0; step < maxSteps && t < maxDepth; step++)
+                {
+                    Point3f p = orig + dir * t;
+                    float d = Scene::map(p, onlySemisphere);
+                    if (d < 0.000001f)
+                    {
+                        float m = 0.25f;
+                        float p0 = float(abs(fmod(p.x, m)) > m / 2.f);
+                        float p1 = float(abs(fmod(p.y, m)) > m / 2.f);
+                        float p2 = float(abs(fmod(p.z, m)) > m / 2.f);
+
+                        pix[0] = p0 + p1;
+                        pix[1] = p1 + p2;
+                        pix[2] = p0 + p2;
+
+                        pix *= 128.f;
+                        break;
+                    }
+                    t += d;
+                }
+
+                frameRow[x] = pix;
+            }
+        }
+    }
+
+    Mat_<Vec3f>& frame;
+    Affine3f pose;
+    Reprojector reproj;
+    float depthFactor;
+    bool onlySemisphere;
+};
+
+
 struct Scene
 {
     virtual ~Scene() {}
     static Ptr<Scene> create(Size sz, Matx33f _intr, float _depthFactor, bool onlySemisphere);
     virtual Mat depth(Affine3f pose) = 0;
+    virtual Mat rgb(Affine3f pose) = 0;
     virtual std::vector<Affine3f> getPoses() = 0;
 };
 
@@ -142,6 +209,17 @@ struct SemisphereScene : Scene
         return std::move(frame);
     }
 
+    Mat rgb(Affine3f pose) override
+    {
+        Mat_<Vec3f> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderColorInvoker<SemisphereScene>(frame, pose, reproj, depthFactor, onlySemisphere));
+
+        return std::move(frame);
+    }
+
     std::vector<Affine3f> getPoses() override
     {
         std::vector<Affine3f> poses;
@@ -172,6 +250,7 @@ Ptr<Scene> Scene::create(Size sz, Matx33f _intr, float _depthFactor, bool _onlyS
 
 typedef cv::Vec4f ptype;
 typedef cv::Mat_< ptype > Points;
+typedef cv::Mat_< ptype > Colors;
 typedef Points Normals;
 typedef Size2i Size;
 
@@ -270,168 +349,844 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
             }
         }, nstripes);
 }
+void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _colors, OutputArray image, Affine3f)
+{
+    Size sz = _points.size();
+    image.create(sz, CV_8UC4);
 
+    Points  points = _points.getMat();
+    Colors  colors = _colors.getMat();
+
+    Mat_<Vec4b> img = image.getMat();
+
+    Range range(0, sz.height);
+    const int nstripes = -1;
+    parallel_for_(range, [&](const Range&)
+        {
+            for (int y = range.start; y < range.end; y++)
+            {
+                Vec4b* imgRow = img[y];
+                const ptype* ptsRow = points[y];
+                const ptype* clrRow = colors[y];
+
+                for (int x = 0; x < sz.width; x++)
+                {
+                    Point3f p = fromPtype(ptsRow[x]);
+                    Point3f c = fromPtype(clrRow[x]);
+
+                    Vec4b color;
+
+                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z)
+                        || cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
+                    {
+                        color = Vec4b(0, 32, 0, 0);
+                    }
+                    else
+                    {
+                        color = Vec4b((uchar)c.x, (uchar)c.y, (uchar)c.z, (uchar)0);
+                    }
+
+                    imgRow[x] = color;
+                }
+            }
+        }, nstripes);
+}
 // ----------------------------
 
-class Settings
+void displayImage(Mat depth, Mat points, Mat normals, float depthFactor, Vec3f lightPose)
 {
-public:
-    float depthFactor;
-    Matx33f intr;
-    Size frameSize;
-    Vec3f lightPose;
-
-    Ptr<Volume> volume;
-    Ptr<Scene> scene;
-    std::vector<Affine3f> poses;
-
-    Settings(bool useHashTSDF)
-    {
-        frameSize = Size(640, 480);
-
-        float fx, fy, cx, cy;
-        fx = fy = 525.f;
-        cx = frameSize.width / 2 - 0.5f;
-        cy = frameSize.height / 2 - 0.5f;
-        intr = Matx33f(fx,  0, cx,
-                        0, fy, cy,
-                        0,  0,  1);
-
-        // 5000 for the 16-bit PNG files
-        // 1 for the 32-bit float images in the ROS bag files
-        depthFactor = 5000;
-
-        Vec3i volumeDims = Vec3i::all(512); //number of voxels
-
-        float volSize = 3.f;
-        float voxelSize = volSize / 512.f; //meters
-
-        // default pose of volume cube
-        Affine3f volumePose = Affine3f().translate(Vec3f(-volSize / 2.f, -volSize / 2.f, 0.5f));
-        float tsdf_trunc_dist = 7 * voxelSize; // about 0.04f in meters
-        int tsdf_max_weight = 64;   //frames
-
-        float raycast_step_factor = 0.25f;  //in voxel sizes
-        // gradient delta factor is fixed at 1.0f and is not used
-        //p.gradient_delta_factor = 0.5f; //in voxel sizes
-
-        //p.lightPose = p.volume_pose.translation()/4; //meters
-        lightPose = Vec3f::all(0.f); //meters
-
-        // depth truncation is not used by default but can be useful in some scenes
-        float truncateThreshold = 0.f; //meters
-
-        VolumeParams::VolumeKind volumeKind = VolumeParams::VolumeKind::TSDF;
-
-        if (useHashTSDF)
-        {
-            volumeKind = VolumeParams::VolumeKind::HASHTSDF;
-            truncateThreshold = 4.f;
-        }
-        else
-        {
-            volSize = 3.f;
-            volumeDims = Vec3i::all(128); //number of voxels
-            voxelSize = volSize / 128.f;
-            tsdf_trunc_dist = 2 * voxelSize; // 0.04f in meters
-
-            raycast_step_factor = 0.75f;  //in voxel sizes
-        }
-
-        volume = makeVolume(volumeKind, voxelSize, volumePose.matrix,
-                            raycast_step_factor, tsdf_trunc_dist, tsdf_max_weight,
-                            truncateThreshold, volumeDims[0], volumeDims[1], volumeDims[2]);
-
-        scene = Scene::create(frameSize, intr, depthFactor, true);
-        poses = scene->getPoses();
-    }
-};
-
-void displayImage(Mat depth, UMat _points, UMat _normals, float depthFactor, Vec3f lightPose)
-{
-    Mat  points, normals, image;
-    AccessFlag af = ACCESS_READ;
-    normals = _normals.getMat(af);
-    points = _points.getMat(af);
+    Mat image;
     patchNaNs(points);
-
     imshow("depth", depth * (1.f / depthFactor / 4.f));
     renderPointsNormals(points, normals, image, lightPose);
     imshow("render", image);
-    waitKey(2000);
+    waitKey(100);
+}
+
+void displayColorImage(Mat depth, Mat rgb, Mat points, Mat normals, Mat colors, float depthFactor, Vec3f lightPose)
+{
+    Mat image;
+    patchNaNs(points);
+    imshow("depth", depth * (1.f / depthFactor / 4.f));
+    imshow("rgb", rgb * (1.f / 255.f));
+    renderPointsNormalsColors(points, normals, colors, image, lightPose);
+    imshow("render", image);
+    waitKey(100);
 }
 
 static const bool display = false;
 
-PERF_TEST(Perf_TSDF, integrate)
+// Perf_TSDF_GPU.integrate_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_TSDF_GPU, integrate_mat)
+#else
+PERF_TEST(Perf_TSDF, integrate_mat)
+#endif
 {
-    Settings settings(false);
-    for (size_t i = 0; i < settings.poses.size(); i++)
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
     {
-        Matx44f pose = settings.poses[i].matrix;
-        Mat depth = settings.scene->depth(pose);
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+
         startTimer();
-        settings.volume->integrate(depth, settings.depthFactor, pose, settings.intr);
+        volume.integrate(depth, pose);
         stopTimer();
-        depth.release();
+
     }
     SANITY_CHECK_NOTHING();
 }
 
-PERF_TEST(Perf_TSDF, raycast)
+// Perf_TSDF_GPU.integrate_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_TSDF_GPU, integrate_frame)
+#else
+PERF_TEST(Perf_TSDF, integrate_frame)
+#endif
 {
-    Settings settings(false);
-    for (size_t i = 0; i < settings.poses.size(); i++)
-    {
-        UMat _points, _normals;
-        Matx44f pose = settings.poses[i].matrix;
-        Mat depth = settings.scene->depth(pose);
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
 
-        settings.volume->integrate(depth, settings.depthFactor, pose, settings.intr);
         startTimer();
-        settings.volume->raycast(pose, settings.intr, settings.frameSize, _points, _normals);
+        volume.integrate(odf, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_TSDF_GPU.raycast_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_TSDF_GPU, raycast_mat)
+#else
+PERF_TEST(Perf_TSDF, raycast_mat)
+#endif
+{
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat points, normals;
+
+        volume.integrate(depth, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, points, normals);
         stopTimer();
 
         if (display)
-            displayImage(depth, _points, _normals, settings.depthFactor, settings.lightPose);
+            displayImage(depth, points, normals, depthFactor, lightPose);
+
     }
     SANITY_CHECK_NOTHING();
 }
 
-PERF_TEST(Perf_HashTSDF, integrate)
+// Perf_TSDF_GPU.raycast_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_TSDF_GPU, raycast_frame)
+#else
+PERF_TEST(Perf_TSDF, raycast_frame)
+#endif
 {
-    Settings settings(true);
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
 
-    for (size_t i = 0; i < settings.poses.size(); i++)
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
     {
-        Matx44f pose = settings.poses[i].matrix;
-        Mat depth = settings.scene->depth(pose);
-        startTimer();
-        settings.volume->integrate(depth, settings.depthFactor, pose, settings.intr);
-        stopTimer();
-        depth.release();
-    }
-    SANITY_CHECK_NOTHING();
-}
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
 
-PERF_TEST(Perf_HashTSDF, raycast)
-{
-    Settings settings(true);
-    for (size_t i = 0; i < settings.poses.size(); i++)
-    {
-        UMat _points, _normals;
-        Matx44f pose = settings.poses[i].matrix;
-        Mat depth = settings.scene->depth(pose);
+#ifdef HAVE_OPENCL
+        OdometryFrame odf(OdometryFrameStoreType::UMAT);
+#else
+        OdometryFrame odf;
+#endif
+
+        odf.setDepth(depth);
+        volume.integrate(odf, pose);
 
-        settings.volume->integrate(depth, settings.depthFactor, pose, settings.intr);
         startTimer();
-        settings.volume->raycast(pose, settings.intr, settings.frameSize, _points, _normals);
+        volume.raycast(pose, frameSize.height, frameSize.width, odf);
         stopTimer();
 
         if (display)
-            displayImage(depth, _points, _normals, settings.depthFactor, settings.lightPose);
+        {
+            Mat points, normals;
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            displayImage(depth, points, normals, depthFactor, lightPose);
+        }
     }
     SANITY_CHECK_NOTHING();
 }
 
+
+#ifdef HAVE_OPENCL
+// Perf_TSDF_CPU.integrate_mat
+PERF_TEST(Perf_TSDF_CPU, integrate_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+
+        startTimer();
+        volume.integrate(depth, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_TSDF_CPU.integrate_frame
+PERF_TEST(Perf_TSDF_CPU, integrate_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+
+        startTimer();
+        volume.integrate(odf, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_TSDF_CPU.raycast_mat
+PERF_TEST(Perf_TSDF_CPU, raycast_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat points, normals;
+
+        volume.integrate(depth, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, points, normals);
+        stopTimer();
+
+        if (display)
+            displayImage(depth, points, normals, depthFactor, lightPose);
+
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_TSDF_CPU.raycast_frame
+PERF_TEST(Perf_TSDF_CPU, raycast_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::TSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+        volume.integrate(odf, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, odf);
+        stopTimer();
+
+        if (display)
+        {
+            Mat points, normals;
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            displayImage(depth, points, normals, depthFactor, lightPose);
+        }
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+#endif
+
+
+
+
+// Perf_HashTSDF_GPU.integrate_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_HashTSDF_GPU, integrate_mat)
+#else
+PERF_TEST(Perf_HashTSDF, integrate_mat)
+#endif
+{
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+
+        startTimer();
+        volume.integrate(depth, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_HashTSDF_GPU.integrate_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_HashTSDF_GPU, integrate_frame)
+#else
+PERF_TEST(Perf_HashTSDF, integrate_frame)
+#endif
+{
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+
+        startTimer();
+        volume.integrate(odf, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_HashTSDF_GPU.raycast_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_HashTSDF_GPU, raycast_mat)
+#else
+PERF_TEST(Perf_HashTSDF, raycast_mat)
+#endif
+{
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat points, normals;
+
+        volume.integrate(depth, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, points, normals);
+        stopTimer();
+
+        if (display)
+            displayImage(depth, points, normals, depthFactor, lightPose);
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_HashTSDF_GPU.raycast_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_HashTSDF_GPU, raycast_frame)
+#else
+PERF_TEST(Perf_HashTSDF, raycast_frame)
+#endif
+{
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+
+#ifdef HAVE_OPENCL
+        OdometryFrame odf(OdometryFrameStoreType::UMAT);
+#else
+        OdometryFrame odf;
+#endif
+
+        odf.setDepth(depth);
+        volume.integrate(odf, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, odf);
+        stopTimer();
+
+        if (display)
+        {
+            Mat points, normals;
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            displayImage(depth, points, normals, depthFactor, lightPose);
+        }
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+
+#ifdef HAVE_OPENCL
+// Perf_HashTSDF_CPU.integrate_mat
+PERF_TEST(Perf_HashTSDF_CPU, integrate_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+
+        startTimer();
+        volume.integrate(depth, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_HashTSDF_CPU.integrate_frame
+PERF_TEST(Perf_HashTSDF_CPU, integrate_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+
+        startTimer();
+        volume.integrate(odf, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_HashTSDF_CPU.raycast_mat
+PERF_TEST(Perf_HashTSDF_CPU, raycast_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat points, normals;
+
+        volume.integrate(depth, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, points, normals);
+        stopTimer();
+
+        if (display)
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+
+// Perf_HashTSDF_CPU.raycast_frame
+PERF_TEST(Perf_HashTSDF_CPU, raycast_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    VolumeType volumeType = VolumeType::HashTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+        volume.integrate(odf, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, odf);
+        stopTimer();
+
+        if (display)
+        {
+            Mat points, normals;
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            displayImage(depth, points, normals, depthFactor, lightPose);
+        }
+    }
+    SANITY_CHECK_NOTHING();
+
+    cv::ocl::setUseOpenCL(true);
+}
+#endif
+
+
+
+// Perf_ColorTSDF_CPU.integrate_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_ColorTSDF_CPU, integrate_mat)
+#else
+PERF_TEST(Perf_ColorTSDF, integrate_mat)
+#endif
+{
+    VolumeType volumeType = VolumeType::ColorTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat rgb = scene->rgb(pose);
+
+        startTimer();
+        volume.integrate(depth, rgb, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_ColorTSDF_CPU.integrate_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_ColorTSDF_CPU, integrate_frame)
+#else
+PERF_TEST(Perf_ColorTSDF, integrate_frame)
+#endif
+{
+    VolumeType volumeType = VolumeType::ColorTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat rgb = scene->rgb(pose);
+        OdometryFrame odf;
+        odf.setDepth(depth);
+        odf.setImage(rgb);
+
+        startTimer();
+        volume.integrate(odf, pose);
+        stopTimer();
+
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_ColorTSDF_CPU.raycast_mat
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_ColorTSDF_CPU, raycast_mat)
+#else
+PERF_TEST(Perf_ColorTSDF, raycast_mat)
+#endif
+{
+    VolumeType volumeType = VolumeType::ColorTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat rgb = scene->rgb(pose);
+        Mat points, normals, colors;
+
+        startTimer();
+        volume.integrate(depth, rgb, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, points, normals, colors);
+        stopTimer();
+
+        if (display)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+// Perf_ColorTSDF_CPU.raycast_frame
+#ifdef HAVE_OPENCL
+PERF_TEST(Perf_ColorTSDF_CPU, raycast_frame)
+#else
+PERF_TEST(Perf_ColorTSDF, raycast_frame)
+#endif
+{
+    VolumeType volumeType = VolumeType::ColorTSDF;
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        Matx44f pose = poses[i].matrix;
+        Mat depth = scene->depth(pose);
+        Mat rgb = scene->rgb(pose);
+
+        OdometryFrame odf;
+        odf.setDepth(depth);
+        odf.setImage(rgb);
+        volume.integrate(odf, pose);
+
+        startTimer();
+        volume.raycast(pose, frameSize.height, frameSize.width, odf);
+        stopTimer();
+
+        if (display)
+        {
+            Mat points, normals, colors;
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        }
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+
 }} // namespace
diff --git a/modules/3d/src/rgbd/color_tsdf_functions.cpp b/modules/3d/src/rgbd/color_tsdf_functions.cpp
new file mode 100644
index 0000000000..d6827d75c4
--- /dev/null
+++ b/modules/3d/src/rgbd/color_tsdf_functions.cpp
@@ -0,0 +1,1259 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// Partially rewritten from https://github.com/Nerei/kinfu_remake
+// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
+
+#include "../precomp.hpp"
+#include "color_tsdf_functions.hpp"
+#include "opencl_kernels_3d.hpp"
+
+namespace cv {
+
+
+void integrateColorTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _rgb, InputArray _pixNorms, InputArray _volume)
+{
+    Matx44f volumePose;
+    settings.getVolumePose(volumePose);
+    integrateColorTsdfVolumeUnit(settings, volumePose, cameraPose, _depth, _rgb, _pixNorms, _volume);
+}
+
+
+void integrateColorTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& volumePose, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _rgb, InputArray _pixNorms, InputArray _volume)
+{
+    //std::cout << "integrateColorTsdfVolumeUnit()" << std::endl;
+
+    Depth depth = _depth.getMat();
+    Colors color = _rgb.getMat();
+    Mat volume = _volume.getMat();
+    Mat pixNorms = _pixNorms.getMat();
+
+    RGBTsdfVoxel* volDataStart = volume.ptr<RGBTsdfVoxel>();
+
+    Vec4i volStrides;
+    settings.getVolumeDimensions(volStrides);
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    float voxelSize = settings.getVoxelSize();
+
+    const Affine3f pose = Affine3f(volumePose);
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    const Intr::Projector projDepth = Intr(intr).makeProjector();
+
+    Matx33f rgb_intr;
+    settings.getCameraIntegrateIntrinsics(rgb_intr);
+    const Intr::Projector projColor = Intr(rgb_intr);
+
+    const float dfac(1.f / settings.getDepthFactor());
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float truncDistInv = 1.f / truncDist;
+    const int maxWeight = settings.getMaxWeight();
+
+    Range integrateRange(0, volResolution.x);
+
+#if USE_INTRINSICS
+    auto IntegrateInvoker = [&](const Range& range)
+    {
+        // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
+        Point3f zStepPt = Point3f(vol2cam.matrix(0, 2),
+            vol2cam.matrix(1, 2),
+            vol2cam.matrix(2, 2)) * voxelSize;
+
+        v_float32x4 zStep(zStepPt.x, zStepPt.y, zStepPt.z, 0);
+        v_float32x4 vfxy(projDepth.fx, projDepth.fy, 0.f, 0.f), vcxy(projDepth.cx, projDepth.cy, 0.f, 0.f);
+        v_float32x4 rgb_vfxy(projColor.fx, projColor.fy, 0.f, 0.f), rgb_vcxy(projColor.cx, projColor.cy, 0.f, 0.f);
+        const v_float32x4 upLimits = v_cvt_f32(v_int32x4(depth.cols - 1, depth.rows - 1, 0, 0));
+
+        for (int x = range.start; x < range.end; x++)
+        {
+            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
+                // optimization of camSpace transformation (vector addition instead of matmul at each z)
+                Point3f basePt = vol2cam * (Point3f((float)x, (float)y, 0) * voxelSize);
+                v_float32x4 camSpacePt(basePt.x, basePt.y, basePt.z, 0);
+
+                int startZ, endZ;
+                if (abs(zStepPt.z) > 1e-5)
+                {
+                    int baseZ = (int)(-basePt.z / zStepPt.z);
+                    if (zStepPt.z > 0)
+                    {
+                        startZ = baseZ;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        startZ = 0;
+                        endZ = baseZ;
+                    }
+                }
+                else
+                {
+                    if (basePt.z > 0)
+                    {
+                        startZ = 0;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        // z loop shouldn't be performed
+                        startZ = endZ = 0;
+                    }
+                }
+                startZ = max(0, startZ);
+                endZ = min(int(volResolution.z), endZ);
+                for (int z = startZ; z < endZ; z++)
+                {
+                    // optimization of the following:
+                    //Point3f volPt = Point3f(x, y, z)*voxelSize;
+                    //Point3f camSpacePt = vol2cam * volPt;
+                    camSpacePt += zStep;
+
+                    float zCamSpace = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(camSpacePt))).get0();
+                    if (zCamSpace <= 0.f)
+                        continue;
+
+                    v_float32x4 camPixVec = camSpacePt / v_setall_f32(zCamSpace);
+                    v_float32x4 projected = v_muladd(camPixVec, vfxy, vcxy);
+                    // leave only first 2 lanes
+                    projected = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
+                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
+
+                    depthType v;
+                    // bilinearly interpolate depth at projected
+                    {
+                        const v_float32x4& pt = projected;
+                        // check coords >= 0 and < imgSize
+                        v_uint32x4 limits = v_reinterpret_as_u32(pt < v_setzero_f32()) |
+                            v_reinterpret_as_u32(pt >= upLimits);
+                        limits = limits | v_rotate_right<1>(limits);
+                        if (limits.get0())
+                            continue;
+
+                        // xi, yi = floor(pt)
+                        v_int32x4 ip = v_floor(pt);
+                        v_int32x4 ipshift = ip;
+                        int xi = ipshift.get0();
+                        ipshift = v_rotate_right<1>(ipshift);
+                        int yi = ipshift.get0();
+
+                        const depthType* row0 = depth[yi + 0];
+                        const depthType* row1 = depth[yi + 1];
+
+                        // v001 = [v(xi + 0, yi + 0), v(xi + 1, yi + 0)]
+                        v_float32x4 v001 = v_load_low(row0 + xi);
+                        // v101 = [v(xi + 0, yi + 1), v(xi + 1, yi + 1)]
+                        v_float32x4 v101 = v_load_low(row1 + xi);
+
+                        v_float32x4 vall = v_combine_low(v001, v101);
+
+                        // assume correct depth is positive
+                        // don't fix missing data
+                        if (v_check_all(vall > v_setzero_f32()))
+                        {
+                            v_float32x4 t = pt - v_cvt_f32(ip);
+                            float tx = t.get0();
+                            t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+                            v_float32x4 ty = v_setall_f32(t.get0());
+                            // vx is y-interpolated between rows 0 and 1
+                            v_float32x4 vx = v001 + ty * (v101 - v001);
+                            float v0 = vx.get0();
+                            vx = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vx)));
+                            float v1 = vx.get0();
+                            v = v0 + tx * (v1 - v0);
+                        }
+                        else
+                            continue;
+                    }
+
+                    v_float32x4 projectedRGB = v_muladd(camPixVec, rgb_vfxy, rgb_vcxy);
+                    // leave only first 2 lanes
+                    projectedRGB = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
+                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
+
+                    // norm(camPixVec) produces double which is too slow
+                    int _u = (int)projected.get0();
+                    int _v = (int)v_rotate_right<1>(projected).get0();
+                    int rgb_u = (int)projectedRGB.get0();
+                    int rgb_v = (int)v_rotate_right<1>(projectedRGB).get0();
+
+                    if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows &&
+                        rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols))
+                        continue;
+                    float pixNorm = pixNorms.at<float>(_v, _u);
+                    // TODO: Add support of 3point and 4 point representation
+                    Vec3f colorRGB = color.at<Vec3f>(rgb_v, rgb_u);
+                    //float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec));
+                    // difference between distances of point and of surface to camera
+                    float sdf = pixNorm * (v * dfac - zCamSpace);
+                    // possible alternative is:
+                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
+                    if (sdf >= -truncDist)
+                    {
+                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
+
+                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
+                        WeightType& weight = voxel.weight;
+                        TsdfType& value = voxel.tsdf;
+                        ColorType& r = voxel.r;
+                        ColorType& g = voxel.g;
+                        ColorType& b = voxel.b;
+
+                        // update RGB
+                        r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
+                        g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
+                        b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
+                        colorFix(r, g, b);
+                        // update TSDF
+                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
+                        weight = WeightType(min(int(weight + 1), int(maxWeight)));
+                    }
+                }
+            }
+        }
+    };
+#else
+    auto IntegrateInvoker = [&](const Range& range)
+    {
+        for (int x = range.start; x < range.end; x++)
+        {
+            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
+                // optimization of camSpace transformation (vector addition instead of matmul at each z)
+                Point3f basePt = vol2cam * (Point3f(float(x), float(y), 0.0f) * voxelSize);
+                Point3f camSpacePt = basePt;
+                // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
+                // zStep == vol2cam*[Point3f(x, y, 1) - Point3f(x, y, 0)]*voxelSize
+                Point3f zStep = Point3f(vol2cam.matrix(0, 2),
+                    vol2cam.matrix(1, 2),
+                    vol2cam.matrix(2, 2)) * voxelSize;
+                int startZ, endZ;
+                if (abs(zStep.z) > 1e-5)
+                {
+                    int baseZ = int(-basePt.z / zStep.z);
+                    if (zStep.z > 0)
+                    {
+                        startZ = baseZ;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        startZ = 0;
+                        endZ = baseZ;
+                    }
+                }
+                else
+                {
+                    if (basePt.z > 0)
+                    {
+                        startZ = 0;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        // z loop shouldn't be performed
+                        startZ = endZ = 0;
+                    }
+                }
+                startZ = max(0, startZ);
+                endZ = min(int(volResolution.z), endZ);
+
+                for (int z = startZ; z < endZ; z++)
+                {
+                    // optimization of the following:
+                    //Point3f volPt = Point3f(x, y, z)*volume.voxelSize;
+                    //Point3f camSpacePt = vol2cam * volPt;
+
+                    camSpacePt += zStep;
+                    if (camSpacePt.z <= 0)
+                        continue;
+
+                    Point3f camPixVec;
+                    Point2f projected = projDepth(camSpacePt, camPixVec);
+                    Point2f projectedRGB = projColor(camSpacePt, camPixVec);
+
+                    depthType v = bilinearDepth(depth, projected);
+                    if (v == 0) {
+                        continue;
+                    }
+                    int _u = projected.x;
+                    int _v = projected.y;
+
+                    int rgb_u = (int)projectedRGB.x;
+                    int rgb_v = (int)projectedRGB.y;
+
+                    if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows
+                        && rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols
+                        ))
+                        continue;
+
+                    float pixNorm = pixNorms.at<float>(_v, _u);
+                    // TODO: Add support of 3point and 4 point representation
+                    Vec3f colorRGB = color.at<Vec3f>(rgb_v, rgb_u);
+
+                    // difference between distances of point and of surface to camera
+                    float sdf = pixNorm * (v * dfac - camSpacePt.z);
+                    // possible alternative is:
+                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
+                    if (sdf >= -truncDist)
+                    {
+                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
+
+                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
+                        WeightType& weight = voxel.weight;
+                        TsdfType& value = voxel.tsdf;
+
+                        ColorType& r = voxel.r;
+                        ColorType& g = voxel.g;
+                        ColorType& b = voxel.b;
+                        // update RGB
+                        if (weight < 1)
+                        {
+                            r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
+                            g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
+                            b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
+                        }
+
+                        // update TSDF
+                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
+                        weight = min(int(weight + 1), int(maxWeight));
+                    }
+                }
+            }
+        }
+    };
+#endif
+    parallel_for_(integrateRange, IntegrateInvoker);
+    //IntegrateInvoker(integrateRange);
+
+    //std::cout << "integrateColorTsdfVolumeUnit() end" << std::endl;
+
+}
+
+
+
+#if USE_INTRINSICS
+// all coordinate checks should be done in inclosing cycle
+
+inline float interpolateColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const v_float32x4& p)
+{
+    // tx, ty, tz = floor(p)
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iy = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    TsdfType vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = volData[neighbourCoords[i] + coordBase].tsdf;
+
+    v_float32x4 v0246 = tsdfToFloat_INTR(v_int32x4(vx[0], vx[2], vx[4], vx[6]));
+    v_float32x4 v1357 = tsdfToFloat_INTR(v_int32x4(vx[1], vx[3], vx[5], vx[7]));
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+
+inline float interpolateColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const Point3f& _p)
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0);
+    return interpolateColorVoxel(volume, volDims, neighbourCoords, p);
+}
+
+
+#else
+inline float interpolateColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const Point3f& p)
+{
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    float vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase].tsdf);
+
+    float v00 = vx[0] + tz * (vx[1] - vx[0]);
+    float v01 = vx[2] + tz * (vx[3] - vx[2]);
+    float v10 = vx[4] + tz * (vx[5] - vx[4]);
+    float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+    float v0 = v00 + ty * (v01 - v00);
+    float v1 = v10 + ty * (v11 - v10);
+
+    return v0 + tx * (v1 - v0);
+
+}
+#endif
+
+
+#if USE_INTRINSICS
+//gradientDeltaFactor is fixed at 1.0 of voxel size
+
+inline v_float32x4 getNormalColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const v_float32x4& p)
+{
+    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
+        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
+            (float)(volResolution.y - 2),
+            (float)(volResolution.z - 2), 1.f))
+        )
+        return nanv;
+
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    float CV_DECL_ALIGNED(16) an[4];
+    an[0] = an[1] = an[2] = an[3] = 0.f;
+    for (int c = 0; c < 3; c++)
+    {
+        const int dim = volDims[c];
+        float& nv = an[c];
+
+        float vx[8];
+        for (int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+            tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
+
+        v_float32x4 v0246(vx[0], vx[2], vx[4], vx[6]);
+        v_float32x4 v1357(vx[1], vx[3], vx[5], vx[7]);
+        v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+        v_float32x4 v00_10 = vxx;
+        v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+        v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+        float v0 = v0_1.get0();
+        v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+        float v1 = v0_1.get0();
+
+        nv = v0 + tx * (v1 - v0);
+    }
+
+    v_float32x4 n = v_load_aligned(an);
+    v_float32x4 Norm = v_sqrt(v_setall_f32(v_reduce_sum(n * n)));
+
+    return Norm.get0() < 0.0001f ? nanv : n / Norm;
+}
+
+inline Point3f getNormalColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const Point3f& _p)
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
+    v_float32x4 result = getNormalColorVoxel(volume, volDims, neighbourCoords, volResolution, p);
+    float CV_DECL_ALIGNED(16) ares[4];
+    v_store_aligned(ares, result);
+    return Point3f(ares[0], ares[1], ares[2]);
+}
+#else
+inline Point3f getNormalColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const Point3f& p)
+{
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    if (p.x < 1 || p.x >= volResolution.x - 2 ||
+        p.y < 1 || p.y >= volResolution.y - 2 ||
+        p.z < 1 || p.z >= volResolution.z - 2)
+        return nan3;
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    Vec3f an;
+    for (int c = 0; c < 3; c++)
+    {
+        const int dim = volDims[c];
+        float& nv = an[c];
+
+        float vx[8];
+        for (int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+            tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
+
+        float v00 = vx[0] + tz * (vx[1] - vx[0]);
+        float v01 = vx[2] + tz * (vx[3] - vx[2]);
+        float v10 = vx[4] + tz * (vx[5] - vx[4]);
+        float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+        float v0 = v00 + ty * (v01 - v00);
+        float v1 = v10 + ty * (v11 - v10);
+
+        nv = v0 + tx * (v1 - v0);
+    }
+
+    float nv = sqrt(an[0] * an[0] +
+        an[1] * an[1] +
+        an[2] * an[2]);
+    return nv < 0.0001f ? nan3 : an / nv;
+}
+#endif
+
+#if USE_INTRINSICS
+inline float interpolateColor(float tx, float ty, float tz, float vx[8])
+{
+    v_float32x4 v0246, v1357;
+    v_load_deinterleave(vx, v0246, v1357);
+
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+#else
+inline float interpolateColor(float tx, float ty, float tz, float vx[8])
+{
+    float v00 = vx[0] + tz * (vx[1] - vx[0]);
+    float v01 = vx[2] + tz * (vx[3] - vx[2]);
+    float v10 = vx[4] + tz * (vx[5] - vx[4]);
+    float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+    float v0 = v00 + ty * (v01 - v00);
+    float v1 = v10 + ty * (v11 - v10);
+
+    return v0 + tx * (v1 - v0);
+}
+#endif
+
+
+#if USE_INTRINSICS
+//gradientDeltaFactor is fixed at 1.0 of voxel size
+
+inline v_float32x4 getColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const float voxelSizeInv, const v_float32x4& p)
+{
+    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
+        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
+            (float)(volResolution.y - 2),
+            (float)(volResolution.z - 2), 1.f))
+        )
+        return nanv;
+
+    v_int32x4 ip = v_floor(p);
+
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+    float CV_DECL_ALIGNED(16) rgb[4];
+
+#if USE_INTERPOLATION_IN_GETNORMAL
+    float r[8], g[8], b[8];
+    for (int i = 0; i < 8; i++)
+    {
+        r[i] = (float)volData[neighbourCoords[i] + coordBase].r;
+        g[i] = (float)volData[neighbourCoords[i] + coordBase].g;
+        b[i] = (float)volData[neighbourCoords[i] + coordBase].b;
+    }
+
+    v_float32x4 vsi(voxelSizeInv, voxelSizeInv, voxelSizeInv, voxelSizeInv);
+    v_float32x4 ptVox = p * vsi;
+    v_int32x4 iptVox = v_floor(ptVox);
+    v_float32x4 t = ptVox - v_cvt_f32(iptVox);
+    float tx = t.get0(); t = v_rotate_right<1>(t);
+    float ty = t.get0(); t = v_rotate_right<1>(t);
+    float tz = t.get0();
+    rgb[0] = interpolateColor(tx, ty, tz, r);
+    rgb[1] = interpolateColor(tx, ty, tz, g);
+    rgb[2] = interpolateColor(tx, ty, tz, b);
+    rgb[3] = 0.f;
+#else
+    rgb[0] = volData[coordBase].r;
+    rgb[1] = volData[coordBase].g;
+    rgb[2] = volData[coordBase].b;
+    rgb[3] = 0.f;
+#endif
+    v_float32x4 res = v_load_aligned(rgb);
+    return res;
+}
+
+inline Point3f getColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const float voxelSizeInv, const Point3f& _p)
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
+    v_float32x4 result = getColorVoxel(volume, volDims, neighbourCoords, volResolution, voxelSizeInv, p);
+    float CV_DECL_ALIGNED(16) ares[4];
+    v_store_aligned(ares, result);
+    return Point3f(ares[0], ares[1], ares[2]);
+}
+
+
+#else
+inline Point3f getColorVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const float voxelSizeInv, const Point3f& p)
+{
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    if (p.x < 1 || p.x >= volResolution.x - 2 ||
+        p.y < 1 || p.y >= volResolution.y - 2 ||
+        p.z < 1 || p.z >= volResolution.z - 2)
+        return nan3;
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+    Point3f res;
+
+#if USE_INTERPOLATION_IN_GETNORMAL
+    // TODO: create better interpolation or remove this simple version
+    float r[8], g[8], b[8];
+    for (int i = 0; i < 8; i++)
+    {
+        r[i] = (float)volData[neighbourCoords[i] + coordBase].r;
+        g[i] = (float)volData[neighbourCoords[i] + coordBase].g;
+        b[i] = (float)volData[neighbourCoords[i] + coordBase].b;
+    }
+
+    Point3f ptVox = p * voxelSizeInv;
+    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
+    float tx = ptVox.x - iptVox[0];
+    float ty = ptVox.y - iptVox[1];
+    float tz = ptVox.z - iptVox[2];
+
+    res = Point3f(interpolateColor(tx, ty, tz, r),
+        interpolateColor(tx, ty, tz, g),
+        interpolateColor(tx, ty, tz, b));
+#else
+    res = Point3f(volData[coordBase].r, volData[coordBase].g, volData[coordBase].b);
+#endif
+    colorFix(res);
+    return res;
+}
+#endif
+
+
+
+
+void raycastColorTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+    InputArray _volume, OutputArray _points, OutputArray _normals, OutputArray _colors)
+{
+    //std::cout << "raycastColorTsdfVolumeUnit()" << std::endl;
+
+    Size frameSize(width, height);
+    CV_Assert(frameSize.area() > 0);
+
+    _points.create(frameSize, POINT_TYPE);
+    _normals.create(frameSize, POINT_TYPE);
+    _colors.create(frameSize, COLOR_TYPE);
+
+    Points points = _points.getMat();
+    Normals normals = _normals.getMat();
+    Colors colors = _colors.getMat();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const Point3f volSize = Point3f(volResolution) * settings.getVoxelSize();
+
+    Matx33f intr;
+    settings.getCameraRaycastIntrinsics(intr);
+    const Intr::Reprojector reprojDepth = Intr(intr).makeReprojector();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    const Point3f boxMax(volSize - Point3f(settings.getVoxelSize(), settings.getVoxelSize(), settings.getVoxelSize()));
+    const Point3f boxMin = Point3f(0, 0, 0);
+    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    const Mat volume = _volume.getMat();
+    float voxelSize = settings.getVoxelSize();
+    float voxelSizeInv = 1.0f / voxelSize;
+    float tstep = settings.getTsdfTruncateDistance() * settings.getRaycastStepFactor();
+
+    Range raycastRange = Range(0, points.rows);
+
+#if USE_INTRINSICS
+    auto RaycastInvoker = [&](const Range& range)
+    {
+        const v_float32x4 vfxy(reprojDepth.fxinv, reprojDepth.fyinv, 0, 0);
+        const v_float32x4 vcxy(reprojDepth.cx, reprojDepth.cy, 0, 0);
+
+        const float(&cm)[16] = cam2vol.matrix.val;
+        const v_float32x4 camRot0(cm[0], cm[4], cm[8], 0);
+        const v_float32x4 camRot1(cm[1], cm[5], cm[9], 0);
+        const v_float32x4 camRot2(cm[2], cm[6], cm[10], 0);
+        const v_float32x4 camTrans(cm[3], cm[7], cm[11], 0);
+
+        const v_float32x4 boxDown(boxMin.x, boxMin.y, boxMin.z, 0.f);
+        const v_float32x4 boxUp(boxMax.x, boxMax.y, boxMax.z, 0.f);
+
+        const v_float32x4 invVoxelSize = v_float32x4(voxelSizeInv,
+            voxelSizeInv,
+            voxelSizeInv, 1.f);
+
+        const float(&vm)[16] = vol2cam.matrix.val;
+        const v_float32x4 volRot0(vm[0], vm[4], vm[8], 0);
+        const v_float32x4 volRot1(vm[1], vm[5], vm[9], 0);
+        const v_float32x4 volRot2(vm[2], vm[6], vm[10], 0);
+        const v_float32x4 volTrans(vm[3], vm[7], vm[11], 0);
+
+        for (int y = range.start; y < range.end; y++)
+        {
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+            ptype* clrRow = colors[y];
+
+            for (int x = 0; x < points.cols; x++)
+            {
+                v_float32x4 point = nanv, normal = nanv, color = nanv;
+
+                v_float32x4 orig = camTrans;
+
+                // get direction through pixel in volume space:
+
+                // 1. reproject (x, y) on projecting plane where z = 1.f
+                v_float32x4 planed = (v_float32x4((float)x, (float)y, 0.f, 0.f) - vcxy) * vfxy;
+                planed = v_combine_low(planed, v_float32x4(1.f, 0.f, 0.f, 0.f));
+
+                // 2. rotate to volume space
+                planed = v_matmuladd(planed, camRot0, camRot1, camRot2, v_setzero_f32());
+
+                // 3. normalize
+                v_float32x4 invNorm = v_invsqrt(v_setall_f32(v_reduce_sum(planed * planed)));
+                v_float32x4 dir = planed * invNorm;
+
+                // compute intersection of ray with all six bbox planes
+                v_float32x4 rayinv = v_setall_f32(1.f) / dir;
+                // div by zero should be eliminated by these products
+                v_float32x4 tbottom = rayinv * (boxDown - orig);
+                v_float32x4 ttop = rayinv * (boxUp - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                v_float32x4 minAx = v_min(ttop, tbottom);
+                v_float32x4 maxAx = v_max(ttop, tbottom);
+
+                // near clipping plane
+                const float clip = 0.f;
+                float _minAx[4], _maxAx[4];
+                v_store(_minAx, minAx);
+                v_store(_maxAx, maxAx);
+                float tmin = max({ _minAx[0], _minAx[1], _minAx[2], clip });
+                float tmax = min({ _maxAx[0], _maxAx[1], _maxAx[2] });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if (tmin < tmax)
+                {
+                    // interpolation optimized a little
+                    orig *= invVoxelSize;
+                    dir *= invVoxelSize;
+
+                    int xdim = volDims[0];
+                    int ydim = volDims[1];
+                    int zdim = volDims[2];
+                    v_float32x4 rayStep = dir * v_setall_f32(tstep);
+                    v_float32x4 next = (orig + dir * v_setall_f32(tmin));
+                    float f = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+                    float fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = cvFloor((tmax - tmin) / tstep);
+                    for (; steps < nSteps; steps++)
+                    {
+                        next += rayStep;
+                        v_int32x4 ip = v_round(next);
+                        int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iz = ip.get0();
+                        int coord = ix * xdim + iy * ydim + iz * zdim;
+
+                        fnext = tsdfToFloat(volume.at<RGBTsdfVoxel>(coord).tsdf);
+                        if (fnext != f)
+                        {
+                            fnext = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+
+                            // when ray crosses a surface
+                            if (std::signbit(f) != std::signbit(fnext))
+                                break;
+
+                            f = fnext;
+                        }
+                    }
+
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if (f > 0.f && fnext < 0.f)
+                    {
+                        v_float32x4 tp = next - rayStep;
+                        float ft = interpolateColorVoxel(volume, volDims, neighbourCoords, tp);
+                        float ftdt = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
+
+                        // avoid division by zero
+                        if (!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            v_float32x4 pv = (orig + dir * v_setall_f32(ts));
+                            v_float32x4 nv = getNormalColorVoxel(volume, volDims, neighbourCoords, volResolution, pv);
+                            v_float32x4 cv = getColorVoxel(volume, volDims, neighbourCoords, volResolution, voxelSizeInv, pv);
+
+                            if (!isNaN(nv))
+                            {
+                                color = cv;
+                                //convert pv and nv to camera space
+                                normal = v_matmuladd(nv, volRot0, volRot1, volRot2, v_setzero_f32());
+                                // interpolation optimized a little
+                                point = v_matmuladd(pv * v_float32x4(voxelSize, voxelSize, voxelSize, 1.f),
+                                    volRot0, volRot1, volRot2, volTrans);
+                            }
+                        }
+                    }
+                }
+
+                v_store((float*)(&ptsRow[x]), point);
+                v_store((float*)(&nrmRow[x]), normal);
+                v_store((float*)(&clrRow[x]), color);
+            }
+        }
+    };
+#else
+    auto RaycastInvoker = [&](const Range& range)
+    {
+        const Point3f camTrans = cam2vol.translation();
+        const Matx33f  camRot = cam2vol.rotation();
+        const Matx33f  volRot = vol2cam.rotation();
+
+        for (int y = range.start; y < range.end; y++)
+        {
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+            ptype* clrRow = colors[y];
+
+            for (int x = 0; x < points.cols; x++)
+            {
+                Point3f point = nan3, normal = nan3, color = nan3;
+
+                Point3f orig = camTrans;
+                // direction through pixel in volume space
+                Point3f dir = normalize(Vec3f(camRot * reprojDepth(Point3f(float(x), float(y), 1.f))));
+
+                // compute intersection of ray with all six bbox planes
+                Vec3f rayinv(1.f / dir.x, 1.f / dir.y, 1.f / dir.z);
+                Point3f tbottom = rayinv.mul(boxMin - orig);
+                Point3f ttop = rayinv.mul(boxMax - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                Point3f minAx(min(ttop.x, tbottom.x), min(ttop.y, tbottom.y), min(ttop.z, tbottom.z));
+                Point3f maxAx(max(ttop.x, tbottom.x), max(ttop.y, tbottom.y), max(ttop.z, tbottom.z));
+
+                // near clipping plane
+                const float clip = 0.f;
+                //float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
+                //float tmax =     min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
+                float tmin = max({ minAx.x, minAx.y, minAx.z, clip });
+                float tmax = min({ maxAx.x, maxAx.y, maxAx.z });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if (tmin < tmax)
+                {
+                    // interpolation optimized a little
+                    orig = orig * voxelSizeInv;
+                    dir = dir * voxelSizeInv;
+
+                    Point3f rayStep = dir * tstep;
+                    Point3f next = (orig + dir * tmin);
+                    float f = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+                    float fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = int(floor((tmax - tmin) / tstep));
+                    for (; steps < nSteps; steps++)
+                    {
+                        next += rayStep;
+                        int xdim = volDims[0];
+                        int ydim = volDims[1];
+                        int zdim = volDims[2];
+                        int ix = cvRound(next.x);
+                        int iy = cvRound(next.y);
+                        int iz = cvRound(next.z);
+                        fnext = tsdfToFloat(volume.at<RGBTsdfVoxel>(ix * xdim + iy * ydim + iz * zdim).tsdf);
+                        if (fnext != f)
+                        {
+                            fnext = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+                            // when ray crosses a surface
+                            if (std::signbit(f) != std::signbit(fnext))
+                                break;
+
+                            f = fnext;
+                        }
+                    }
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if (f > 0.f && fnext < 0.f)
+                    {
+                        Point3f tp = next - rayStep;
+                        float ft = interpolateColorVoxel(volume, volDims, neighbourCoords, tp);
+                        float ftdt = interpolateColorVoxel(volume, volDims, neighbourCoords, next);
+                        // float t = tmin + steps*tstep;
+                        // float ts = t - tstep*ft/(ftdt - ft);
+                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
+
+                        // avoid division by zero
+                        if (!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            Point3f pv = (orig + dir * ts);
+                            Point3f nv = getNormalColorVoxel(volume, volDims, neighbourCoords, volResolution, pv);
+                            Point3f cv = getColorVoxel(volume, volDims, neighbourCoords, volResolution, voxelSizeInv, pv);
+                            if (!isNaN(nv))
+                            {
+                                //convert pv and nv to camera space
+                                normal = volRot * nv;
+                                color = cv;
+                                // interpolation optimized a little
+                                point = vol2cam * (pv * voxelSize);
+                            }
+                        }
+                    }
+                }
+                ptsRow[x] = toPtype(point);
+                nrmRow[x] = toPtype(normal);
+                clrRow[x] = toPtype(color);
+            }
+        }
+
+    };
+#endif
+
+    parallel_for_(raycastRange, RaycastInvoker);
+    //RaycastInvoker(raycastRange);
+
+    //std::cout << "raycastColorTsdfVolumeUnit() end" << std::endl;
+}
+
+
+void fetchNormalsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    InputArray _points, OutputArray _normals)
+{
+    //std::cout << "fetchNormalsFromColorTsdfVolumeUnit" << std::endl;
+
+    CV_TRACE_FUNCTION();
+    CV_Assert(!_points.empty());
+    if (!_normals.needed())
+        return;
+
+    Points points = _points.getMat();
+    CV_Assert(points.type() == POINT_TYPE);
+
+    _normals.createSameSize(_points, _points.type());
+    Normals normals = _normals.getMat();
+
+    const Mat volume = _volume.getMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    Affine3f invPose(pose.inv());
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+    auto PushNormals = [&](const ptype& pp, const int* position)
+    {
+        Point3f p = fromPtype(pp);
+        Point3f n = nan3;
+        if (!isNaN(p))
+        {
+            Point3f voxPt = (invPose * p);
+            voxPt = voxPt * voxelSizeInv;
+            n = pose.rotation() * getNormalColorVoxel(volume, volDims, neighbourCoords, volResolution, voxPt);
+        }
+        normals(position[0], position[1]) = toPtype(n);
+    };
+    points.forEach(PushNormals);
+
+}
+
+inline void coord(
+    const Mat& volume, const RGBTsdfVoxel* volDataStart, std::vector<ptype>& points, std::vector<ptype>& normals, std::vector<ptype>& colors,
+    const Point3i volResolution, const Vec4i volDims, const Vec8i neighbourCoords, const Affine3f pose,
+    const float voxelSize, const float voxelSizeInv, bool needNormals, bool needColors, int x, int y, int z, Point3f V, float v0, int axis)
+{
+    // 0 for x, 1 for y, 2 for z
+    bool limits = false;
+    Point3i shift;
+    float Vc = 0.f;
+    if (axis == 0)
+    {
+        shift = Point3i(1, 0, 0);
+        limits = (x + 1 < volResolution.x);
+        Vc = V.x;
+    }
+    if (axis == 1)
+    {
+        shift = Point3i(0, 1, 0);
+        limits = (y + 1 < volResolution.y);
+        Vc = V.y;
+    }
+    if (axis == 2)
+    {
+        shift = Point3i(0, 0, 1);
+        limits = (z + 1 < volResolution.z);
+        Vc = V.z;
+    }
+
+    if (limits)
+    {
+        const RGBTsdfVoxel& voxeld = volDataStart[(x + shift.x) * volDims[0] +
+            (y + shift.y) * volDims[1] +
+            (z + shift.z) * volDims[2]];
+        float vd = tsdfToFloat(voxeld.tsdf);
+
+        if (voxeld.weight != 0 && vd != 1.f)
+        {
+            if ((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
+            {
+                //linearly interpolate coordinate
+                float Vn = Vc + voxelSize;
+                float dinv = 1.f / (abs(v0) + abs(vd));
+                float inter = (Vc * abs(vd) + Vn * abs(v0)) * dinv;
+
+                Point3f p(shift.x ? inter : V.x,
+                    shift.y ? inter : V.y,
+                    shift.z ? inter : V.z);
+                {
+                    points.push_back(toPtype(pose * p));
+                    if (needNormals)
+                        normals.push_back(toPtype(pose.rotation() *
+                            getNormalColorVoxel(volume, volDims, neighbourCoords, volResolution, p * voxelSizeInv)));
+                    if (needColors)
+                        colors.push_back(toPtype(pose.rotation() *
+                            getColorVoxel(volume, volDims, neighbourCoords, volResolution, voxelSizeInv, p * voxelSizeInv)));
+                }
+            }
+        }
+    }
+}
+
+void fetchPointsNormalsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    OutputArray _points, OutputArray _normals)
+{
+    fetchPointsNormalsColorsFromColorTsdfVolumeUnit(settings, _volume, _points, _normals, noArray());
+}
+
+void fetchPointsNormalsColorsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    OutputArray _points, OutputArray _normals, OutputArray _colors)
+{
+    if (!_points.needed())
+        return;
+    const Mat volume = _volume.getMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    float voxelSize = settings.getVoxelSize();
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+    bool needNormals = _normals.needed();
+    bool needColors  = _colors.needed();
+
+    std::vector<std::vector<ptype>> pVecs, nVecs, cVecs;
+    Range fetchRange(0, volResolution.x);
+    const int nstripes = -1;
+    const RGBTsdfVoxel* volDataStart = volume.ptr<RGBTsdfVoxel>();
+    Mutex mutex;
+    auto FetchPointsNormalsInvoker = [&](const Range& range) {
+
+        std::vector<ptype> points, normals, colors;
+        for (int x = range.start; x < range.end; x++)
+        {
+            const RGBTsdfVoxel* volDataX = volDataStart + x * volDims[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                const RGBTsdfVoxel* volDataY = volDataX + y * volDims[1];
+                for (int z = 0; z < volResolution.z; z++)
+                {
+                    const RGBTsdfVoxel& voxel0 = volDataY[z * volDims[2]];
+                    float v0 = tsdfToFloat(voxel0.tsdf);
+                    if (voxel0.weight != 0 && v0 != 1.f)
+                    {
+                        Point3f V(Point3f((float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f) * voxelSize);
+
+                        coord(volume, volDataStart, points, normals, colors, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, needColors, x, y, z, V, v0, 0);
+                        coord(volume, volDataStart, points, normals, colors, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, needColors, x, y, z, V, v0, 1);
+                        coord(volume, volDataStart, points, normals, colors, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, needColors, x, y, z, V, v0, 2);
+
+                    } // if voxel is not empty
+                }
+            }
+        }
+        AutoLock al(mutex);
+        pVecs.push_back(points);
+        nVecs.push_back(normals);
+        cVecs.push_back(colors);
+    };
+
+    parallel_for_(fetchRange, FetchPointsNormalsInvoker, nstripes);
+
+    std::vector<ptype> points, normals, colors;
+    for (size_t i = 0; i < pVecs.size(); i++)
+    {
+        points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
+        normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
+        colors.insert(colors.end(), cVecs[i].begin(), cVecs[i].end());
+    }
+
+    _points.create((int)points.size(), 1, POINT_TYPE);
+    if (!points.empty())
+        Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
+
+    if (_normals.needed())
+    {
+        _normals.create((int)normals.size(), 1, POINT_TYPE);
+        if (!normals.empty())
+            Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
+    }
+
+    if (_colors.needed())
+    {
+        _colors.create((int)colors.size(), 1, COLOR_TYPE);
+        if (!colors.empty())
+            Mat((int)colors.size(), 1, COLOR_TYPE, &colors[0]).copyTo(_colors.getMat());
+    }
+}
+
+
+} // namespace cv
diff --git a/modules/3d/src/rgbd/color_tsdf_functions.hpp b/modules/3d/src/rgbd/color_tsdf_functions.hpp
new file mode 100644
index 0000000000..5d99784278
--- /dev/null
+++ b/modules/3d/src/rgbd/color_tsdf_functions.hpp
@@ -0,0 +1,43 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// Partially rewritten from https://github.com/Nerei/kinfu_remake
+// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
+
+#ifndef OPENCV_3D_COLORED_TSDF_FUNCTIONS_HPP
+#define OPENCV_3D_COLORED_TSDF_FUNCTIONS_HPP
+
+#include <unordered_set>
+
+#include "utils.hpp"
+#include "tsdf_functions.hpp"
+
+#define USE_INTERPOLATION_IN_GETNORMAL 1
+
+namespace cv
+{
+
+void integrateColorTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _rgb, InputArray _pixNorms, InputArray _volume);
+
+void integrateColorTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& volumePose, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _rgb, InputArray _pixNorms, InputArray _volume);
+
+void raycastColorTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+    InputArray _volume, OutputArray _points, OutputArray _normals, OutputArray _colors);
+
+void fetchNormalsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    InputArray _points, OutputArray _normals);
+
+void fetchPointsNormalsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    OutputArray _points, OutputArray _normals);
+
+void fetchPointsNormalsColorsFromColorTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+    OutputArray _points, OutputArray _normals, OutputArray _colors);
+
+
+} // namespace cv
+
+#endif
diff --git a/modules/3d/src/rgbd/colored_tsdf.cpp b/modules/3d/src/rgbd/colored_tsdf.cpp
deleted file mode 100644
index a1b17cf48d..0000000000
--- a/modules/3d/src/rgbd/colored_tsdf.cpp
+++ /dev/null
@@ -1,1024 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-// Partially rewritten from https://github.com/Nerei/kinfu_remake
-// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
-
-#include "../precomp.hpp"
-#include "colored_tsdf.hpp"
-
-#define USE_INTERPOLATION_IN_GETNORMAL 1
-
-namespace cv {
-
-ColoredTSDFVolume::ColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                       int _maxWeight, Point3i _resolution, bool zFirstMemOrder)
-    : Volume(_voxelSize, _pose, _raycastStepFactor),
-      volResolution(_resolution),
-      maxWeight( WeightType(_maxWeight) )
-{
-    CV_Assert(_maxWeight < 255);
-    // Unlike original code, this should work with any volume size
-    // Not only when (x,y,z % 32) == 0
-    volSize   = Point3f(volResolution) * voxelSize;
-    truncDist = std::max(_truncDist, 2.1f * voxelSize);
-
-    // (xRes*yRes*zRes) array
-    // Depending on zFirstMemOrder arg:
-    // &elem(x, y, z) = data + x*zRes*yRes + y*zRes + z;
-    // &elem(x, y, z) = data + x + y*xRes + z*xRes*yRes;
-    int xdim, ydim, zdim;
-    if(zFirstMemOrder)
-    {
-        xdim = volResolution.z * volResolution.y;
-        ydim = volResolution.z;
-        zdim = 1;
-    }
-    else
-    {
-        xdim = 1;
-        ydim = volResolution.x;
-        zdim = volResolution.x * volResolution.y;
-    }
-
-    volDims = Vec4i(xdim, ydim, zdim);
-    neighbourCoords = Vec8i(
-        volDims.dot(Vec4i(0, 0, 0)),
-        volDims.dot(Vec4i(0, 0, 1)),
-        volDims.dot(Vec4i(0, 1, 0)),
-        volDims.dot(Vec4i(0, 1, 1)),
-        volDims.dot(Vec4i(1, 0, 0)),
-        volDims.dot(Vec4i(1, 0, 1)),
-        volDims.dot(Vec4i(1, 1, 0)),
-        volDims.dot(Vec4i(1, 1, 1))
-    );
-}
-
-class ColoredTSDFVolumeCPU : public ColoredTSDFVolume
-{
-public:
-    // dimension in voxels, size in meters
-    ColoredTSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                         int _maxWeight, Vec3i _resolution, bool zFirstMemOrder = true);
-    virtual void integrate(InputArray, float, const Matx44f&, const Matx33f&, const int) override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-    virtual void integrate(InputArray _depth, InputArray _rgb, float depthFactor, const Matx44f& cameraPose,
-                           const Matx33f& depth_intrinsics, const Matx33f& rgb_intrinsics, const int frameId = 0) override;
-    virtual void raycast(const Matx44f& cameraPose, const Matx33f& depth_intrinsics, const Size& frameSize,
-                         OutputArray points, OutputArray normals, OutputArray colors) const override;
-    virtual void raycast(const Matx44f&, const Matx33f&, const Size&, OutputArray, OutputArray) const override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-
-    virtual void fetchNormals(InputArray points, OutputArray _normals) const override;
-    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
-
-    virtual void reset() override;
-    virtual RGBTsdfVoxel at(const Vec3i& volumeIdx) const;
-
-    float interpolateVoxel(const cv::Point3f& p) const;
-    Point3f getNormalVoxel(const cv::Point3f& p) const;
-    float interpolateColor(float tx, float ty, float tz, float vx[8]) const;
-    Point3f getColorVoxel(const cv::Point3f& p) const;
-
-#if USE_INTRINSICS
-    float interpolateVoxel(const v_float32x4& p) const;
-    v_float32x4 getNormalVoxel(const v_float32x4& p) const;
-    v_float32x4 getColorVoxel(const v_float32x4& p) const;
-#endif
-
-    Vec4i volStrides;
-    Vec6f frameParams;
-    Mat pixNorms;
-    // See zFirstMemOrder arg of parent class constructor
-    // for the array layout info
-    // Consist of Voxel elements
-    Mat volume;
-};
-
-// dimension in voxels, size in meters
-ColoredTSDFVolumeCPU::ColoredTSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor,
-                             float _truncDist, int _maxWeight, Vec3i _resolution,
-                             bool zFirstMemOrder)
-    : ColoredTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution,
-                 zFirstMemOrder)
-{
-    int xdim, ydim, zdim;
-    if (zFirstMemOrder)
-    {
-        xdim = volResolution.z * volResolution.y;
-        ydim = volResolution.z;
-        zdim = 1;
-    }
-    else
-    {
-        xdim = 1;
-        ydim = volResolution.x;
-        zdim = volResolution.x * volResolution.y;
-    }
-    volStrides = Vec4i(xdim, ydim, zdim);
-
-    volume = Mat(1, volResolution.x * volResolution.y * volResolution.z, rawType<RGBTsdfVoxel>());
-
-    reset();
-}
-
-// zero volume, leave rest params the same
-void ColoredTSDFVolumeCPU::reset()
-{
-    CV_TRACE_FUNCTION();
-
-    volume.forEach<VecRGBTsdfVoxel>([](VecRGBTsdfVoxel& vv, const int* /* position */)
-    {
-        RGBTsdfVoxel& v = reinterpret_cast<RGBTsdfVoxel&>(vv);
-        v.tsdf = floatToTsdf(0.0f); v.weight = 0;
-    });
-}
-
-RGBTsdfVoxel ColoredTSDFVolumeCPU::at(const Vec3i& volumeIdx) const
-{
-    //! Out of bounds
-    if ((volumeIdx[0] >= volResolution.x || volumeIdx[0] < 0) ||
-        (volumeIdx[1] >= volResolution.y || volumeIdx[1] < 0) ||
-        (volumeIdx[2] >= volResolution.z || volumeIdx[2] < 0))
-    {
-        return RGBTsdfVoxel(floatToTsdf(1.f), 0, 160, 160, 160);
-    }
-
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-    int coordBase =
-        volumeIdx[0] * volDims[0] + volumeIdx[1] * volDims[1] + volumeIdx[2] * volDims[2];
-    return volData[coordBase];
-}
-
-// use depth instead of distance (optimization)
-void ColoredTSDFVolumeCPU::integrate(InputArray _depth, InputArray _rgb, float depthFactor, const Matx44f& cameraPose,
-                                     const Matx33f& _depth_intrinsics, const Matx33f& _rgb_intrinsics, const int frameId)
-{
-    CV_TRACE_FUNCTION();
-    CV_UNUSED(frameId);
-    CV_Assert(_depth.type() == DEPTH_TYPE);
-    CV_Assert(!_depth.empty());
-    Depth depth = _depth.getMat();
-    Colors rgb = _rgb.getMat();
-    Intr depth_intrinsics(_depth_intrinsics);
-    Intr rgb_intrinsics(_rgb_intrinsics);
-    Vec6f newParams((float)depth.rows, (float)depth.cols,
-                    depth_intrinsics.fx, depth_intrinsics.fy,
-                    depth_intrinsics.cx, depth_intrinsics.cy);
-    if (!(frameParams == newParams))
-    {
-        frameParams = newParams;
-        pixNorms = preCalculationPixNorm(depth.size(), depth_intrinsics);
-    }
-
-    integrateRGBVolumeUnit(truncDist, voxelSize, maxWeight, (this->pose).matrix, volResolution, volStrides, depth, rgb,
-        depthFactor, cameraPose, depth_intrinsics, rgb_intrinsics, pixNorms, volume);
-}
-
-#if USE_INTRINSICS
-// all coordinate checks should be done in inclosing cycle
-inline float ColoredTSDFVolumeCPU::interpolateVoxel(const Point3f& _p) const
-{
-    v_float32x4 p(_p.x, _p.y, _p.z, 0);
-    return interpolateVoxel(p);
-}
-
-inline float ColoredTSDFVolumeCPU::interpolateVoxel(const v_float32x4& p) const
-{
-    // tx, ty, tz = floor(p)
-    v_int32x4 ip = v_floor(p);
-    v_float32x4 t = p - v_cvt_f32(ip);
-    float tx = t.get0();
-    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float ty = t.get0();
-    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float tz = t.get0();
-
-    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-    int ix = ip.get0();
-    ip = v_rotate_right<1>(ip);
-    int iy = ip.get0();
-    ip = v_rotate_right<1>(ip);
-    int iz = ip.get0();
-
-    int coordBase = ix * xdim + iy * ydim + iz * zdim;
-
-    TsdfType vx[8];
-    for (int i = 0; i < 8; i++)
-        vx[i] = volData[neighbourCoords[i] + coordBase].tsdf;
-
-    v_float32x4 v0246 = tsdfToFloat_INTR(v_int32x4(vx[0], vx[2], vx[4], vx[6]));
-    v_float32x4 v1357 = tsdfToFloat_INTR(v_int32x4(vx[1], vx[3], vx[5], vx[7]));
-    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
-
-    v_float32x4 v00_10 = vxx;
-    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
-    float v0 = v0_1.get0();
-    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-    float v1 = v0_1.get0();
-
-    return v0 + tx * (v1 - v0);
-}
-#else
-inline float ColoredTSDFVolumeCPU::interpolateVoxel(const Point3f& p) const
-{
-    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-
-    int ix = cvFloor(p.x);
-    int iy = cvFloor(p.y);
-    int iz = cvFloor(p.z);
-
-    float tx = p.x - ix;
-    float ty = p.y - iy;
-    float tz = p.z - iz;
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-    float vx[8];
-    for (int i = 0; i < 8; i++)
-        vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase].tsdf);
-
-    float v00 = vx[0] + tz*(vx[1] - vx[0]);
-    float v01 = vx[2] + tz*(vx[3] - vx[2]);
-    float v10 = vx[4] + tz*(vx[5] - vx[4]);
-    float v11 = vx[6] + tz*(vx[7] - vx[6]);
-
-    float v0 = v00 + ty*(v01 - v00);
-    float v1 = v10 + ty*(v11 - v10);
-
-    return v0 + tx*(v1 - v0);
-}
-#endif
-
-
-#if USE_INTRINSICS
-//gradientDeltaFactor is fixed at 1.0 of voxel size
-inline Point3f ColoredTSDFVolumeCPU::getNormalVoxel(const Point3f& _p) const
-{
-    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
-    v_float32x4 result = getNormalVoxel(p);
-    float CV_DECL_ALIGNED(16) ares[4];
-    v_store_aligned(ares, result);
-    return Point3f(ares[0], ares[1], ares[2]);
-}
-
-inline v_float32x4 ColoredTSDFVolumeCPU::getNormalVoxel(const v_float32x4& p) const
-{
-    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
-        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
-            (float)(volResolution.y - 2),
-            (float)(volResolution.z - 2), 1.f))
-        )
-        return nanv;
-
-    v_int32x4 ip = v_floor(p);
-    v_float32x4 t = p - v_cvt_f32(ip);
-    float tx = t.get0();
-    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float ty = t.get0();
-    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float tz = t.get0();
-
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iz = ip.get0();
-
-    int coordBase = ix * xdim + iy * ydim + iz * zdim;
-
-    float CV_DECL_ALIGNED(16) an[4];
-    an[0] = an[1] = an[2] = an[3] = 0.f;
-    for (int c = 0; c < 3; c++)
-    {
-        const int dim = volDims[c];
-        float& nv = an[c];
-
-        float vx[8];
-        for (int i = 0; i < 8; i++)
-            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
-                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
-
-        v_float32x4 v0246(vx[0], vx[2], vx[4], vx[6]);
-        v_float32x4 v1357(vx[1], vx[3], vx[5], vx[7]);
-        v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
-
-        v_float32x4 v00_10 = vxx;
-        v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-        v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
-        float v0 = v0_1.get0();
-        v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-        float v1 = v0_1.get0();
-
-        nv = v0 + tx * (v1 - v0);
-    }
-
-    v_float32x4 n = v_load_aligned(an);
-    v_float32x4 Norm = v_sqrt(v_setall_f32(v_reduce_sum(n * n)));
-
-    return Norm.get0() < 0.0001f ? nanv : n / Norm;
-}
-#else
-inline Point3f ColoredTSDFVolumeCPU::getNormalVoxel(const Point3f& p) const
-{
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-    if(p.x < 1 || p.x >= volResolution.x - 2 ||
-       p.y < 1 || p.y >= volResolution.y - 2 ||
-       p.z < 1 || p.z >= volResolution.z - 2)
-        return nan3;
-
-    int ix = cvFloor(p.x);
-    int iy = cvFloor(p.y);
-    int iz = cvFloor(p.z);
-
-    float tx = p.x - ix;
-    float ty = p.y - iy;
-    float tz = p.z - iz;
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-
-    Vec3f an;
-    for(int c = 0; c < 3; c++)
-    {
-        const int dim = volDims[c];
-        float& nv = an[c];
-
-        float vx[8];
-        for(int i = 0; i < 8; i++)
-            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
-                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
-
-        float v00 = vx[0] + tz*(vx[1] - vx[0]);
-        float v01 = vx[2] + tz*(vx[3] - vx[2]);
-        float v10 = vx[4] + tz*(vx[5] - vx[4]);
-        float v11 = vx[6] + tz*(vx[7] - vx[6]);
-
-        float v0 = v00 + ty*(v01 - v00);
-        float v1 = v10 + ty*(v11 - v10);
-
-        nv = v0 + tx*(v1 - v0);
-    }
-
-    float nv = sqrt(an[0] * an[0] +
-                    an[1] * an[1] +
-                    an[2] * an[2]);
-    return nv < 0.0001f ? nan3 : an / nv;
-}
-#endif
-
-#if USE_INTRINSICS
-inline float ColoredTSDFVolumeCPU::interpolateColor(float tx, float ty, float tz, float vx[8]) const
-{
-    v_float32x4 v0246, v1357;
-    v_load_deinterleave(vx, v0246, v1357);
-
-    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
-
-    v_float32x4 v00_10 = vxx;
-    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
-    float v0 = v0_1.get0();
-    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-    float v1 = v0_1.get0();
-
-    return v0 + tx * (v1 - v0);
-}
-#else
-inline float ColoredTSDFVolumeCPU::interpolateColor(float tx, float ty, float tz, float vx[8]) const
-{
-    float v00 = vx[0] + tz * (vx[1] - vx[0]);
-    float v01 = vx[2] + tz * (vx[3] - vx[2]);
-    float v10 = vx[4] + tz * (vx[5] - vx[4]);
-    float v11 = vx[6] + tz * (vx[7] - vx[6]);
-
-    float v0 = v00 + ty * (v01 - v00);
-    float v1 = v10 + ty * (v11 - v10);
-
-    return v0 + tx * (v1 - v0);
-}
-#endif
-
-#if USE_INTRINSICS
-//gradientDeltaFactor is fixed at 1.0 of voxel size
-inline Point3f ColoredTSDFVolumeCPU::getColorVoxel(const Point3f& _p) const
-{
-    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
-    v_float32x4 result = getColorVoxel(p);
-    float CV_DECL_ALIGNED(16) ares[4];
-    v_store_aligned(ares, result);
-    return Point3f(ares[0], ares[1], ares[2]);
-}
-inline v_float32x4 ColoredTSDFVolumeCPU::getColorVoxel(const v_float32x4& p) const
-{
-    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
-        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
-            (float)(volResolution.y - 2),
-            (float)(volResolution.z - 2), 1.f))
-        )
-        return nanv;
-
-    v_int32x4 ip = v_floor(p);
-
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iz = ip.get0();
-
-    int coordBase = ix * xdim + iy * ydim + iz * zdim;
-    float CV_DECL_ALIGNED(16) rgb[4];
-
-#if USE_INTERPOLATION_IN_GETNORMAL
-    float r[8], g[8], b[8];
-    for (int i = 0; i < 8; i++)
-    {
-        r[i] = (float)volData[neighbourCoords[i] + coordBase].r;
-        g[i] = (float)volData[neighbourCoords[i] + coordBase].g;
-        b[i] = (float)volData[neighbourCoords[i] + coordBase].b;
-    }
-
-    v_float32x4 vsi(voxelSizeInv, voxelSizeInv, voxelSizeInv, voxelSizeInv);
-    v_float32x4 ptVox = p * vsi;
-    v_int32x4 iptVox = v_floor(ptVox);
-    v_float32x4 t = ptVox - v_cvt_f32(iptVox);
-    float tx = t.get0(); t = v_rotate_right<1>(t);
-    float ty = t.get0(); t = v_rotate_right<1>(t);
-    float tz = t.get0();
-    rgb[0] = interpolateColor(tx, ty, tz, r);
-    rgb[1] = interpolateColor(tx, ty, tz, g);
-    rgb[2] = interpolateColor(tx, ty, tz, b);
-    rgb[3] = 0.f;
-#else
-    rgb[0] = volData[coordBase].r;
-    rgb[1] = volData[coordBase].g;
-    rgb[2] = volData[coordBase].b;
-    rgb[3] = 0.f;
-#endif
-    v_float32x4 res = v_load_aligned(rgb);
-    return res;
-}
-#else
-inline Point3f ColoredTSDFVolumeCPU::getColorVoxel(const Point3f& p) const
-{
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
-
-
-
-    if(p.x < 1 || p.x >= volResolution.x - 2 ||
-       p.y < 1 || p.y >= volResolution.y - 2 ||
-       p.z < 1 || p.z >= volResolution.z - 2)
-        return nan3;
-
-    int ix = cvFloor(p.x);
-    int iy = cvFloor(p.y);
-    int iz = cvFloor(p.z);
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-    Point3f res;
-
-#if USE_INTERPOLATION_IN_GETNORMAL
-    // TODO: create better interpolation or remove this simple version
-    float r[8], g[8], b[8];
-    for (int i = 0; i < 8; i++)
-    {
-        r[i] = (float) volData[neighbourCoords[i] + coordBase].r;
-        g[i] = (float) volData[neighbourCoords[i] + coordBase].g;
-        b[i] = (float) volData[neighbourCoords[i] + coordBase].b;
-    }
-
-    Point3f ptVox = p * voxelSizeInv;
-    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
-    float tx = ptVox.x - iptVox[0];
-    float ty = ptVox.y - iptVox[1];
-    float tz = ptVox.z - iptVox[2];
-
-    res=Point3f(interpolateColor(tx, ty, tz, r),
-                interpolateColor(tx, ty, tz, g),
-                interpolateColor(tx, ty, tz, b));
-#else
-    res=Point3f(volData[coordBase].r, volData[coordBase].g, volData[coordBase].b);
-#endif
-    colorFix(res);
-    return res;
-}
-#endif
-
-struct ColorRaycastInvoker : ParallelLoopBody
-{
-    ColorRaycastInvoker(Points& _points, Normals& _normals, Colors& _colors, const Matx44f& cameraPose,
-                        const Intr& depth_intrinsics, const ColoredTSDFVolumeCPU& _volume) :
-        ParallelLoopBody(),
-        points(_points),
-        normals(_normals),
-        colors(_colors),
-        volume(_volume),
-        tstep(volume.truncDist * volume.raycastStepFactor),
-        // We do subtract voxel size to minimize checks after
-        // Note: origin of volume coordinate is placed
-        // in the center of voxel (0,0,0), not in the corner of the voxel!
-        boxMax(volume.volSize - Point3f(volume.voxelSize,
-                                        volume.voxelSize,
-                                        volume.voxelSize)),
-        boxMin(),
-        cam2vol(volume.pose.inv() * Affine3f(cameraPose)),
-        vol2cam(Affine3f(cameraPose.inv()) * volume.pose),
-        reprojDepth(depth_intrinsics.makeReprojector())
-    {  }
-#if USE_INTRINSICS
-    virtual void operator() (const Range& range) const override
-    {
-        const v_float32x4 vfxy(reprojDepth.fxinv, reprojDepth.fyinv, 0, 0);
-        const v_float32x4 vcxy(reprojDepth.cx, reprojDepth.cy, 0, 0);
-
-        const float(&cm)[16] = cam2vol.matrix.val;
-        const v_float32x4 camRot0(cm[0], cm[4], cm[8], 0);
-        const v_float32x4 camRot1(cm[1], cm[5], cm[9], 0);
-        const v_float32x4 camRot2(cm[2], cm[6], cm[10], 0);
-        const v_float32x4 camTrans(cm[3], cm[7], cm[11], 0);
-
-        const v_float32x4 boxDown(boxMin.x, boxMin.y, boxMin.z, 0.f);
-        const v_float32x4 boxUp(boxMax.x, boxMax.y, boxMax.z, 0.f);
-
-        const v_float32x4 invVoxelSize = v_float32x4(volume.voxelSizeInv,
-            volume.voxelSizeInv,
-            volume.voxelSizeInv, 1.f);
-
-        const float(&vm)[16] = vol2cam.matrix.val;
-        const v_float32x4 volRot0(vm[0], vm[4], vm[8], 0);
-        const v_float32x4 volRot1(vm[1], vm[5], vm[9], 0);
-        const v_float32x4 volRot2(vm[2], vm[6], vm[10], 0);
-        const v_float32x4 volTrans(vm[3], vm[7], vm[11], 0);
-
-        for (int y = range.start; y < range.end; y++)
-        {
-            ptype* ptsRow = points[y];
-            ptype* nrmRow = normals[y];
-            ptype* clrRow = colors[y];
-
-            for (int x = 0; x < points.cols; x++)
-            {
-                v_float32x4 point = nanv, normal = nanv, color = nanv;
-
-                v_float32x4 orig = camTrans;
-
-                // get direction through pixel in volume space:
-
-                // 1. reproject (x, y) on projecting plane where z = 1.f
-                v_float32x4 planed = (v_float32x4((float)x, (float)y, 0.f, 0.f) - vcxy) * vfxy;
-                planed = v_combine_low(planed, v_float32x4(1.f, 0.f, 0.f, 0.f));
-
-                // 2. rotate to volume space
-                planed = v_matmuladd(planed, camRot0, camRot1, camRot2, v_setzero_f32());
-
-                // 3. normalize
-                v_float32x4 invNorm = v_invsqrt(v_setall_f32(v_reduce_sum(planed * planed)));
-                v_float32x4 dir = planed * invNorm;
-
-                // compute intersection of ray with all six bbox planes
-                v_float32x4 rayinv = v_setall_f32(1.f) / dir;
-                // div by zero should be eliminated by these products
-                v_float32x4 tbottom = rayinv * (boxDown - orig);
-                v_float32x4 ttop = rayinv * (boxUp - orig);
-
-                // re-order intersections to find smallest and largest on each axis
-                v_float32x4 minAx = v_min(ttop, tbottom);
-                v_float32x4 maxAx = v_max(ttop, tbottom);
-
-                // near clipping plane
-                const float clip = 0.f;
-                float _minAx[4], _maxAx[4];
-                v_store(_minAx, minAx);
-                v_store(_maxAx, maxAx);
-                float tmin = max({ _minAx[0], _minAx[1], _minAx[2], clip });
-                float tmax = min({ _maxAx[0], _maxAx[1], _maxAx[2] });
-
-                // precautions against getting coordinates out of bounds
-                tmin = tmin + tstep;
-                tmax = tmax - tstep;
-
-                if (tmin < tmax)
-                {
-                    // interpolation optimized a little
-                    orig *= invVoxelSize;
-                    dir *= invVoxelSize;
-
-                    int xdim = volume.volDims[0];
-                    int ydim = volume.volDims[1];
-                    int zdim = volume.volDims[2];
-                    v_float32x4 rayStep = dir * v_setall_f32(tstep);
-                    v_float32x4 next = (orig + dir * v_setall_f32(tmin));
-                    float f = volume.interpolateVoxel(next), fnext = f;
-
-                    //raymarch
-                    int steps = 0;
-                    int nSteps = cvFloor((tmax - tmin) / tstep);
-                    for (; steps < nSteps; steps++)
-                    {
-                        next += rayStep;
-                        v_int32x4 ip = v_round(next);
-                        int ix = ip.get0(); ip = v_rotate_right<1>(ip);
-                        int iy = ip.get0(); ip = v_rotate_right<1>(ip);
-                        int iz = ip.get0();
-                        int coord = ix * xdim + iy * ydim + iz * zdim;
-
-                        fnext = tsdfToFloat(volume.volume.at<RGBTsdfVoxel>(coord).tsdf);
-                        if (fnext != f)
-                        {
-                            fnext = volume.interpolateVoxel(next);
-
-                            // when ray crosses a surface
-                            if (std::signbit(f) != std::signbit(fnext))
-                                break;
-
-                            f = fnext;
-                        }
-                    }
-
-                    // if ray penetrates a surface from outside
-                    // linearly interpolate t between two f values
-                    if (f > 0.f && fnext < 0.f)
-                    {
-                        v_float32x4 tp = next - rayStep;
-                        float ft = volume.interpolateVoxel(tp);
-                        float ftdt = volume.interpolateVoxel(next);
-                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
-
-                        // avoid division by zero
-                        if (!cvIsNaN(ts) && !cvIsInf(ts))
-                        {
-                            v_float32x4 pv = (orig + dir * v_setall_f32(ts));
-                            v_float32x4 nv = volume.getNormalVoxel(pv);
-                            v_float32x4 cv = volume.getColorVoxel(pv);
-
-                            if (!isNaN(nv))
-                            {
-                                color = cv;
-                                //convert pv and nv to camera space
-                                normal = v_matmuladd(nv, volRot0, volRot1, volRot2, v_setzero_f32());
-                                // interpolation optimized a little
-                                point = v_matmuladd(pv * v_float32x4(volume.voxelSize,
-                                    volume.voxelSize,
-                                    volume.voxelSize, 1.f),
-                                    volRot0, volRot1, volRot2, volTrans);
-                            }
-                        }
-                    }
-                }
-
-                v_store((float*)(&ptsRow[x]), point);
-                v_store((float*)(&nrmRow[x]), normal);
-                v_store((float*)(&clrRow[x]), color);
-            }
-        }
-    }
-#else
-    virtual void operator() (const Range& range) const override
-    {
-        const Point3f camTrans = cam2vol.translation();
-        const Matx33f  camRot  = cam2vol.rotation();
-        const Matx33f  volRot  = vol2cam.rotation();
-
-        for(int y = range.start; y < range.end; y++)
-        {
-            ptype* ptsRow = points[y];
-            ptype* nrmRow = normals[y];
-            ptype* clrRow = colors[y];
-
-            for(int x = 0; x < points.cols; x++)
-            {
-                Point3f point = nan3, normal = nan3, color = nan3;
-
-                Point3f orig = camTrans;
-                // direction through pixel in volume space
-                Point3f dir = normalize(Vec3f(camRot * reprojDepth(Point3f(float(x), float(y), 1.f))));
-
-                // compute intersection of ray with all six bbox planes
-                Vec3f rayinv(1.f/dir.x, 1.f/dir.y, 1.f/dir.z);
-                Point3f tbottom = rayinv.mul(boxMin - orig);
-                Point3f ttop    = rayinv.mul(boxMax - orig);
-
-                // re-order intersections to find smallest and largest on each axis
-                Point3f minAx(min(ttop.x, tbottom.x), min(ttop.y, tbottom.y), min(ttop.z, tbottom.z));
-                Point3f maxAx(max(ttop.x, tbottom.x), max(ttop.y, tbottom.y), max(ttop.z, tbottom.z));
-
-                // near clipping plane
-                const float clip = 0.f;
-                //float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
-                //float tmax =     min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
-                float tmin = max({ minAx.x, minAx.y, minAx.z, clip });
-                float tmax = min({ maxAx.x, maxAx.y, maxAx.z });
-
-                // precautions against getting coordinates out of bounds
-                tmin = tmin + tstep;
-                tmax = tmax - tstep;
-
-                if(tmin < tmax)
-                {
-                    // interpolation optimized a little
-                    orig = orig*volume.voxelSizeInv;
-                    dir  =  dir*volume.voxelSizeInv;
-
-                    Point3f rayStep = dir * tstep;
-                    Point3f next = (orig + dir * tmin);
-                    float f = volume.interpolateVoxel(next), fnext = f;
-
-                    //raymarch
-                    int steps = 0;
-                    int nSteps = int(floor((tmax - tmin)/tstep));
-                    for(; steps < nSteps; steps++)
-                    {
-                        next += rayStep;
-                        int xdim = volume.volDims[0];
-                        int ydim = volume.volDims[1];
-                        int zdim = volume.volDims[2];
-                        int ix = cvRound(next.x);
-                        int iy = cvRound(next.y);
-                        int iz = cvRound(next.z);
-                        fnext = tsdfToFloat(volume.volume.at<RGBTsdfVoxel>(ix*xdim + iy*ydim + iz*zdim).tsdf);
-                        if(fnext != f)
-                        {
-                            fnext = volume.interpolateVoxel(next);
-                            // when ray crosses a surface
-                            if(std::signbit(f) != std::signbit(fnext))
-                                break;
-
-                            f = fnext;
-                        }
-                    }
-                    // if ray penetrates a surface from outside
-                    // linearly interpolate t between two f values
-                    if(f > 0.f && fnext < 0.f)
-                    {
-                        Point3f tp    = next - rayStep;
-                        float ft   = volume.interpolateVoxel(tp);
-                        float ftdt = volume.interpolateVoxel(next);
-                        // float t = tmin + steps*tstep;
-                        // float ts = t - tstep*ft/(ftdt - ft);
-                        float ts = tmin + tstep*(steps - ft/(ftdt - ft));
-
-                        // avoid division by zero
-                        if(!cvIsNaN(ts) && !cvIsInf(ts))
-                        {
-                            Point3f pv = (orig + dir*ts);
-                            Point3f nv = volume.getNormalVoxel(pv);
-                            Point3f cv = volume.getColorVoxel(pv);
-                            if(!isNaN(nv))
-                            {
-                                //convert pv and nv to camera space
-                                normal = volRot * nv;
-                                color = cv;
-                                // interpolation optimized a little
-                                point = vol2cam * (pv*volume.voxelSize);
-                            }
-                        }
-                    }
-                }
-                ptsRow[x] = toPtype(point);
-                nrmRow[x] = toPtype(normal);
-                clrRow[x] = toPtype(color);
-            }
-        }
-    }
-#endif
-
-    Points& points;
-    Normals& normals;
-    Colors& colors;
-    const ColoredTSDFVolumeCPU& volume;
-
-    const float tstep;
-
-    const Point3f boxMax;
-    const Point3f boxMin;
-
-    const Affine3f cam2vol;
-    const Affine3f vol2cam;
-    const Intr::Reprojector reprojDepth;
-};
-
-
-void ColoredTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const Matx33f& depth_intrinsics, const Size& frameSize,
-                                   OutputArray _points, OutputArray _normals, OutputArray _colors) const
-{
-    CV_TRACE_FUNCTION();
-
-    CV_Assert(frameSize.area() > 0);
-
-    _points.create (frameSize, POINT_TYPE);
-    _normals.create(frameSize, POINT_TYPE);
-    _colors.create(frameSize, POINT_TYPE);
-
-    Points points   =  _points.getMat();
-    Normals normals = _normals.getMat();
-    Colors colors = _colors.getMat();
-    ColorRaycastInvoker ri(points, normals, colors, cameraPose, Intr(depth_intrinsics), *this);
-
-    const int nstripes = -1;
-    parallel_for_(Range(0, points.rows), ri, nstripes);
-}
-
-
-struct ColorFetchPointsNormalsInvoker : ParallelLoopBody
-{
-    ColorFetchPointsNormalsInvoker(const ColoredTSDFVolumeCPU& _volume,
-                              std::vector<std::vector<ptype>>& _pVecs,
-                              std::vector<std::vector<ptype>>& _nVecs,
-                              bool _needNormals) :
-        ParallelLoopBody(),
-        vol(_volume),
-        pVecs(_pVecs),
-        nVecs(_nVecs),
-        needNormals(_needNormals)
-    {
-        volDataStart = vol.volume.ptr<RGBTsdfVoxel>();
-    }
-
-    inline void coord(std::vector<ptype>& points, std::vector<ptype>& normals,
-                      int x, int y, int z, Point3f V, float v0, int axis) const
-    {
-        // 0 for x, 1 for y, 2 for z
-        bool limits = false;
-        Point3i shift;
-        float Vc = 0.f;
-        if(axis == 0)
-        {
-            shift  = Point3i(1, 0, 0);
-            limits = (x + 1 < vol.volResolution.x);
-            Vc     = V.x;
-        }
-        if(axis == 1)
-        {
-            shift  = Point3i(0, 1, 0);
-            limits = (y + 1 < vol.volResolution.y);
-            Vc     = V.y;
-        }
-        if(axis == 2)
-        {
-            shift  = Point3i(0, 0, 1);
-            limits = (z + 1 < vol.volResolution.z);
-            Vc     = V.z;
-        }
-
-        if(limits)
-        {
-            const RGBTsdfVoxel& voxeld = volDataStart[(x+shift.x)*vol.volDims[0] +
-                                                      (y+shift.y)*vol.volDims[1] +
-                                                      (z+shift.z)*vol.volDims[2]];
-            float vd = tsdfToFloat(voxeld.tsdf);
-
-            if(voxeld.weight != 0 && vd != 1.f)
-            {
-                if((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
-                {
-                    //linearly interpolate coordinate
-                    float Vn    = Vc + vol.voxelSize;
-                    float dinv  = 1.f/(abs(v0)+abs(vd));
-                    float inter = (Vc*abs(vd) + Vn*abs(v0))*dinv;
-
-                    Point3f p(shift.x ? inter : V.x,
-                              shift.y ? inter : V.y,
-                              shift.z ? inter : V.z);
-                    {
-                        points.push_back(toPtype(vol.pose * p));
-                        if(needNormals)
-                            normals.push_back(toPtype(vol.pose.rotation() *
-                                                      vol.getNormalVoxel(p*vol.voxelSizeInv)));
-                    }
-                }
-            }
-        }
-    }
-
-    virtual void operator() (const Range& range) const override
-    {
-        std::vector<ptype> points, normals;
-        for(int x = range.start; x < range.end; x++)
-        {
-            const RGBTsdfVoxel* volDataX = volDataStart + x*vol.volDims[0];
-            for(int y = 0; y < vol.volResolution.y; y++)
-            {
-                const RGBTsdfVoxel* volDataY = volDataX + y*vol.volDims[1];
-                for(int z = 0; z < vol.volResolution.z; z++)
-                {
-                    const RGBTsdfVoxel& voxel0 = volDataY[z*vol.volDims[2]];
-                    float v0             = tsdfToFloat(voxel0.tsdf);
-                    if(voxel0.weight != 0 && v0 != 1.f)
-                    {
-                        Point3f V(Point3f((float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f)*vol.voxelSize);
-
-                        coord(points, normals, x, y, z, V, v0, 0);
-                        coord(points, normals, x, y, z, V, v0, 1);
-                        coord(points, normals, x, y, z, V, v0, 2);
-
-                    } // if voxel is not empty
-                }
-            }
-        }
-
-        AutoLock al(mutex);
-        pVecs.push_back(points);
-        nVecs.push_back(normals);
-    }
-
-    const ColoredTSDFVolumeCPU& vol;
-    std::vector<std::vector<ptype>>& pVecs;
-    std::vector<std::vector<ptype>>& nVecs;
-    const RGBTsdfVoxel* volDataStart;
-    bool needNormals;
-    mutable Mutex mutex;
-};
-
-void ColoredTSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if(_points.needed())
-    {
-        std::vector<std::vector<ptype>> pVecs, nVecs;
-        ColorFetchPointsNormalsInvoker fi(*this, pVecs, nVecs, _normals.needed());
-        Range range(0, volResolution.x);
-        const int nstripes = -1;
-        parallel_for_(range, fi, nstripes);
-        std::vector<ptype> points, normals;
-        for(size_t i = 0; i < pVecs.size(); i++)
-        {
-            points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
-            normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
-        }
-
-        _points.create((int)points.size(), 1, POINT_TYPE);
-        if(!points.empty())
-            Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
-
-        if(_normals.needed())
-        {
-            _normals.create((int)normals.size(), 1, POINT_TYPE);
-            if(!normals.empty())
-                Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
-        }
-    }
-}
-
-void ColoredTSDFVolumeCPU::fetchNormals(InputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(!_points.empty());
-    if(_normals.needed())
-    {
-        Points points = _points.getMat();
-        CV_Assert(points.type() == POINT_TYPE);
-
-        _normals.createSameSize(_points, _points.type());
-        Normals normals = _normals.getMat();
-
-        const ColoredTSDFVolumeCPU& _vol = *this;
-        auto PushNormals = [&](const ptype& pp, const int* position)
-        {
-            const ColoredTSDFVolumeCPU& vol(_vol);
-            Affine3f invPose(vol.pose.inv());
-            Point3f p = fromPtype(pp);
-            Point3f n = nan3;
-            if (!isNaN(p))
-            {
-                Point3f voxPt = (invPose * p);
-                voxPt = voxPt * vol.voxelSizeInv;
-                n = vol.pose.rotation() * vol.getNormalVoxel(voxPt);
-            }
-            normals(position[0], position[1]) = toPtype(n);
-        };
-        points.forEach(PushNormals);
-    }
-}
-
-Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
-                                   float _truncDist, int _maxWeight, Point3i _resolution)
-{
-    return makePtr<ColoredTSDFVolumeCPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution);
-}
-
-Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(const VolumeParams& _params)
-{
-    Mat pm = _params.pose;
-    CV_Assert(pm.size() == Size(4, 4));
-    CV_Assert(pm.type() == CV_32F);
-    Matx44f pose = pm;
-
-    cv::Point3i resolution(_params.resolutionX, _params.resolutionY, _params.resolutionZ);
-    return makePtr<ColoredTSDFVolumeCPU>(_params.voxelSize, pose, _params.raycastStepFactor,
-                                         _params.tsdfTruncDist, _params.maxWeight, resolution);
-}
-
-} // namespace cv
diff --git a/modules/3d/src/rgbd/colored_tsdf.hpp b/modules/3d/src/rgbd/colored_tsdf.hpp
deleted file mode 100644
index c66e01e535..0000000000
--- a/modules/3d/src/rgbd/colored_tsdf.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-#ifndef OPENCV_3D_COLORED_TSDF_HPP
-#define OPENCV_3D_COLORED_TSDF_HPP
-
-#include "../precomp.hpp"
-#include "tsdf_functions.hpp"
-
-namespace cv
-{
-
-class ColoredTSDFVolume : public Volume
-{
-   public:
-    // dimension in voxels, size in meters
-    ColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                      int _maxWeight, Point3i _resolution, bool zFirstMemOrder = true);
-    virtual ~ColoredTSDFVolume() = default;
-
-   public:
-
-    Point3i volResolution;
-    WeightType maxWeight;
-
-    Point3f volSize;
-    float truncDist;
-    Vec4i volDims;
-    Vec8i neighbourCoords;
-};
-
-Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
-                                             float _truncDist, int _maxWeight, Point3i _resolution);
-Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(const VolumeParams& _params);
-
-}  // namespace cv
-
-#endif // include guard
diff --git a/modules/3d/src/rgbd/hash_tsdf.cpp b/modules/3d/src/rgbd/hash_tsdf.cpp
deleted file mode 100644
index b57ddc19f1..0000000000
--- a/modules/3d/src/rgbd/hash_tsdf.cpp
+++ /dev/null
@@ -1,1795 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-#include "../precomp.hpp"
-#include "hash_tsdf.hpp"
-#include "opencl_kernels_3d.hpp"
-
-#define USE_INTERPOLATION_IN_GETNORMAL 1
-#define VOLUMES_SIZE 8192
-
-namespace cv
-{
-
-HashTSDFVolume::HashTSDFVolume(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor,
-                               float _truncDist, int _maxWeight, float _truncateThreshold,
-                               int _volumeUnitRes, bool _zFirstMemOrder)
-    : Volume(_voxelSize, _pose, _raycastStepFactor),
-    maxWeight(_maxWeight),
-    truncateThreshold(_truncateThreshold),
-    volumeUnitResolution(_volumeUnitRes),
-    volumeUnitSize(voxelSize* volumeUnitResolution),
-    zFirstMemOrder(_zFirstMemOrder)
-{
-    truncDist = std::max(_truncDist, 4.0f * voxelSize);
-
-    if (!(volumeUnitResolution & (volumeUnitResolution - 1)))
-    {
-        // vuRes is a power of 2, let's get this power
-        volumeUnitDegree = trailingZeros32(volumeUnitResolution);
-    }
-    else
-    {
-        CV_Error(Error::StsBadArg, "Volume unit resolution should be a power of 2");
-    }
-
-    int xdim, ydim, zdim;
-    if (zFirstMemOrder)
-    {
-        xdim = volumeUnitResolution * volumeUnitResolution;
-        ydim = volumeUnitResolution;
-        zdim = 1;
-    }
-    else
-    {
-        xdim = 1;
-        ydim = volumeUnitResolution;
-        zdim = volumeUnitResolution * volumeUnitResolution;
-    }
-    volStrides = Vec4i(xdim, ydim, zdim);
-}
-
-//! Spatial hashing
-struct tsdf_hash
-{
-    size_t operator()(const Vec3i& x) const noexcept
-    {
-        size_t seed = 0;
-        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
-        for (uint16_t i = 0; i < 3; i++)
-        {
-            seed ^= std::hash<int>()(x[i]) + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
-        }
-        return seed;
-    }
-};
-
-struct VolumeUnit
-{
-    cv::Vec3i coord;
-    int index;
-    cv::Matx44f pose;
-    int lastVisibleIndex = 0;
-    bool isActive;
-};
-
-typedef std::unordered_set<cv::Vec3i, tsdf_hash> VolumeUnitIndexSet;
-typedef std::unordered_map<cv::Vec3i, VolumeUnit, tsdf_hash> VolumeUnitIndexes;
-
-class HashTSDFVolumeCPU : public HashTSDFVolume
-{
-public:
-    // dimension in voxels, size in meters
-    HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight,
-        float _truncateThreshold, int _volumeUnitRes, bool zFirstMemOrder = true);
-
-    HashTSDFVolumeCPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = true);
-
-    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const Matx33f&, const Matx33f&, const int) override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); }
-    void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Matx33f& intrinsics,
-                   const int frameId = 0) override;
-    void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize, OutputArray points,
-                 OutputArray normals) const override;
-    void raycast(const Matx44f&, const Matx33f&, const Size&, OutputArray, OutputArray, OutputArray) const override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); }
-    void fetchNormals(InputArray points, OutputArray _normals) const override;
-    void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
-
-    void reset() override;
-    size_t getTotalVolumeUnits() const override { return volumeUnits.size(); }
-    int getVisibleBlocks(int currFrameId, int frameThreshold) const override;
-
-    //! Return the voxel given the voxel index in the universal volume (1 unit = 1 voxel_length)
-    TsdfVoxel at(const Vec3i& volumeIdx) const;
-
-    //! Return the voxel given the point in volume coordinate system i.e., (metric scale 1 unit =
-    //! 1m)
-    virtual TsdfVoxel at(const cv::Point3f& point) const;
-    virtual TsdfVoxel _at(const cv::Vec3i& volumeIdx, int indx) const;
-
-    TsdfVoxel atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it) const;
-
-
-    float interpolateVoxelPoint(const Point3f& point) const;
-    float interpolateVoxel(const cv::Point3f& point) const;
-    Point3f getNormalVoxel(const cv::Point3f& p) const;
-
-    //! Utility functions for coordinate transformations
-    Vec3i volumeToVolumeUnitIdx(const Point3f& point) const;
-    Point3f volumeUnitIdxToVolume(const Vec3i& volumeUnitIdx) const;
-
-    Point3f voxelCoordToVolume(const Vec3i& voxelIdx) const;
-    Vec3i volumeToVoxelCoord(const Point3f& point) const;
-
-public:
-    Vec6f frameParams;
-    Mat pixNorms;
-    VolumeUnitIndexes volumeUnits;
-    cv::Mat volUnitsData;
-    int lastVolIndex;
-};
-
-
-HashTSDFVolumeCPU::HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist,
-                                     int _maxWeight, float _truncateThreshold, int _volumeUnitRes, bool _zFirstMemOrder)
-    :HashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold, _volumeUnitRes,
-                    _zFirstMemOrder)
-{
-    reset();
-}
-
-HashTSDFVolumeCPU::HashTSDFVolumeCPU(const VolumeParams& _params, bool _zFirstMemOrder)
-    : HashTSDFVolumeCPU(_params.voxelSize,
-                        Matx44f(_params.pose),
-                        _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight,
-                        _params.depthTruncThreshold, _params.unitResolution, _zFirstMemOrder)
-{
-}
-
-// zero volume, leave rest params the same
-void HashTSDFVolumeCPU::reset()
-{
-    CV_TRACE_FUNCTION();
-    lastVolIndex = 0;
-    volUnitsData = cv::Mat(VOLUMES_SIZE, volumeUnitResolution * volumeUnitResolution * volumeUnitResolution, rawType<TsdfVoxel>());
-    frameParams = Vec6f();
-    pixNorms = Mat();
-    volumeUnits = VolumeUnitIndexes();
-}
-
-void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Matx33f& _intrinsics, const int frameId)
-{
-    CV_TRACE_FUNCTION();
-
-    CV_Assert(_depth.type() == DEPTH_TYPE);
-    Depth depth = _depth.getMat();
-
-    //! Compute volumes to be allocated
-    const int depthStride = volumeUnitDegree;
-    const float invDepthFactor = 1.f / depthFactor;
-    const Intr intrinsics(_intrinsics);
-    const Intr::Reprojector reproj(intrinsics.makeReprojector());
-    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
-    const Point3f truncPt(truncDist, truncDist, truncDist);
-    VolumeUnitIndexSet newIndices;
-    Mutex mutex;
-    Range allocateRange(0, depth.rows);
-
-    auto AllocateVolumeUnitsInvoker = [&](const Range& range) {
-        VolumeUnitIndexSet localAccessVolUnits;
-        for (int y = range.start; y < range.end; y += depthStride)
-        {
-            const depthType* depthRow = depth[y];
-            for (int x = 0; x < depth.cols; x += depthStride)
-            {
-                depthType z = depthRow[x] * invDepthFactor;
-                if (z <= 0 || z > this->truncateThreshold)
-                    continue;
-                Point3f camPoint = reproj(Point3f((float)x, (float)y, z));
-                Point3f volPoint = cam2vol * camPoint;
-                //! Find accessed TSDF volume unit for valid 3D vertex
-                Vec3i lower_bound = this->volumeToVolumeUnitIdx(volPoint - truncPt);
-                Vec3i upper_bound = this->volumeToVolumeUnitIdx(volPoint + truncPt);
-
-                for (int i = lower_bound[0]; i <= upper_bound[0]; i++)
-                    for (int j = lower_bound[1]; j <= upper_bound[1]; j++)
-                        for (int k = lower_bound[2]; k <= upper_bound[2]; k++)
-                        {
-                            const Vec3i tsdf_idx = Vec3i(i, j, k);
-                            if (localAccessVolUnits.count(tsdf_idx) <= 0 && this->volumeUnits.count(tsdf_idx) <= 0)
-                            {
-                                //! This volume unit will definitely be required for current integration
-                                localAccessVolUnits.emplace(tsdf_idx);
-                            }
-                        }
-            }
-        }
-
-        mutex.lock();
-        for (const auto& tsdf_idx : localAccessVolUnits)
-        {
-            //! If the insert into the global set passes
-            if (!newIndices.count(tsdf_idx))
-            {
-                // Volume allocation can be performed outside of the lock
-                newIndices.emplace(tsdf_idx);
-            }
-        }
-        mutex.unlock();
-    };
-    parallel_for_(allocateRange, AllocateVolumeUnitsInvoker);
-
-    //! Perform the allocation
-    for (auto idx : newIndices)
-    {
-        VolumeUnit& vu = this->volumeUnits.emplace(idx, VolumeUnit()).first->second;
-
-        Matx44f subvolumePose = pose.translate(volumeUnitIdxToVolume(idx)).matrix;
-
-        vu.pose = subvolumePose;
-        vu.index = lastVolIndex; lastVolIndex++;
-        if (lastVolIndex > int(volUnitsData.size().height))
-        {
-            volUnitsData.resize((lastVolIndex - 1) * 2);
-        }
-        volUnitsData.row(vu.index).forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
-            {
-                TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
-                v.tsdf = floatToTsdf(0.0f); v.weight = 0;
-            });
-        //! This volume unit will definitely be required for current integration
-        vu.lastVisibleIndex = frameId;
-        vu.isActive = true;
-    }
-
-    //! Get keys for all the allocated volume Units
-    std::vector<Vec3i> totalVolUnits;
-    for (const auto& keyvalue : volumeUnits)
-    {
-        totalVolUnits.push_back(keyvalue.first);
-    }
-
-    //! Mark volumes in the camera frustum as active
-    Range inFrustumRange(0, (int)volumeUnits.size());
-    parallel_for_(inFrustumRange, [&](const Range& range) {
-        const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
-        const Intr::Projector proj(intrinsics.makeProjector());
-
-        for (int i = range.start; i < range.end; ++i)
-        {
-            Vec3i tsdf_idx = totalVolUnits[i];
-            VolumeUnitIndexes::iterator it = volumeUnits.find(tsdf_idx);
-            if (it == volumeUnits.end())
-                continue;
-
-            Point3f volumeUnitPos = volumeUnitIdxToVolume(it->first);
-            Point3f volUnitInCamSpace = vol2cam * volumeUnitPos;
-            if (volUnitInCamSpace.z < 0 || volUnitInCamSpace.z > truncateThreshold)
-            {
-                it->second.isActive = false;
-                continue;
-            }
-            Point2f cameraPoint = proj(volUnitInCamSpace);
-            if (cameraPoint.x >= 0 && cameraPoint.y >= 0 && cameraPoint.x < depth.cols && cameraPoint.y < depth.rows)
-            {
-                assert(it != volumeUnits.end());
-                it->second.lastVisibleIndex = frameId;
-                it->second.isActive         = true;
-            }
-        }
-        });
-
-    Vec6f newParams((float)depth.rows, (float)depth.cols,
-                    intrinsics.fx, intrinsics.fy,
-                    intrinsics.cx, intrinsics.cy);
-    if ( !(frameParams==newParams) )
-    {
-        frameParams = newParams;
-        pixNorms = preCalculationPixNorm(depth.size(), intrinsics);
-    }
-
-    //! Integrate the correct volumeUnits
-    parallel_for_(Range(0, (int)totalVolUnits.size()), [&](const Range& range) {
-        for (int i = range.start; i < range.end; i++)
-        {
-            Vec3i tsdf_idx = totalVolUnits[i];
-            VolumeUnitIndexes::iterator it = volumeUnits.find(tsdf_idx);
-            if (it == volumeUnits.end())
-                return;
-
-            VolumeUnit& volumeUnit = it->second;
-            if (volumeUnit.isActive)
-            {
-                //! The volume unit should already be added into the Volume from the allocator
-                integrateVolumeUnit(truncDist, voxelSize, maxWeight, volumeUnit.pose,
-                                    Point3i(volumeUnitResolution, volumeUnitResolution, volumeUnitResolution), volStrides, depth,
-                                    depthFactor, cameraPose, intrinsics, pixNorms, volUnitsData.row(volumeUnit.index));
-
-                //! Ensure all active volumeUnits are set to inactive for next integration
-                volumeUnit.isActive = false;
-            }
-        }
-        });
-}
-
-cv::Vec3i HashTSDFVolumeCPU::volumeToVolumeUnitIdx(const cv::Point3f& p) const
-{
-    return cv::Vec3i(cvFloor(p.x / volumeUnitSize), cvFloor(p.y / volumeUnitSize),
-                     cvFloor(p.z / volumeUnitSize));
-}
-
-cv::Point3f HashTSDFVolumeCPU::volumeUnitIdxToVolume(const cv::Vec3i& volumeUnitIdx) const
-{
-    return cv::Point3f(volumeUnitIdx[0] * volumeUnitSize, volumeUnitIdx[1] * volumeUnitSize,
-                       volumeUnitIdx[2] * volumeUnitSize);
-}
-
-cv::Point3f HashTSDFVolumeCPU::voxelCoordToVolume(const cv::Vec3i& voxelIdx) const
-{
-    return cv::Point3f(voxelIdx[0] * voxelSize, voxelIdx[1] * voxelSize, voxelIdx[2] * voxelSize);
-}
-
-cv::Vec3i HashTSDFVolumeCPU::volumeToVoxelCoord(const cv::Point3f& point) const
-{
-    return cv::Vec3i(cvFloor(point.x * voxelSizeInv), cvFloor(point.y * voxelSizeInv),
-                     cvFloor(point.z * voxelSizeInv));
-}
-
-inline TsdfVoxel HashTSDFVolumeCPU::_at(const cv::Vec3i& volumeIdx, int indx) const
-{
-    //! Out of bounds
-    if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) ||
-        (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) ||
-        (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0))
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-
-    const TsdfVoxel* volData = volUnitsData.ptr<TsdfVoxel>(indx);
-    int coordBase =
-        volumeIdx[0] * volStrides[0] + volumeIdx[1] * volStrides[1] + volumeIdx[2] * volStrides[2];
-    return volData[coordBase];
-}
-
-inline TsdfVoxel HashTSDFVolumeCPU::at(const cv::Vec3i& volumeIdx) const
-{
-    Vec3i volumeUnitIdx = Vec3i(volumeIdx[0] >> volumeUnitDegree,
-                                volumeIdx[1] >> volumeUnitDegree,
-                                volumeIdx[2] >> volumeUnitDegree);
-
-    VolumeUnitIndexes::const_iterator it = volumeUnits.find(volumeUnitIdx);
-
-    if (it == volumeUnits.end())
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-
-    cv::Vec3i volUnitLocalIdx = volumeIdx - cv::Vec3i(volumeUnitIdx[0] << volumeUnitDegree,
-                                                      volumeUnitIdx[1] << volumeUnitDegree,
-                                                      volumeUnitIdx[2] << volumeUnitDegree);
-
-    volUnitLocalIdx =
-        cv::Vec3i(abs(volUnitLocalIdx[0]), abs(volUnitLocalIdx[1]), abs(volUnitLocalIdx[2]));
-    return _at(volUnitLocalIdx, it->second.index);
-
-}
-
-TsdfVoxel HashTSDFVolumeCPU::at(const Point3f& point) const
-{
-    cv::Vec3i volumeUnitIdx = volumeToVolumeUnitIdx(point);
-    VolumeUnitIndexes::const_iterator it = volumeUnits.find(volumeUnitIdx);
-
-    if (it == volumeUnits.end())
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-
-    cv::Point3f volumeUnitPos = volumeUnitIdxToVolume(volumeUnitIdx);
-    cv::Vec3i volUnitLocalIdx = volumeToVoxelCoord(point - volumeUnitPos);
-    volUnitLocalIdx =
-        cv::Vec3i(abs(volUnitLocalIdx[0]), abs(volUnitLocalIdx[1]), abs(volUnitLocalIdx[2]));
-    return _at(volUnitLocalIdx, it->second.index);
-}
-
-TsdfVoxel HashTSDFVolumeCPU::atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it) const
-{
-    if (it == volumeUnits.end())
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-    Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree,
-                                          volumeUnitIdx[1] << volumeUnitDegree,
-                                          volumeUnitIdx[2] << volumeUnitDegree);
-
-    // expanding at(), removing bounds check
-    const TsdfVoxel* volData = volUnitsData.ptr<TsdfVoxel>(it->second.index);
-    int coordBase = volUnitLocalIdx[0] * volStrides[0] + volUnitLocalIdx[1] * volStrides[1] + volUnitLocalIdx[2] * volStrides[2];
-    return volData[coordBase];
-}
-
-#if USE_INTRINSICS
-inline float interpolate(float tx, float ty, float tz, float vx[8])
-{
-    v_float32x4 v0246, v1357;
-    v_load_deinterleave(vx, v0246, v1357);
-
-    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
-
-    v_float32x4 v00_10 = vxx;
-    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
-    float v0 = v0_1.get0();
-    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-    float v1 = v0_1.get0();
-
-    return v0 + tx * (v1 - v0);
-}
-
-#else
-inline float interpolate(float tx, float ty, float tz, float vx[8])
-{
-    float v00 = vx[0] + tz * (vx[1] - vx[0]);
-    float v01 = vx[2] + tz * (vx[3] - vx[2]);
-    float v10 = vx[4] + tz * (vx[5] - vx[4]);
-    float v11 = vx[6] + tz * (vx[7] - vx[6]);
-
-    float v0 = v00 + ty * (v01 - v00);
-    float v1 = v10 + ty * (v11 - v10);
-
-    return v0 + tx * (v1 - v0);
-}
-#endif
-
-float HashTSDFVolumeCPU::interpolateVoxelPoint(const Point3f& point) const
-{
-    const Vec3i neighbourCoords[] = { {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1},
-                                      {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1} };
-
-    // A small hash table to reduce a number of find() calls
-    bool queried[8];
-    VolumeUnitIndexes::const_iterator iterMap[8];
-    for (int i = 0; i < 8; i++)
-    {
-        iterMap[i] = volumeUnits.end();
-        queried[i] = false;
-    }
-
-    int ix = cvFloor(point.x);
-    int iy = cvFloor(point.y);
-    int iz = cvFloor(point.z);
-
-    float tx = point.x - ix;
-    float ty = point.y - iy;
-    float tz = point.z - iz;
-
-    Vec3i iv(ix, iy, iz);
-    float vx[8];
-    for (int i = 0; i < 8; i++)
-    {
-        Vec3i pt = iv + neighbourCoords[i];
-
-        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
-        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
-        auto it = iterMap[dictIdx];
-        if (!queried[dictIdx])
-        {
-            it = volumeUnits.find(volumeUnitIdx);
-            iterMap[dictIdx] = it;
-            queried[dictIdx] = true;
-        }
-
-        vx[i] = atVolumeUnit(pt, volumeUnitIdx, it).tsdf;
-    }
-
-    return interpolate(tx, ty, tz, vx);
-}
-
-inline float HashTSDFVolumeCPU::interpolateVoxel(const cv::Point3f& point) const
-{
-    return interpolateVoxelPoint(point * voxelSizeInv);
-}
-
-
-Point3f HashTSDFVolumeCPU::getNormalVoxel(const Point3f &point) const
-{
-    Vec3f normal = Vec3f(0, 0, 0);
-
-    Point3f ptVox = point * voxelSizeInv;
-    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
-
-    // A small hash table to reduce a number of find() calls
-    bool queried[8];
-    VolumeUnitIndexes::const_iterator iterMap[8];
-    for (int i = 0; i < 8; i++)
-    {
-        iterMap[i] = volumeUnits.end();
-        queried[i] = false;
-    }
-
-#if !USE_INTERPOLATION_IN_GETNORMAL
-    const Vec3i offsets[] = { { 1,  0,  0}, {-1,  0,  0}, { 0,  1,  0}, // 0-3
-                              { 0, -1,  0}, { 0,  0,  1}, { 0,  0, -1}  // 4-7
-    };
-    const int nVals = 6;
-
-#else
-    const Vec3i offsets[] = { { 0,  0,  0}, { 0,  0,  1}, { 0,  1,  0}, { 0,  1,  1}, //  0-3
-                              { 1,  0,  0}, { 1,  0,  1}, { 1,  1,  0}, { 1,  1,  1}, //  4-7
-                              {-1,  0,  0}, {-1,  0,  1}, {-1,  1,  0}, {-1,  1,  1}, //  8-11
-                              { 2,  0,  0}, { 2,  0,  1}, { 2,  1,  0}, { 2,  1,  1}, // 12-15
-                              { 0, -1,  0}, { 0, -1,  1}, { 1, -1,  0}, { 1, -1,  1}, // 16-19
-                              { 0,  2,  0}, { 0,  2,  1}, { 1,  2,  0}, { 1,  2,  1}, // 20-23
-                              { 0,  0, -1}, { 0,  1, -1}, { 1,  0, -1}, { 1,  1, -1}, // 24-27
-                              { 0,  0,  2}, { 0,  1,  2}, { 1,  0,  2}, { 1,  1,  2}, // 28-31
-    };
-    const int nVals = 32;
-#endif
-
-    float vals[nVals];
-    for (int i = 0; i < nVals; i++)
-    {
-        Vec3i pt = iptVox + offsets[i];
-
-        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
-
-        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
-        auto it = iterMap[dictIdx];
-        if (!queried[dictIdx])
-        {
-            it = volumeUnits.find(volumeUnitIdx);
-            iterMap[dictIdx] = it;
-            queried[dictIdx] = true;
-        }
-
-        vals[i] = tsdfToFloat(atVolumeUnit(pt, volumeUnitIdx, it).tsdf);
-    }
-
-#if !USE_INTERPOLATION_IN_GETNORMAL
-    for (int c = 0; c < 3; c++)
-    {
-        normal[c] = vals[c * 2] - vals[c * 2 + 1];
-    }
-#else
-
-    float cxv[8], cyv[8], czv[8];
-
-    // How these numbers were obtained:
-    // 1. Take the basic interpolation sequence:
-    // 000, 001, 010, 011, 100, 101, 110, 111
-    // where each digit corresponds to shift by x, y, z axis respectively.
-    // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis
-    // 3. Search corresponding values in offsets
-    const int idxxp[8] = {  8,  9, 10, 11,  0,  1,  2,  3 };
-    const int idxxn[8] = {  4,  5,  6,  7, 12, 13, 14, 15 };
-    const int idxyp[8] = { 16, 17,  0,  1, 18, 19,  4,  5 };
-    const int idxyn[8] = {  2,  3, 20, 21,  6,  7, 22, 23 };
-    const int idxzp[8] = { 24,  0, 25,  2, 26,  4, 27,  6 };
-    const int idxzn[8] = {  1, 28,  3, 29,  5, 30,  7, 31 };
-
-#if !USE_INTRINSICS
-    for (int i = 0; i < 8; i++)
-    {
-        cxv[i] = vals[idxxn[i]] - vals[idxxp[i]];
-        cyv[i] = vals[idxyn[i]] - vals[idxyp[i]];
-        czv[i] = vals[idxzn[i]] - vals[idxzp[i]];
-    }
-#else
-
-# if CV_SIMD >= 32
-    v_float32x8 cxp = v_lut(vals, idxxp);
-    v_float32x8 cxn = v_lut(vals, idxxn);
-
-    v_float32x8 cyp = v_lut(vals, idxyp);
-    v_float32x8 cyn = v_lut(vals, idxyn);
-
-    v_float32x8 czp = v_lut(vals, idxzp);
-    v_float32x8 czn = v_lut(vals, idxzn);
-
-    v_float32x8 vcxv = cxn - cxp;
-    v_float32x8 vcyv = cyn - cyp;
-    v_float32x8 vczv = czn - czp;
-
-    v_store(cxv, vcxv);
-    v_store(cyv, vcyv);
-    v_store(czv, vczv);
-# else
-    v_float32x4 cxp0 = v_lut(vals, idxxp + 0); v_float32x4 cxp1 = v_lut(vals, idxxp + 4);
-    v_float32x4 cxn0 = v_lut(vals, idxxn + 0); v_float32x4 cxn1 = v_lut(vals, idxxn + 4);
-
-    v_float32x4 cyp0 = v_lut(vals, idxyp + 0); v_float32x4 cyp1 = v_lut(vals, idxyp + 4);
-    v_float32x4 cyn0 = v_lut(vals, idxyn + 0); v_float32x4 cyn1 = v_lut(vals, idxyn + 4);
-
-    v_float32x4 czp0 = v_lut(vals, idxzp + 0); v_float32x4 czp1 = v_lut(vals, idxzp + 4);
-    v_float32x4 czn0 = v_lut(vals, idxzn + 0); v_float32x4 czn1 = v_lut(vals, idxzn + 4);
-
-    v_float32x4 cxv0 = cxn0 - cxp0; v_float32x4 cxv1 = cxn1 - cxp1;
-    v_float32x4 cyv0 = cyn0 - cyp0; v_float32x4 cyv1 = cyn1 - cyp1;
-    v_float32x4 czv0 = czn0 - czp0; v_float32x4 czv1 = czn1 - czp1;
-
-    v_store(cxv + 0, cxv0); v_store(cxv + 4, cxv1);
-    v_store(cyv + 0, cyv0); v_store(cyv + 4, cyv1);
-    v_store(czv + 0, czv0); v_store(czv + 4, czv1);
-#endif
-
-#endif
-
-    float tx = ptVox.x - iptVox[0];
-    float ty = ptVox.y - iptVox[1];
-    float tz = ptVox.z - iptVox[2];
-
-    normal[0] = interpolate(tx, ty, tz, cxv);
-    normal[1] = interpolate(tx, ty, tz, cyv);
-    normal[2] = interpolate(tx, ty, tz, czv);
-#endif
-
-    float nv = sqrt(normal[0] * normal[0] +
-                    normal[1] * normal[1] +
-                    normal[2] * normal[2]);
-    return nv < 0.0001f ? nan3 : normal / nv;
-}
-
-void HashTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const Matx33f& _intrinsics, const Size& frameSize,
-                                OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(frameSize.area() > 0);
-
-    _points.create(frameSize, POINT_TYPE);
-    _normals.create(frameSize, POINT_TYPE);
-
-    Points points1   = _points.getMat();
-    Normals normals1 = _normals.getMat();
-
-    Points& points(points1);
-    Normals& normals(normals1);
-    const HashTSDFVolumeCPU& volume(*this);
-    const float tstep(volume.truncDist * volume.raycastStepFactor);
-    const Affine3f cam2vol(volume.pose.inv() * Affine3f(cameraPose));
-    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * volume.pose);
-    const Intr intrinsics(_intrinsics);
-    const Intr::Reprojector reproj(intrinsics.makeReprojector());
-
-    const int nstripes = -1;
-
-    auto _HashRaycastInvoker = [&](const Range& range)
-    {
-        const Point3f cam2volTrans = cam2vol.translation();
-        const Matx33f cam2volRot = cam2vol.rotation();
-        const Matx33f vol2camRot = vol2cam.rotation();
-
-        const float blockSize = volume.volumeUnitSize;
-
-        for (int y = range.start; y < range.end; y++)
-        {
-            ptype* ptsRow = points[y];
-            ptype* nrmRow = normals[y];
-
-            for (int x = 0; x < points.cols; x++)
-            {
-                //! Initialize default value
-                Point3f point = nan3, normal = nan3;
-
-                //! Ray origin and direction in the volume coordinate frame
-                Point3f orig    = cam2volTrans;
-                Point3f rayDirV = normalize(Vec3f(cam2volRot * reproj(Point3f(float(x), float(y), 1.f))));
-
-                float tmin = 0;
-                float tmax = volume.truncateThreshold;
-                float tcurr = tmin;
-
-                cv::Vec3i prevVolumeUnitIdx =
-                    cv::Vec3i(std::numeric_limits<int>::min(), std::numeric_limits<int>::min(),
-                        std::numeric_limits<int>::min());
-
-                float tprev = tcurr;
-                float prevTsdf = volume.truncDist;
-                while (tcurr < tmax)
-                {
-                    Point3f currRayPos = orig + tcurr * rayDirV;
-                    cv::Vec3i currVolumeUnitIdx = volume.volumeToVolumeUnitIdx(currRayPos);
-
-                    VolumeUnitIndexes::const_iterator it = volume.volumeUnits.find(currVolumeUnitIdx);
-
-                    float currTsdf = prevTsdf;
-                    int currWeight = 0;
-                    float stepSize = 0.5f * blockSize;
-                    cv::Vec3i volUnitLocalIdx;
-
-
-                    //! The subvolume exists in hashtable
-                    if (it != volume.volumeUnits.end())
-                    {
-                        cv::Point3f currVolUnitPos =
-                            volume.volumeUnitIdxToVolume(currVolumeUnitIdx);
-                        volUnitLocalIdx = volume.volumeToVoxelCoord(currRayPos - currVolUnitPos);
-
-                        //! TODO: Figure out voxel interpolation
-                        TsdfVoxel currVoxel = _at(volUnitLocalIdx, it->second.index);
-                        currTsdf = tsdfToFloat(currVoxel.tsdf);
-                        currWeight = currVoxel.weight;
-                        stepSize = tstep;
-                    }
-                    //! Surface crossing
-                    if (prevTsdf > 0.f && currTsdf <= 0.f && currWeight > 0)
-                    {
-                        float tInterp = (tcurr * prevTsdf - tprev * currTsdf) / (prevTsdf - currTsdf);
-                        if (!cvIsNaN(tInterp) && !cvIsInf(tInterp))
-                        {
-                            Point3f pv = orig + tInterp * rayDirV;
-                            Point3f nv = volume.getNormalVoxel(pv);
-
-                            if (!isNaN(nv))
-                            {
-                                normal = vol2camRot * nv;
-                                point = vol2cam * pv;
-                            }
-                        }
-                        break;
-                    }
-                    prevVolumeUnitIdx = currVolumeUnitIdx;
-                    prevTsdf = currTsdf;
-                    tprev = tcurr;
-                    tcurr += stepSize;
-                }
-                ptsRow[x] = toPtype(point);
-                nrmRow[x] = toPtype(normal);
-            }
-        }
-    };
-
-    parallel_for_(Range(0, points.rows), _HashRaycastInvoker, nstripes);
-}
-
-void HashTSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if (_points.needed())
-    {
-        std::vector<std::vector<ptype>> pVecs, nVecs;
-
-        std::vector<Vec3i> totalVolUnits;
-        for (const auto& keyvalue : volumeUnits)
-        {
-            totalVolUnits.push_back(keyvalue.first);
-        }
-        Range fetchRange(0, (int)totalVolUnits.size());
-        const int nstripes = -1;
-
-        const HashTSDFVolumeCPU& volume(*this);
-        bool needNormals(_normals.needed());
-        Mutex mutex;
-
-        auto HashFetchPointsNormalsInvoker = [&](const Range& range)
-        {
-            std::vector<ptype> points, normals;
-            for (int i = range.start; i < range.end; i++)
-            {
-                cv::Vec3i tsdf_idx = totalVolUnits[i];
-
-                VolumeUnitIndexes::const_iterator it = volume.volumeUnits.find(tsdf_idx);
-                Point3f base_point = volume.volumeUnitIdxToVolume(tsdf_idx);
-                if (it != volume.volumeUnits.end())
-                {
-                    std::vector<ptype> localPoints;
-                    std::vector<ptype> localNormals;
-                    for (int x = 0; x < volume.volumeUnitResolution; x++)
-                        for (int y = 0; y < volume.volumeUnitResolution; y++)
-                            for (int z = 0; z < volume.volumeUnitResolution; z++)
-                            {
-                                cv::Vec3i voxelIdx(x, y, z);
-                                TsdfVoxel voxel = _at(voxelIdx, it->second.index);
-
-                                if (voxel.tsdf != -128 && voxel.weight != 0)
-                                {
-                                    Point3f point = base_point + volume.voxelCoordToVolume(voxelIdx);
-                                    localPoints.push_back(toPtype(this->pose * point));
-                                    if (needNormals)
-                                    {
-                                        Point3f normal = volume.getNormalVoxel(point);
-                                        localNormals.push_back(toPtype(this->pose.rotation() * normal));
-                                    }
-                                }
-                            }
-
-                    AutoLock al(mutex);
-                    pVecs.push_back(localPoints);
-                    nVecs.push_back(localNormals);
-                }
-            }
-        };
-
-        parallel_for_(fetchRange, HashFetchPointsNormalsInvoker, nstripes);
-
-        std::vector<ptype> points, normals;
-        for (size_t i = 0; i < pVecs.size(); i++)
-        {
-            points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
-            normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
-        }
-
-        _points.create((int)points.size(), 1, POINT_TYPE);
-        if (!points.empty())
-            Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
-
-        if (_normals.needed())
-        {
-            _normals.create((int)normals.size(), 1, POINT_TYPE);
-            if (!normals.empty())
-                Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
-        }
-    }
-}
-
-void HashTSDFVolumeCPU::fetchNormals(InputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if (_normals.needed())
-    {
-        Points points = _points.getMat();
-        CV_Assert(points.type() == POINT_TYPE);
-
-        _normals.createSameSize(_points, _points.type());
-        Normals normals = _normals.getMat();
-
-        const HashTSDFVolumeCPU& _volume = *this;
-        auto HashPushNormals             = [&](const ptype& point, const int* position) {
-            const HashTSDFVolumeCPU& volume(_volume);
-            Affine3f invPose(volume.pose.inv());
-            Point3f p = fromPtype(point);
-            Point3f n = nan3;
-            if (!isNaN(p))
-            {
-                Point3f voxelPoint = invPose * p;
-                n                  = volume.pose.rotation() * volume.getNormalVoxel(voxelPoint);
-            }
-            normals(position[0], position[1]) = toPtype(n);
-        };
-        points.forEach(HashPushNormals);
-    }
-}
-
-int HashTSDFVolumeCPU::getVisibleBlocks(int currFrameId, int frameThreshold) const
-{
-    int numVisibleBlocks = 0;
-    //! TODO: Iterate over map parallely?
-    for (const auto& keyvalue : volumeUnits)
-    {
-        const VolumeUnit& volumeUnit = keyvalue.second;
-        if (volumeUnit.lastVisibleIndex > (currFrameId - frameThreshold))
-            numVisibleBlocks++;
-    }
-    return numVisibleBlocks;
-}
-
-
-///////// GPU implementation /////////
-
-#ifdef HAVE_OPENCL
-
-class HashTSDFVolumeGPU : public HashTSDFVolume
-{
-public:
-    HashTSDFVolumeGPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight,
-        float _truncateThreshold, int _volumeUnitRes, bool zFirstMemOrder = false);
-
-    HashTSDFVolumeGPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = false);
-
-    void reset() override;
-
-    void integrateAllVolumeUnitsGPU(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics);
-
-    void allocateVolumeUnits(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics);
-
-    void markActive(const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId);
-
-    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const Matx33f&, const Matx33f&, const int) override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-    void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Matx33f& intrinsics,
-                   const int frameId = 0) override;
-    void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize, OutputArray points,
-                 OutputArray normals) const override;
-    void raycast(const Matx44f&, const Matx33f&, const Size&, OutputArray, OutputArray, OutputArray) const override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-
-    void fetchNormals(InputArray points, OutputArray _normals) const override;
-    void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
-
-    size_t getTotalVolumeUnits() const override { return size_t(hashTable.last); }
-    int getVisibleBlocks(int currFrameId, int frameThreshold) const override;
-
-
-
-    //! Return the voxel given the point in volume coordinate system i.e., (metric scale 1 unit =
-    //! 1m)
-    virtual TsdfVoxel new_at(const cv::Vec3i& volumeIdx, int indx) const;
-    TsdfVoxel new_atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, int indx) const;
-
-
-    float interpolateVoxelPoint(const Point3f& point) const;
-    float interpolateVoxel(const cv::Point3f& point) const;
-    Point3f getNormalVoxel(const cv::Point3f& p) const;
-
-    //! Utility functions for coordinate transformations
-    Vec3i volumeToVolumeUnitIdx(const Point3f& point) const;
-    Point3f volumeUnitIdxToVolume(const Vec3i& volumeUnitIdx) const;
-
-    Point3f voxelCoordToVolume(const Vec3i& voxelIdx) const;
-    Vec3i volumeToVoxelCoord(const Point3f& point) const;
-
-public:
-    Vec6f frameParams;
-    int bufferSizeDegree;
-
-    // per-volume-unit data
-    cv::UMat lastVisibleIndices;
-
-    cv::UMat isActiveFlags;
-
-    cv::UMat volUnitsData;
-    //TODO: remove it when there's no CPU parts
-    cv::Mat volUnitsDataCopy;
-
-    cv::UMat pixNorms;
-
-    //TODO: move indexes.volumes to GPU
-    CustomHashSet hashTable;
-};
-
-HashTSDFVolumeGPU::HashTSDFVolumeGPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight,
-    float _truncateThreshold, int _volumeUnitRes, bool _zFirstMemOrder)
-    :HashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold, _volumeUnitRes, _zFirstMemOrder)
-{
-    if (truncDist >= volumeUnitSize)
-    {
-        std::cout << "truncDist exceeds volume unit size, allocation may work incorrectly" << std::endl;
-    }
-
-    reset();
-}
-
-HashTSDFVolumeGPU::HashTSDFVolumeGPU(const VolumeParams & _params, bool _zFirstMemOrder)
-    : HashTSDFVolumeGPU(_params.voxelSize,
-                        Matx44f(_params.pose),
-                        _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight,
-                        _params.depthTruncThreshold, _params.unitResolution, _zFirstMemOrder)
-{
-}
-
-// zero volume, leave rest params the same
-void HashTSDFVolumeGPU::reset()
-{
-    CV_TRACE_FUNCTION();
-
-    bufferSizeDegree = 15;
-    int buff_lvl = (int) (1 << bufferSizeDegree);
-
-    int volCubed = volumeUnitResolution * volumeUnitResolution * volumeUnitResolution;
-    volUnitsDataCopy = cv::Mat(buff_lvl, volCubed, rawType<TsdfVoxel>());
-
-    volUnitsData = cv::UMat(buff_lvl, volCubed, CV_8UC2);
-
-    lastVisibleIndices = cv::UMat(buff_lvl, 1, CV_32S);
-
-    isActiveFlags = cv::UMat(buff_lvl, 1, CV_8U);
-
-    hashTable = CustomHashSet();
-
-    frameParams = Vec6f();
-    pixNorms = UMat();
-}
-
-
-void HashTSDFVolumeGPU::integrateAllVolumeUnitsGPU(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics)
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(!depth.empty());
-
-    String errorStr;
-    String name = "integrateAllVolumeUnits";
-    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
-    String options = "-cl-mad-enable";
-    ocl::Kernel k;
-    k.create(name.c_str(), source, options, &errorStr);
-
-    if (k.empty())
-        throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-    float dfac = 1.f / depthFactor;
-    Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy);
-    Matx44f vol2camMatrix = (Affine3f(cameraPose).inv() * pose).matrix;
-    Matx44f camInvMatrix = Affine3f(cameraPose).inv().matrix;
-
-    UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ);
-    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
-
-    k.args(ocl::KernelArg::ReadOnly(depth),
-           ocl::KernelArg::PtrReadOnly(hashesGpu),
-           ocl::KernelArg::PtrReadOnly(hashDataGpu),
-           ocl::KernelArg::ReadWrite(volUnitsData),
-           ocl::KernelArg::ReadOnly(pixNorms),
-           ocl::KernelArg::ReadOnly(isActiveFlags),
-           vol2camMatrix,
-           camInvMatrix,
-           voxelSize,
-           volumeUnitResolution,
-           volStrides.val,
-           fxy.val,
-           cxy.val,
-           dfac,
-           truncDist,
-           int(maxWeight)
-    );
-
-    int resol = volumeUnitResolution;
-    size_t globalSize[3];
-    globalSize[0] = (size_t)resol;
-    globalSize[1] = (size_t)resol;
-    globalSize[2] = (size_t)hashTable.last; // num of volume units
-
-    if (!k.run(3, globalSize, NULL, true))
-        throw std::runtime_error("Failed to run kernel");
-}
-
-
-void HashTSDFVolumeGPU::allocateVolumeUnits(const UMat& _depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics)
-{
-    constexpr int pixCapacity = 16;
-    typedef std::array<Vec3i, pixCapacity> LocalVolUnits;
-
-    Depth depth = _depth.getMat(ACCESS_READ);
-
-    //! Compute volumes to be allocated
-    const int depthStride = volumeUnitDegree;
-    const float invDepthFactor = 1.f / depthFactor;
-    const Intr::Reprojector reproj(intrinsics.makeReprojector());
-    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
-    const Point3f truncPt(truncDist, truncDist, truncDist);
-    Mutex mutex;
-
-    // for new indices
-    CustomHashSet thm;
-
-    auto fillLocalAcessVolUnits = [&](const Range& xrange, const Range& yrange, CustomHashSet& ghm)
-    {
-        for (int y = yrange.start; y < yrange.end; y += depthStride)
-        {
-            const depthType* depthRow = depth[y];
-            for (int x = xrange.start; x < xrange.end; x += depthStride)
-            {
-                depthType z = depthRow[x] * invDepthFactor;
-                if (z <= 0 || z > this->truncateThreshold)
-                    continue;
-                Point3f camPoint = reproj(Point3f((float)x, (float)y, z));
-                Point3f volPoint = cam2vol * camPoint;
-                //! Find accessed TSDF volume unit for valid 3D vertex
-                Vec3i lower_bound = this->volumeToVolumeUnitIdx(volPoint - truncPt);
-                Vec3i upper_bound = this->volumeToVolumeUnitIdx(volPoint + truncPt);
-
-                int pixLocalCounter = 0;
-                LocalVolUnits pixLocalVolUnits;
-                for (int i = lower_bound[0]; i <= upper_bound[0]; i++)
-                    for (int j = lower_bound[1]; j <= upper_bound[1]; j++)
-                        for (int k = lower_bound[2]; k <= upper_bound[2]; k++)
-                        {
-                            const Vec3i tsdf_idx = Vec3i(i, j, k);
-
-                            if (hashTable.find(tsdf_idx) < 0)
-                            {
-                                bool found = false;
-                                for (int c = 0; c < pixLocalCounter; c++)
-                                {
-                                    if (pixLocalVolUnits[c] == tsdf_idx)
-                                    {
-                                        found = true; break;
-                                    }
-                                }
-                                if (!found)
-                                {
-                                    pixLocalVolUnits[pixLocalCounter++] = tsdf_idx;
-                                    if (pixLocalCounter >= pixCapacity)
-                                    {
-                                        return;
-                                    }
-                                }
-                            }
-                        }
-
-                // lock localAccessVolUnits somehow
-                for (int i = 0; i < pixLocalCounter; i++)
-                {
-                    Vec3i idx = pixLocalVolUnits[i];
-                    if (!ghm.insert(idx))
-                    {
-                        //return;
-                    }
-                }
-                // unlock
-            }
-        }
-    };
-
-    Rect dim(0, 0, depth.cols, depth.rows);
-    Size gsz(32, 32);
-    Size gg(divUp(dim.width, gsz.width), divUp(dim.height, gsz.height));
-
-    bool needReallocation = false;
-    auto allocateLambda = [&](const Range& r)
-    {
-
-    for (int yg = r.start; yg < r.end; yg++)
-    {
-        for (int xg = 0; xg < gg.width; xg++)
-        {
-            Rect gr(xg * gsz.width, yg * gsz.height, (xg + 1) * gsz.width, (yg + 1) * gsz.height);
-            gr = gr & dim;
-            Range xr(gr.tl().x, gr.br().x), yr(gr.tl().y, gr.br().y);
-
-            CustomHashSet ghm;
-
-            fillLocalAcessVolUnits(xr, yr, ghm);
-
-            if (ghm.last)
-            {
-                std::lock_guard<std::recursive_mutex> al(mutex);
-
-                for (int i = 0; i < ghm.last; i++)
-                {
-                    Vec4i node = ghm.data[i];
-                    Vec3i idx(node[0], node[1], node[2]);
-
-                    //TODO: 1. add to separate hash map instead, then merge on GPU side
-
-                    int result = thm.insert(idx);
-                    if (!result)
-                    {
-                        needReallocation = true;
-                        return;
-                    }
-                }
-            }
-        }
-    }
-
-    };
-
-    do
-    {
-        if (needReallocation)
-        {
-            thm.capacity *= 2;
-            thm.data.resize(thm.capacity);
-
-            needReallocation = false;
-        }
-
-        parallel_for_(Range(0, gg.height), allocateLambda);
-    } while (needReallocation);
-
-
-    auto pushToGlobal = [](const CustomHashSet _thm, CustomHashSet& _globalHashMap,
-                           bool& _needReallocation, Mutex& _mutex)
-    {
-        for (int i = 0; i < _thm.last; i++)
-        {
-                Vec4i node = _thm.data[i];
-                Vec3i idx(node[0], node[1], node[2]);
-
-                std::lock_guard<std::recursive_mutex> al(_mutex);
-
-                int result = _globalHashMap.insert(idx);
-                if (result == 0)
-                {
-                    _needReallocation = true;
-                    return;
-                }
-        }
-    };
-
-    needReallocation = false;
-    do
-    {
-        if (needReallocation)
-        {
-            hashTable.capacity *= 2;
-            hashTable.data.resize(hashTable.capacity);
-
-            needReallocation = false;
-        }
-
-        pushToGlobal(thm, hashTable, needReallocation, mutex);
-    } while (needReallocation);
-}
-
-
-void HashTSDFVolumeGPU::markActive(const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId)
-{
-    //! Mark volumes in the camera frustum as active
-    String errorStr;
-    String name = "markActive";
-    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
-    String options = "-cl-mad-enable";
-    ocl::Kernel k;
-    k.create(name.c_str(), source, options, &errorStr);
-
-    if (k.empty())
-        throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
-    const Intr::Projector proj(intrinsics.makeProjector());
-    Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy);
-
-    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
-
-    k.args(
-        ocl::KernelArg::PtrReadOnly(hashDataGpu),
-        ocl::KernelArg::WriteOnly(isActiveFlags),
-        ocl::KernelArg::WriteOnly(lastVisibleIndices),
-        vol2cam.matrix,
-        fxy,
-        cxy,
-        frameSz,
-        volumeUnitSize,
-        hashTable.last,
-        truncateThreshold,
-        frameId
-    );
-
-    size_t globalSize[1] = { (size_t)hashTable.last };
-    if (!k.run(1, globalSize, nullptr, true))
-        throw std::runtime_error("Failed to run kernel");
-}
-
-
-void HashTSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Matx33f& _intrinsics, const int frameId)
-{
-    CV_TRACE_FUNCTION();
-
-    CV_Assert(_depth.type() == DEPTH_TYPE);
-    UMat depth = _depth.getUMat();
-
-    Intr intrinsics(_intrinsics);
-
-    // Save length to fill new data in ranges
-    int sizeBefore = hashTable.last;
-    allocateVolumeUnits(depth, depthFactor, cameraPose, intrinsics);
-    int sizeAfter = hashTable.last;
-    //! Perform the allocation
-
-    // Grow buffers
-    int buff_lvl = (int)(1 << bufferSizeDegree);
-    if (sizeAfter >= buff_lvl)
-    {
-        bufferSizeDegree = (int)(log2(sizeAfter) + 1); // clz() would be better
-        int oldBuffSize = buff_lvl;
-        buff_lvl = (int)pow(2, bufferSizeDegree);
-
-        volUnitsDataCopy.resize(buff_lvl);
-
-        Range oldr(0, oldBuffSize);
-        int volCubed = volumeUnitResolution * volumeUnitResolution * volumeUnitResolution;
-        UMat newData(buff_lvl, volCubed, CV_8UC2);
-        volUnitsData.copyTo(newData.rowRange(oldr));
-        volUnitsData = newData;
-
-        UMat newLastVisibleIndices(buff_lvl, 1, CV_32S);
-        lastVisibleIndices.copyTo(newLastVisibleIndices.rowRange(oldr));
-        lastVisibleIndices = newLastVisibleIndices;
-
-        UMat newIsActiveFlags(buff_lvl, 1, CV_8U);
-        isActiveFlags.copyTo(newIsActiveFlags.rowRange(oldr));
-        isActiveFlags = newIsActiveFlags;
-    }
-
-    // Fill data for new volume units
-    Range r(sizeBefore, sizeAfter);
-    if (r.start < r.end)
-    {
-        lastVisibleIndices.rowRange(r) = frameId;
-        isActiveFlags.rowRange(r) = 1;
-
-        TsdfVoxel emptyVoxel(floatToTsdf(0.0f), 0);
-        volUnitsData.rowRange(r) = Vec2b((uchar)(emptyVoxel.tsdf), (uchar)(emptyVoxel.weight));
-    }
-
-    //! Mark volumes in the camera frustum as active
-    markActive(cameraPose, intrinsics, depth.size(), frameId);
-
-    Vec6f newParams((float)depth.rows, (float)depth.cols,
-                     intrinsics.fx, intrinsics.fy,
-                     intrinsics.cx, intrinsics.cy);
-    if (!(frameParams == newParams))
-    {
-        frameParams = newParams;
-        pixNorms = preCalculationPixNormGPU(depth.size(), intrinsics);
-    }
-
-    //! Integrate the correct volumeUnits
-    integrateAllVolumeUnitsGPU(depth, depthFactor, cameraPose, intrinsics);
-}
-
-
-//TODO: remove these functions when everything is done on GPU
-cv::Vec3i HashTSDFVolumeGPU::volumeToVolumeUnitIdx(const cv::Point3f& p) const
-{
-    return cv::Vec3i(cvFloor(p.x / volumeUnitSize), cvFloor(p.y / volumeUnitSize),
-        cvFloor(p.z / volumeUnitSize));
-}
-
-cv::Point3f HashTSDFVolumeGPU::volumeUnitIdxToVolume(const cv::Vec3i& volumeUnitIdx) const
-{
-    return cv::Point3f(volumeUnitIdx[0] * volumeUnitSize, volumeUnitIdx[1] * volumeUnitSize,
-        volumeUnitIdx[2] * volumeUnitSize);
-}
-
-cv::Point3f HashTSDFVolumeGPU::voxelCoordToVolume(const cv::Vec3i& voxelIdx) const
-{
-    return cv::Point3f(voxelIdx[0] * voxelSize, voxelIdx[1] * voxelSize, voxelIdx[2] * voxelSize);
-}
-
-cv::Vec3i HashTSDFVolumeGPU::volumeToVoxelCoord(const cv::Point3f& point) const
-{
-    return cv::Vec3i(cvFloor(point.x * voxelSizeInv), cvFloor(point.y * voxelSizeInv),
-        cvFloor(point.z * voxelSizeInv));
-}
-
-inline TsdfVoxel HashTSDFVolumeGPU::new_at(const cv::Vec3i& volumeIdx, int indx) const
-{
-    //! Out of bounds
-    if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) ||
-        (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) ||
-        (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0))
-    {
-        return TsdfVoxel(floatToTsdf(1.0f), 0);
-    }
-
-    const TsdfVoxel* volData = volUnitsDataCopy.ptr<TsdfVoxel>(indx);
-    int coordBase =
-        volumeIdx[0] * volStrides[0] +
-        volumeIdx[1] * volStrides[1] +
-        volumeIdx[2] * volStrides[2];
-    return volData[coordBase];
-}
-
-TsdfVoxel HashTSDFVolumeGPU::new_atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, int indx) const
-{
-    if (indx < 0)
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-    Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree,
-                                          volumeUnitIdx[1] << volumeUnitDegree,
-                                          volumeUnitIdx[2] << volumeUnitDegree);
-
-    // expanding at(), removing bounds check
-    const TsdfVoxel* volData = volUnitsDataCopy.ptr<TsdfVoxel>(indx);
-    int coordBase = volUnitLocalIdx[0] * volStrides[0] +
-        volUnitLocalIdx[1] * volStrides[1] +
-        volUnitLocalIdx[2] * volStrides[2];
-    return volData[coordBase];
-}
-
-float HashTSDFVolumeGPU::interpolateVoxelPoint(const Point3f& point) const
-{
-    const Vec3i local_neighbourCoords[] = { {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1},
-                                            {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1} };
-
-    // A small hash table to reduce a number of find() calls
-    // -2 and lower means not queried yet
-    // -1 means not found
-    // 0+ means found
-    int iterMap[8];
-    for (int i = 0; i < 8; i++)
-    {
-        iterMap[i] = -2;
-    }
-
-    int ix = cvFloor(point.x);
-    int iy = cvFloor(point.y);
-    int iz = cvFloor(point.z);
-
-    float tx = point.x - ix;
-    float ty = point.y - iy;
-    float tz = point.z - iz;
-
-    Vec3i iv(ix, iy, iz);
-    float vx[8];
-    for (int i = 0; i < 8; i++)
-    {
-        Vec3i pt = iv + local_neighbourCoords[i];
-
-        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
-        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
-        auto it = iterMap[dictIdx];
-        if (it < -1)
-        {
-            it = hashTable.find(volumeUnitIdx);
-            iterMap[dictIdx] = it;
-        }
-
-        vx[i] = new_atVolumeUnit(pt, volumeUnitIdx, it).tsdf;
-    }
-
-    return interpolate(tx, ty, tz, vx);
-}
-
-inline float HashTSDFVolumeGPU::interpolateVoxel(const cv::Point3f& point) const
-{
-    return interpolateVoxelPoint(point * voxelSizeInv);
-}
-
-Point3f HashTSDFVolumeGPU::getNormalVoxel(const Point3f& point) const
-{
-    Vec3f normal = Vec3f(0, 0, 0);
-
-    Point3f ptVox = point * voxelSizeInv;
-    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
-
-    // A small hash table to reduce a number of find() calls
-    // -2 and lower means not queried yet
-    // -1 means not found
-    // 0+ means found
-    int iterMap[8];
-    for (int i = 0; i < 8; i++)
-    {
-        iterMap[i] = -2;
-    }
-
-#if !USE_INTERPOLATION_IN_GETNORMAL
-    const Vec3i offsets[] = { { 1,  0,  0}, {-1,  0,  0}, { 0,  1,  0}, // 0-3
-                              { 0, -1,  0}, { 0,  0,  1}, { 0,  0, -1}  // 4-7
-    };
-    const int nVals = 6;
-
-#else
-    const Vec3i offsets[] = { { 0,  0,  0}, { 0,  0,  1}, { 0,  1,  0}, { 0,  1,  1}, //  0-3
-                              { 1,  0,  0}, { 1,  0,  1}, { 1,  1,  0}, { 1,  1,  1}, //  4-7
-                              {-1,  0,  0}, {-1,  0,  1}, {-1,  1,  0}, {-1,  1,  1}, //  8-11
-                              { 2,  0,  0}, { 2,  0,  1}, { 2,  1,  0}, { 2,  1,  1}, // 12-15
-                              { 0, -1,  0}, { 0, -1,  1}, { 1, -1,  0}, { 1, -1,  1}, // 16-19
-                              { 0,  2,  0}, { 0,  2,  1}, { 1,  2,  0}, { 1,  2,  1}, // 20-23
-                              { 0,  0, -1}, { 0,  1, -1}, { 1,  0, -1}, { 1,  1, -1}, // 24-27
-                              { 0,  0,  2}, { 0,  1,  2}, { 1,  0,  2}, { 1,  1,  2}, // 28-31
-    };
-    const int nVals = 32;
-#endif
-
-    float vals[nVals];
-    for (int i = 0; i < nVals; i++)
-    {
-        Vec3i pt = iptVox + offsets[i];
-
-        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
-
-        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
-        auto it = iterMap[dictIdx];
-        if (it < -1)
-        {
-            it = hashTable.find(volumeUnitIdx);
-            iterMap[dictIdx] = it;
-        }
-
-        vals[i] = tsdfToFloat(new_atVolumeUnit(pt, volumeUnitIdx, it).tsdf);
-    }
-
-#if !USE_INTERPOLATION_IN_GETNORMAL
-    for (int c = 0; c < 3; c++)
-    {
-        normal[c] = vals[c * 2] - vals[c * 2 + 1];
-    }
-#else
-
-    float cxv[8], cyv[8], czv[8];
-
-    // How these numbers were obtained:
-    // 1. Take the basic interpolation sequence:
-    // 000, 001, 010, 011, 100, 101, 110, 111
-    // where each digit corresponds to shift by x, y, z axis respectively.
-    // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis
-    // 3. Search corresponding values in offsets
-    const int idxxp[8] = { 8,  9, 10, 11,  0,  1,  2,  3 };
-    const int idxxn[8] = { 4,  5,  6,  7, 12, 13, 14, 15 };
-    const int idxyp[8] = { 16, 17,  0,  1, 18, 19,  4,  5 };
-    const int idxyn[8] = { 2,  3, 20, 21,  6,  7, 22, 23 };
-    const int idxzp[8] = { 24,  0, 25,  2, 26,  4, 27,  6 };
-    const int idxzn[8] = { 1, 28,  3, 29,  5, 30,  7, 31 };
-
-#if !USE_INTRINSICS
-    for (int i = 0; i < 8; i++)
-    {
-        cxv[i] = vals[idxxn[i]] - vals[idxxp[i]];
-        cyv[i] = vals[idxyn[i]] - vals[idxyp[i]];
-        czv[i] = vals[idxzn[i]] - vals[idxzp[i]];
-    }
-#else
-
-# if CV_SIMD >= 32
-    v_float32x8 cxp = v_lut(vals, idxxp);
-    v_float32x8 cxn = v_lut(vals, idxxn);
-
-    v_float32x8 cyp = v_lut(vals, idxyp);
-    v_float32x8 cyn = v_lut(vals, idxyn);
-
-    v_float32x8 czp = v_lut(vals, idxzp);
-    v_float32x8 czn = v_lut(vals, idxzn);
-
-    v_float32x8 vcxv = cxn - cxp;
-    v_float32x8 vcyv = cyn - cyp;
-    v_float32x8 vczv = czn - czp;
-
-    v_store(cxv, vcxv);
-    v_store(cyv, vcyv);
-    v_store(czv, vczv);
-# else
-    v_float32x4 cxp0 = v_lut(vals, idxxp + 0); v_float32x4 cxp1 = v_lut(vals, idxxp + 4);
-    v_float32x4 cxn0 = v_lut(vals, idxxn + 0); v_float32x4 cxn1 = v_lut(vals, idxxn + 4);
-
-    v_float32x4 cyp0 = v_lut(vals, idxyp + 0); v_float32x4 cyp1 = v_lut(vals, idxyp + 4);
-    v_float32x4 cyn0 = v_lut(vals, idxyn + 0); v_float32x4 cyn1 = v_lut(vals, idxyn + 4);
-
-    v_float32x4 czp0 = v_lut(vals, idxzp + 0); v_float32x4 czp1 = v_lut(vals, idxzp + 4);
-    v_float32x4 czn0 = v_lut(vals, idxzn + 0); v_float32x4 czn1 = v_lut(vals, idxzn + 4);
-
-    v_float32x4 cxv0 = cxn0 - cxp0; v_float32x4 cxv1 = cxn1 - cxp1;
-    v_float32x4 cyv0 = cyn0 - cyp0; v_float32x4 cyv1 = cyn1 - cyp1;
-    v_float32x4 czv0 = czn0 - czp0; v_float32x4 czv1 = czn1 - czp1;
-
-    v_store(cxv + 0, cxv0); v_store(cxv + 4, cxv1);
-    v_store(cyv + 0, cyv0); v_store(cyv + 4, cyv1);
-    v_store(czv + 0, czv0); v_store(czv + 4, czv1);
-#endif
-
-#endif
-
-    float tx = ptVox.x - iptVox[0];
-    float ty = ptVox.y - iptVox[1];
-    float tz = ptVox.z - iptVox[2];
-
-    normal[0] = interpolate(tx, ty, tz, cxv);
-    normal[1] = interpolate(tx, ty, tz, cyv);
-    normal[2] = interpolate(tx, ty, tz, czv);
-#endif
-    float nv = sqrt(normal[0] * normal[0] +
-                    normal[1] * normal[1] +
-                    normal[2] * normal[2]);
-    return nv < 0.0001f ? nan3 : normal / nv;
-}
-
-
-void HashTSDFVolumeGPU::raycast(const Matx44f& cameraPose, const Matx33f& _intrinsics, const Size& frameSize,
-                                OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(frameSize.area() > 0);
-
-    String errorStr;
-    String name = "raycast";
-    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
-    String options = "-cl-mad-enable";
-    ocl::Kernel k;
-    k.create(name.c_str(), source, options, &errorStr);
-
-    if (k.empty())
-        throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-    _points.create(frameSize, CV_32FC4);
-    _normals.create(frameSize, CV_32FC4);
-
-    UMat points = _points.getUMat();
-    UMat normals = _normals.getUMat();
-
-    Intr intrinsics(_intrinsics);
-    Intr::Reprojector r = intrinsics.makeReprojector();
-    Vec2f finv(r.fxinv, r.fyinv), cxy(r.cx, r.cy);
-
-    Vec4f boxMin, boxMax(volumeUnitSize - voxelSize,
-                         volumeUnitSize - voxelSize,
-                         volumeUnitSize - voxelSize);
-
-    float tstep = truncDist * raycastStepFactor;
-
-    const HashTSDFVolumeGPU& volume(*this);
-    const Affine3f cam2vol(volume.pose.inv() * Affine3f(cameraPose));
-    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * volume.pose);
-
-    Matx44f cam2volRotGPU = cam2vol.matrix;
-    Matx44f vol2camRotGPU = vol2cam.matrix;
-
-    UMat volPoseGpu = Mat(pose.matrix).getUMat(ACCESS_READ);
-    UMat invPoseGpu = Mat(pose.inv().matrix).getUMat(ACCESS_READ);
-
-    UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ);
-    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
-
-    k.args(
-        ocl::KernelArg::PtrReadOnly(hashesGpu),
-        ocl::KernelArg::PtrReadOnly(hashDataGpu),
-        ocl::KernelArg::WriteOnlyNoSize(points),
-        ocl::KernelArg::WriteOnlyNoSize(normals),
-        frameSize,
-        ocl::KernelArg::ReadOnly(volUnitsData),
-        cam2volRotGPU,
-        vol2camRotGPU,
-        float(volume.truncateThreshold),
-        finv.val, cxy.val,
-        boxMin.val, boxMax.val,
-        tstep,
-        voxelSize,
-        voxelSizeInv,
-        volumeUnitSize,
-        volume.truncDist,
-        volumeUnitDegree,
-        volStrides
-    );
-
-    size_t globalSize[2];
-    globalSize[0] = (size_t)frameSize.width;
-    globalSize[1] = (size_t)frameSize.height;
-
-    if (!k.run(2, globalSize, NULL, true))
-        throw std::runtime_error("Failed to run kernel");
-}
-
-void HashTSDFVolumeGPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if (_points.needed())
-    {
-        //TODO: remove it when it works w/o CPU code
-        volUnitsData.copyTo(volUnitsDataCopy);
-        //TODO: remove it when it works w/o CPU code
-        //TODO: enable it when it's on GPU
-        //UMat hashDataGpu(hashMap.capacity, 1, CV_32SC4);
-        //Mat(hashMap.data, false).copyTo(hashDataGpu);
-
-        std::vector<std::vector<ptype>> pVecs, nVecs;
-
-        Range _fetchRange(0, hashTable.last);
-
-        const int nstripes = -1;
-
-        const HashTSDFVolumeGPU& volume(*this);
-        bool needNormals(_normals.needed());
-        Mutex mutex;
-
-        auto _HashFetchPointsNormalsInvoker = [&](const Range& range)
-        {
-            std::vector<ptype> points, normals;
-            for (int row = range.start; row < range.end; row++)
-            {
-                cv::Vec4i idx4 = hashTable.data[row];
-                cv::Vec3i idx(idx4[0], idx4[1], idx4[2]);
-
-                Point3f base_point = volume.volumeUnitIdxToVolume(idx);
-
-                std::vector<ptype> localPoints;
-                std::vector<ptype> localNormals;
-                for (int x = 0; x < volume.volumeUnitResolution; x++)
-                    for (int y = 0; y < volume.volumeUnitResolution; y++)
-                        for (int z = 0; z < volume.volumeUnitResolution; z++)
-                        {
-                            cv::Vec3i voxelIdx(x, y, z);
-                            TsdfVoxel voxel = new_at(voxelIdx, row);
-
-                            if (voxel.tsdf != -128 && voxel.weight != 0)
-                            {
-                                Point3f point = base_point + volume.voxelCoordToVolume(voxelIdx);
-                                localPoints.push_back(toPtype(this->pose * point));
-                                if (needNormals)
-                                {
-                                    Point3f normal = volume.getNormalVoxel(point);
-                                    localNormals.push_back(toPtype(this->pose.rotation() * normal));
-                                }
-                            }
-                        }
-
-                AutoLock al(mutex);
-                pVecs.push_back(localPoints);
-                nVecs.push_back(localNormals);
-            }
-        };
-
-        parallel_for_(_fetchRange, _HashFetchPointsNormalsInvoker, nstripes);
-
-        std::vector<ptype> points, normals;
-        for (size_t i = 0; i < pVecs.size(); i++)
-        {
-            points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
-            normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
-        }
-
-        _points.create((int)points.size(), 1, POINT_TYPE);
-        if (!points.empty())
-            Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
-
-        if (_normals.needed())
-        {
-            _normals.create((int)normals.size(), 1, POINT_TYPE);
-            if (!normals.empty())
-                Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
-        }
-    }
-}
-
-void HashTSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if (_normals.needed())
-    {
-        //TODO: remove it when it works w/o CPU code
-        volUnitsData.copyTo(volUnitsDataCopy);
-
-        Points points = _points.getMat();
-        CV_Assert(points.type() == POINT_TYPE);
-        _normals.createSameSize(_points, _points.type());
-        Normals normals = _normals.getMat();
-        const HashTSDFVolumeGPU& _volume = *this;
-        auto HashPushNormals             = [&](const ptype& point, const int* position) {
-            const HashTSDFVolumeGPU& volume(_volume);
-            Affine3f invPose(volume.pose.inv());
-            Point3f p = fromPtype(point);
-            Point3f n = nan3;
-            if (!isNaN(p))
-            {
-                Point3f voxelPoint = invPose * p;
-                n                  = volume.pose.rotation() * volume.getNormalVoxel(voxelPoint);
-            }
-            normals(position[0], position[1]) = toPtype(n);
-        };
-        points.forEach(HashPushNormals);
-    }
-
-}
-
-int HashTSDFVolumeGPU::getVisibleBlocks(int currFrameId, int frameThreshold) const
-{
-    Mat cpuIndices = lastVisibleIndices.getMat(ACCESS_READ);
-
-    int numVisibleBlocks = 0;
-    //! TODO: Iterate over map parallely?
-    for (int i = 0; i < hashTable.last; i++)
-    {
-        if (cpuIndices.at<int>(i) > (currFrameId - frameThreshold))
-            numVisibleBlocks++;
-    }
-    return numVisibleBlocks;
-}
-
-#endif
-
-//template<typename T>
-Ptr<HashTSDFVolume> makeHashTSDFVolume(const VolumeParams& _params)
-{
-    Mat pm = _params.pose;
-    CV_Assert(pm.size() == Size(4, 4));
-    CV_Assert(pm.type() == CV_32F);
-    Matx44f pose = pm;
-#ifdef HAVE_OPENCL
-    if (ocl::useOpenCL())
-        return makePtr<HashTSDFVolumeGPU>(_params.voxelSize, pose, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight,
-                                          _params.depthTruncThreshold, _params.unitResolution);
-#endif
-    return makePtr<HashTSDFVolumeCPU>(_params.voxelSize, pose, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight,
-                                      _params.depthTruncThreshold, _params.unitResolution);
-}
-
-//template<typename T>
-Ptr<HashTSDFVolume> makeHashTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-    int _maxWeight, float truncateThreshold, int volumeUnitResolution)
-{
-#ifdef HAVE_OPENCL
-    if (ocl::useOpenCL())
-        return makePtr<HashTSDFVolumeGPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, truncateThreshold,
-            volumeUnitResolution);
-#endif
-    return makePtr<HashTSDFVolumeCPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, truncateThreshold,
-        volumeUnitResolution);
-}
-
-}  // namespace cv
diff --git a/modules/3d/src/rgbd/hash_tsdf.hpp b/modules/3d/src/rgbd/hash_tsdf.hpp
deleted file mode 100644
index 6e8ce5f966..0000000000
--- a/modules/3d/src/rgbd/hash_tsdf.hpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-#ifndef OPENCV_3D_HASH_TSDF_HPP
-#define OPENCV_3D_HASH_TSDF_HPP
-
-#include "../precomp.hpp"
-#include "tsdf_functions.hpp"
-
-namespace cv
-{
-
-class HashTSDFVolume : public Volume
-{
-   public:
-    // dimension in voxels, size in meters
-    //! Use fixed volume cuboid
-    HashTSDFVolume(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                   int _maxWeight, float _truncateThreshold, int _volumeUnitRes,
-                   bool zFirstMemOrder = true);
-
-    virtual ~HashTSDFVolume() = default;
-
-    virtual int getVisibleBlocks(int currFrameId, int frameThreshold) const override = 0;
-    virtual size_t getTotalVolumeUnits() const override = 0;
-
-   public:
-    int maxWeight;
-    float truncDist;
-    float truncateThreshold;
-    int volumeUnitResolution;
-    int volumeUnitDegree;
-    float volumeUnitSize;
-    bool zFirstMemOrder;
-    Vec4i volStrides;
-};
-
-//template<typename T>
-Ptr<HashTSDFVolume> makeHashTSDFVolume(const VolumeParams& _volumeParams);
-//template<typename T>
-Ptr<HashTSDFVolume> makeHashTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                                       int _maxWeight, float truncateThreshold, int volumeUnitResolution = 16);
-
-}  // namespace cv
-
-#endif // include guard
diff --git a/modules/3d/src/rgbd/hash_tsdf_functions.cpp b/modules/3d/src/rgbd/hash_tsdf_functions.cpp
new file mode 100644
index 0000000000..6f63721e64
--- /dev/null
+++ b/modules/3d/src/rgbd/hash_tsdf_functions.cpp
@@ -0,0 +1,1534 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// Partially rewritten from https://github.com/Nerei/kinfu_remake
+// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
+
+#include "../precomp.hpp"
+#include "hash_tsdf_functions.hpp"
+#include "opencl_kernels_3d.hpp"
+
+namespace cv {
+
+static Vec3i volumeToVolumeUnitIdx(const Point3f& point, const float volumeUnitSize)
+{
+    return cv::Vec3i(
+        cvFloor(point.x / volumeUnitSize),
+        cvFloor(point.y / volumeUnitSize),
+        cvFloor(point.z / volumeUnitSize));
+}
+
+static cv::Point3f volumeUnitIdxToVolume(const cv::Vec3i& volumeUnitIdx, const float volumeUnitSize)
+{
+    return cv::Point3f(
+        volumeUnitIdx[0] * volumeUnitSize,
+        volumeUnitIdx[1] * volumeUnitSize,
+        volumeUnitIdx[2] * volumeUnitSize);
+}
+
+static cv::Point3f voxelCoordToVolume(const cv::Vec3i& voxelIdx, const float voxelSize)
+{
+    return cv::Point3f(
+        voxelIdx[0] * voxelSize,
+        voxelIdx[1] * voxelSize,
+        voxelIdx[2] * voxelSize);
+}
+
+static cv::Vec3i volumeToVoxelCoord(const cv::Point3f& point, const float voxelSizeInv)
+{
+    return cv::Vec3i(
+        cvFloor(point.x * voxelSizeInv),
+        cvFloor(point.y * voxelSizeInv),
+        cvFloor(point.z * voxelSizeInv));
+}
+
+int calcVolumeUnitDegree(Point3i volumeResolution)
+{
+    if (!(volumeResolution.x & (volumeResolution.x - 1)))
+    {
+        // vuRes is a power of 2, let's get this power
+        return trailingZeros32(volumeResolution.x);
+    }
+    else
+    {
+        CV_Error(Error::StsBadArg, "Volume unit resolution should be a power of 2");
+    }
+}
+
+void allocateVolumeUnits(
+    const UMat& _depth, float depthFactor, const Affine3f volumePose, const Matx44f& cameraPose, const Intr& intrinsics,
+    CustomHashSet& hashTable, const int volumeUnitDegree, const float truncDist, const float maxDepth, const float volumeUnitSize);
+
+TsdfVoxel atVolumeUnit(
+    const Mat& volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it,
+    const int volumeUnitDegree, const Vec4i volStrides);
+
+TsdfVoxel new_atVolumeUnit(
+    const Mat& volUnitsDataCopy,
+    const Vec3i& point, const Vec3i& volumeUnitIdx, const int indx,
+    const int volumeUnitDegree, const Vec4i volStrides);
+
+Point3f getNormalVoxel(
+    const Point3f& point, const float voxelSizeInv,
+    const int volumeUnitDegree, const Vec4i volStrides,
+    const Mat& volUnitsDataCopy, const VolumeUnitIndexes& volumeUnits);
+
+
+#ifdef HAVE_OPENCL
+void markActive(
+    const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId,
+    const Affine3f volumePose, CustomHashSet& hashTable, UMat& isActiveFlags, UMat& lastVisibleIndices,
+    const float maxDepth, const float volumeUnitSize);
+
+Point3f ocl_getNormalVoxel(
+    const Point3f& point, const float voxelSizeInv,
+    const int volumeUnitDegree, const Vec4i volStrides,
+    const Mat& volUnitsData, const CustomHashSet& hashTable);
+#endif
+
+void integrateHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int& lastVolIndex, const int frameId, const int volumeUnitDegree,
+    InputArray _depth, InputArray _pixNorms, InputArray _volUnitsData, VolumeUnitIndexes& volumeUnits)
+{
+    //std::cout << "integrateHashTsdfVolumeUnit()" << std::endl;
+
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(_depth.type() == DEPTH_TYPE);
+    Depth depth = _depth.getMat();
+    Mat volUnitsData = _volUnitsData.getMat();
+    Mat pixNorms = _pixNorms.getMat();
+
+    //! Compute volumes to be allocated
+    const int depthStride = volumeUnitDegree;
+    const float invDepthFactor = 1.f / settings.getDepthFactor();
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float maxDepth = settings.getMaxDepth();
+    const float voxelSize = settings.getVoxelSize();
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const float volumeUnitSize = voxelSize * resolution[0];
+
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    const Intr intrinsics(intr);
+    const Intr::Reprojector reproj(intrinsics.makeReprojector());
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
+
+    const Point3f truncPt(truncDist, truncDist, truncDist);
+    VolumeUnitIndexSet newIndices;
+    Mutex mutex;
+    Range allocateRange(0, depth.rows);
+
+    auto AllocateVolumeUnitsInvoker = [&](const Range& range) {
+        VolumeUnitIndexSet localAccessVolUnits;
+        for (int y = range.start; y < range.end; y += depthStride)
+        {
+            const depthType* depthRow = depth[y];
+            for (int x = 0; x < depth.cols; x += depthStride)
+            {
+                depthType z = depthRow[x] * invDepthFactor;
+
+                if (z <= 0 || z > maxDepth)
+                    continue;
+
+                Point3f camPoint = reproj(Point3f((float)x, (float)y, z));
+                Point3f volPoint = cam2vol * camPoint;
+                //! Find accessed TSDF volume unit for valid 3D vertex
+                Vec3i lower_bound = volumeToVolumeUnitIdx(volPoint - truncPt, volumeUnitSize);
+                Vec3i upper_bound = volumeToVolumeUnitIdx(volPoint + truncPt, volumeUnitSize);
+
+                for (int i = lower_bound[0]; i <= upper_bound[0]; i++)
+                    for (int j = lower_bound[1]; j <= upper_bound[1]; j++)
+                        for (int k = lower_bound[2]; k <= upper_bound[2]; k++)
+                        {
+                            const Vec3i tsdf_idx = Vec3i(i, j, k);
+                            if (localAccessVolUnits.count(tsdf_idx) <= 0 && volumeUnits.count(tsdf_idx) <= 0)
+                            {
+                                //! This volume unit will definitely be required for current integration
+                                localAccessVolUnits.emplace(tsdf_idx);
+                            }
+                        }
+            }
+        }
+
+        mutex.lock();
+        for (const auto& tsdf_idx : localAccessVolUnits)
+        {
+            //! If the insert into the global set passes
+            if (!newIndices.count(tsdf_idx))
+            {
+                // Volume allocation can be performed outside of the lock
+                newIndices.emplace(tsdf_idx);
+            }
+        }
+        mutex.unlock();
+    };
+    parallel_for_(allocateRange, AllocateVolumeUnitsInvoker);
+    //AllocateVolumeUnitsInvoker(allocateRange);
+
+    //! Perform the allocation
+    for (auto idx : newIndices)
+    {
+        VolumeUnit& vu = volumeUnits.emplace(idx, VolumeUnit()).first->second;
+
+        Matx44f subvolumePose = pose.translate(volumeUnitIdxToVolume(idx, volumeUnitSize)).matrix;
+
+        vu.pose = subvolumePose;
+        vu.index = lastVolIndex; lastVolIndex++;
+        if (lastVolIndex > int(volUnitsData.size().height))
+        {
+            volUnitsData.resize((lastVolIndex - 1) * 2);
+        }
+        volUnitsData.row(vu.index).forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
+            {
+                TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
+                v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+            });
+        //! This volume unit will definitely be required for current integration
+        vu.lastVisibleIndex = frameId;
+        vu.isActive = true;
+    }
+
+    //! Get keys for all the allocated volume Units
+    std::vector<Vec3i> totalVolUnits;
+    for (const auto& keyvalue : volumeUnits)
+    {
+        totalVolUnits.push_back(keyvalue.first);
+    }
+
+
+    //! Mark volumes in the camera frustum as active
+    Range inFrustumRange(0, (int)volumeUnits.size());
+    parallel_for_(inFrustumRange, [&](const Range& range) {
+        const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+        const Intr::Projector proj(intrinsics.makeProjector());
+
+        for (int i = range.start; i < range.end; ++i)
+        {
+            Vec3i tsdf_idx = totalVolUnits[i];
+            VolumeUnitIndexes::iterator it = volumeUnits.find(tsdf_idx);
+            if (it == volumeUnits.end())
+                continue;
+
+            Point3f volumeUnitPos = volumeUnitIdxToVolume(it->first, volumeUnitSize);
+            Point3f volUnitInCamSpace = vol2cam * volumeUnitPos;
+            if (volUnitInCamSpace.z < 0 || volUnitInCamSpace.z > maxDepth)
+            {
+                it->second.isActive = false;
+                continue;
+            }
+            Point2f cameraPoint = proj(volUnitInCamSpace);
+            if (cameraPoint.x >= 0 && cameraPoint.y >= 0 && cameraPoint.x < depth.cols && cameraPoint.y < depth.rows)
+            {
+                assert(it != volumeUnits.end());
+                it->second.lastVisibleIndex = frameId;
+                it->second.isActive = true;
+            }
+        }
+        });
+
+
+    //! Integrate the correct volumeUnits
+    parallel_for_(Range(0, (int)totalVolUnits.size()), [&](const Range& range) {
+        for (int i = range.start; i < range.end; i++)
+        {
+            Vec3i tsdf_idx = totalVolUnits[i];
+            VolumeUnitIndexes::iterator it = volumeUnits.find(tsdf_idx);
+            if (it == volumeUnits.end())
+                return;
+
+            VolumeUnit& volumeUnit = it->second;
+            if (volumeUnit.isActive)
+            {
+                //! The volume unit should already be added into the Volume from the allocator
+                integrateTsdfVolumeUnit(settings, volumeUnit.pose, cameraPose, depth, pixNorms, volUnitsData.row(volumeUnit.index));
+
+                //! Ensure all active volumeUnits are set to inactive for next integration
+                volumeUnit.isActive = false;
+            }
+        }
+        });
+}
+
+
+void allocateVolumeUnits(
+    const UMat& _depth, float depthFactor, const Affine3f volumePose, const Matx44f& cameraPose, const Intr& intrinsics,
+    CustomHashSet& hashTable, const int volumeUnitDegree, const float truncDist, const float maxDepth, const float volumeUnitSize)
+{
+    constexpr int pixCapacity = 16;
+    typedef std::array<Vec3i, pixCapacity> LocalVolUnits;
+
+    Depth depth = _depth.getMat(ACCESS_READ);
+
+    //! Compute volumes to be allocated
+    const int depthStride = volumeUnitDegree;
+    const float invDepthFactor = 1.f / depthFactor;
+    const Intr::Reprojector reproj(intrinsics.makeReprojector());
+    const Affine3f cam2vol(volumePose.inv() * Affine3f(cameraPose));
+    const Point3f truncPt(truncDist, truncDist, truncDist);
+    Mutex mutex;
+
+    // for new indices
+    CustomHashSet thm;
+
+    auto fillLocalAcessVolUnits = [&](const Range& xrange, const Range& yrange, CustomHashSet& ghm)
+    {
+        for (int y = yrange.start; y < yrange.end; y += depthStride)
+        {
+            const depthType* depthRow = depth[y];
+            for (int x = xrange.start; x < xrange.end; x += depthStride)
+            {
+                depthType z = depthRow[x] * invDepthFactor;
+                if (z <= 0 || z > maxDepth)
+                    continue;
+                Point3f camPoint = reproj(Point3f((float)x, (float)y, z));
+                Point3f volPoint = cam2vol * camPoint;
+                //! Find accessed TSDF volume unit for valid 3D vertex
+                Vec3i lower_bound = volumeToVolumeUnitIdx(volPoint - truncPt, volumeUnitSize);
+                Vec3i upper_bound = volumeToVolumeUnitIdx(volPoint + truncPt, volumeUnitSize);
+
+                int pixLocalCounter = 0;
+                LocalVolUnits pixLocalVolUnits;
+                for (int i = lower_bound[0]; i <= upper_bound[0]; i++)
+                    for (int j = lower_bound[1]; j <= upper_bound[1]; j++)
+                        for (int k = lower_bound[2]; k <= upper_bound[2]; k++)
+                        {
+                            const Vec3i tsdf_idx = Vec3i(i, j, k);
+
+                            if (hashTable.find(tsdf_idx) < 0)
+                            {
+                                bool found = false;
+                                for (int c = 0; c < pixLocalCounter; c++)
+                                {
+                                    if (pixLocalVolUnits[c] == tsdf_idx)
+                                    {
+                                        found = true; break;
+                                    }
+                                }
+                                if (!found)
+                                {
+                                    pixLocalVolUnits[pixLocalCounter++] = tsdf_idx;
+                                    if (pixLocalCounter >= pixCapacity)
+                                    {
+                                        return;
+                                    }
+                                }
+                            }
+                        }
+
+                // lock localAccessVolUnits somehow
+                for (int i = 0; i < pixLocalCounter; i++)
+                {
+                    Vec3i idx = pixLocalVolUnits[i];
+                    if (!ghm.insert(idx))
+                    {
+                        //return;
+                    }
+                }
+                // unlock
+            }
+        }
+    };
+
+    Rect dim(0, 0, depth.cols, depth.rows);
+    Size gsz(32, 32);
+    Size gg(divUp(dim.width, gsz.width), divUp(dim.height, gsz.height));
+
+    bool needReallocation = false;
+    auto allocateLambda = [&](const Range& r)
+    {
+
+        for (int yg = r.start; yg < r.end; yg++)
+        {
+            for (int xg = 0; xg < gg.width; xg++)
+            {
+                Rect gr(xg * gsz.width, yg * gsz.height, (xg + 1) * gsz.width, (yg + 1) * gsz.height);
+                gr = gr & dim;
+                Range xr(gr.tl().x, gr.br().x), yr(gr.tl().y, gr.br().y);
+
+                CustomHashSet ghm;
+
+                fillLocalAcessVolUnits(xr, yr, ghm);
+
+                if (ghm.last)
+                {
+                    cv::AutoLock al(mutex);
+
+                    for (int i = 0; i < ghm.last; i++)
+                    {
+                        Vec4i node = ghm.data[i];
+                        Vec3i idx(node[0], node[1], node[2]);
+
+                        //TODO: 1. add to separate hash map instead, then merge on GPU side
+
+                        int result = thm.insert(idx);
+                        if (!result)
+                        {
+                            needReallocation = true;
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+
+    };
+
+    do
+    {
+        if (needReallocation)
+        {
+            thm.capacity *= 2;
+            thm.data.resize(thm.capacity);
+
+            needReallocation = false;
+        }
+
+        parallel_for_(Range(0, gg.height), allocateLambda);
+    } while (needReallocation);
+
+
+    auto pushToGlobal = [](const CustomHashSet _thm, CustomHashSet& _globalHashMap,
+        bool& _needReallocation, Mutex& _mutex)
+    {
+        for (int i = 0; i < _thm.last; i++)
+        {
+            Vec4i node = _thm.data[i];
+            Vec3i idx(node[0], node[1], node[2]);
+
+            std::lock_guard<std::recursive_mutex> al(_mutex);
+
+            int result = _globalHashMap.insert(idx);
+            if (result == 0)
+            {
+                _needReallocation = true;
+                return;
+            }
+        }
+    };
+
+    needReallocation = false;
+    do
+    {
+        if (needReallocation)
+        {
+            hashTable.capacity *= 2;
+            hashTable.data.resize(hashTable.capacity);
+
+            needReallocation = false;
+        }
+
+        pushToGlobal(thm, hashTable, needReallocation, mutex);
+    } while (needReallocation);
+}
+
+#ifdef HAVE_OPENCL
+
+void markActive(
+    const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId,
+    const Affine3f volumePose, CustomHashSet& hashTable, UMat& isActiveFlags, UMat& lastVisibleIndices,
+    const float maxDepth, const float volumeUnitSize)
+{
+    //! Mark volumes in the camera frustum as active
+    String errorStr;
+    String name = "markActive";
+    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * volumePose);
+    const Intr::Projector proj(intrinsics.makeProjector());
+    Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy);
+
+    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
+
+    k.args(
+        ocl::KernelArg::PtrReadOnly(hashDataGpu),
+        ocl::KernelArg::WriteOnly(isActiveFlags),
+        ocl::KernelArg::WriteOnly(lastVisibleIndices),
+        vol2cam.matrix,
+        fxy,
+        cxy,
+        frameSz,
+        volumeUnitSize,
+        hashTable.last,
+        maxDepth,
+        frameId
+    );
+
+    size_t globalSize[1] = { (size_t)hashTable.last };
+    if (!k.run(1, globalSize, nullptr, true))
+        throw std::runtime_error("Failed to run kernel");
+}
+
+
+void ocl_integrateHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int& lastVolIndex, const int frameId, int& bufferSizeDegree, const int volumeUnitDegree,
+    InputArray _depth, InputArray _pixNorms, InputArray _lastVisibleIndices, InputArray _volUnitsDataCopy,  InputArray _volUnitsData, CustomHashSet& hashTable, InputArray _isActiveFlags)
+{
+    CV_TRACE_FUNCTION();
+    UMat depth = _depth.getUMat();
+    CV_Assert(!depth.empty());
+    CV_Assert(lastVolIndex >= 0);
+    UMat pixNorms = _pixNorms.getUMat();
+    UMat volUnitsData = _volUnitsData.getUMat();
+    Mat volUnitsDataCopy = _volUnitsDataCopy.getMat();
+    UMat isActiveFlags = _isActiveFlags.getUMat();
+    UMat lastVisibleIndices = _lastVisibleIndices.getUMat();
+
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    const Intr intrinsics(intr);
+
+    Vec4i volStrides;
+    settings.getVolumeDimensions(volStrides);
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const int volumeUnitResolution = resolution[0];
+    const int maxWeight = settings.getMaxWeight();
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float maxDepth = settings.getMaxDepth();
+    const float voxelSize = settings.getVoxelSize();
+    const float depthFactor = settings.getDepthFactor();
+    const float dfac = 1.f / depthFactor;
+    const float volumeUnitSize = voxelSize * resolution[0];
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    Matx44f vol2camMatrix = (Affine3f(cameraPose).inv() * pose).matrix;
+    Matx44f camInvMatrix = Affine3f(cameraPose).inv().matrix;
+
+    // Save length to fill new data in ranges
+    int sizeBefore = hashTable.last;
+    allocateVolumeUnits(depth, depthFactor, pose, cameraPose, intrinsics, hashTable, volumeUnitDegree, truncDist, maxDepth, volumeUnitSize);
+    int sizeAfter = hashTable.last;
+    //! Perform the allocation
+
+    // Grow buffers
+    int buff_lvl = (int)(1 << bufferSizeDegree);
+    if (sizeAfter >= buff_lvl)
+    {
+        bufferSizeDegree = (int)(log2(sizeAfter) + 1); // clz() would be better
+        int oldBuffSize = buff_lvl;
+        buff_lvl = (int)pow(2, bufferSizeDegree);
+
+        volUnitsDataCopy.resize(buff_lvl);
+
+        Range oldr(0, oldBuffSize);
+        int volCubed = volumeUnitResolution * volumeUnitResolution * volumeUnitResolution;
+        UMat newData(buff_lvl, volCubed, CV_8UC2);
+        volUnitsData.copyTo(newData.rowRange(oldr));
+        volUnitsData = newData;
+
+        UMat newLastVisibleIndices(buff_lvl, 1, CV_32S);
+        lastVisibleIndices.copyTo(newLastVisibleIndices.rowRange(oldr));
+        lastVisibleIndices = newLastVisibleIndices;
+
+        UMat newIsActiveFlags(buff_lvl, 1, CV_8U);
+        isActiveFlags.copyTo(newIsActiveFlags.rowRange(oldr));
+        isActiveFlags = newIsActiveFlags;
+    }
+
+    // Fill data for new volume units
+    Range r(sizeBefore, sizeAfter);
+    if (r.start < r.end)
+    {
+        lastVisibleIndices.rowRange(r) = frameId;
+        isActiveFlags.rowRange(r) = 1;
+
+        TsdfVoxel emptyVoxel(floatToTsdf(0.0f), 0);
+        volUnitsData.rowRange(r) = Vec2b((uchar)(emptyVoxel.tsdf), (uchar)(emptyVoxel.weight));
+    }
+
+    //! Mark volumes in the camera frustum as active
+    markActive(cameraPose, intrinsics, depth.size(), frameId, pose, hashTable, isActiveFlags, lastVisibleIndices, maxDepth, volumeUnitSize);
+
+    //! Integrate the correct volumeUnits
+    String errorStr;
+    String name = "integrateAllVolumeUnits";
+    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy);
+
+    UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ);
+    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
+
+    k.args(ocl::KernelArg::ReadOnly(depth),
+        ocl::KernelArg::PtrReadOnly(hashesGpu),
+        ocl::KernelArg::PtrReadOnly(hashDataGpu),
+        ocl::KernelArg::ReadWrite(volUnitsData),
+        ocl::KernelArg::ReadOnly(pixNorms),
+        ocl::KernelArg::ReadOnly(isActiveFlags),
+        vol2camMatrix,
+        camInvMatrix,
+        voxelSize,
+        volumeUnitResolution,
+        volStrides.val,
+        fxy.val,
+        cxy.val,
+        dfac,
+        truncDist,
+        int(maxWeight)
+    );
+
+    int resol = volumeUnitResolution;
+    size_t globalSize[3];
+    globalSize[0] = (size_t)resol;
+    globalSize[1] = (size_t)resol;
+    globalSize[2] = (size_t)hashTable.last; // num of volume units
+
+    if (!k.run(3, globalSize, NULL, true))
+        throw std::runtime_error("Failed to run kernel");
+
+    //std::cout << "ocl_integrateHashTsdfVolumeUnit() end" << std::endl;
+}
+#endif
+
+inline TsdfVoxel _at(Mat& volUnitsData, const cv::Vec3i& volumeIdx, int indx,
+    const int volumeUnitResolution, const Vec4i volStrides)
+{
+    //! Out of bounds
+    if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) ||
+        (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) ||
+        (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0))
+    {
+        return TsdfVoxel(floatToTsdf(1.f), 0);
+    }
+
+    const TsdfVoxel* volData = volUnitsData.ptr<TsdfVoxel>(indx);
+    int coordBase =
+        volumeIdx[0] * volStrides[0] + volumeIdx[1] * volStrides[1] + volumeIdx[2] * volStrides[2];
+    return volData[coordBase];
+}
+
+#if USE_INTRINSICS
+inline float interpolate(float tx, float ty, float tz, float vx[8])
+{
+    v_float32x4 v0246, v1357;
+    v_load_deinterleave(vx, v0246, v1357);
+
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+
+#else
+inline float interpolate(float tx, float ty, float tz, float vx[8])
+{
+    float v00 = vx[0] + tz * (vx[1] - vx[0]);
+    float v01 = vx[2] + tz * (vx[3] - vx[2]);
+    float v10 = vx[4] + tz * (vx[5] - vx[4]);
+    float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+    float v0 = v00 + ty * (v01 - v00);
+    float v1 = v10 + ty * (v11 - v10);
+
+    return v0 + tx * (v1 - v0);
+}
+#endif
+
+TsdfVoxel atVolumeUnit(
+    const Mat& volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it,
+    const int volumeUnitDegree, const Vec4i volStrides)
+{
+    if (it == volumeUnits.end())
+    {
+        return TsdfVoxel(floatToTsdf(1.f), 0);
+    }
+    Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree,
+        volumeUnitIdx[1] << volumeUnitDegree,
+        volumeUnitIdx[2] << volumeUnitDegree);
+
+    // expanding at(), removing bounds check
+    const TsdfVoxel* volData = volUnitsData.ptr<TsdfVoxel>(it->second.index);
+    int coordBase = volUnitLocalIdx[0] * volStrides[0] + volUnitLocalIdx[1] * volStrides[1] + volUnitLocalIdx[2] * volStrides[2];
+    return volData[coordBase];
+}
+
+
+TsdfVoxel new_atVolumeUnit(
+    const Mat& volUnitsDataCopy,
+    const Vec3i& point, const Vec3i& volumeUnitIdx, const int indx,
+    const int volumeUnitDegree, const Vec4i volStrides)
+{
+    if (indx < 0)
+    {
+        return TsdfVoxel(floatToTsdf(1.f), 0);
+    }
+    Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree,
+        volumeUnitIdx[1] << volumeUnitDegree,
+        volumeUnitIdx[2] << volumeUnitDegree);
+
+    // expanding at(), removing bounds check
+    const TsdfVoxel* volData = volUnitsDataCopy.ptr<TsdfVoxel>(indx);
+    int coordBase = volUnitLocalIdx[0] * volStrides[0] +
+        volUnitLocalIdx[1] * volStrides[1] +
+        volUnitLocalIdx[2] * volStrides[2];
+    return volData[coordBase];
+}
+
+
+
+Point3f getNormalVoxel(
+    const Point3f& point, const float voxelSizeInv,
+    const int volumeUnitDegree,  const Vec4i volStrides,
+    const Mat& volUnitsDataCopy, const VolumeUnitIndexes& volumeUnits)
+{
+    Vec3f normal = Vec3f(0, 0, 0);
+
+    Point3f ptVox = point * voxelSizeInv;
+    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
+
+    // A small hash table to reduce a number of find() calls
+    bool queried[8];
+    VolumeUnitIndexes::const_iterator iterMap[8];
+    for (int i = 0; i < 8; i++)
+    {
+        iterMap[i] = volumeUnits.end();
+        queried[i] = false;
+    }
+
+#if !USE_INTERPOLATION_IN_GETNORMAL
+    const Vec3i offsets[] = { { 1,  0,  0}, {-1,  0,  0}, { 0,  1,  0}, // 0-3
+                              { 0, -1,  0}, { 0,  0,  1}, { 0,  0, -1}  // 4-7
+    };
+    const int nVals = 6;
+
+#else
+    const Vec3i offsets[] = { { 0,  0,  0}, { 0,  0,  1}, { 0,  1,  0}, { 0,  1,  1}, //  0-3
+                              { 1,  0,  0}, { 1,  0,  1}, { 1,  1,  0}, { 1,  1,  1}, //  4-7
+                              {-1,  0,  0}, {-1,  0,  1}, {-1,  1,  0}, {-1,  1,  1}, //  8-11
+                              { 2,  0,  0}, { 2,  0,  1}, { 2,  1,  0}, { 2,  1,  1}, // 12-15
+                              { 0, -1,  0}, { 0, -1,  1}, { 1, -1,  0}, { 1, -1,  1}, // 16-19
+                              { 0,  2,  0}, { 0,  2,  1}, { 1,  2,  0}, { 1,  2,  1}, // 20-23
+                              { 0,  0, -1}, { 0,  1, -1}, { 1,  0, -1}, { 1,  1, -1}, // 24-27
+                              { 0,  0,  2}, { 0,  1,  2}, { 1,  0,  2}, { 1,  1,  2}, // 28-31
+    };
+    const int nVals = 32;
+#endif
+
+    float vals[nVals];
+    for (int i = 0; i < nVals; i++)
+    {
+        Vec3i pt = iptVox + offsets[i];
+
+        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
+
+        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
+        auto it = iterMap[dictIdx];
+        if (!queried[dictIdx])
+        {
+            it = volumeUnits.find(volumeUnitIdx);
+            iterMap[dictIdx] = it;
+            queried[dictIdx] = true;
+        }
+
+        vals[i] = tsdfToFloat(atVolumeUnit(volUnitsDataCopy, volumeUnits, pt, volumeUnitIdx, it, volumeUnitDegree, volStrides).tsdf);
+    }
+
+#if !USE_INTERPOLATION_IN_GETNORMAL
+    for (int c = 0; c < 3; c++)
+    {
+        normal[c] = vals[c * 2] - vals[c * 2 + 1];
+    }
+#else
+
+    float cxv[8], cyv[8], czv[8];
+
+    // How these numbers were obtained:
+    // 1. Take the basic interpolation sequence:
+    // 000, 001, 010, 011, 100, 101, 110, 111
+    // where each digit corresponds to shift by x, y, z axis respectively.
+    // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis
+    // 3. Search corresponding values in offsets
+    const int idxxp[8] = { 8,  9, 10, 11,  0,  1,  2,  3 };
+    const int idxxn[8] = { 4,  5,  6,  7, 12, 13, 14, 15 };
+    const int idxyp[8] = { 16, 17,  0,  1, 18, 19,  4,  5 };
+    const int idxyn[8] = { 2,  3, 20, 21,  6,  7, 22, 23 };
+    const int idxzp[8] = { 24,  0, 25,  2, 26,  4, 27,  6 };
+    const int idxzn[8] = { 1, 28,  3, 29,  5, 30,  7, 31 };
+
+#if !USE_INTRINSICS
+    for (int i = 0; i < 8; i++)
+    {
+        cxv[i] = vals[idxxn[i]] - vals[idxxp[i]];
+        cyv[i] = vals[idxyn[i]] - vals[idxyp[i]];
+        czv[i] = vals[idxzn[i]] - vals[idxzp[i]];
+    }
+#else
+
+# if CV_SIMD_WIDTH >= 32
+    v_float32x8 cxp = v_lut(vals, idxxp);
+    v_float32x8 cxn = v_lut(vals, idxxn);
+
+    v_float32x8 cyp = v_lut(vals, idxyp);
+    v_float32x8 cyn = v_lut(vals, idxyn);
+
+    v_float32x8 czp = v_lut(vals, idxzp);
+    v_float32x8 czn = v_lut(vals, idxzn);
+
+    v_float32x8 vcxv = cxn - cxp;
+    v_float32x8 vcyv = cyn - cyp;
+    v_float32x8 vczv = czn - czp;
+
+    v_store(cxv, vcxv);
+    v_store(cyv, vcyv);
+    v_store(czv, vczv);
+# else
+    v_float32x4 cxp0 = v_lut(vals, idxxp + 0); v_float32x4 cxp1 = v_lut(vals, idxxp + 4);
+    v_float32x4 cxn0 = v_lut(vals, idxxn + 0); v_float32x4 cxn1 = v_lut(vals, idxxn + 4);
+
+    v_float32x4 cyp0 = v_lut(vals, idxyp + 0); v_float32x4 cyp1 = v_lut(vals, idxyp + 4);
+    v_float32x4 cyn0 = v_lut(vals, idxyn + 0); v_float32x4 cyn1 = v_lut(vals, idxyn + 4);
+
+    v_float32x4 czp0 = v_lut(vals, idxzp + 0); v_float32x4 czp1 = v_lut(vals, idxzp + 4);
+    v_float32x4 czn0 = v_lut(vals, idxzn + 0); v_float32x4 czn1 = v_lut(vals, idxzn + 4);
+
+    v_float32x4 cxv0 = cxn0 - cxp0; v_float32x4 cxv1 = cxn1 - cxp1;
+    v_float32x4 cyv0 = cyn0 - cyp0; v_float32x4 cyv1 = cyn1 - cyp1;
+    v_float32x4 czv0 = czn0 - czp0; v_float32x4 czv1 = czn1 - czp1;
+
+    v_store(cxv + 0, cxv0); v_store(cxv + 4, cxv1);
+    v_store(cyv + 0, cyv0); v_store(cyv + 4, cyv1);
+    v_store(czv + 0, czv0); v_store(czv + 4, czv1);
+#endif
+
+#endif
+
+    float tx = ptVox.x - iptVox[0];
+    float ty = ptVox.y - iptVox[1];
+    float tz = ptVox.z - iptVox[2];
+
+    normal[0] = interpolate(tx, ty, tz, cxv);
+    normal[1] = interpolate(tx, ty, tz, cyv);
+    normal[2] = interpolate(tx, ty, tz, czv);
+#endif
+
+    float nv = sqrt(normal[0] * normal[0] +
+        normal[1] * normal[1] +
+        normal[2] * normal[2]);
+    return nv < 0.0001f ? nan3 : normal / nv;
+}
+
+#ifdef HAVE_OPENCL
+
+Point3f ocl_getNormalVoxel(
+    const Point3f& point, const float voxelSizeInv,
+    const int volumeUnitDegree, const Vec4i volStrides,
+    const Mat& volUnitsData, const CustomHashSet& hashTable)
+{
+    Vec3f normal = Vec3f(0, 0, 0);
+
+    Point3f ptVox = point * voxelSizeInv;
+    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
+
+    // A small hash table to reduce a number of find() calls
+    // -2 and lower means not queried yet
+    // -1 means not found
+    // 0+ means found
+    int iterMap[8];
+    for (int i = 0; i < 8; i++)
+    {
+        iterMap[i] = -2;
+    }
+
+#if !USE_INTERPOLATION_IN_GETNORMAL
+    const Vec3i offsets[] = { { 1,  0,  0}, {-1,  0,  0}, { 0,  1,  0}, // 0-3
+                              { 0, -1,  0}, { 0,  0,  1}, { 0,  0, -1}  // 4-7
+    };
+    const int nVals = 6;
+
+#else
+    const Vec3i offsets[] = { { 0,  0,  0}, { 0,  0,  1}, { 0,  1,  0}, { 0,  1,  1}, //  0-3
+                              { 1,  0,  0}, { 1,  0,  1}, { 1,  1,  0}, { 1,  1,  1}, //  4-7
+                              {-1,  0,  0}, {-1,  0,  1}, {-1,  1,  0}, {-1,  1,  1}, //  8-11
+                              { 2,  0,  0}, { 2,  0,  1}, { 2,  1,  0}, { 2,  1,  1}, // 12-15
+                              { 0, -1,  0}, { 0, -1,  1}, { 1, -1,  0}, { 1, -1,  1}, // 16-19
+                              { 0,  2,  0}, { 0,  2,  1}, { 1,  2,  0}, { 1,  2,  1}, // 20-23
+                              { 0,  0, -1}, { 0,  1, -1}, { 1,  0, -1}, { 1,  1, -1}, // 24-27
+                              { 0,  0,  2}, { 0,  1,  2}, { 1,  0,  2}, { 1,  1,  2}, // 28-31
+    };
+    const int nVals = 32;
+#endif
+
+    float vals[nVals];
+    for (int i = 0; i < nVals; i++)
+    {
+        Vec3i pt = iptVox + offsets[i];
+
+        Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree);
+
+        int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4;
+        auto it = iterMap[dictIdx];
+        if (it < -1)
+        {
+            it = hashTable.find(volumeUnitIdx);
+            iterMap[dictIdx] = it;
+        }
+
+        vals[i] = tsdfToFloat(new_atVolumeUnit(volUnitsData, pt, volumeUnitIdx, it, volumeUnitDegree, volStrides).tsdf);
+    }
+
+#if !USE_INTERPOLATION_IN_GETNORMAL
+    for (int c = 0; c < 3; c++)
+    {
+        normal[c] = vals[c * 2] - vals[c * 2 + 1];
+    }
+#else
+
+    float cxv[8], cyv[8], czv[8];
+
+    // How these numbers were obtained:
+    // 1. Take the basic interpolation sequence:
+    // 000, 001, 010, 011, 100, 101, 110, 111
+    // where each digit corresponds to shift by x, y, z axis respectively.
+    // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis
+    // 3. Search corresponding values in offsets
+    const int idxxp[8] = { 8,  9, 10, 11,  0,  1,  2,  3 };
+    const int idxxn[8] = { 4,  5,  6,  7, 12, 13, 14, 15 };
+    const int idxyp[8] = { 16, 17,  0,  1, 18, 19,  4,  5 };
+    const int idxyn[8] = { 2,  3, 20, 21,  6,  7, 22, 23 };
+    const int idxzp[8] = { 24,  0, 25,  2, 26,  4, 27,  6 };
+    const int idxzn[8] = { 1, 28,  3, 29,  5, 30,  7, 31 };
+
+#if !USE_INTRINSICS
+    for (int i = 0; i < 8; i++)
+    {
+        cxv[i] = vals[idxxn[i]] - vals[idxxp[i]];
+        cyv[i] = vals[idxyn[i]] - vals[idxyp[i]];
+        czv[i] = vals[idxzn[i]] - vals[idxzp[i]];
+    }
+#else
+
+# if CV_SIMD_WIDTH >= 32
+    v_float32x8 cxp = v_lut(vals, idxxp);
+    v_float32x8 cxn = v_lut(vals, idxxn);
+
+    v_float32x8 cyp = v_lut(vals, idxyp);
+    v_float32x8 cyn = v_lut(vals, idxyn);
+
+    v_float32x8 czp = v_lut(vals, idxzp);
+    v_float32x8 czn = v_lut(vals, idxzn);
+
+    v_float32x8 vcxv = cxn - cxp;
+    v_float32x8 vcyv = cyn - cyp;
+    v_float32x8 vczv = czn - czp;
+
+    v_store(cxv, vcxv);
+    v_store(cyv, vcyv);
+    v_store(czv, vczv);
+# else
+    v_float32x4 cxp0 = v_lut(vals, idxxp + 0); v_float32x4 cxp1 = v_lut(vals, idxxp + 4);
+    v_float32x4 cxn0 = v_lut(vals, idxxn + 0); v_float32x4 cxn1 = v_lut(vals, idxxn + 4);
+
+    v_float32x4 cyp0 = v_lut(vals, idxyp + 0); v_float32x4 cyp1 = v_lut(vals, idxyp + 4);
+    v_float32x4 cyn0 = v_lut(vals, idxyn + 0); v_float32x4 cyn1 = v_lut(vals, idxyn + 4);
+
+    v_float32x4 czp0 = v_lut(vals, idxzp + 0); v_float32x4 czp1 = v_lut(vals, idxzp + 4);
+    v_float32x4 czn0 = v_lut(vals, idxzn + 0); v_float32x4 czn1 = v_lut(vals, idxzn + 4);
+
+    v_float32x4 cxv0 = cxn0 - cxp0; v_float32x4 cxv1 = cxn1 - cxp1;
+    v_float32x4 cyv0 = cyn0 - cyp0; v_float32x4 cyv1 = cyn1 - cyp1;
+    v_float32x4 czv0 = czn0 - czp0; v_float32x4 czv1 = czn1 - czp1;
+
+    v_store(cxv + 0, cxv0); v_store(cxv + 4, cxv1);
+    v_store(cyv + 0, cyv0); v_store(cyv + 4, cyv1);
+    v_store(czv + 0, czv0); v_store(czv + 4, czv1);
+#endif
+
+#endif
+
+    float tx = ptVox.x - iptVox[0];
+    float ty = ptVox.y - iptVox[1];
+    float tz = ptVox.z - iptVox[2];
+
+    normal[0] = interpolate(tx, ty, tz, cxv);
+    normal[1] = interpolate(tx, ty, tz, cyv);
+    normal[2] = interpolate(tx, ty, tz, czv);
+#endif
+
+    float nv = sqrt(normal[0] * normal[0] +
+        normal[1] * normal[1] +
+        normal[2] * normal[2]);
+    return nv < 0.0001f ? nan3 : normal / nv;
+}
+
+#endif
+
+void raycastHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width, const int volumeUnitDegree,
+    InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits, OutputArray _points, OutputArray _normals)
+{
+    //std::cout << "raycastHashTsdfVolumeUnit()" << std::endl;
+
+    CV_TRACE_FUNCTION();
+    Size frameSize(width, height);
+    CV_Assert(frameSize.area() > 0);
+
+    Mat volUnitsData = _volUnitsData.getMat();
+
+    _points.create(frameSize, POINT_TYPE);
+    _normals.create(frameSize, POINT_TYPE);
+
+    Points points1 = _points.getMat();
+    Normals normals1 = _normals.getMat();
+
+    Points& points(points1);
+    Normals& normals(normals1);
+
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float raycastStepFactor = settings.getRaycastStepFactor();
+    const float tstep = truncDist * raycastStepFactor;
+    const float maxDepth = settings.getMaxDepth();
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const float volumeUnitSize = voxelSize * resolution[0];
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    Matx33f intr;
+    settings.getCameraRaycastIntrinsics(intr);
+    const Intr intrinsics(intr);
+    const Intr::Reprojector reproj(intrinsics.makeReprojector());
+
+    const int nstripes = -1;
+
+    auto _HashRaycastInvoker = [&](const Range& range)
+    {
+        const Point3f cam2volTrans = cam2vol.translation();
+        const Matx33f cam2volRot = cam2vol.rotation();
+        const Matx33f vol2camRot = vol2cam.rotation();
+
+        const float blockSize = volumeUnitSize;
+
+        for (int y = range.start; y < range.end; y++)
+        {
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+
+            for (int x = 0; x < points.cols; x++)
+            {
+                //! Initialize default value
+                Point3f point = nan3, normal = nan3;
+
+                //! Ray origin and direction in the volume coordinate frame
+                Point3f orig = cam2volTrans;
+                Point3f rayDirV = normalize(Vec3f(cam2volRot * reproj(Point3f(float(x), float(y), 1.f))));
+
+                float tmin = 0;
+                float tmax = maxDepth;
+                float tcurr = tmin;
+
+                cv::Vec3i prevVolumeUnitIdx =
+                    cv::Vec3i(std::numeric_limits<int>::min(), std::numeric_limits<int>::min(),
+                        std::numeric_limits<int>::min());
+
+                float tprev = tcurr;
+                float prevTsdf = truncDist;
+                while (tcurr < tmax)
+                {
+
+                    Point3f currRayPos = orig + tcurr * rayDirV;
+                    cv::Vec3i currVolumeUnitIdx = volumeToVolumeUnitIdx(currRayPos, volumeUnitSize);
+
+                    VolumeUnitIndexes::const_iterator it = volumeUnits.find(currVolumeUnitIdx);
+
+                    float currTsdf = prevTsdf;
+                    int currWeight = 0;
+                    float stepSize = 0.5f * blockSize;
+                    cv::Vec3i volUnitLocalIdx;
+
+
+                    //! The subvolume exists in hashtable
+                    if (it != volumeUnits.end())
+                    {
+                        cv::Point3f currVolUnitPos = volumeUnitIdxToVolume(currVolumeUnitIdx, volumeUnitSize);
+                        volUnitLocalIdx = volumeToVoxelCoord(currRayPos - currVolUnitPos, voxelSizeInv);
+
+                        //! TODO: Figure out voxel interpolation
+                        TsdfVoxel currVoxel = _at(volUnitsData, volUnitLocalIdx, it->second.index, volResolution.x, volDims);
+                        currTsdf = tsdfToFloat(currVoxel.tsdf);
+                        currWeight = currVoxel.weight;
+                        stepSize = tstep;
+                    }
+
+                    //std::cout << prevTsdf << " " << currTsdf << " " << currWeight << std::endl;
+                    //! Surface crossing
+                    if (prevTsdf > 0.f && currTsdf <= 0.f && currWeight > 0)
+                    {
+                        float tInterp = (tcurr * prevTsdf - tprev * currTsdf) / (prevTsdf - currTsdf);
+                        if (!cvIsNaN(tInterp) && !cvIsInf(tInterp))
+                        {
+                            Point3f pv = orig + tInterp * rayDirV;
+                            Point3f nv = getNormalVoxel(pv, voxelSizeInv, volumeUnitDegree, volDims, volUnitsData, volumeUnits);
+
+                            if (!isNaN(nv))
+                            {
+                                normal = vol2camRot * nv;
+                                point = vol2cam * pv;
+                            }
+                        }
+                        break;
+                    }
+                    prevVolumeUnitIdx = currVolumeUnitIdx;
+                    prevTsdf = currTsdf;
+                    tprev = tcurr;
+                    tcurr += stepSize;
+                }
+                ptsRow[x] = toPtype(point);
+                nrmRow[x] = toPtype(normal);
+            }
+        }
+    };
+
+    parallel_for_(Range(0, points.rows), _HashRaycastInvoker, nstripes);
+
+    //std::cout << "raycastHashTsdfVolumeUnit() end" << std::endl;
+}
+
+#ifdef HAVE_OPENCL
+
+void ocl_raycastHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width, const int volumeUnitDegree,
+    const CustomHashSet& hashTable, InputArray _volUnitsData, OutputArray _points, OutputArray _normals)
+{
+    CV_TRACE_FUNCTION();
+    Size frameSize(width, height);
+    CV_Assert(frameSize.area() > 0);
+
+    UMat volUnitsData = _volUnitsData.getUMat();
+
+    String errorStr;
+    String name = "raycast";
+    ocl::ProgramSource source = ocl::_3d::hash_tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    _points.create(frameSize, CV_32FC4);
+    _normals.create(frameSize, CV_32FC4);
+
+    UMat points = _points.getUMat();
+    UMat normals = _normals.getUMat();
+
+    Matx33f intr;
+    settings.getCameraRaycastIntrinsics(intr);
+    Intr intrinsics(intr);
+    Intr::Reprojector r = intrinsics.makeReprojector();
+    Vec2f finv(r.fxinv, r.fyinv), cxy(r.cx, r.cy);
+
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float raycastStepFactor = settings.getRaycastStepFactor();
+    const float tstep = truncDist * raycastStepFactor;
+    const float maxDepth = settings.getMaxDepth();
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    const Vec4i volStrides;
+    settings.getVolumeDimensions(volStrides);
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const float volumeUnitSize = voxelSize * volResolution.x;
+
+    Vec4f boxMin, boxMax(volumeUnitSize - voxelSize,
+        volumeUnitSize - voxelSize,
+        volumeUnitSize - voxelSize);
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    Matx44f cam2volRotGPU = cam2vol.matrix;
+    Matx44f vol2camRotGPU = vol2cam.matrix;
+
+    UMat volPoseGpu = Mat(pose.matrix).getUMat(ACCESS_READ);
+    UMat invPoseGpu = Mat(pose.inv().matrix).getUMat(ACCESS_READ);
+
+    UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ);
+    UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ);
+
+    k.args(
+        ocl::KernelArg::PtrReadOnly(hashesGpu),
+        ocl::KernelArg::PtrReadOnly(hashDataGpu),
+        ocl::KernelArg::WriteOnlyNoSize(points),
+        ocl::KernelArg::WriteOnlyNoSize(normals),
+        frameSize,
+        ocl::KernelArg::ReadOnly(volUnitsData),
+        cam2volRotGPU,
+        vol2camRotGPU,
+        float(maxDepth),
+        finv.val, cxy.val,
+        boxMin.val, boxMax.val,
+        tstep,
+        voxelSize,
+        voxelSizeInv,
+        volumeUnitSize,
+        truncDist,
+        volumeUnitDegree,
+        volStrides
+    );
+
+    size_t globalSize[2];
+    globalSize[0] = (size_t)frameSize.width;
+    globalSize[1] = (size_t)frameSize.height;
+
+    if (!k.run(2, globalSize, NULL, true))
+        throw std::runtime_error("Failed to run kernel");
+
+}
+#endif
+
+void fetchNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const int volumeUnitDegree, InputArray _points, OutputArray _normals)
+{
+    CV_TRACE_FUNCTION();
+
+    if (!_normals.needed())
+        return;
+
+    Points points = _points.getMat();
+    CV_Assert(points.type() == POINT_TYPE);
+
+    _normals.createSameSize(_points, _points.type());
+    Normals normals = _normals.getMat();
+    Mat volUnitsData = _volUnitsData.getMat();
+
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    auto HashPushNormals = [&](const ptype& point, const int* position) {
+        //Affine3f invPose(pose.inv());
+        Point3f p = fromPtype(point);
+        Point3f n = nan3;
+        if (!isNaN(p))
+        {
+            //Point3f voxelPoint = invPose * p;
+            Point3f voxelPoint = p;
+            n = pose.rotation() * getNormalVoxel(voxelPoint, voxelSizeInv, volumeUnitDegree, volDims, volUnitsData, volumeUnits);
+        }
+        normals(position[0], position[1]) = toPtype(n);
+    };
+    points.forEach(HashPushNormals);
+
+}
+
+#ifdef HAVE_OPENCL
+void olc_fetchNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const int volumeUnitDegree, InputArray _volUnitsData, InputArray _volUnitsDataCopy,
+    const CustomHashSet& hashTable, InputArray _points, OutputArray _normals)
+{
+    CV_TRACE_FUNCTION();
+    UMat volUnitsData = _volUnitsData.getUMat();
+    Mat volUnitsDataCopy = _volUnitsDataCopy.getMat();
+    if (!_normals.needed())
+        return;
+
+    //TODO: remove it when it works w/o CPU code
+    volUnitsData.copyTo(volUnitsDataCopy);
+
+    Points points = _points.getMat();
+    CV_Assert(points.type() == POINT_TYPE);
+    _normals.createSameSize(_points, _points.type());
+    Normals normals = _normals.getMat();
+
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    auto HashPushNormals = [&](const ptype& point, const int* position) {
+        Affine3f invPose(pose.inv());
+        Point3f p = fromPtype(point);
+        Point3f n = nan3;
+        if (!isNaN(p))
+        {
+            Point3f voxelPoint = invPose * p;
+            n = pose.rotation() * ocl_getNormalVoxel(voxelPoint, voxelSizeInv, volumeUnitDegree, volDims, volUnitsDataCopy, hashTable);
+        }
+        normals(position[0], position[1]) = toPtype(n);
+    };
+    points.forEach(HashPushNormals);
+
+}
+#endif
+
+
+void fetchPointsNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const int volumeUnitDegree, OutputArray _points, OutputArray _normals)
+{
+    //std::cout << "fetchNormalsFromHashTsdfVolumeUnit()" << std::endl;
+    CV_TRACE_FUNCTION();
+
+    if (!_points.needed())
+        return;
+
+    std::vector<std::vector<ptype>> pVecs, nVecs;
+    Mat volUnitsData = _volUnitsData.getMat();
+
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const int volumeUnitResolution = volResolution.x;
+    const float volumeUnitSize = voxelSize * resolution[0];
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+
+    std::vector<Vec3i> totalVolUnits;
+    for (const auto& keyvalue : volumeUnits)
+    {
+        totalVolUnits.push_back(keyvalue.first);
+    }
+    Range fetchRange(0, (int)totalVolUnits.size());
+    const int nstripes = -1;
+
+    bool needNormals(_normals.needed());
+    Mutex mutex;
+
+    auto HashFetchPointsNormalsInvoker = [&](const Range& range)
+    {
+        std::vector<ptype> points, normals;
+        for (int i = range.start; i < range.end; i++)
+        {
+            cv::Vec3i tsdf_idx = totalVolUnits[i];
+
+            VolumeUnitIndexes::const_iterator it = volumeUnits.find(tsdf_idx);
+            Point3f base_point = volumeUnitIdxToVolume(tsdf_idx, volumeUnitSize);
+            if (it != volumeUnits.end())
+            {
+                std::vector<ptype> localPoints;
+                std::vector<ptype> localNormals;
+                for (int x = 0; x < volumeUnitResolution; x++)
+                    for (int y = 0; y < volumeUnitResolution; y++)
+                        for (int z = 0; z < volumeUnitResolution; z++)
+                        {
+                            cv::Vec3i voxelIdx(x, y, z);
+                            TsdfVoxel voxel = _at(volUnitsData, voxelIdx, it->second.index, volResolution.x, volDims);
+
+                            if (voxel.tsdf != -128 && voxel.weight != 0)
+                            {
+                                Point3f point = base_point + voxelCoordToVolume(voxelIdx, voxelSize);
+                                localPoints.push_back(toPtype(point));
+                                if (needNormals)
+                                {
+                                    Point3f normal = getNormalVoxel(point, voxelSizeInv, volumeUnitDegree, volDims, volUnitsData, volumeUnits);
+                                    localNormals.push_back(toPtype(normal));
+                                }
+                            }
+                        }
+
+                AutoLock al(mutex);
+                pVecs.push_back(localPoints);
+                nVecs.push_back(localNormals);
+            }
+        }
+    };
+
+    parallel_for_(fetchRange, HashFetchPointsNormalsInvoker, nstripes);
+
+    std::vector<ptype> points, normals;
+    for (size_t i = 0; i < pVecs.size(); i++)
+    {
+        points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
+        normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
+    }
+
+    _points.create((int)points.size(), 1, POINT_TYPE);
+    if (!points.empty())
+        Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
+
+    if (_normals.needed())
+    {
+        _normals.create((int)normals.size(), 1, POINT_TYPE);
+        if (!normals.empty())
+            Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
+    }
+}
+
+#ifdef HAVE_OPENCL
+
+inline TsdfVoxel new_at(
+    Mat& volUnitsDataCopy, const cv::Vec3i& volumeIdx, int indx,
+    const int volumeUnitResolution, const Vec4i volStrides)
+{
+    //! Out of bounds
+    if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) ||
+        (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) ||
+        (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0))
+    {
+        return TsdfVoxel(floatToTsdf(1.0f), 0);
+    }
+
+    const TsdfVoxel* volData = volUnitsDataCopy.ptr<TsdfVoxel>(indx);
+    int coordBase =
+        volumeIdx[0] * volStrides[0] +
+        volumeIdx[1] * volStrides[1] +
+        volumeIdx[2] * volStrides[2];
+    return volData[coordBase];
+}
+
+
+void ocl_fetchPointsNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const int volumeUnitDegree, InputArray _volUnitsData, InputArray _volUnitsDataCopy,
+    const CustomHashSet& hashTable, OutputArray _points, OutputArray _normals)
+{
+
+    CV_TRACE_FUNCTION();
+
+    if (!_points.needed())
+        return;
+
+    UMat volUnitsData = _volUnitsData.getUMat();
+    Mat volUnitsDataCopy = _volUnitsDataCopy.getMat();
+    //TODO: remove it when it works w/o CPU code
+    volUnitsData.copyTo(volUnitsDataCopy);
+    //TODO: remove it when it works w/o CPU code
+    //TODO: enable it when it's on GPU
+    //UMat hashDataGpu(hashMap.capacity, 1, CV_32SC4);
+    //Mat(hashMap.data, false).copyTo(hashDataGpu);
+
+    std::vector<std::vector<ptype>> pVecs, nVecs;
+
+    const float voxelSize = settings.getVoxelSize();
+    const float voxelSizeInv = 1.f / voxelSize;
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const int volumeUnitResolution = volResolution.x;
+    const float volumeUnitSize = voxelSize * resolution[0];
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+
+    Range _fetchRange(0, hashTable.last);
+
+    const int nstripes = -1;
+
+    bool needNormals(_normals.needed());
+    Mutex mutex;
+
+    auto _HashFetchPointsNormalsInvoker = [&](const Range& range)
+    {
+        std::vector<ptype> points, normals;
+        for (int row = range.start; row < range.end; row++)
+        {
+            cv::Vec4i idx4 = hashTable.data[row];
+            cv::Vec3i idx(idx4[0], idx4[1], idx4[2]);
+
+            Point3f base_point = volumeUnitIdxToVolume(idx, volumeUnitSize);
+
+            std::vector<ptype> localPoints;
+            std::vector<ptype> localNormals;
+            for (int x = 0; x < volumeUnitResolution; x++)
+                for (int y = 0; y < volumeUnitResolution; y++)
+                    for (int z = 0; z < volumeUnitResolution; z++)
+                    {
+                        cv::Vec3i voxelIdx(x, y, z);
+                        TsdfVoxel voxel = new_at(volUnitsDataCopy, voxelIdx, row, volumeUnitResolution, volDims);
+
+                        if (voxel.tsdf != -128 && voxel.weight != 0)
+                        {
+                            Point3f point = base_point + voxelCoordToVolume(voxelIdx, voxelSize);
+
+                            localPoints.push_back(toPtype(point));
+                            if (needNormals)
+                            {
+                                Point3f normal = ocl_getNormalVoxel(point, voxelSizeInv, volumeUnitDegree, volDims, volUnitsDataCopy, hashTable);
+                                localNormals.push_back(toPtype(normal));
+                            }
+                        }
+                    }
+
+            AutoLock al(mutex);
+            pVecs.push_back(localPoints);
+            nVecs.push_back(localNormals);
+        }
+    };
+
+    parallel_for_(_fetchRange, _HashFetchPointsNormalsInvoker, nstripes);
+
+    std::vector<ptype> points, normals;
+    for (size_t i = 0; i < pVecs.size(); i++)
+    {
+        points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
+        normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
+    }
+
+    _points.create((int)points.size(), 1, POINT_TYPE);
+    if (!points.empty())
+        Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
+
+    if (_normals.needed())
+    {
+        _normals.create((int)normals.size(), 1, POINT_TYPE);
+        if (!normals.empty())
+            Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
+    }
+
+}
+#endif
+
+} // namespace cv
diff --git a/modules/3d/src/rgbd/hash_tsdf_functions.hpp b/modules/3d/src/rgbd/hash_tsdf_functions.hpp
new file mode 100644
index 0000000000..de83574fe7
--- /dev/null
+++ b/modules/3d/src/rgbd/hash_tsdf_functions.hpp
@@ -0,0 +1,325 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// Partially rewritten from https://github.com/Nerei/kinfu_remake
+// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
+
+#ifndef OPENCV_3D_HASH_TSDF_FUNCTIONS_HPP
+#define OPENCV_3D_HASH_TSDF_FUNCTIONS_HPP
+
+#include <unordered_set>
+
+#include "utils.hpp"
+#include "tsdf_functions.hpp"
+
+#define USE_INTERPOLATION_IN_GETNORMAL 1
+#define VOLUMES_SIZE 8192
+
+namespace cv
+{
+
+//! Spatial hashing
+struct tsdf_hash
+{
+    size_t operator()(const Vec3i& x) const noexcept
+    {
+        size_t seed = 0;
+        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
+        for (uint16_t i = 0; i < 3; i++)
+        {
+            seed ^= std::hash<int>()(x[i]) + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
+        }
+        return seed;
+    }
+};
+
+struct VolumeUnit
+{
+    cv::Vec3i coord;
+    int index;
+    cv::Matx44f pose;
+    int lastVisibleIndex = 0;
+    bool isActive;
+};
+
+class CustomHashSet
+{
+public:
+    static const int hashDivisor = 32768;
+    static const int startCapacity = 2048;
+
+    std::vector<int> hashes;
+    // 0-3 for key, 4th for internal use
+    // don't keep keep value
+    std::vector<Vec4i> data;
+    int capacity;
+    int last;
+
+    CustomHashSet()
+    {
+        hashes.resize(hashDivisor);
+        for (int i = 0; i < hashDivisor; i++)
+            hashes[i] = -1;
+        capacity = startCapacity;
+
+        data.resize(capacity);
+        for (int i = 0; i < capacity; i++)
+            data[i] = { 0, 0, 0, -1 };
+
+        last = 0;
+    }
+
+    ~CustomHashSet() { }
+
+    inline size_t calc_hash(Vec3i x) const
+    {
+        uint32_t seed = 0;
+        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
+        for (int i = 0; i < 3; i++)
+        {
+            seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
+        }
+        return seed;
+    }
+
+    // should work on existing elements too
+    // 0 - need resize
+    // 1 - idx is inserted
+    // 2 - idx already exists
+    int insert(Vec3i idx)
+    {
+        if (last < capacity)
+        {
+            int hash = int(calc_hash(idx) % hashDivisor);
+            int place = hashes[hash];
+            if (place >= 0)
+            {
+                int oldPlace = place;
+                while (place >= 0)
+                {
+                    if (data[place][0] == idx[0] &&
+                        data[place][1] == idx[1] &&
+                        data[place][2] == idx[2])
+                        return 2;
+                    else
+                    {
+                        oldPlace = place;
+                        place = data[place][3];
+                        //std::cout << "place=" << place << std::endl;
+                    }
+                }
+
+                // found, create here
+                data[oldPlace][3] = last;
+            }
+            else
+            {
+                // insert at last
+                hashes[hash] = last;
+            }
+
+            data[last][0] = idx[0];
+            data[last][1] = idx[1];
+            data[last][2] = idx[2];
+            data[last][3] = -1;
+            last++;
+
+            return 1;
+        }
+        else
+            return 0;
+    }
+
+    int find(Vec3i idx) const
+    {
+        int hash = int(calc_hash(idx) % hashDivisor);
+        int place = hashes[hash];
+        // search a place
+        while (place >= 0)
+        {
+            if (data[place][0] == idx[0] &&
+                data[place][1] == idx[1] &&
+                data[place][2] == idx[2])
+                break;
+            else
+            {
+                place = data[place][3];
+            }
+        }
+
+        return place;
+    }
+};
+
+// TODO: remove this structure as soon as HashTSDFGPU data is completely on GPU;
+// until then CustomHashTable can be replaced by this one if needed
+
+const int NAN_ELEMENT = -2147483647;
+
+struct Volume_NODE
+{
+    Vec4i idx = Vec4i(NAN_ELEMENT);
+    int32_t row = -1;
+    int32_t nextVolumeRow = -1;
+    int32_t dummy = 0;
+    int32_t dummy2 = 0;
+};
+
+const int _hash_divisor = 32768;
+const int _list_size = 4;
+
+class VolumesTable
+{
+public:
+    const int hash_divisor = _hash_divisor;
+    const int list_size = _list_size;
+    const int32_t free_row = -1;
+    const int32_t free_isActive = 0;
+
+    const cv::Vec4i nan4 = cv::Vec4i(NAN_ELEMENT);
+
+    int bufferNums;
+    cv::Mat volumes;
+
+    VolumesTable() : bufferNums(1)
+    {
+        this->volumes = cv::Mat(hash_divisor * list_size, 1, rawType<Volume_NODE>());
+        for (int i = 0; i < volumes.size().height; i++)
+        {
+            Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
+            v->idx = nan4;
+            v->row = -1;
+            v->nextVolumeRow = -1;
+        }
+    }
+    const VolumesTable& operator=(const VolumesTable& vt)
+    {
+        this->volumes = vt.volumes;
+        this->bufferNums = vt.bufferNums;
+        return *this;
+    }
+    ~VolumesTable() {};
+
+    bool insert(Vec3i idx, int row)
+    {
+        CV_Assert(row >= 0);
+
+        int bufferNum = 0;
+        int hash = int(calc_hash(idx) % hash_divisor);
+        int start = getPos(idx, bufferNum);
+        int i = start;
+
+        while (i >= 0)
+        {
+            Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
+
+            if (v->idx[0] == NAN_ELEMENT)
+            {
+                Vec4i idx4(idx[0], idx[1], idx[2], 0);
+
+                bool extend = false;
+                if (i != start && i % list_size == 0)
+                {
+                    if (bufferNum >= bufferNums - 1)
+                    {
+                        extend = true;
+                        volumes.resize(hash_divisor * bufferNums);
+                        bufferNums++;
+                    }
+                    bufferNum++;
+                    v->nextVolumeRow = (bufferNum * hash_divisor + hash) * list_size;
+                }
+                else
+                {
+                    v->nextVolumeRow = i + 1;
+                }
+
+                v->idx = idx4;
+                v->row = row;
+
+                return extend;
+            }
+
+            i = v->nextVolumeRow;
+        }
+        return false;
+    }
+    int findRow(Vec3i idx) const
+    {
+        int bufferNum = 0;
+        int i = getPos(idx, bufferNum);
+
+        while (i >= 0)
+        {
+            const Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
+
+            if (v->idx == Vec4i(idx[0], idx[1], idx[2], 0))
+                return v->row;
+            else
+                i = v->nextVolumeRow;
+        }
+
+        return -1;
+    }
+
+    inline int getPos(Vec3i idx, int bufferNum) const
+    {
+        int hash = int(calc_hash(idx) % hash_divisor);
+        return (bufferNum * hash_divisor + hash) * list_size;
+    }
+
+    inline size_t calc_hash(Vec3i x) const
+    {
+        uint32_t seed = 0;
+        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
+        for (int i = 0; i < 3; i++)
+        {
+            seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
+        }
+        return seed;
+    }
+};
+
+int calcVolumeUnitDegree(Point3i volumeResolution);
+
+typedef std::unordered_set<cv::Vec3i, tsdf_hash> VolumeUnitIndexSet;
+typedef std::unordered_map<cv::Vec3i, VolumeUnit, tsdf_hash> VolumeUnitIndexes;
+
+void integrateHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int& lastVolIndex, const int frameId, const int volumeUnitDegree,
+    InputArray _depth, InputArray _pixNorms, InputArray _volUnitsData, VolumeUnitIndexes& volumeUnits);
+
+void raycastHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width, const int volumeUnitDegree,
+    InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits, OutputArray _points, OutputArray _normals);
+
+void fetchNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const int volumeUnitDegree, InputArray _points, OutputArray _normals);
+
+void fetchPointsNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volUnitsData, const VolumeUnitIndexes& volumeUnits,
+    const int volumeUnitDegree, OutputArray _points, OutputArray _normals);
+
+#ifdef HAVE_OPENCL
+void ocl_integrateHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int& lastVolIndex, const int frameId, int& bufferSizeDegree, const int volumeUnitDegree,
+    InputArray _depth, InputArray _pixNorms, InputArray _lastVisibleIndices, InputArray _volUnitsDataCopy, InputArray _volUnitsData, CustomHashSet& hashTable, InputArray _isActiveFlags);
+
+void ocl_raycastHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width, const int volumeUnitDegree,
+    const CustomHashSet& hashTable, InputArray _volUnitsData, OutputArray _points, OutputArray _normals);
+
+void olc_fetchNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const int volumeUnitDegree, InputArray _volUnitsData, InputArray _volUnitsDataCopy,
+    const CustomHashSet& hashTable, InputArray _points, OutputArray _normals);
+
+void ocl_fetchPointsNormalsFromHashTsdfVolumeUnit(
+    const VolumeSettings& settings, const int volumeUnitDegree, InputArray _volUnitsData, InputArray _volUnitsDataCopy,
+    const CustomHashSet& hashTable, OutputArray _points, OutputArray _normals);
+#endif
+
+} // namespace cv
+
+#endif
diff --git a/modules/3d/src/rgbd/odometry_frame_impl.cpp b/modules/3d/src/rgbd/odometry_frame_impl.cpp
index 6088c64b91..cf3963c845 100644
--- a/modules/3d/src/rgbd/odometry_frame_impl.cpp
+++ b/modules/3d/src/rgbd/odometry_frame_impl.cpp
@@ -21,6 +21,7 @@ public:
     virtual void getGrayImage(OutputArray image) const = 0;
     virtual void setDepth(InputArray  depth) = 0;
     virtual void getDepth(OutputArray depth) const = 0;
+    virtual void getScaledDepth(OutputArray depth) const = 0;
     virtual void setMask(InputArray  mask) = 0;
     virtual void getMask(OutputArray mask) const = 0;
     virtual void setNormals(InputArray  normals) = 0;
@@ -46,6 +47,7 @@ public:
     virtual void getGrayImage(OutputArray image) const override;
     virtual void setDepth(InputArray  depth) override;
     virtual void getDepth(OutputArray depth) const override;
+    virtual void getScaledDepth(OutputArray depth) const override;
     virtual void setMask(InputArray  mask) override;
     virtual void getMask(OutputArray mask) const override;
     virtual void setNormals(InputArray  normals) override;
@@ -64,6 +66,7 @@ private:
     TMat image;
     TMat imageGray;
     TMat depth;
+    TMat scaledDepth;
     TMat mask;
     TMat normals;
     std::vector< std::vector<TMat> > pyramids;
@@ -87,6 +90,7 @@ void OdometryFrame::getImage(OutputArray image) const { this->impl->getImage(ima
 void OdometryFrame::getGrayImage(OutputArray image) const { this->impl->getGrayImage(image); }
 void OdometryFrame::setDepth(InputArray  depth) { this->impl->setDepth(depth); }
 void OdometryFrame::getDepth(OutputArray depth) const { this->impl->getDepth(depth); }
+void OdometryFrame::getScaledDepth(OutputArray depth) const { this->impl->getScaledDepth(depth); }
 void OdometryFrame::setMask(InputArray  mask) { this->impl->setMask(mask); }
 void OdometryFrame::getMask(OutputArray mask) const { this->impl->getMask(mask); }
 void OdometryFrame::setNormals(InputArray  normals) { this->impl->setNormals(normals); }
@@ -140,7 +144,9 @@ void OdometryFrameImplTMat<TMat>::getGrayImage(OutputArray _image) const
 template<typename TMat>
 void OdometryFrameImplTMat<TMat>::setDepth(InputArray _depth)
 {
-    TMat depth_tmp, depth_flt;
+    TMat depth_tmp;
+    Mat depth_flt;
+
     depth_tmp = getTMat<TMat>(_depth);
     // Odometry works well with depth values in range [0, 10)
     // If it's bigger, let's scale it down by 5000, a typical depth factor
@@ -149,12 +155,14 @@ void OdometryFrameImplTMat<TMat>::setDepth(InputArray _depth)
     if (max > 10)
     {
         depth_tmp.convertTo(depth_flt, CV_32FC1, 1.f / 5000.f);
-        TMat depthMask;
-        cv::compare(depth_flt, Scalar(FLT_EPSILON), depthMask, cv::CMP_LT);
-        depth_flt.setTo(std::numeric_limits<float>::quiet_NaN(), depthMask);
-        depth_tmp = depth_flt;
+        // getTMat<Mat>(depth_flt) < FLT_EPSILON dont work with UMat
+        // depth_flt.setTo(std::numeric_limits<float>::quiet_NaN(), getTMat<Mat>(depth_flt) < FLT_EPSILON);
+        depth_flt.setTo(std::numeric_limits<float>::quiet_NaN(), depth_flt < FLT_EPSILON);
+        depth_tmp = getTMat<TMat>(depth_flt);
+
     }
-    this->depth = depth_tmp;
+    this->depth = getTMat<TMat>(_depth);
+    this->scaledDepth = depth_tmp;
     this->findMask(_depth);
 }
 
@@ -164,6 +172,12 @@ void OdometryFrameImplTMat<TMat>::getDepth(OutputArray _depth) const
     _depth.assign(this->depth);
 }
 
+template<typename TMat>
+void OdometryFrameImplTMat<TMat>::getScaledDepth(OutputArray _depth) const
+{
+    _depth.assign(this->scaledDepth);
+}
+
 template<typename TMat>
 void OdometryFrameImplTMat<TMat>::setMask(InputArray _mask)
 {
diff --git a/modules/3d/src/rgbd/odometry_functions.cpp b/modules/3d/src/rgbd/odometry_functions.cpp
index c376728c0c..4a75aae78a 100644
--- a/modules/3d/src/rgbd/odometry_functions.cpp
+++ b/modules/3d/src/rgbd/odometry_functions.cpp
@@ -71,6 +71,7 @@ void prepareRGBFrameBase(OdometryFrame& frame, OdometrySettings settings, bool u
     checkImage(image);
 
     TMat depth;
+
     if (useDepth)
     {
         frame.getDepth(depth);
@@ -177,7 +178,7 @@ void prepareICPFrameBase(OdometryFrame& frame, OdometrySettings settings)
     typedef Mat TMat;
 
     TMat depth;
-    frame.getDepth(depth);
+    frame.getScaledDepth(depth);
     if (depth.empty())
     {
         if (frame.getPyramidLevels(OdometryFramePyramidType::PYR_DEPTH) > 0)
@@ -256,7 +257,7 @@ void prepareICPFrameDst(OdometryFrame& frame, OdometrySettings settings)
     settings.getCameraMatrix(cameraMatrix);
 
     TMat depth, mask, normals;
-    frame.getDepth(depth);
+    frame.getScaledDepth(depth);
     frame.getMask(mask);
     frame.getNormals(normals);
 
@@ -886,9 +887,9 @@ void computeCorresps(const Matx33f& _K, const Matx33f& _K_inv, const Mat& Rt,
         for (int u1 = 0; u1 < depth1.cols; u1++)
         {
             float d1 = depth1_row[u1];
-            if (mask1_row[u1])
+            if (mask1_row[u1] && !cvIsNaN(d1))
             {
-                CV_DbgAssert(!cvIsNaN(d1));
+                //CV_DbgAssert(!cvIsNaN(d1));
                 float transformed_d1 = static_cast<float>(d1 * (KRK_inv6_u1[u1] + KRK_inv7_v1_plus_KRK_inv8[v1]) +
                     Kt_ptr[2]);
 
diff --git a/modules/3d/src/rgbd/tsdf.cpp b/modules/3d/src/rgbd/tsdf.cpp
deleted file mode 100644
index 7047e2670e..0000000000
--- a/modules/3d/src/rgbd/tsdf.cpp
+++ /dev/null
@@ -1,1228 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-// Partially rewritten from https://github.com/Nerei/kinfu_remake
-// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
-
-#include "../precomp.hpp"
-#include "tsdf.hpp"
-#include "opencl_kernels_3d.hpp"
-
-namespace cv {
-
-class TSDFVolumeCPU : public TSDFVolume
-{
-   public:
-    // dimension in voxels, size in meters
-    TSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                  int _maxWeight, Vec3i _resolution, bool zFirstMemOrder = true);
-
-    virtual void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
-                           const Matx33f& intrinsics, const int frameId = 0) override;
-    virtual void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize,
-                         OutputArray points, OutputArray normals) const override;
-    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const Matx33f&, const Matx33f&, const int) override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-    virtual void raycast(const Matx44f&, const Matx33f&, const Size&, OutputArray, OutputArray, OutputArray) const override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-
-    virtual void fetchNormals(InputArray points, OutputArray _normals) const override;
-    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
-
-    virtual void reset() override;
-    virtual TsdfVoxel at(const Vec3i& volumeIdx) const;
-
-    float interpolateVoxel(const cv::Point3f& p) const;
-    Point3f getNormalVoxel(const cv::Point3f& p) const;
-
-#if USE_INTRINSICS
-    float interpolateVoxel(const v_float32x4& p) const;
-    v_float32x4 getNormalVoxel(const v_float32x4& p) const;
-#endif
-    Vec4i volStrides;
-    Vec6f frameParams;
-    Mat pixNorms;
-    // See zFirstMemOrder arg of parent class constructor
-    // for the array layout info
-    // Consist of Voxel elements
-    Mat volume;
-};
-
-
-TSDFVolume::TSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                       int _maxWeight, Point3i _resolution, bool zFirstMemOrder)
-    : Volume(_voxelSize, _pose, _raycastStepFactor),
-      volResolution(_resolution),
-      maxWeight( WeightType(_maxWeight) )
-{
-    CV_Assert(_maxWeight < 255);
-    // Unlike original code, this should work with any volume size
-    // Not only when (x,y,z % 32) == 0
-    volSize   = Point3f(volResolution) * voxelSize;
-    truncDist = std::max(_truncDist, 2.1f * voxelSize);
-
-    // (xRes*yRes*zRes) array
-    // Depending on zFirstMemOrder arg:
-    // &elem(x, y, z) = data + x*zRes*yRes + y*zRes + z;
-    // &elem(x, y, z) = data + x + y*xRes + z*xRes*yRes;
-    int xdim, ydim, zdim;
-    if(zFirstMemOrder)
-    {
-        xdim = volResolution.z * volResolution.y;
-        ydim = volResolution.z;
-        zdim = 1;
-    }
-    else
-    {
-        xdim = 1;
-        ydim = volResolution.x;
-        zdim = volResolution.x * volResolution.y;
-    }
-
-    volDims = Vec4i(xdim, ydim, zdim);
-    neighbourCoords = Vec8i(
-        volDims.dot(Vec4i(0, 0, 0)),
-        volDims.dot(Vec4i(0, 0, 1)),
-        volDims.dot(Vec4i(0, 1, 0)),
-        volDims.dot(Vec4i(0, 1, 1)),
-        volDims.dot(Vec4i(1, 0, 0)),
-        volDims.dot(Vec4i(1, 0, 1)),
-        volDims.dot(Vec4i(1, 1, 0)),
-        volDims.dot(Vec4i(1, 1, 1))
-    );
-}
-
-// dimension in voxels, size in meters
-TSDFVolumeCPU::TSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor,
-                             float _truncDist, int _maxWeight, Vec3i _resolution,
-                             bool zFirstMemOrder)
-    : TSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution,
-                 zFirstMemOrder)
-{
-    int xdim, ydim, zdim;
-    if (zFirstMemOrder)
-    {
-        xdim = volResolution.z * volResolution.y;
-        ydim = volResolution.z;
-        zdim = 1;
-    }
-    else
-    {
-        xdim = 1;
-        ydim = volResolution.x;
-        zdim = volResolution.x * volResolution.y;
-    }
-    volStrides = Vec4i(xdim, ydim, zdim);
-
-    volume = Mat(1, volResolution.x * volResolution.y * volResolution.z, rawType<TsdfVoxel>());
-
-    reset();
-}
-
-// zero volume, leave rest params the same
-void TSDFVolumeCPU::reset()
-{
-    CV_TRACE_FUNCTION();
-
-    volume.forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
-    {
-        TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
-        v.tsdf = floatToTsdf(0.0f); v.weight = 0;
-    });
-}
-
-TsdfVoxel TSDFVolumeCPU::at(const Vec3i& volumeIdx) const
-{
-    //! Out of bounds
-    if ((volumeIdx[0] >= volResolution.x || volumeIdx[0] < 0) ||
-        (volumeIdx[1] >= volResolution.y || volumeIdx[1] < 0) ||
-        (volumeIdx[2] >= volResolution.z || volumeIdx[2] < 0))
-    {
-        return TsdfVoxel(floatToTsdf(1.f), 0);
-    }
-
-    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
-    int coordBase =
-        volumeIdx[0] * volDims[0] + volumeIdx[1] * volDims[1] + volumeIdx[2] * volDims[2];
-    return volData[coordBase];
-}
-
-// use depth instead of distance (optimization)
-void TSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
-                              const Matx33f& _intrinsics, const int frameId)
-{
-    CV_TRACE_FUNCTION();
-    CV_UNUSED(frameId);
-    CV_Assert(_depth.type() == DEPTH_TYPE);
-    CV_Assert(!_depth.empty());
-    Depth depth = _depth.getMat();
-
-    Intr intrinsics(_intrinsics);
-    Vec6f newParams((float)depth.rows, (float)depth.cols,
-                    intrinsics.fx, intrinsics.fy,
-                    intrinsics.cx, intrinsics.cy);
-    if (!(frameParams == newParams))
-    {
-        frameParams = newParams;
-        pixNorms = preCalculationPixNorm(depth.size(), intrinsics);
-    }
-
-    integrateVolumeUnit(truncDist, voxelSize, maxWeight, (this->pose).matrix, volResolution, volStrides, depth,
-        depthFactor, cameraPose, intrinsics, pixNorms, volume);
-}
-
-#if USE_INTRINSICS
-// all coordinate checks should be done in inclosing cycle
-inline float TSDFVolumeCPU::interpolateVoxel(const Point3f& _p) const
-{
-    v_float32x4 p(_p.x, _p.y, _p.z, 0);
-    return interpolateVoxel(p);
-}
-
-inline float TSDFVolumeCPU::interpolateVoxel(const v_float32x4& p) const
-{
-    // tx, ty, tz = floor(p)
-    v_int32x4 ip  = v_floor(p);
-    v_float32x4 t = p - v_cvt_f32(ip);
-    float tx      = t.get0();
-    t             = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float ty      = t.get0();
-    t             = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float tz      = t.get0();
-
-    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
-
-    int ix = ip.get0();
-    ip     = v_rotate_right<1>(ip);
-    int iy = ip.get0();
-    ip     = v_rotate_right<1>(ip);
-    int iz = ip.get0();
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-
-    TsdfType vx[8];
-    for(int i = 0; i < 8; i++)
-        vx[i] = volData[neighbourCoords[i] + coordBase].tsdf;
-
-    v_float32x4 v0246 = tsdfToFloat_INTR(v_int32x4(vx[0], vx[2], vx[4], vx[6]));
-    v_float32x4 v1357 = tsdfToFloat_INTR(v_int32x4(vx[1], vx[3], vx[5], vx[7]));
-    v_float32x4 vxx = v0246 + v_setall_f32(tz)*(v1357 - v0246);
-
-    v_float32x4 v00_10 = vxx;
-    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty)*(v01_11 - v00_10);
-    float v0         = v0_1.get0();
-    v0_1             = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-    float v1         = v0_1.get0();
-
-    return v0 + tx*(v1 - v0);
-}
-#else
-inline float TSDFVolumeCPU::interpolateVoxel(const Point3f& p) const
-{
-    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-
-    int ix = cvFloor(p.x);
-    int iy = cvFloor(p.y);
-    int iz = cvFloor(p.z);
-
-    float tx = p.x - ix;
-    float ty = p.y - iy;
-    float tz = p.z - iz;
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
-
-    float vx[8];
-    for (int i = 0; i < 8; i++)
-        vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase].tsdf);
-
-    float v00 = vx[0] + tz*(vx[1] - vx[0]);
-    float v01 = vx[2] + tz*(vx[3] - vx[2]);
-    float v10 = vx[4] + tz*(vx[5] - vx[4]);
-    float v11 = vx[6] + tz*(vx[7] - vx[6]);
-
-    float v0 = v00 + ty*(v01 - v00);
-    float v1 = v10 + ty*(v11 - v10);
-
-    return v0 + tx*(v1 - v0);
-
-}
-#endif
-
-#if USE_INTRINSICS
-//gradientDeltaFactor is fixed at 1.0 of voxel size
-inline Point3f TSDFVolumeCPU::getNormalVoxel(const Point3f& _p) const
-{
-    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
-    v_float32x4 result = getNormalVoxel(p);
-    float CV_DECL_ALIGNED(16) ares[4];
-    v_store_aligned(ares, result);
-    return Point3f(ares[0], ares[1], ares[2]);
-}
-
-inline v_float32x4 TSDFVolumeCPU::getNormalVoxel(const v_float32x4& p) const
-{
-    if(v_check_any (p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
-       v_check_any (p >= v_float32x4((float)(volResolution.x-2),
-                                     (float)(volResolution.y-2),
-                                     (float)(volResolution.z-2), 1.f))
-                   )
-        return nanv;
-
-    v_int32x4 ip  = v_floor(p);
-    v_float32x4 t = p - v_cvt_f32(ip);
-    float tx      = t.get0();
-    t             = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float ty      = t.get0();
-    t             = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-    float tz      = t.get0();
-
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
-
-    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
-    int iz = ip.get0();
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-
-    float CV_DECL_ALIGNED(16) an[4];
-    an[0] = an[1] = an[2] = an[3] = 0.f;
-    for(int c = 0; c < 3; c++)
-    {
-        const int dim = volDims[c];
-        float& nv     = an[c];
-
-        float vx[8];
-        for(int i = 0; i < 8; i++)
-            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1*dim].tsdf) -
-                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1*dim].tsdf);
-
-        v_float32x4 v0246 (vx[0], vx[2], vx[4], vx[6]);
-        v_float32x4 v1357 (vx[1], vx[3], vx[5], vx[7]);
-        v_float32x4 vxx = v0246 + v_setall_f32(tz)*(v1357 - v0246);
-
-        v_float32x4 v00_10 = vxx;
-        v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
-
-        v_float32x4 v0_1 = v00_10 + v_setall_f32(ty)*(v01_11 - v00_10);
-        float v0         = v0_1.get0();
-        v0_1             = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
-        float v1         = v0_1.get0();
-
-        nv = v0 + tx*(v1 - v0);
-    }
-
-    v_float32x4 n       = v_load_aligned(an);
-    v_float32x4 Norm = v_sqrt(v_setall_f32(v_reduce_sum(n*n)));
-
-    return Norm.get0() < 0.0001f ? nanv : n/Norm;
-}
-#else
-inline Point3f TSDFVolumeCPU::getNormalVoxel(const Point3f& p) const
-{
-    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
-    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
-
-    if(p.x < 1 || p.x >= volResolution.x - 2 ||
-       p.y < 1 || p.y >= volResolution.y - 2 ||
-       p.z < 1 || p.z >= volResolution.z - 2)
-        return nan3;
-
-    int ix = cvFloor(p.x);
-    int iy = cvFloor(p.y);
-    int iz = cvFloor(p.z);
-
-    float tx = p.x - ix;
-    float ty = p.y - iy;
-    float tz = p.z - iz;
-
-    int coordBase = ix*xdim + iy*ydim + iz*zdim;
-
-    Vec3f an;
-    for(int c = 0; c < 3; c++)
-    {
-        const int dim = volDims[c];
-        float& nv = an[c];
-
-        float vx[8];
-        for (int i = 0; i < 8; i++)
-            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
-                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
-
-        float v00 = vx[0] + tz*(vx[1] - vx[0]);
-        float v01 = vx[2] + tz*(vx[3] - vx[2]);
-        float v10 = vx[4] + tz*(vx[5] - vx[4]);
-        float v11 = vx[6] + tz*(vx[7] - vx[6]);
-
-        float v0 = v00 + ty*(v01 - v00);
-        float v1 = v10 + ty*(v11 - v10);
-
-        nv = v0 + tx*(v1 - v0);
-    }
-
-    float nv = sqrt(an[0] * an[0] +
-                    an[1] * an[1] +
-                    an[2] * an[2]);
-    return nv < 0.0001f ? nan3 : an / nv;
-}
-#endif
-
-struct RaycastInvoker : ParallelLoopBody
-{
-    RaycastInvoker(Points& _points, Normals& _normals, const Matx44f& cameraPose,
-                  const Intr& intrinsics, const TSDFVolumeCPU& _volume) :
-        ParallelLoopBody(),
-        points(_points),
-        normals(_normals),
-        volume(_volume),
-        tstep(volume.truncDist * volume.raycastStepFactor),
-        // We do subtract voxel size to minimize checks after
-        // Note: origin of volume coordinate is placed
-        // in the center of voxel (0,0,0), not in the corner of the voxel!
-        boxMax(volume.volSize - Point3f(volume.voxelSize,
-                                        volume.voxelSize,
-                                        volume.voxelSize)),
-        boxMin(),
-        cam2vol(volume.pose.inv() * Affine3f(cameraPose)),
-        vol2cam(Affine3f(cameraPose.inv()) * volume.pose),
-        reproj(intrinsics.makeReprojector())
-    {  }
-
-#if USE_INTRINSICS
-    virtual void operator() (const Range& range) const override
-    {
-        const v_float32x4 vfxy(reproj.fxinv, reproj.fyinv, 0, 0);
-        const v_float32x4 vcxy(reproj.cx, reproj.cy, 0, 0);
-
-        const float (&cm)[16] = cam2vol.matrix.val;
-        const v_float32x4 camRot0(cm[0], cm[4], cm[8], 0);
-        const v_float32x4 camRot1(cm[1], cm[5], cm[9], 0);
-        const v_float32x4 camRot2(cm[2], cm[6], cm[10], 0);
-        const v_float32x4 camTrans(cm[3], cm[7], cm[11], 0);
-
-        const v_float32x4 boxDown(boxMin.x, boxMin.y, boxMin.z, 0.f);
-        const v_float32x4 boxUp(boxMax.x, boxMax.y, boxMax.z, 0.f);
-
-        const v_float32x4 invVoxelSize = v_float32x4(volume.voxelSizeInv,
-                                                     volume.voxelSizeInv,
-                                                     volume.voxelSizeInv, 1.f);
-
-        const float (&vm)[16] = vol2cam.matrix.val;
-        const v_float32x4 volRot0(vm[0], vm[4], vm[8], 0);
-        const v_float32x4 volRot1(vm[1], vm[5], vm[9], 0);
-        const v_float32x4 volRot2(vm[2], vm[6], vm[10], 0);
-        const v_float32x4 volTrans(vm[3], vm[7], vm[11], 0);
-
-        for(int y = range.start; y < range.end; y++)
-        {
-            ptype* ptsRow = points[y];
-            ptype* nrmRow = normals[y];
-
-            for(int x = 0; x < points.cols; x++)
-            {
-                v_float32x4 point = nanv, normal = nanv;
-
-                v_float32x4 orig = camTrans;
-
-                // get direction through pixel in volume space:
-
-                // 1. reproject (x, y) on projecting plane where z = 1.f
-                v_float32x4 planed = (v_float32x4((float)x, (float)y, 0.f, 0.f) - vcxy)*vfxy;
-                planed             = v_combine_low(planed, v_float32x4(1.f, 0.f, 0.f, 0.f));
-
-                // 2. rotate to volume space
-                planed = v_matmuladd(planed, camRot0, camRot1, camRot2, v_setzero_f32());
-
-                // 3. normalize
-                v_float32x4 invNorm = v_invsqrt(v_setall_f32(v_reduce_sum(planed*planed)));
-                v_float32x4 dir     = planed*invNorm;
-
-                // compute intersection of ray with all six bbox planes
-                v_float32x4 rayinv = v_setall_f32(1.f)/dir;
-                // div by zero should be eliminated by these products
-                v_float32x4 tbottom = rayinv*(boxDown - orig);
-                v_float32x4 ttop    = rayinv*(boxUp   - orig);
-
-                // re-order intersections to find smallest and largest on each axis
-                v_float32x4 minAx = v_min(ttop, tbottom);
-                v_float32x4 maxAx = v_max(ttop, tbottom);
-
-                // near clipping plane
-                const float clip = 0.f;
-                float _minAx[4], _maxAx[4];
-                v_store(_minAx, minAx);
-                v_store(_maxAx, maxAx);
-                float tmin       = max( {_minAx[0], _minAx[1], _minAx[2], clip} );
-                float tmax       = min( {_maxAx[0], _maxAx[1], _maxAx[2]} );
-
-                // precautions against getting coordinates out of bounds
-                tmin = tmin + tstep;
-                tmax = tmax - tstep;
-
-                if(tmin < tmax)
-                {
-                    // interpolation optimized a little
-                    orig *= invVoxelSize;
-                    dir  *= invVoxelSize;
-
-                    int xdim            = volume.volDims[0];
-                    int ydim            = volume.volDims[1];
-                    int zdim            = volume.volDims[2];
-                    v_float32x4 rayStep = dir * v_setall_f32(tstep);
-                    v_float32x4 next    = (orig + dir * v_setall_f32(tmin));
-                    float f = volume.interpolateVoxel(next), fnext = f;
-
-                    //raymarch
-                    int steps = 0;
-                    int nSteps = cvFloor((tmax - tmin)/tstep);
-                    for(; steps < nSteps; steps++)
-                    {
-                        next += rayStep;
-                        v_int32x4 ip = v_round(next);
-                        int ix = ip.get0(); ip = v_rotate_right<1>(ip);
-                        int iy = ip.get0(); ip = v_rotate_right<1>(ip);
-                        int iz = ip.get0();
-                        int coord = ix*xdim + iy*ydim + iz*zdim;
-
-                        fnext = tsdfToFloat(volume.volume.at<TsdfVoxel>(coord).tsdf);
-                        if(fnext != f)
-                        {
-                            fnext = volume.interpolateVoxel(next);
-
-                            // when ray crosses a surface
-                            if(std::signbit(f) != std::signbit(fnext))
-                                break;
-
-                            f = fnext;
-                        }
-                    }
-
-                    // if ray penetrates a surface from outside
-                    // linearly interpolate t between two f values
-                    if(f > 0.f && fnext < 0.f)
-                    {
-                        v_float32x4 tp = next - rayStep;
-                        float ft    = volume.interpolateVoxel(tp);
-                        float ftdt  = volume.interpolateVoxel(next);
-                        float ts = tmin + tstep*(steps - ft/(ftdt - ft));
-
-                        // avoid division by zero
-                        if(!cvIsNaN(ts) && !cvIsInf(ts))
-                        {
-                            v_float32x4 pv = (orig + dir*v_setall_f32(ts));
-                            v_float32x4 nv = volume.getNormalVoxel(pv);
-
-                            if(!isNaN(nv))
-                            {
-                                //convert pv and nv to camera space
-                                normal = v_matmuladd(nv, volRot0, volRot1, volRot2, v_setzero_f32());
-                                // interpolation optimized a little
-                                point = v_matmuladd(pv*v_float32x4(volume.voxelSize,
-                                                                   volume.voxelSize,
-                                                                   volume.voxelSize, 1.f),
-                                                    volRot0, volRot1, volRot2, volTrans);
-                            }
-                        }
-                    }
-                }
-
-                v_store((float*)(&ptsRow[x]), point);
-                v_store((float*)(&nrmRow[x]), normal);
-            }
-        }
-    }
-#else
-    virtual void operator() (const Range& range) const override
-    {
-        const Point3f camTrans = cam2vol.translation();
-        const Matx33f  camRot  = cam2vol.rotation();
-        const Matx33f  volRot  = vol2cam.rotation();
-
-        for(int y = range.start; y < range.end; y++)
-        {
-            ptype* ptsRow = points[y];
-            ptype* nrmRow = normals[y];
-
-            for(int x = 0; x < points.cols; x++)
-            {
-                Point3f point = nan3, normal = nan3;
-
-                Point3f orig = camTrans;
-                // direction through pixel in volume space
-                Point3f dir = normalize(Vec3f(camRot * reproj(Point3f(float(x), float(y), 1.f))));
-
-                // compute intersection of ray with all six bbox planes
-                Vec3f rayinv(1.f/dir.x, 1.f/dir.y, 1.f/dir.z);
-                Point3f tbottom = rayinv.mul(boxMin - orig);
-                Point3f ttop    = rayinv.mul(boxMax - orig);
-
-                // re-order intersections to find smallest and largest on each axis
-                Point3f minAx(min(ttop.x, tbottom.x), min(ttop.y, tbottom.y), min(ttop.z, tbottom.z));
-                Point3f maxAx(max(ttop.x, tbottom.x), max(ttop.y, tbottom.y), max(ttop.z, tbottom.z));
-
-                // near clipping plane
-                const float clip = 0.f;
-                //float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
-                //float tmax =     min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
-                float tmin = max({ minAx.x, minAx.y, minAx.z, clip });
-                float tmax = min({ maxAx.x, maxAx.y, maxAx.z });
-
-                // precautions against getting coordinates out of bounds
-                tmin = tmin + tstep;
-                tmax = tmax - tstep;
-
-                if(tmin < tmax)
-                {
-                    // interpolation optimized a little
-                    orig = orig*volume.voxelSizeInv;
-                    dir  =  dir*volume.voxelSizeInv;
-
-                    Point3f rayStep = dir * tstep;
-                    Point3f next = (orig + dir * tmin);
-                    float f = volume.interpolateVoxel(next), fnext = f;
-
-                    //raymarch
-                    int steps = 0;
-                    int nSteps = int(floor((tmax - tmin)/tstep));
-                    for(; steps < nSteps; steps++)
-                    {
-                        next += rayStep;
-                        int xdim = volume.volDims[0];
-                        int ydim = volume.volDims[1];
-                        int zdim = volume.volDims[2];
-                        int ix = cvRound(next.x);
-                        int iy = cvRound(next.y);
-                        int iz = cvRound(next.z);
-                        fnext = tsdfToFloat(volume.volume.at<TsdfVoxel>(ix*xdim + iy*ydim + iz*zdim).tsdf);
-                        if(fnext != f)
-                        {
-                            fnext = volume.interpolateVoxel(next);
-                            // when ray crosses a surface
-                            if(std::signbit(f) != std::signbit(fnext))
-                                break;
-
-                            f = fnext;
-                        }
-                    }
-                    // if ray penetrates a surface from outside
-                    // linearly interpolate t between two f values
-                    if(f > 0.f && fnext < 0.f)
-                    {
-                        Point3f tp    = next - rayStep;
-                        float ft   = volume.interpolateVoxel(tp);
-                        float ftdt = volume.interpolateVoxel(next);
-                        // float t = tmin + steps*tstep;
-                        // float ts = t - tstep*ft/(ftdt - ft);
-                        float ts = tmin + tstep*(steps - ft/(ftdt - ft));
-
-                        // avoid division by zero
-                        if(!cvIsNaN(ts) && !cvIsInf(ts))
-                        {
-                            Point3f pv = (orig + dir*ts);
-                            Point3f nv = volume.getNormalVoxel(pv);
-
-                            if(!isNaN(nv))
-                            {
-                                //convert pv and nv to camera space
-                                normal = volRot * nv;
-                                // interpolation optimized a little
-                                point = vol2cam * (pv*volume.voxelSize);
-                            }
-                        }
-                    }
-                }
-                ptsRow[x] = toPtype(point);
-                nrmRow[x] = toPtype(normal);
-            }
-        }
-    }
-#endif
-
-    Points& points;
-    Normals& normals;
-    const TSDFVolumeCPU& volume;
-
-    const float tstep;
-
-    const Point3f boxMax;
-    const Point3f boxMin;
-
-    const Affine3f cam2vol;
-    const Affine3f vol2cam;
-    const Intr::Reprojector reproj;
-};
-
-
-void TSDFVolumeCPU::raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize,
-                            OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    CV_Assert(frameSize.area() > 0);
-
-    _points.create (frameSize, POINT_TYPE);
-    _normals.create(frameSize, POINT_TYPE);
-
-    Points points   =  _points.getMat();
-    Normals normals = _normals.getMat();
-
-    RaycastInvoker ri(points, normals, cameraPose, Intr(intrinsics), *this);
-
-    const int nstripes = -1;
-    parallel_for_(Range(0, points.rows), ri, nstripes);
-}
-
-
-struct FetchPointsNormalsInvoker : ParallelLoopBody
-{
-    FetchPointsNormalsInvoker(const TSDFVolumeCPU& _volume,
-                              std::vector<std::vector<ptype>>& _pVecs,
-                              std::vector<std::vector<ptype>>& _nVecs,
-                              bool _needNormals) :
-        ParallelLoopBody(),
-        vol(_volume),
-        pVecs(_pVecs),
-        nVecs(_nVecs),
-        needNormals(_needNormals)
-    {
-        volDataStart = vol.volume.ptr<TsdfVoxel>();
-    }
-
-    inline void coord(std::vector<ptype>& points, std::vector<ptype>& normals,
-                      int x, int y, int z, Point3f V, float v0, int axis) const
-    {
-        // 0 for x, 1 for y, 2 for z
-        bool limits = false;
-        Point3i shift;
-        float Vc = 0.f;
-        if(axis == 0)
-        {
-            shift  = Point3i(1, 0, 0);
-            limits = (x + 1 < vol.volResolution.x);
-            Vc     = V.x;
-        }
-        if(axis == 1)
-        {
-            shift  = Point3i(0, 1, 0);
-            limits = (y + 1 < vol.volResolution.y);
-            Vc     = V.y;
-        }
-        if(axis == 2)
-        {
-            shift  = Point3i(0, 0, 1);
-            limits = (z + 1 < vol.volResolution.z);
-            Vc     = V.z;
-        }
-
-        if(limits)
-        {
-            const TsdfVoxel& voxeld = volDataStart[(x+shift.x)*vol.volDims[0] +
-                                                   (y+shift.y)*vol.volDims[1] +
-                                                   (z+shift.z)*vol.volDims[2]];
-            float vd = tsdfToFloat(voxeld.tsdf);
-            if(voxeld.weight != 0 && vd != 1.f)
-            {
-                if((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
-                {
-                    //linearly interpolate coordinate
-                    float Vn    = Vc + vol.voxelSize;
-                    float dinv  = 1.f/(abs(v0)+abs(vd));
-                    float inter = (Vc*abs(vd) + Vn*abs(v0))*dinv;
-
-                    Point3f p(shift.x ? inter : V.x,
-                              shift.y ? inter : V.y,
-                              shift.z ? inter : V.z);
-                    {
-                        points.push_back(toPtype(vol.pose * p));
-                        if(needNormals)
-                            normals.push_back(toPtype(vol.pose.rotation() *
-                                                      vol.getNormalVoxel(p*vol.voxelSizeInv)));
-                    }
-                }
-            }
-        }
-    }
-
-    virtual void operator() (const Range& range) const override
-    {
-        std::vector<ptype> points, normals;
-        for(int x = range.start; x < range.end; x++)
-        {
-            const TsdfVoxel* volDataX = volDataStart + x*vol.volDims[0];
-            for(int y = 0; y < vol.volResolution.y; y++)
-            {
-                const TsdfVoxel* volDataY = volDataX + y*vol.volDims[1];
-                for(int z = 0; z < vol.volResolution.z; z++)
-                {
-                    const TsdfVoxel& voxel0 = volDataY[z*vol.volDims[2]];
-                    float v0             = tsdfToFloat(voxel0.tsdf);
-                    if(voxel0.weight != 0 && v0 != 1.f)
-                    {
-                        Point3f V(Point3f((float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f)*vol.voxelSize);
-
-                        coord(points, normals, x, y, z, V, v0, 0);
-                        coord(points, normals, x, y, z, V, v0, 1);
-                        coord(points, normals, x, y, z, V, v0, 2);
-
-                    } // if voxel is not empty
-                }
-            }
-        }
-
-        AutoLock al(mutex);
-        pVecs.push_back(points);
-        nVecs.push_back(normals);
-    }
-
-    const TSDFVolumeCPU& vol;
-    std::vector<std::vector<ptype>>& pVecs;
-    std::vector<std::vector<ptype>>& nVecs;
-    const TsdfVoxel* volDataStart;
-    bool needNormals;
-    mutable Mutex mutex;
-};
-
-void TSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if(_points.needed())
-    {
-        std::vector<std::vector<ptype>> pVecs, nVecs;
-        FetchPointsNormalsInvoker fi(*this, pVecs, nVecs, _normals.needed());
-        Range range(0, volResolution.x);
-        const int nstripes = -1;
-        parallel_for_(range, fi, nstripes);
-
-        std::vector<ptype> points, normals;
-        for(size_t i = 0; i < pVecs.size(); i++)
-        {
-            points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
-            normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
-        }
-
-        _points.create((int)points.size(), 1, POINT_TYPE);
-        if(!points.empty())
-            Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
-
-        if(_normals.needed())
-        {
-            _normals.create((int)normals.size(), 1, POINT_TYPE);
-            if(!normals.empty())
-                Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
-        }
-    }
-}
-
-void TSDFVolumeCPU::fetchNormals(InputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(!_points.empty());
-    if(_normals.needed())
-    {
-        Points points = _points.getMat();
-        CV_Assert(points.type() == POINT_TYPE);
-
-        _normals.createSameSize(_points, _points.type());
-        Normals normals = _normals.getMat();
-
-        const TSDFVolumeCPU& _vol = *this;
-        auto PushNormals = [&](const ptype& pp, const int* position)
-        {
-            const TSDFVolumeCPU& vol(_vol);
-            Affine3f invPose(vol.pose.inv());
-            Point3f p = fromPtype(pp);
-            Point3f n = nan3;
-            if (!isNaN(p))
-            {
-                Point3f voxPt = (invPose * p);
-                voxPt = voxPt * vol.voxelSizeInv;
-                n = vol.pose.rotation() * vol.getNormalVoxel(voxPt);
-            }
-            normals(position[0], position[1]) = toPtype(n);
-        };
-        points.forEach(PushNormals);
-    }
-}
-
-///////// GPU implementation /////////
-
-#ifdef HAVE_OPENCL
-
-class TSDFVolumeGPU : public TSDFVolume
-{
-   public:
-    // dimension in voxels, size in meters
-    TSDFVolumeGPU(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-                  int _maxWeight, Point3i _resolution);
-
-    virtual void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
-                           const Matx33f& intrinsics, const int frameId = 0) override;
-    virtual void raycast(const Matx44f& cameraPose, const Matx33f& intrinsics, const Size& frameSize,
-                         OutputArray _points, OutputArray _normals) const override;
-    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const Matx33f&, const Matx33f&, const int) override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-    virtual void raycast(const Matx44f&, const Matx33f&, const Size&, OutputArray, OutputArray, OutputArray) const override
-    { CV_Error(Error::StsNotImplemented, "Not implemented"); };
-
-    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
-    virtual void fetchNormals(InputArray points, OutputArray normals) const override;
-
-    virtual void reset() override;
-
-    Vec6f frameParams;
-    UMat pixNorms;
-    // See zFirstMemOrder arg of parent class constructor
-    // for the array layout info
-    // Array elem is CV_8UC2, read as (int8, uint8)
-    UMat volume;
-};
-
-TSDFVolumeGPU::TSDFVolumeGPU(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist, int _maxWeight,
-                             Point3i _resolution) :
-    TSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution, false)
-{
-    volume = UMat(1, volResolution.x * volResolution.y * volResolution.z, CV_8UC2);
-
-    reset();
-}
-
-
-// zero volume, leave rest params the same
-void TSDFVolumeGPU::reset()
-{
-    CV_TRACE_FUNCTION();
-
-    volume.setTo(Scalar(0, 0));
-}
-
-// use depth instead of distance (optimization)
-void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor,
-                              const Matx44f& cameraPose, const Matx33f& _intrinsics, const int frameId)
-{
-    CV_TRACE_FUNCTION();
-    CV_UNUSED(frameId);
-    CV_Assert(!_depth.empty());
-
-    UMat depth = _depth.getUMat();
-
-    String errorStr;
-    String name           = "integrate";
-    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
-    String options        = "-cl-mad-enable";
-    ocl::Kernel k;
-    k.create(name.c_str(), source, options, &errorStr);
-
-    if(k.empty())
-        throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-    UMat vol2camGpu;
-    Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
-    Mat(vol2cam.matrix).copyTo(vol2camGpu);
-
-    float dfac = 1.f/depthFactor;
-    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
-    Intr intrinsics(_intrinsics);
-    Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy);
-    Vec6f newParams((float)depth.rows, (float)depth.cols,
-                    intrinsics.fx, intrinsics.fy,
-                    intrinsics.cx, intrinsics.cy);
-    if (!(frameParams == newParams))
-    {
-        frameParams = newParams;
-        pixNorms = preCalculationPixNormGPU(depth.size(), intrinsics);
-    }
-
-    // TODO: optimization possible
-    // Use sampler for depth (mask needed)
-    k.args(ocl::KernelArg::ReadOnly(depth),
-           ocl::KernelArg::PtrReadWrite(volume),
-           ocl::KernelArg::PtrReadOnly(vol2camGpu),
-           voxelSize,
-           volResGpu.val,
-           volDims.val,
-           fxy.val,
-           cxy.val,
-           dfac,
-           truncDist,
-           int(maxWeight),
-           ocl::KernelArg::PtrReadOnly(pixNorms));
-
-    size_t globalSize[2];
-    globalSize[0] = (size_t)volResolution.x;
-    globalSize[1] = (size_t)volResolution.y;
-
-    if(!k.run(2, globalSize, NULL, true))
-        throw std::runtime_error("Failed to run kernel");
-}
-
-
-void TSDFVolumeGPU::raycast(const Matx44f& cameraPose, const Matx33f& _intrinsics, const Size& frameSize,
-                            OutputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    CV_Assert(frameSize.area() > 0);
-
-    String errorStr;
-    String name           = "raycast";
-    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
-    String options        = "-cl-mad-enable";
-    ocl::Kernel k;
-    k.create(name.c_str(), source, options, &errorStr);
-
-    if(k.empty())
-        throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-    _points.create (frameSize, CV_32FC4);
-    _normals.create(frameSize, CV_32FC4);
-
-    UMat points  =  _points.getUMat();
-    UMat normals = _normals.getUMat();
-
-    UMat vol2camGpu, cam2volGpu;
-    Affine3f vol2cam = Affine3f(cameraPose.inv()) * pose;
-    Affine3f cam2vol = pose.inv() * Affine3f(cameraPose);
-    Mat(cam2vol.matrix).copyTo(cam2volGpu);
-    Mat(vol2cam.matrix).copyTo(vol2camGpu);
-    Intr intrinsics(_intrinsics);
-    Intr::Reprojector r = intrinsics.makeReprojector();
-    // We do subtract voxel size to minimize checks after
-    // Note: origin of volume coordinate is placed
-    // in the center of voxel (0,0,0), not in the corner of the voxel!
-    Vec4f boxMin, boxMax(volSize.x - voxelSize,
-                         volSize.y - voxelSize,
-                         volSize.z - voxelSize);
-    Vec2f finv(r.fxinv, r.fyinv), cxy(r.cx, r.cy);
-    float tstep = truncDist * raycastStepFactor;
-
-    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
-
-    k.args(ocl::KernelArg::WriteOnlyNoSize(points),
-           ocl::KernelArg::WriteOnlyNoSize(normals),
-           frameSize,
-           ocl::KernelArg::PtrReadOnly(volume),
-           ocl::KernelArg::PtrReadOnly(vol2camGpu),
-           ocl::KernelArg::PtrReadOnly(cam2volGpu),
-           finv.val, cxy.val,
-           boxMin.val, boxMax.val,
-           tstep,
-           voxelSize,
-           volResGpu.val,
-           volDims.val,
-           neighbourCoords.val);
-
-    size_t globalSize[2];
-    globalSize[0] = (size_t)frameSize.width;
-    globalSize[1] = (size_t)frameSize.height;
-
-    if(!k.run(2, globalSize, NULL, true))
-        throw std::runtime_error("Failed to run kernel");
-}
-
-
-void TSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const
-{
-    CV_TRACE_FUNCTION();
-    CV_Assert(!_points.empty());
-
-    if(_normals.needed())
-    {
-        UMat points = _points.getUMat();
-        CV_Assert(points.type() == POINT_TYPE);
-
-        _normals.createSameSize(_points, POINT_TYPE);
-        UMat normals = _normals.getUMat();
-
-        String errorStr;
-        String name           = "getNormals";
-        ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
-        String options        = "-cl-mad-enable";
-        ocl::Kernel k;
-        k.create(name.c_str(), source, options, &errorStr);
-
-        if(k.empty())
-            throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-        UMat volPoseGpu, invPoseGpu;
-        Mat(pose      .matrix).copyTo(volPoseGpu);
-        Mat(pose.inv().matrix).copyTo(invPoseGpu);
-        Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
-        Size frameSize = points.size();
-
-        k.args(ocl::KernelArg::ReadOnlyNoSize(points),
-               ocl::KernelArg::WriteOnlyNoSize(normals),
-               frameSize,
-               ocl::KernelArg::PtrReadOnly(volume),
-               ocl::KernelArg::PtrReadOnly(volPoseGpu),
-               ocl::KernelArg::PtrReadOnly(invPoseGpu),
-               voxelSizeInv,
-               volResGpu.val,
-               volDims.val,
-               neighbourCoords.val);
-
-        size_t globalSize[2];
-        globalSize[0] = (size_t)points.cols;
-        globalSize[1] = (size_t)points.rows;
-
-        if(!k.run(2, globalSize, NULL, true))
-            throw std::runtime_error("Failed to run kernel");
-    }
-}
-
-void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) const
-{
-    CV_TRACE_FUNCTION();
-
-    if(points.needed())
-    {
-        bool needNormals = normals.needed();
-
-        // 1. scan to count points in each group and allocate output arrays
-
-        ocl::Kernel kscan;
-
-        String errorStr;
-        ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
-        String options        = "-cl-mad-enable";
-
-        kscan.create("scanSize", source, options, &errorStr);
-
-        if(kscan.empty())
-            throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-        size_t globalSize[3];
-        globalSize[0] = (size_t)volResolution.x;
-        globalSize[1] = (size_t)volResolution.y;
-        globalSize[2] = (size_t)volResolution.z;
-
-        const ocl::Device& device = ocl::Device::getDefault();
-        size_t wgsLimit           = device.maxWorkGroupSize();
-        size_t memSize            = device.localMemSize();
-        // local mem should keep a point (and a normal) for each thread in a group
-        // use 4 float per each point and normal
-        size_t elemSize     = (sizeof(float)*4)*(needNormals ? 2 : 1);
-        const size_t lcols  = 8;
-        const size_t lrows  = 8;
-        size_t lplanes      = min(memSize/elemSize, wgsLimit)/lcols/lrows;
-        lplanes             = roundDownPow2(lplanes);
-        size_t localSize[3] = {lcols, lrows, lplanes};
-        Vec3i ngroups((int)divUp(globalSize[0], (unsigned int)localSize[0]),
-                      (int)divUp(globalSize[1], (unsigned int)localSize[1]),
-                      (int)divUp(globalSize[2], (unsigned int)localSize[2]));
-
-        const size_t counterSize = sizeof(int);
-        size_t lszscan = localSize[0]*localSize[1]*localSize[2]*counterSize;
-
-        const int gsz[3] = {ngroups[2], ngroups[1], ngroups[0]};
-        UMat groupedSum(3, gsz, CV_32S, Scalar(0));
-
-        UMat volPoseGpu;
-        Mat(pose.matrix).copyTo(volPoseGpu);
-        Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
-
-        kscan.args(ocl::KernelArg::PtrReadOnly(volume),
-                   volResGpu.val,
-                   volDims.val,
-                   neighbourCoords.val,
-                   ocl::KernelArg::PtrReadOnly(volPoseGpu),
-                   voxelSize,
-                   voxelSizeInv,
-                   ocl::KernelArg::Local(lszscan),
-                   ocl::KernelArg::WriteOnlyNoSize(groupedSum));
-
-        if(!kscan.run(3, globalSize, localSize, true))
-            throw std::runtime_error("Failed to run kernel");
-
-        Mat groupedSumCpu = groupedSum.getMat(ACCESS_READ);
-        int gpuSum        = (int)sum(groupedSumCpu)[0];
-        // should be no CPU copies when new kernel is executing
-        groupedSumCpu.release();
-
-        // 2. fill output arrays according to per-group points count
-
-        points.create(gpuSum, 1, POINT_TYPE);
-        UMat pts = points.getUMat();
-        UMat nrm;
-        if(needNormals)
-        {
-            normals.create(gpuSum, 1, POINT_TYPE);
-            nrm = normals.getUMat();
-        }
-        else
-        {
-            // it won't be accessed but empty args are forbidden
-            nrm = UMat(1, 1, POINT_TYPE);
-        }
-
-        if (gpuSum)
-        {
-            ocl::Kernel kfill;
-            kfill.create("fillPtsNrm", source, options, &errorStr);
-
-            if(kfill.empty())
-                throw std::runtime_error("Failed to create kernel: " + errorStr);
-
-            UMat atomicCtr(1, 1, CV_32S, Scalar(0));
-
-            // mem size to keep pts (and normals optionally) for all work-items in a group
-            size_t lszfill = localSize[0]*localSize[1]*localSize[2]*elemSize;
-
-            kfill.args(ocl::KernelArg::PtrReadOnly(volume),
-                       volResGpu.val,
-                       volDims.val,
-                       neighbourCoords.val,
-                       ocl::KernelArg::PtrReadOnly(volPoseGpu),
-                       voxelSize,
-                       voxelSizeInv,
-                       ((int)needNormals),
-                       ocl::KernelArg::Local(lszfill),
-                       ocl::KernelArg::PtrReadWrite(atomicCtr),
-                       ocl::KernelArg::ReadOnlyNoSize(groupedSum),
-                       ocl::KernelArg::WriteOnlyNoSize(pts),
-                       ocl::KernelArg::WriteOnlyNoSize(nrm)
-                       );
-
-            if(!kfill.run(3, globalSize, localSize, true))
-                throw std::runtime_error("Failed to run kernel");
-        }
-    }
-}
-
-#endif
-
-Ptr<TSDFVolume> makeTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
-                               float _truncDist, int _maxWeight, Point3i _resolution)
-{
-#ifdef HAVE_OPENCL
-    if (ocl::useOpenCL())
-        return makePtr<TSDFVolumeGPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight,
-                                      _resolution);
-#endif
-    return makePtr<TSDFVolumeCPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight,
-                                  _resolution);
-}
-
-Ptr<TSDFVolume> makeTSDFVolume(const VolumeParams& _params)
-{
-    Mat pm = _params.pose;
-    CV_Assert(pm.size() == Size(4, 4));
-    CV_Assert(pm.type() == CV_32F);
-    Matx44f pose = pm;
-    cv::Point3i resolution(_params.resolutionX, _params.resolutionY, _params.resolutionZ);
-#ifdef HAVE_OPENCL
-    if (ocl::useOpenCL())
-        return makePtr<TSDFVolumeGPU>(_params.voxelSize, pose, _params.raycastStepFactor,
-                                      _params.tsdfTruncDist, _params.maxWeight, resolution);
-#endif
-    return makePtr<TSDFVolumeCPU>(_params.voxelSize, pose, _params.raycastStepFactor,
-                                  _params.tsdfTruncDist, _params.maxWeight, resolution);
-
-}
-
-} // namespace cv
diff --git a/modules/3d/src/rgbd/tsdf.hpp b/modules/3d/src/rgbd/tsdf.hpp
deleted file mode 100644
index 572957e23c..0000000000
--- a/modules/3d/src/rgbd/tsdf.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-// Partially rewritten from https://github.com/Nerei/kinfu_remake
-// Copyright(c) 2012, Anatoly Baksheev. All rights reserved.
-
-#ifndef OPENCV_3D_TSDF_HPP
-#define OPENCV_3D_TSDF_HPP
-
-#include "../precomp.hpp"
-#include "tsdf_functions.hpp"
-
-namespace cv
-{
-
-class TSDFVolume : public Volume
-{
-   public:
-    // dimension in voxels, size in meters
-    TSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
-               int _maxWeight, Point3i _resolution, bool zFirstMemOrder = true);
-    virtual ~TSDFVolume() = default;
-
-   public:
-
-    Point3i volResolution;
-    WeightType maxWeight;
-
-    Point3f volSize;
-    float truncDist;
-    Vec4i volDims;
-    Vec8i neighbourCoords;
-};
-
-Ptr<TSDFVolume> makeTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
-                               float _truncDist, int _maxWeight, Point3i _resolution);
-Ptr<TSDFVolume> makeTSDFVolume(const VolumeParams& _params);
-}  // namespace cv
-
-#endif // include guard
diff --git a/modules/3d/src/rgbd/tsdf_functions.cpp b/modules/3d/src/rgbd/tsdf_functions.cpp
index 0ced2f7a9b..df6717d4cd 100644
--- a/modules/3d/src/rgbd/tsdf_functions.cpp
+++ b/modules/3d/src/rgbd/tsdf_functions.cpp
@@ -11,11 +11,13 @@
 
 namespace cv {
 
-cv::Mat preCalculationPixNorm(Size size, const Intr& intrinsics)
+void preCalculationPixNorm(Size size, const Intr& intrinsics, Mat& pixNorm)
 {
+    //std::cout << "preCalculationPixNorm" << std::endl;
+
     Point2f fl(intrinsics.fx, intrinsics.fy);
     Point2f pp(intrinsics.cx, intrinsics.cy);
-    Mat pixNorm(size.height, size.width, CV_32F);
+    pixNorm = Mat(size.height, size.width, CV_32F);
     std::vector<float> x(size.width);
     std::vector<float> y(size.height);
     for (int i = 0; i < size.width; i++)
@@ -30,45 +32,65 @@ cv::Mat preCalculationPixNorm(Size size, const Intr& intrinsics)
             pixNorm.at<float>(i, j) = sqrtf(x[j] * x[j] + y[i] * y[i] + 1.0f);
         }
     }
-    return pixNorm;
+
 }
 
 #ifdef HAVE_OPENCL
-cv::UMat preCalculationPixNormGPU(Size size, const Intr& intrinsics)
+void ocl_preCalculationPixNorm(Size size, const Intr& intrinsics, UMat& pixNorm)
 {
-    // calculating this on CPU then uploading to GPU is faster than calculating this on GPU
-    Mat cpuPixNorm = preCalculationPixNorm(size, intrinsics);
+    //std::cout << "ocl_preCalculationPixNorm" << std::endl;
 
-    UMat pixNorm(size, CV_32F);
+    // calculating this on CPU then uploading to GPU is faster than calculating this on GPU
+    Mat cpuPixNorm;
+    preCalculationPixNorm(size, intrinsics, cpuPixNorm);
     cpuPixNorm.copyTo(pixNorm);
 
-    return pixNorm;
 }
 #endif
 
+// Integrate
 
-void integrateVolumeUnit(
-    float truncDist, float voxelSize, int maxWeight,
-    cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
-    InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose,
-    const cv::Intr& intrinsics, InputArray _pixNorms, InputArray _volume)
+void integrateTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume)
 {
-    CV_TRACE_FUNCTION();
+    Matx44f volumePose;
+    settings.getVolumePose(volumePose);
+    integrateTsdfVolumeUnit(settings, volumePose, cameraPose, _depth, _pixNorms, _volume);
+}
+
+
+void integrateTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& volumePose, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume)
+{
+    //std::cout << "integrateTsdfVolumeUnit" << std::endl;
 
-    CV_Assert(_depth.type() == DEPTH_TYPE);
-    CV_Assert(!_depth.empty());
-    cv::Affine3f vpose(_pose);
     Depth depth = _depth.getMat();
-
-    Range integrateRange(0, volResolution.x);
-
     Mat volume = _volume.getMat();
     Mat pixNorms = _pixNorms.getMat();
-    const Intr::Projector proj(intrinsics.makeProjector());
-    const cv::Affine3f vol2cam(Affine3f(cameraPose.inv()) * vpose);
-    const float truncDistInv(1.f / truncDist);
-    const float dfac(1.f / depthFactor);
-    TsdfVoxel* volDataStart = volume.ptr<TsdfVoxel>();;
+
+    TsdfVoxel* volDataStart = volume.ptr<TsdfVoxel>();
+
+    Vec4i volStrides;
+    settings.getVolumeDimensions(volStrides);
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    float voxelSize = settings.getVoxelSize();
+
+    const Affine3f pose = Affine3f(volumePose);
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    const Intr::Projector proj = Intr(intr).makeProjector();
+    const float dfac(1.f / settings.getDepthFactor());
+    const float truncDist = settings.getTsdfTruncateDistance();
+    const float truncDistInv = 1.f / truncDist;
+    const int maxWeight = settings.getMaxWeight();
+
+    Range integrateRange(0, volResolution.x);
 
 #if USE_INTRINSICS
     auto IntegrateInvoker = [&](const Range& range)
@@ -197,6 +219,7 @@ void integrateVolumeUnit(
                     float sdf = pixNorm * (v * dfac - zCamSpace);
                     // possible alternative is:
                     // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
+
                     if (sdf >= -truncDist)
                     {
                         TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
@@ -207,7 +230,7 @@ void integrateVolumeUnit(
 
                         // update TSDF
                         value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
-                        weight = (weight + 1) < maxWeight ? (weight + 1) : (WeightType) maxWeight;
+                        weight = (weight + 1) < maxWeight ? (weight + 1) : (WeightType)maxWeight;
                     }
                 }
             }
@@ -278,11 +301,11 @@ void integrateVolumeUnit(
                     if (v == 0) {
                         continue;
                     }
-
                     int _u = projected.x;
                     int _v = projected.y;
                     if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows))
                         continue;
+
                     float pixNorm = pixNorms.at<float>(_v, _u);
 
                     // difference between distances of point and of surface to camera
@@ -306,310 +329,1132 @@ void integrateVolumeUnit(
         }
     };
 #endif
-
     parallel_for_(integrateRange, IntegrateInvoker);
+    //IntegrateInvoker(integrateRange);
 }
 
-
-void integrateRGBVolumeUnit(
-    float truncDist, float voxelSize, int maxWeight,
-    cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
-    InputArray _depth, InputArray _rgb, float depthFactor, const cv::Matx44f& cameraPose,
-    const cv::Intr& depth_intrinsics, const cv::Intr& rgb_intrinsics, InputArray _pixNorms, InputArray _volume)
+#ifdef HAVE_OPENCL
+void ocl_integrateTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume)
 {
+    //std::cout << "ocl_integrateTsdfVolumeUnit" << std::endl;
+
     CV_TRACE_FUNCTION();
-
-    CV_Assert(_depth.type() == DEPTH_TYPE);
+    //CV_UNUSED(frameId);
     CV_Assert(!_depth.empty());
-    cv::Affine3f vpose(_pose);
-    Depth depth = _depth.getMat();
-    Colors color = _rgb.getMat();
-    Range integrateRange(0, volResolution.x);
 
-    Mat volume = _volume.getMat();
-    Mat pixNorms = _pixNorms.getMat();
-    const Intr::Projector projDepth(depth_intrinsics.makeProjector());
-    const Intr::Projector projRGB(rgb_intrinsics);
-    const cv::Affine3f vol2cam(Affine3f(cameraPose.inv()) * vpose);
-    const float truncDistInv(1.f / truncDist);
-    const float dfac(1.f / depthFactor);
-    RGBTsdfVoxel* volDataStart = volume.ptr<RGBTsdfVoxel>();
+    UMat depth = _depth.getUMat();
+    UMat volume = _volume.getUMat();
+    UMat pixNorms = _pixNorms.getUMat();
+
+    String errorStr;
+    String name = "integrate";
+    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    UMat vol2camGpu;
+    Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+    Mat(vol2cam.matrix).copyTo(vol2camGpu);
+
+    float dfac = 1.f / settings.getDepthFactor();
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    Intr intrinsics(intr);
+    Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy);
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+
+    const float voxelSize = settings.getVoxelSize();
+    const float truncatedDistance = settings.getTsdfTruncateDistance();
+    const int maxWeight = settings.getMaxWeight();
+
+    // TODO: optimization possible
+    // Use sampler for depth (mask needed)
+    k.args(ocl::KernelArg::ReadOnly(depth),
+        ocl::KernelArg::PtrReadWrite(volume),
+        ocl::KernelArg::PtrReadOnly(vol2camGpu),
+        voxelSize,
+        volResGpu.val,
+        volDims.val,
+        fxy.val,
+        cxy.val,
+        dfac,
+        truncatedDistance,
+        maxWeight,
+        ocl::KernelArg::PtrReadOnly(pixNorms));
+
+    size_t globalSize[2];
+    globalSize[0] = (size_t)volResolution.x;
+    globalSize[1] = (size_t)volResolution.y;
+
+    if (!k.run(2, globalSize, NULL, true))
+        throw std::runtime_error("Failed to run kernel");
+
+}
+#endif
+
+
+// Raycast
 
 #if USE_INTRINSICS
-    auto IntegrateInvoker = [&](const Range& range)
+// all coordinate checks should be done in inclosing cycle
+inline float interpolateTsdfVoxel(const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const v_float32x4& p)
+{
+    // tx, ty, tz = floor(p)
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
+
+    int ix = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iy = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    TsdfType vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = volData[neighbourCoords[i] + coordBase].tsdf;
+
+    v_float32x4 v0246 = tsdfToFloat_INTR(v_int32x4(vx[0], vx[2], vx[4], vx[6]));
+    v_float32x4 v1357 = tsdfToFloat_INTR(v_int32x4(vx[1], vx[3], vx[5], vx[7]));
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+
+inline float interpolateTsdfVoxel( const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const Point3f& _p)
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0);
+    return interpolateTsdfVoxel(volume, volDims, neighbourCoords, p);
+}
+
+#else
+inline float interpolateTsdfVoxel( const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords,
+    const Point3f& p)
+{
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
+
+    float vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase].tsdf);
+
+    float v00 = vx[0] + tz * (vx[1] - vx[0]);
+    float v01 = vx[2] + tz * (vx[3] - vx[2]);
+    float v10 = vx[4] + tz * (vx[5] - vx[4]);
+    float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+    float v0 = v00 + ty * (v01 - v00);
+    float v1 = v10 + ty * (v11 - v10);
+
+    return v0 + tx * (v1 - v0);
+
+}
+#endif
+
+
+#if USE_INTRINSICS
+//gradientDeltaFactor is fixed at 1.0 of voxel size
+inline v_float32x4 getNormalVoxel( const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const v_float32x4& p)
+{
+    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
+        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
+            (float)(volResolution.y - 2),
+            (float)(volResolution.z - 2), 1.f))
+        )
+        return nanv;
+
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
+
+    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    float CV_DECL_ALIGNED(16) an[4];
+    an[0] = an[1] = an[2] = an[3] = 0.f;
+    for (int c = 0; c < 3; c++)
     {
-        // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
-        Point3f zStepPt = Point3f(vol2cam.matrix(0, 2),
-            vol2cam.matrix(1, 2),
-            vol2cam.matrix(2, 2)) * voxelSize;
+        const int dim = volDims[c];
+        float& nv = an[c];
 
-        v_float32x4 zStep(zStepPt.x, zStepPt.y, zStepPt.z, 0);
-        v_float32x4 vfxy(projDepth.fx, projDepth.fy, 0.f, 0.f), vcxy(projDepth.cx, projDepth.cy, 0.f, 0.f);
-        v_float32x4 rgb_vfxy(projRGB.fx, projRGB.fy, 0.f, 0.f), rgb_vcxy(projRGB.cx, projRGB.cy, 0.f, 0.f);
-        const v_float32x4 upLimits = v_cvt_f32(v_int32x4(depth.cols - 1, depth.rows - 1, 0, 0));
+        float vx[8];
+        for (int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+            tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
 
-        for (int x = range.start; x < range.end; x++)
+        v_float32x4 v0246(vx[0], vx[2], vx[4], vx[6]);
+        v_float32x4 v1357(vx[1], vx[3], vx[5], vx[7]);
+        v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+        v_float32x4 v00_10 = vxx;
+        v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+        v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+        float v0 = v0_1.get0();
+        v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+        float v1 = v0_1.get0();
+
+        nv = v0 + tx * (v1 - v0);
+    }
+
+    v_float32x4 n = v_load_aligned(an);
+    v_float32x4 Norm = v_sqrt(v_setall_f32(v_reduce_sum(n * n)));
+
+    return Norm.get0() < 0.0001f ? nanv : n / Norm;
+}
+
+inline Point3f getNormalVoxel( const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const Point3f& _p)
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
+    v_float32x4 result = getNormalVoxel(volume, volDims, neighbourCoords, volResolution, p);
+    float CV_DECL_ALIGNED(16) ares[4];
+    v_store_aligned(ares, result);
+    return Point3f(ares[0], ares[1], ares[2]);
+}
+#else
+inline Point3f getNormalVoxel( const Mat& volume,
+    const Vec4i& volDims, const Vec8i& neighbourCoords, const Point3i volResolution,
+    const Point3f& p)
+{
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const TsdfVoxel* volData = volume.ptr<TsdfVoxel>();
+
+    if (p.x < 1 || p.x >= volResolution.x - 2 ||
+        p.y < 1 || p.y >= volResolution.y - 2 ||
+        p.z < 1 || p.z >= volResolution.z - 2)
+        return nan3;
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    Vec3f an;
+    for (int c = 0; c < 3; c++)
+    {
+        const int dim = volDims[c];
+        float& nv = an[c];
+
+        float vx[8];
+        for (int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+            tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
+
+        float v00 = vx[0] + tz * (vx[1] - vx[0]);
+        float v01 = vx[2] + tz * (vx[3] - vx[2]);
+        float v10 = vx[4] + tz * (vx[5] - vx[4]);
+        float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+        float v0 = v00 + ty * (v01 - v00);
+        float v1 = v10 + ty * (v11 - v10);
+
+        nv = v0 + tx * (v1 - v0);
+    }
+
+    float nv = sqrt(an[0] * an[0] +
+        an[1] * an[1] +
+        an[2] * an[2]);
+    return nv < 0.0001f ? nan3 : an / nv;
+}
+#endif
+
+void raycastTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+                       InputArray _volume, OutputArray _points, OutputArray _normals)
+{
+    //std::cout << "raycastVolumeUnit" << std::endl;
+
+    const Size frameSize(width, height);
+    //CV_Assert(frameSize.area() > 0);
+    _points.create(frameSize, POINT_TYPE);
+    _normals.create(frameSize, POINT_TYPE);
+
+    Points points = _points.getMat();
+    Normals normals = _normals.getMat();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const Point3f volSize = Point3f(volResolution) * settings.getVoxelSize();
+
+    Matx33f intr;
+    settings.getCameraRaycastIntrinsics(intr);
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    const Point3f boxMax(volSize - Point3f(settings.getVoxelSize(), settings.getVoxelSize(), settings.getVoxelSize()));
+    const Point3f boxMin = Point3f(0, 0, 0);
+    const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose));
+    const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose);
+
+    const Mat volume = _volume.getMat();
+    float voxelSize = settings.getVoxelSize();
+    float voxelSizeInv = 1.0f / voxelSize;
+    const Intr::Reprojector reproj = Intr(intr).makeReprojector();
+    float tstep = settings.getTsdfTruncateDistance() * settings.getRaycastStepFactor();
+
+    Range raycastRange = Range(0, points.rows);
+    //TODO::  swap realization, they are missplaced :)
+#if USE_INTRINSICS
+    auto RaycastInvoker = [&](const Range& range)
+    {
+        const v_float32x4 vfxy(reproj.fxinv, reproj.fyinv, 0, 0);
+        const v_float32x4 vcxy(reproj.cx, reproj.cy, 0, 0);
+
+        const float(&cm)[16] = cam2vol.matrix.val;
+        const v_float32x4 camRot0(cm[0], cm[4], cm[8], 0);
+        const v_float32x4 camRot1(cm[1], cm[5], cm[9], 0);
+        const v_float32x4 camRot2(cm[2], cm[6], cm[10], 0);
+        const v_float32x4 camTrans(cm[3], cm[7], cm[11], 0);
+
+        const v_float32x4 boxDown(boxMin.x, boxMin.y, boxMin.z, 0.f);
+        const v_float32x4 boxUp(boxMax.x, boxMax.y, boxMax.z, 0.f);
+
+        const v_float32x4 invVoxelSize = v_float32x4(voxelSizeInv, voxelSizeInv, voxelSizeInv, 1.f);
+
+        const float(&vm)[16] = vol2cam.matrix.val;
+        const v_float32x4 volRot0(vm[0], vm[4], vm[8], 0);
+        const v_float32x4 volRot1(vm[1], vm[5], vm[9], 0);
+        const v_float32x4 volRot2(vm[2], vm[6], vm[10], 0);
+        const v_float32x4 volTrans(vm[3], vm[7], vm[11], 0);
+
+        for (int y = range.start; y < range.end; y++)
         {
-            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
-            for (int y = 0; y < volResolution.y; y++)
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+
+            for (int x = 0; x < points.cols; x++)
             {
-                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
-                // optimization of camSpace transformation (vector addition instead of matmul at each z)
-                Point3f basePt = vol2cam * (Point3f((float)x, (float)y, 0) * voxelSize);
-                v_float32x4 camSpacePt(basePt.x, basePt.y, basePt.z, 0);
+                v_float32x4 point = nanv, normal = nanv;
 
-                int startZ, endZ;
-                if (abs(zStepPt.z) > 1e-5)
+                v_float32x4 orig = camTrans;
+
+                // get direction through pixel in volume space:
+
+                // 1. reproject (x, y) on projecting plane where z = 1.f
+                v_float32x4 planed = (v_float32x4((float)x, (float)y, 0.f, 0.f) - vcxy) * vfxy;
+                planed = v_combine_low(planed, v_float32x4(1.f, 0.f, 0.f, 0.f));
+
+                // 2. rotate to volume space
+                planed = v_matmuladd(planed, camRot0, camRot1, camRot2, v_setzero_f32());
+
+                // 3. normalize
+                v_float32x4 invNorm = v_invsqrt(v_setall_f32(v_reduce_sum(planed * planed)));
+                v_float32x4 dir = planed * invNorm;
+
+                // compute intersection of ray with all six bbox planes
+                v_float32x4 rayinv = v_setall_f32(1.f) / dir;
+                // div by zero should be eliminated by these products
+                v_float32x4 tbottom = rayinv * (boxDown - orig);
+                v_float32x4 ttop = rayinv * (boxUp - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                v_float32x4 minAx = v_min(ttop, tbottom);
+                v_float32x4 maxAx = v_max(ttop, tbottom);
+
+                // near clipping plane
+                const float clip = 0.f;
+                float _minAx[4], _maxAx[4];
+                v_store(_minAx, minAx);
+                v_store(_maxAx, maxAx);
+                float tmin = max({ _minAx[0], _minAx[1], _minAx[2], clip });
+                float tmax = min({ _maxAx[0], _maxAx[1], _maxAx[2] });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if (tmin < tmax)
                 {
-                    int baseZ = (int)(-basePt.z / zStepPt.z);
-                    if (zStepPt.z > 0)
+                    // interpolation optimized a little
+                    orig *= invVoxelSize;
+                    dir *= invVoxelSize;
+
+                    int xdim = volDims[0];
+                    int ydim = volDims[1];
+                    int zdim = volDims[2];
+                    v_float32x4 rayStep = dir * v_setall_f32(tstep);
+                    v_float32x4 next = (orig + dir * v_setall_f32(tmin));
+                    float f = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+                    float fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = cvFloor((tmax - tmin) / tstep);
+                    for (; steps < nSteps; steps++)
                     {
-                        startZ = baseZ;
-                        endZ = volResolution.z;
-                    }
-                    else
-                    {
-                        startZ = 0;
-                        endZ = baseZ;
-                    }
-                }
-                else
-                {
-                    if (basePt.z > 0)
-                    {
-                        startZ = 0;
-                        endZ = volResolution.z;
-                    }
-                    else
-                    {
-                        // z loop shouldn't be performed
-                        startZ = endZ = 0;
-                    }
-                }
-                startZ = max(0, startZ);
-                endZ = min(int(volResolution.z), endZ);
-                for (int z = startZ; z < endZ; z++)
-                {
-                    // optimization of the following:
-                    //Point3f volPt = Point3f(x, y, z)*voxelSize;
-                    //Point3f camSpacePt = vol2cam * volPt;
-                    camSpacePt += zStep;
+                        next += rayStep;
+                        v_int32x4 ip = v_round(next);
+                        int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iz = ip.get0();
+                        int coord = ix * xdim + iy * ydim + iz * zdim;
 
-                    float zCamSpace = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(camSpacePt))).get0();
-                    if (zCamSpace <= 0.f)
-                        continue;
-
-                    v_float32x4 camPixVec = camSpacePt / v_setall_f32(zCamSpace);
-                    v_float32x4 projected = v_muladd(camPixVec, vfxy, vcxy);
-                    // leave only first 2 lanes
-                    projected = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
-                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
-
-                    depthType v;
-                    // bilinearly interpolate depth at projected
-                    {
-                        const v_float32x4& pt = projected;
-                        // check coords >= 0 and < imgSize
-                        v_uint32x4 limits = v_reinterpret_as_u32(pt < v_setzero_f32()) |
-                            v_reinterpret_as_u32(pt >= upLimits);
-                        limits = limits | v_rotate_right<1>(limits);
-                        if (limits.get0())
-                            continue;
-
-                        // xi, yi = floor(pt)
-                        v_int32x4 ip = v_floor(pt);
-                        v_int32x4 ipshift = ip;
-                        int xi = ipshift.get0();
-                        ipshift = v_rotate_right<1>(ipshift);
-                        int yi = ipshift.get0();
-
-                        const depthType* row0 = depth[yi + 0];
-                        const depthType* row1 = depth[yi + 1];
-
-                        // v001 = [v(xi + 0, yi + 0), v(xi + 1, yi + 0)]
-                        v_float32x4 v001 = v_load_low(row0 + xi);
-                        // v101 = [v(xi + 0, yi + 1), v(xi + 1, yi + 1)]
-                        v_float32x4 v101 = v_load_low(row1 + xi);
-
-                        v_float32x4 vall = v_combine_low(v001, v101);
-
-                        // assume correct depth is positive
-                        // don't fix missing data
-                        if (v_check_all(vall > v_setzero_f32()))
+                        fnext = tsdfToFloat(volume.at<TsdfVoxel>(coord).tsdf);
+                        if (fnext != f)
                         {
-                            v_float32x4 t = pt - v_cvt_f32(ip);
-                            float tx = t.get0();
-                            t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
-                            v_float32x4 ty = v_setall_f32(t.get0());
-                            // vx is y-interpolated between rows 0 and 1
-                            v_float32x4 vx = v001 + ty * (v101 - v001);
-                            float v0 = vx.get0();
-                            vx = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vx)));
-                            float v1 = vx.get0();
-                            v = v0 + tx * (v1 - v0);
+                            fnext = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+
+                            // when ray crosses a surface
+                            if (std::signbit(f) != std::signbit(fnext))
+                                break;
+
+                            f = fnext;
                         }
-                        else
-                            continue;
                     }
 
-                    v_float32x4 projectedRGB = v_muladd(camPixVec, rgb_vfxy, rgb_vcxy);
-                    // leave only first 2 lanes
-                    projectedRGB = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
-                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
-
-                    // norm(camPixVec) produces double which is too slow
-                    int _u = (int)projected.get0();
-                    int _v = (int)v_rotate_right<1>(projected).get0();
-                    int rgb_u = (int)projectedRGB.get0();
-                    int rgb_v = (int)v_rotate_right<1>(projectedRGB).get0();
-
-                    if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows &&
-                        rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols))
-                        continue;
-                    float pixNorm = pixNorms.at<float>(_v, _u);
-                    Vec4f colorRGB = color.at<Vec4f>(rgb_v, rgb_u);
-                    //float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec));
-                    // difference between distances of point and of surface to camera
-                    float sdf = pixNorm * (v * dfac - zCamSpace);
-                    // possible alternative is:
-                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
-                    if (sdf >= -truncDist)
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if (f > 0.f && fnext < 0.f)
                     {
-                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
+                        v_float32x4 tp = next - rayStep;
+                        float ft = interpolateTsdfVoxel(volume, volDims, neighbourCoords, tp);
+                        float ftdt = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
 
-                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
-                        WeightType& weight = voxel.weight;
-                        TsdfType& value = voxel.tsdf;
-                        ColorType& r = voxel.r;
-                        ColorType& g = voxel.g;
-                        ColorType& b = voxel.b;
+                        // avoid division by zero
+                        if (!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            v_float32x4 pv = (orig + dir * v_setall_f32(ts));
+                            v_float32x4 nv = getNormalVoxel(volume, volDims, neighbourCoords, volResolution, pv);
 
-                        // update RGB
-                        r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
-                        g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
-                        b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
-                        colorFix(r, g, b);
-                        // update TSDF
-                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
-                        weight = WeightType(min(int(weight + 1), int(maxWeight)));
+                            if (!isNaN(nv))
+                            {
+                                //convert pv and nv to camera space
+                                normal = v_matmuladd(nv, volRot0, volRot1, volRot2, v_setzero_f32());
+                                // interpolation optimized a little
+                                point = v_matmuladd(pv * v_float32x4(voxelSize, voxelSize, voxelSize, 1.f),
+                                    volRot0, volRot1, volRot2, volTrans);
+                            }
+                        }
                     }
                 }
+
+                v_store((float*)(&ptsRow[x]), point);
+                v_store((float*)(&nrmRow[x]), normal);
             }
         }
     };
 #else
-    auto IntegrateInvoker = [&](const Range& range)
+    auto RaycastInvoker = [&](const Range& range)
     {
-        for (int x = range.start; x < range.end; x++)
+        const Point3f camTrans = cam2vol.translation();
+        const Matx33f  camRot = cam2vol.rotation();
+        const Matx33f  volRot = vol2cam.rotation();
+
+        for (int y = range.start; y < range.end; y++)
         {
-            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
-            for (int y = 0; y < volResolution.y; y++)
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+
+            for (int x = 0; x < points.cols; x++)
             {
-                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
-                // optimization of camSpace transformation (vector addition instead of matmul at each z)
-                Point3f basePt = vol2cam * (Point3f(float(x), float(y), 0.0f) * voxelSize);
-                Point3f camSpacePt = basePt;
-                // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
-                // zStep == vol2cam*[Point3f(x, y, 1) - Point3f(x, y, 0)]*voxelSize
-                Point3f zStep = Point3f(vol2cam.matrix(0, 2),
-                    vol2cam.matrix(1, 2),
-                    vol2cam.matrix(2, 2)) * voxelSize;
-                int startZ, endZ;
-                if (abs(zStep.z) > 1e-5)
+                Point3f point = nan3, normal = nan3;
+
+                Point3f orig = camTrans;
+                // direction through pixel in volume space
+                Point3f dir = normalize(Vec3f(camRot * reproj(Point3f(float(x), float(y), 1.f))));
+
+                // compute intersection of ray with all six bbox planes
+                Vec3f rayinv(1.f / dir.x, 1.f / dir.y, 1.f / dir.z);
+                Point3f tbottom = rayinv.mul(boxMin - orig);
+                Point3f ttop = rayinv.mul(boxMax - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                Point3f minAx(min(ttop.x, tbottom.x), min(ttop.y, tbottom.y), min(ttop.z, tbottom.z));
+                Point3f maxAx(max(ttop.x, tbottom.x), max(ttop.y, tbottom.y), max(ttop.z, tbottom.z));
+
+                // near clipping plane
+                const float clip = 0.f;
+                //float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
+                //float tmax =     min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
+                float tmin = max({ minAx.x, minAx.y, minAx.z, clip });
+                float tmax = min({ maxAx.x, maxAx.y, maxAx.z });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if (tmin < tmax)
                 {
-                    int baseZ = int(-basePt.z / zStep.z);
-                    if (zStep.z > 0)
+                    // interpolation optimized a little
+                    orig = orig * voxelSizeInv;
+                    dir = dir * voxelSizeInv;
+
+                    Point3f rayStep = dir * tstep;
+                    Point3f next = (orig + dir * tmin);
+                    float f = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+                    float fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = int(floor((tmax - tmin) / tstep));
+                    for (; steps < nSteps; steps++)
                     {
-                        startZ = baseZ;
-                        endZ = volResolution.z;
-                    }
-                    else
-                    {
-                        startZ = 0;
-                        endZ = baseZ;
-                    }
-                }
-                else
-                {
-                    if (basePt.z > 0)
-                    {
-                        startZ = 0;
-                        endZ = volResolution.z;
-                    }
-                    else
-                    {
-                        // z loop shouldn't be performed
-                        startZ = endZ = 0;
-                    }
-                }
-                startZ = max(0, startZ);
-                endZ = min(int(volResolution.z), endZ);
-
-                for (int z = startZ; z < endZ; z++)
-                {
-                    // optimization of the following:
-                    //Point3f volPt = Point3f(x, y, z)*volume.voxelSize;
-                    //Point3f camSpacePt = vol2cam * volPt;
-
-                    camSpacePt += zStep;
-                    if (camSpacePt.z <= 0)
-                        continue;
-
-                    Point3f camPixVec;
-                    Point2f projected = projDepth(camSpacePt, camPixVec);
-                    Point2f projectedRGB = projRGB(camSpacePt, camPixVec);
-
-
-                    depthType v = bilinearDepth(depth, projected);
-                    if (v == 0) {
-                        continue;
-                    }
-
-                    int _u = (int) projected.x;
-                    int _v = (int) projected.y;
-
-                    int rgb_u = (int) projectedRGB.x;
-                    int rgb_v = (int) projectedRGB.y;
-
-                    if (!(_v >= 0 && _v < depth.rows && _u >= 0 && _u < depth.cols  &&
-                        rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols))
-                        continue;
-
-                    float pixNorm = pixNorms.at<float>(_v, _u);
-                    Vec4f colorRGB = color.at<Vec4f>(rgb_v, rgb_u);
-                    // difference between distances of point and of surface to camera
-                    float sdf = pixNorm * (v * dfac - camSpacePt.z);
-                    // possible alternative is:
-                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
-                    if (sdf >= -truncDist)
-                    {
-                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
-
-                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
-                        WeightType& weight = voxel.weight;
-                        TsdfType& value = voxel.tsdf;
-                        ColorType& r = voxel.r;
-                        ColorType& g = voxel.g;
-                        ColorType& b = voxel.b;
-
-                        // update RGB
-                        if (weight < 1)
+                        next += rayStep;
+                        int xdim = volDims[0];
+                        int ydim = volDims[1];
+                        int zdim = volDims[2];
+                        int ix = cvRound(next.x);
+                        int iy = cvRound(next.y);
+                        int iz = cvRound(next.z);
+                        fnext = tsdfToFloat(volume.at<TsdfVoxel>(ix * xdim + iy * ydim + iz * zdim).tsdf);
+                        if (fnext != f)
                         {
-                            r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
-                            g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
-                            b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
-                        }
+                            fnext = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+                            // when ray crosses a surface
+                            if (std::signbit(f) != std::signbit(fnext))
+                                break;
 
-                        // update TSDF
-                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
-                        weight = WeightType( min(int(weight + 1), int(maxWeight)) );
+                            f = fnext;
+                        }
+                    }
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if (f > 0.f && fnext < 0.f)
+                    {
+                        Point3f tp = next - rayStep;
+                        float ft = interpolateTsdfVoxel(volume, volDims, neighbourCoords, tp);
+                        float ftdt = interpolateTsdfVoxel(volume, volDims, neighbourCoords, next);
+                        // float t = tmin + steps*tstep;
+                        // float ts = t - tstep*ft/(ftdt - ft);
+                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
+
+                        // avoid division by zero
+                        if (!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            Point3f pv = (orig + dir * ts);
+                            Point3f nv = getNormalVoxel(volume, volDims, neighbourCoords, volResolution, pv);
+
+                            if (!isNaN(nv))
+                            {
+                                //convert pv and nv to camera space
+                                normal = volRot * nv;
+                                // interpolation optimized a little
+                                point = vol2cam * (pv * voxelSize);
+                            }
+                        }
                     }
                 }
+                ptsRow[x] = toPtype(point);
+                nrmRow[x] = toPtype(normal);
             }
         }
     };
+
 #endif
-    parallel_for_(integrateRange, IntegrateInvoker);
+    parallel_for_(raycastRange, RaycastInvoker);
+    //RaycastInvoker(raycastRange);
 }
 
+
+#ifdef HAVE_OPENCL
+void ocl_raycastTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+    InputArray _volume, OutputArray _points, OutputArray _normals)
+{
+    //std::cout << "ocl_raycastVolumeUnit" << std::endl;
+
+    CV_TRACE_FUNCTION();
+
+    const Size frameSize(width, height);
+    CV_Assert(frameSize.area() > 0);
+
+    String errorStr;
+    String name = "raycast";
+    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    _points.create(frameSize, CV_32FC4);
+    _normals.create(frameSize, CV_32FC4);
+
+    UMat points = _points.getUMat();
+    UMat normals = _normals.getUMat();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    const Point3f volSize = Point3f(volResolution) * settings.getVoxelSize();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    UMat vol2camGpu, cam2volGpu;
+    Affine3f vol2cam = Affine3f(cameraPose.inv()) * pose;
+    Affine3f cam2vol = pose.inv() * Affine3f(cameraPose);
+    Mat(cam2vol.matrix).copyTo(cam2volGpu);
+    Mat(vol2cam.matrix).copyTo(vol2camGpu);
+    Matx33f intr;
+    settings.getCameraRaycastIntrinsics(intr);
+    Intr intrinsics(intr);
+    Intr::Reprojector r = intrinsics.makeReprojector();
+
+    const UMat volume = _volume.getUMat();
+    float voxelSize = settings.getVoxelSize();
+    float raycastStepFactor = settings.getRaycastStepFactor();
+    float truncatedDistance = settings.getTsdfTruncateDistance();
+
+    // We do subtract voxel size to minimize checks after
+    // Note: origin of volume coordinate is placed
+    // in the center of voxel (0,0,0), not in the corner of the voxel!
+    Vec4f boxMin, boxMax(volSize.x - voxelSize,
+        volSize.y - voxelSize,
+        volSize.z - voxelSize);
+    Vec2f finv(r.fxinv, r.fyinv), cxy(r.cx, r.cy);
+    float tstep = truncatedDistance * raycastStepFactor;
+
+    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
+
+    k.args(ocl::KernelArg::WriteOnlyNoSize(points),
+        ocl::KernelArg::WriteOnlyNoSize(normals),
+        frameSize,
+        ocl::KernelArg::PtrReadOnly(volume),
+        ocl::KernelArg::PtrReadOnly(vol2camGpu),
+        ocl::KernelArg::PtrReadOnly(cam2volGpu),
+        finv.val, cxy.val,
+        boxMin.val, boxMax.val,
+        tstep,
+        voxelSize,
+        volResGpu.val,
+        volDims.val,
+        neighbourCoords.val);
+
+    size_t globalSize[2];
+    globalSize[0] = (size_t)frameSize.width;
+    globalSize[1] = (size_t)frameSize.height;
+
+    if (!k.run(2, globalSize, NULL, true))
+        throw std::runtime_error("Failed to run kernel");
+}
+#endif
+
+// Fetch
+
+void fetchNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume, InputArray _points, OutputArray _normals)
+{
+    //std::cout << "fetchNormalsFromTsdfVolumeUnit" << std::endl;
+
+    CV_TRACE_FUNCTION();
+    CV_Assert(!_points.empty());
+    if (!_normals.needed())
+        return;
+
+    Points points = _points.getMat();
+    CV_Assert(points.type() == POINT_TYPE);
+
+    _normals.createSameSize(_points, _points.type());
+    Normals normals = _normals.getMat();
+
+    const Mat volume = _volume.getMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    Affine3f invPose(pose.inv());
+    Matx33f r = pose.rotation();
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+    auto PushNormals = [&](const ptype& pp, const int* position)
+    {
+        Point3f p = fromPtype(pp);
+        Point3f n = nan3;
+        if (!isNaN(p))
+        {
+            Point3f voxPt = (invPose * p);
+            voxPt = voxPt * voxelSizeInv;
+            n = r * getNormalVoxel(volume, volDims, neighbourCoords, volResolution, voxPt);
+        }
+        normals(position[0], position[1]) = toPtype(n);
+    };
+    points.forEach(PushNormals);
+}
+
+#ifdef HAVE_OPENCL
+void ocl_fetchNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume, InputArray _points, OutputArray _normals)
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(!_points.empty());
+    if (!_normals.needed())
+        return;
+
+    UMat points = _points.getUMat();
+    CV_Assert(points.type() == POINT_TYPE);
+
+    _normals.createSameSize(_points, POINT_TYPE);
+    UMat normals = _normals.getUMat();
+
+    const UMat volume = _volume.getUMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+    String errorStr;
+    String name = "getNormals";
+    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+    ocl::Kernel k;
+    k.create(name.c_str(), source, options, &errorStr);
+
+    if (k.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    UMat volPoseGpu, invPoseGpu;
+    Mat(pose.matrix).copyTo(volPoseGpu);
+    Mat(pose.inv().matrix).copyTo(invPoseGpu);
+    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
+    Size frameSize = points.size();
+
+    k.args(ocl::KernelArg::ReadOnlyNoSize(points),
+        ocl::KernelArg::WriteOnlyNoSize(normals),
+        frameSize,
+        ocl::KernelArg::PtrReadOnly(volume),
+        ocl::KernelArg::PtrReadOnly(volPoseGpu),
+        ocl::KernelArg::PtrReadOnly(invPoseGpu),
+        voxelSizeInv,
+        volResGpu.val,
+        volDims.val,
+        neighbourCoords.val);
+
+    size_t globalSize[2];
+    globalSize[0] = (size_t)points.cols;
+    globalSize[1] = (size_t)points.rows;
+
+    if (!k.run(2, globalSize, NULL, true))
+        throw std::runtime_error("Failed to run kernel");
+
+}
+#endif
+
+inline void coord(const Mat& volume, const TsdfVoxel* volDataStart, std::vector<ptype>& points, std::vector<ptype>& normals,
+    const Point3i volResolution, const Vec4i volDims, const Vec8i neighbourCoords, const Affine3f pose,
+    const float voxelSize, const float voxelSizeInv, bool needNormals, int x, int y, int z, Point3f V, float v0, int axis)
+{
+    // 0 for x, 1 for y, 2 for z
+    bool limits = false;
+    Point3i shift;
+    float Vc = 0.f;
+    if (axis == 0)
+    {
+        shift = Point3i(1, 0, 0);
+        limits = (x + 1 < volResolution.x);
+        Vc = V.x;
+    }
+    if (axis == 1)
+    {
+        shift = Point3i(0, 1, 0);
+        limits = (y + 1 < volResolution.y);
+        Vc = V.y;
+    }
+    if (axis == 2)
+    {
+        shift = Point3i(0, 0, 1);
+        limits = (z + 1 < volResolution.z);
+        Vc = V.z;
+    }
+
+    if (limits)
+    {
+        const TsdfVoxel& voxeld = volDataStart[(x + shift.x) * volDims[0] +
+            (y + shift.y) * volDims[1] +
+            (z + shift.z) * volDims[2]];
+        float vd = tsdfToFloat(voxeld.tsdf);
+        if (voxeld.weight != 0 && vd != 1.f)
+        {
+            if ((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
+            {
+                //linearly interpolate coordinate
+                float Vn = Vc + voxelSize;
+                float dinv = 1.f / (abs(v0) + abs(vd));
+                float inter = (Vc * abs(vd) + Vn * abs(v0)) * dinv;
+
+                Point3f p(shift.x ? inter : V.x,
+                    shift.y ? inter : V.y,
+                    shift.z ? inter : V.z);
+                {
+                    points.push_back(toPtype(pose * p));
+                    if (needNormals)
+                        normals.push_back(toPtype(pose.rotation() *
+                            getNormalVoxel(volume, volDims, neighbourCoords, volResolution, p * voxelSizeInv)));
+                }
+            }
+        }
+    }
+}
+
+
+void fetchPointsNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume, OutputArray _points, OutputArray _normals)
+{
+    if (!_points.needed())
+        return;
+    const Mat volume = _volume.getMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+    float voxelSize = settings.getVoxelSize();
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+    bool needNormals = _normals.needed();
+
+    std::vector<std::vector<ptype>> pVecs, nVecs;
+    Range fetchRange(0, volResolution.x);
+    const int nstripes = -1;
+    const TsdfVoxel* volDataStart = volume.ptr<TsdfVoxel>();
+    Mutex mutex;
+
+    auto FetchPointsNormalsInvoker = [&](const Range& range) {
+        std::vector<ptype> points, normals;
+        for (int x = range.start; x < range.end; x++)
+        {
+            const TsdfVoxel* volDataX = volDataStart + x * volDims[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                const TsdfVoxel* volDataY = volDataX + y * volDims[1];
+                for (int z = 0; z < volResolution.z; z++)
+                {
+                    const TsdfVoxel& voxel0 = volDataY[z * volDims[2]];
+                    float v0 = tsdfToFloat(voxel0.tsdf);
+                    if (voxel0.weight != 0 && v0 != 1.f)
+                    {
+                        Point3f V(Point3f((float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f) * voxelSize);
+
+                        coord(volume, volDataStart, points, normals, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, x, y, z, V, v0, 0);
+                        coord(volume, volDataStart, points, normals, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, x, y, z, V, v0, 1);
+                        coord(volume, volDataStart, points, normals, volResolution, volDims, neighbourCoords, pose, voxelSize, voxelSizeInv, needNormals, x, y, z, V, v0, 2);
+
+                    } // if voxel is not empty
+                }
+            }
+        }
+
+        AutoLock al(mutex);
+        pVecs.push_back(points);
+        nVecs.push_back(normals);
+    };
+
+    parallel_for_(fetchRange, FetchPointsNormalsInvoker, nstripes);
+
+
+
+    std::vector<ptype> points, normals;
+    for (size_t i = 0; i < pVecs.size(); i++)
+    {
+        points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
+        normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
+    }
+
+    _points.create((int)points.size(), 1, POINT_TYPE);
+    if (!points.empty())
+        Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
+
+    if (_normals.needed())
+    {
+        _normals.create((int)normals.size(), 1, POINT_TYPE);
+        if (!normals.empty())
+            Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
+    }
+
+}
+
+
+#ifdef HAVE_OPENCL
+void ocl_fetchPointsNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume, OutputArray points, OutputArray normals)
+{
+    CV_TRACE_FUNCTION();
+
+    if (!points.needed())
+        return;
+
+
+    const UMat volume = _volume.getUMat();
+
+    Matx44f _pose;
+    settings.getVolumePose(_pose);
+    const Affine3f pose = Affine3f(_pose);
+
+    float voxelSize = settings.getVoxelSize();
+    float voxelSizeInv = 1.f / settings.getVoxelSize();
+
+    const Vec4i volDims;
+    settings.getVolumeDimensions(volDims);
+    const Vec8i neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+
+
+    bool needNormals = normals.needed();
+
+    // 1. scan to count points in each group and allocate output arrays
+
+    ocl::Kernel kscan;
+
+    String errorStr;
+    ocl::ProgramSource source = ocl::_3d::tsdf_oclsrc;
+    String options = "-cl-mad-enable";
+
+    kscan.create("scanSize", source, options, &errorStr);
+
+    if (kscan.empty())
+        throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+    size_t globalSize[3];
+    globalSize[0] = (size_t)volResolution.x;
+    globalSize[1] = (size_t)volResolution.y;
+    globalSize[2] = (size_t)volResolution.z;
+
+    const ocl::Device& device = ocl::Device::getDefault();
+    size_t wgsLimit = device.maxWorkGroupSize();
+    size_t memSize = device.localMemSize();
+    // local mem should keep a point (and a normal) for each thread in a group
+    // use 4 float per each point and normal
+    size_t elemSize = (sizeof(float) * 4) * (needNormals ? 2 : 1);
+    const size_t lcols = 8;
+    const size_t lrows = 8;
+    size_t lplanes = min(memSize / elemSize, wgsLimit) / lcols / lrows;
+    lplanes = roundDownPow2(lplanes);
+    size_t localSize[3] = { lcols, lrows, lplanes };
+    Vec3i ngroups((int)divUp(globalSize[0], (unsigned int)localSize[0]),
+        (int)divUp(globalSize[1], (unsigned int)localSize[1]),
+        (int)divUp(globalSize[2], (unsigned int)localSize[2]));
+
+    const size_t counterSize = sizeof(int);
+    size_t lszscan = localSize[0] * localSize[1] * localSize[2] * counterSize;
+
+    const int gsz[3] = { ngroups[2], ngroups[1], ngroups[0] };
+    UMat groupedSum(3, gsz, CV_32S, Scalar(0));
+
+    UMat volPoseGpu;
+    Mat(pose.matrix).copyTo(volPoseGpu);
+    Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z);
+
+    kscan.args(ocl::KernelArg::PtrReadOnly(volume),
+        volResGpu.val,
+        volDims.val,
+        neighbourCoords.val,
+        ocl::KernelArg::PtrReadOnly(volPoseGpu),
+        voxelSize,
+        voxelSizeInv,
+        ocl::KernelArg::Local(lszscan),
+        ocl::KernelArg::WriteOnlyNoSize(groupedSum));
+
+    if (!kscan.run(3, globalSize, localSize, true))
+        throw std::runtime_error("Failed to run kernel");
+
+    Mat groupedSumCpu = groupedSum.getMat(ACCESS_READ);
+    int gpuSum = (int)sum(groupedSumCpu)[0];
+    // should be no CPU copies when new kernel is executing
+    groupedSumCpu.release();
+
+    // 2. fill output arrays according to per-group points count
+
+    points.create(gpuSum, 1, POINT_TYPE);
+    UMat pts = points.getUMat();
+    UMat nrm;
+    if (needNormals)
+    {
+        normals.create(gpuSum, 1, POINT_TYPE);
+        nrm = normals.getUMat();
+    }
+    else
+    {
+        // it won't be accessed but empty args are forbidden
+        nrm = UMat(1, 1, POINT_TYPE);
+    }
+
+    if (gpuSum)
+    {
+        ocl::Kernel kfill;
+        kfill.create("fillPtsNrm", source, options, &errorStr);
+
+        if (kfill.empty())
+            throw std::runtime_error("Failed to create kernel: " + errorStr);
+
+        UMat atomicCtr(1, 1, CV_32S, Scalar(0));
+
+        // mem size to keep pts (and normals optionally) for all work-items in a group
+        size_t lszfill = localSize[0] * localSize[1] * localSize[2] * elemSize;
+
+        kfill.args(ocl::KernelArg::PtrReadOnly(volume),
+            volResGpu.val,
+            volDims.val,
+            neighbourCoords.val,
+            ocl::KernelArg::PtrReadOnly(volPoseGpu),
+            voxelSize,
+            voxelSizeInv,
+            ((int)needNormals),
+            ocl::KernelArg::Local(lszfill),
+            ocl::KernelArg::PtrReadWrite(atomicCtr),
+            ocl::KernelArg::ReadOnlyNoSize(groupedSum),
+            ocl::KernelArg::WriteOnlyNoSize(pts),
+            ocl::KernelArg::WriteOnlyNoSize(nrm)
+        );
+
+        if (!kfill.run(3, globalSize, localSize, true))
+            throw std::runtime_error("Failed to run kernel");
+    }
+}
+#endif
+
+
+
 } // namespace cv
diff --git a/modules/3d/src/rgbd/tsdf_functions.hpp b/modules/3d/src/rgbd/tsdf_functions.hpp
index e5c8abb305..9294dc1eb5 100644
--- a/modules/3d/src/rgbd/tsdf_functions.hpp
+++ b/modules/3d/src/rgbd/tsdf_functions.hpp
@@ -74,8 +74,10 @@ inline void colorFix(Point3f& c)
     if (c.z > 255) c.z = 255;
 }
 
-cv::Mat preCalculationPixNorm(Size size, const Intr& intrinsics);
-cv::UMat preCalculationPixNormGPU(Size size, const Intr& intrinsics);
+void preCalculationPixNorm(Size size, const Intr& intrinsics, Mat& pixNorm);
+#ifdef HAVE_OPENCL
+void ocl_preCalculationPixNorm(Size size, const Intr& intrinsics, UMat& pixNorm);
+#endif
 
 inline depthType bilinearDepth(const Depth& m, cv::Point2f pt)
 {
@@ -151,256 +153,56 @@ inline depthType bilinearDepth(const Depth& m, cv::Point2f pt)
     }
 }
 
-void integrateVolumeUnit(
+void _integrateVolumeUnit(
     float truncDist, float voxelSize, int maxWeight,
     cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
     InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose,
     const cv::Intr& intrinsics, InputArray _pixNorms, InputArray _volume);
 
-void integrateRGBVolumeUnit(
+void _integrateRGBVolumeUnit(
     float truncDist, float voxelSize, int maxWeight,
     cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
     InputArray _depth, InputArray _rgb, float depthFactor, const cv::Matx44f& cameraPose,
     const cv::Intr& depth_intrinsics, const cv::Intr& rgb_intrinsics, InputArray _pixNorms, InputArray _volume);
 
 
-class CustomHashSet
-{
-public:
-    static const int hashDivisor = 32768;
-    static const int startCapacity = 2048;
+void integrateTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume);
 
-    std::vector<int> hashes;
-    // 0-3 for key, 4th for internal use
-    // don't keep keep value
-    std::vector<Vec4i> data;
-    int capacity;
-    int last;
+void integrateTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& volumePose, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume);
 
-    CustomHashSet()
-    {
-        hashes.resize(hashDivisor);
-        for (int i = 0; i < hashDivisor; i++)
-            hashes[i] = -1;
-        capacity = startCapacity;
 
-        data.resize(capacity);
-        for (int i = 0; i < capacity; i++)
-            data[i] = { 0, 0, 0, -1 };
+void raycastTsdfVolumeUnit(const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+                       InputArray _volume, OutputArray _points, OutputArray _normals);
 
-        last = 0;
-    }
+void fetchNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+                                    InputArray _points, OutputArray _normals);
 
-    ~CustomHashSet() { }
+void fetchPointsNormalsFromTsdfVolumeUnit(const VolumeSettings& settings, InputArray _volume,
+                                          OutputArray points, OutputArray normals);
 
-    inline size_t calc_hash(Vec3i x) const
-    {
-        uint32_t seed = 0;
-        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
-        for (int i = 0; i < 3; i++)
-        {
-            seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
-        }
-        return seed;
-    }
 
-    // should work on existing elements too
-    // 0 - need resize
-    // 1 - idx is inserted
-    // 2 - idx already exists
-    int insert(Vec3i idx)
-    {
-        if (last < capacity)
-        {
-            int hash = int(calc_hash(idx) % hashDivisor);
-            int place = hashes[hash];
-            if (place >= 0)
-            {
-                int oldPlace = place;
-                while (place >= 0)
-                {
-                    if (data[place][0] == idx[0] &&
-                        data[place][1] == idx[1] &&
-                        data[place][2] == idx[2])
-                        return 2;
-                    else
-                    {
-                        oldPlace = place;
-                        place = data[place][3];
-                        //std::cout << "place=" << place << std::endl;
-                    }
-                }
+#ifdef HAVE_OPENCL
+void ocl_integrateTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose,
+    InputArray _depth, InputArray _pixNorms, InputArray _volume);
 
-                // found, create here
-                data[oldPlace][3] = last;
-            }
-            else
-            {
-                // insert at last
-                hashes[hash] = last;
-            }
+void ocl_raycastTsdfVolumeUnit(
+    const VolumeSettings& settings, const Matx44f& cameraPose, int height, int width,
+    InputArray _volume, OutputArray _points, OutputArray _normals);
 
-            data[last][0] = idx[0];
-            data[last][1] = idx[1];
-            data[last][2] = idx[2];
-            data[last][3] = -1;
-            last++;
+void ocl_fetchNormalsFromTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volume,
+    InputArray _points, OutputArray _normals);
 
-            return 1;
-        }
-        else
-            return 0;
-    }
+void ocl_fetchPointsNormalsFromTsdfVolumeUnit(
+    const VolumeSettings& settings, InputArray _volume,
+    OutputArray _points, OutputArray _normals);
+#endif
 
-    int find(Vec3i idx) const
-    {
-        int hash = int(calc_hash(idx) % hashDivisor);
-        int place = hashes[hash];
-        // search a place
-        while (place >= 0)
-        {
-            if (data[place][0] == idx[0] &&
-                data[place][1] == idx[1] &&
-                data[place][2] == idx[2])
-                break;
-            else
-            {
-                place = data[place][3];
-            }
-        }
-
-        return place;
-    }
-};
-
-// TODO: remove this structure as soon as HashTSDFGPU data is completely on GPU;
-// until then CustomHashTable can be replaced by this one if needed
-
-const int NAN_ELEMENT = -2147483647;
-
-struct Volume_NODE
-{
-    Vec4i idx = Vec4i(NAN_ELEMENT);
-    int32_t row = -1;
-    int32_t nextVolumeRow = -1;
-    int32_t dummy = 0;
-    int32_t dummy2 = 0;
-};
-
-const int _hash_divisor = 32768;
-const int _list_size = 4;
-
-class VolumesTable
-{
-public:
-    const int hash_divisor = _hash_divisor;
-    const int list_size = _list_size;
-    const int32_t free_row = -1;
-    const int32_t free_isActive = 0;
-
-    const cv::Vec4i nan4 = cv::Vec4i(NAN_ELEMENT);
-
-    int bufferNums;
-    cv::Mat volumes;
-
-    VolumesTable() : bufferNums(1)
-    {
-        this->volumes = cv::Mat(hash_divisor * list_size, 1, rawType<Volume_NODE>());
-        for (int i = 0; i < volumes.size().height; i++)
-        {
-            Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
-            v->idx = nan4;
-            v->row = -1;
-            v->nextVolumeRow = -1;
-        }
-    }
-    const VolumesTable& operator=(const VolumesTable& vt)
-    {
-        this->volumes = vt.volumes;
-        this->bufferNums = vt.bufferNums;
-        return *this;
-    }
-    ~VolumesTable() {};
-
-    bool insert(Vec3i idx, int row)
-    {
-        CV_Assert(row >= 0);
-
-        int bufferNum = 0;
-        int hash = int(calc_hash(idx) % hash_divisor);
-        int start = getPos(idx, bufferNum);
-        int i = start;
-
-        while (i >= 0)
-        {
-            Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
-
-            if (v->idx[0] == NAN_ELEMENT)
-            {
-                Vec4i idx4(idx[0], idx[1], idx[2], 0);
-
-                bool extend = false;
-                if (i != start && i % list_size == 0)
-                {
-                    if (bufferNum >= bufferNums - 1)
-                    {
-                        extend = true;
-                        volumes.resize(hash_divisor * bufferNums);
-                        bufferNums++;
-                    }
-                    bufferNum++;
-                    v->nextVolumeRow = (bufferNum * hash_divisor + hash) * list_size;
-                }
-                else
-                {
-                    v->nextVolumeRow = i + 1;
-                }
-
-                v->idx = idx4;
-                v->row = row;
-
-                return extend;
-            }
-
-            i = v->nextVolumeRow;
-        }
-        return false;
-    }
-    int findRow(Vec3i idx) const
-    {
-        int bufferNum = 0;
-        int i = getPos(idx, bufferNum);
-
-        while (i >= 0)
-        {
-            const Volume_NODE* v = volumes.ptr<Volume_NODE>(i);
-
-            if (v->idx == Vec4i(idx[0], idx[1], idx[2], 0))
-                return v->row;
-            else
-                i = v->nextVolumeRow;
-        }
-
-        return -1;
-    }
-
-    inline int getPos(Vec3i idx, int bufferNum) const
-    {
-        int hash = int(calc_hash(idx) % hash_divisor);
-        return (bufferNum * hash_divisor + hash) * list_size;
-    }
-
-    inline size_t calc_hash(Vec3i x) const
-    {
-        uint32_t seed = 0;
-        constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9;
-        for (int i = 0; i < 3; i++)
-        {
-            seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2);
-        }
-        return seed;
-    }
-};
 
 }  // namespace cv
 
diff --git a/modules/3d/src/rgbd/volume.cpp b/modules/3d/src/rgbd/volume.cpp
deleted file mode 100644
index 6c07d27b25..0000000000
--- a/modules/3d/src/rgbd/volume.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html
-
-#include "../precomp.hpp"
-#include "tsdf.hpp"
-#include "hash_tsdf.hpp"
-#include "colored_tsdf.hpp"
-
-namespace cv
-{
-
-Ptr<VolumeParams> VolumeParams::defaultParams(int _volumeKind)
-{
-    VolumeParams params;
-    params.kind              = _volumeKind;
-    params.maxWeight         = 64;
-    params.raycastStepFactor = 0.25f;
-    params.unitResolution    = 0;  // unitResolution not used for TSDF
-    float volumeSize         = 3.0f;
-    Matx44f pose = Affine3f().translate(Vec3f(-volumeSize / 2.f, -volumeSize / 2.f, 0.5f)).matrix;
-    params.pose = Mat(pose);
-
-    if(params.kind == VolumeKind::TSDF)
-    {
-        params.resolutionX = 512;
-        params.resolutionY = 512;
-        params.resolutionZ = 512;
-        params.voxelSize           = volumeSize / 512.f;
-        params.depthTruncThreshold = 0.f;  // depthTruncThreshold not required for TSDF
-        params.tsdfTruncDist = 7 * params.voxelSize;  //! About 0.04f in meters
-        return makePtr<VolumeParams>(params);
-    }
-    else if(params.kind == VolumeKind::HASHTSDF)
-    {
-        params.unitResolution      = 16;
-        params.voxelSize           = volumeSize / 512.f;
-        params.depthTruncThreshold = 4.f;
-        params.tsdfTruncDist = 7 * params.voxelSize;  //! About 0.04f in meters
-        return makePtr<VolumeParams>(params);
-    }
-    else if (params.kind == VolumeKind::COLOREDTSDF)
-    {
-        params.resolutionX = 512;
-        params.resolutionY = 512;
-        params.resolutionZ = 512;
-        params.voxelSize = volumeSize / 512.f;
-        params.depthTruncThreshold = 0.f;  // depthTruncThreshold not required for TSDF
-        params.tsdfTruncDist = 7 * params.voxelSize;  //! About 0.04f in meters
-        return makePtr<VolumeParams>(params);
-    }
-    CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
-}
-
-Ptr<VolumeParams> VolumeParams::coarseParams(int _volumeKind)
-{
-    Ptr<VolumeParams> params = defaultParams(_volumeKind);
-
-    params->raycastStepFactor = 0.75f;
-    float volumeSize          = 3.0f;
-    if(params->kind == VolumeKind::TSDF)
-    {
-        params->resolutionX = 128;
-        params->resolutionY = 128;
-        params->resolutionZ = 128;
-        params->voxelSize  = volumeSize / 128.f;
-        params->tsdfTruncDist = 2 * params->voxelSize;  //! About 0.04f in meters
-        return params;
-    }
-    else if(params->kind == VolumeKind::HASHTSDF)
-    {
-        params->voxelSize = volumeSize / 128.f;
-        params->tsdfTruncDist = 2 * params->voxelSize;  //! About 0.04f in meters
-        return params;
-    }
-    else if (params->kind == VolumeKind::COLOREDTSDF)
-    {
-        params->resolutionX = 128;
-        params->resolutionY = 128;
-        params->resolutionZ = 128;
-        params->voxelSize = volumeSize / 128.f;
-        params->tsdfTruncDist = 2 * params->voxelSize;  //! About 0.04f in meters
-        return params;
-    }
-    CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
-}
-
-Ptr<Volume> makeVolume(const Ptr<VolumeParams>& _volumeParams)
-{
-    int kind = _volumeParams->kind;
-    if(kind == VolumeParams::VolumeKind::TSDF)
-        return makeTSDFVolume(*_volumeParams);
-    else if(kind == VolumeParams::VolumeKind::HASHTSDF)
-        return makeHashTSDFVolume(*_volumeParams);
-    else if(kind == VolumeParams::VolumeKind::COLOREDTSDF)
-        return makeColoredTSDFVolume(*_volumeParams);
-    CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
-}
-
-Ptr<Volume> makeVolume(int _volumeKind, float _voxelSize, Matx44f _pose,
-                       float _raycastStepFactor, float _truncDist, int _maxWeight, float _truncateThreshold,
-                       int _resolutionX, int _resolutionY, int _resolutionZ)
-{
-    Point3i _presolution(_resolutionX, _resolutionY, _resolutionZ);
-    if (_volumeKind == VolumeParams::VolumeKind::TSDF)
-    {
-        return makeTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _presolution);
-    }
-    else if (_volumeKind == VolumeParams::VolumeKind::HASHTSDF)
-    {
-        return makeHashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold);
-    }
-    else if (_volumeKind == VolumeParams::VolumeKind::COLOREDTSDF)
-    {
-        return makeColoredTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _presolution);
-    }
-    CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
-}
-
-}  // namespace cv
diff --git a/modules/3d/src/rgbd/volume_impl.cpp b/modules/3d/src/rgbd/volume_impl.cpp
new file mode 100644
index 0000000000..a2edc063e4
--- /dev/null
+++ b/modules/3d/src/rgbd/volume_impl.cpp
@@ -0,0 +1,540 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+
+#include <iostream>
+#include "volume_impl.hpp"
+#include "tsdf_functions.hpp"
+#include "hash_tsdf_functions.hpp"
+#include "color_tsdf_functions.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace cv
+{
+
+Volume::Impl::Impl(const VolumeSettings& _settings) :
+    settings(_settings)
+#ifdef HAVE_OPENCL
+    , useGPU(ocl::useOpenCL())
+#endif
+{}
+
+// TSDF
+
+TsdfVolume::TsdfVolume(const VolumeSettings& _settings) :
+    Volume::Impl(_settings)
+{
+    Vec3i volResolution;
+    settings.getVolumeResolution(volResolution);
+#ifndef HAVE_OPENCL
+    volume = Mat(1, volResolution[0] * volResolution[1] * volResolution[2], rawType<TsdfVoxel>());
+#else
+    if (useGPU)
+        gpu_volume = UMat(1, volResolution[0] * volResolution[1] * volResolution[2], rawType<TsdfVoxel>());
+    else
+        cpu_volume = Mat(1, volResolution[0] * volResolution[1] * volResolution[2], rawType<TsdfVoxel>());
+#endif
+
+    reset();
+}
+TsdfVolume::~TsdfVolume() {}
+
+void TsdfVolume::integrate(const OdometryFrame& frame, InputArray _cameraPose)
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_OPENCL
+    Mat depth;
+#else
+    UMat depth;
+#endif
+    frame.getDepth(depth);
+    integrate(depth, _cameraPose);
+}
+
+void TsdfVolume::integrate(InputArray _depth, InputArray _cameraPose)
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_OPENCL
+    Mat depth = _depth.getMat();
+#else
+    UMat depth = _depth.getUMat();
+#endif
+    CV_Assert(!depth.empty());
+
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    Intr intrinsics(intr);
+    Vec6f newParams((float)depth.rows, (float)depth.cols,
+        intrinsics.fx, intrinsics.fy,
+        intrinsics.cx, intrinsics.cy);
+    if (!(frameParams == newParams))
+    {
+        frameParams = newParams;
+#ifndef HAVE_OPENCL
+        preCalculationPixNorm(depth.size(), intrinsics, pixNorms);
+#else
+        if (useGPU)
+            ocl_preCalculationPixNorm(depth.size(), intrinsics, gpu_pixNorms);
+        else
+            preCalculationPixNorm(depth.size(), intrinsics, cpu_pixNorms);
+#endif
+    }
+    const Matx44f cameraPose = _cameraPose.getMat();
+
+#ifndef HAVE_OPENCL
+    integrateTsdfVolumeUnit(settings, cameraPose, depth, pixNorms, volume);
+#else
+    if (useGPU)
+        ocl_integrateTsdfVolumeUnit(settings, cameraPose, depth, gpu_pixNorms, gpu_volume);
+    else
+        integrateTsdfVolumeUnit(settings, cameraPose, depth, cpu_pixNorms, cpu_volume);
+#endif
+}
+void TsdfVolume::integrate(InputArray, InputArray, InputArray)
+{
+    CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+}
+
+void TsdfVolume::raycast(InputArray cameraPose, OdometryFrame& outFrame) const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), outFrame);
+}
+
+void TsdfVolume::raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors)  const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), points, normals, colors);
+}
+
+void TsdfVolume::raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const
+{
+#ifndef HAVE_OPENCL
+    Mat points, normals;
+    raycast(cameraPose, height, width, points, normals, noArray());
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+    outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+    outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+#else
+    if (useGPU)
+    {
+        UMat points, normals;
+        raycast(cameraPose, height, width, points, normals, noArray());
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+        outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+    else
+    {
+        Mat points, normals;
+        raycast(cameraPose, height, width, points, normals, noArray());
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+        outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+#endif
+}
+
+
+void TsdfVolume::raycast(InputArray _cameraPose, int height, int width, OutputArray _points, OutputArray _normals, OutputArray _colors) const
+{
+    if (_colors.needed())
+        CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+
+    CV_Assert(height > 0);
+    CV_Assert(width > 0);
+
+    const Matx44f cameraPose = _cameraPose.getMat();
+#ifndef HAVE_OPENCL
+    raycastTsdfVolumeUnit(settings, cameraPose, height, width, volume, _points, _normals);
+#else
+    if (useGPU)
+        ocl_raycastTsdfVolumeUnit(settings, cameraPose, height, width, gpu_volume, _points, _normals);
+    else
+        raycastTsdfVolumeUnit(settings, cameraPose, height, width, cpu_volume, _points, _normals);
+#endif
+}
+
+void TsdfVolume::fetchNormals(InputArray points, OutputArray normals) const
+{
+#ifndef HAVE_OPENCL
+    fetchNormalsFromTsdfVolumeUnit(settings, volume, points, normals);
+#else
+    if (useGPU)
+        ocl_fetchNormalsFromTsdfVolumeUnit(settings, gpu_volume, points, normals);
+    else
+        fetchNormalsFromTsdfVolumeUnit(settings, cpu_volume, points, normals);
+#endif
+}
+
+void TsdfVolume::fetchPointsNormals(OutputArray points, OutputArray normals) const
+{
+#ifndef HAVE_OPENCL
+    fetchPointsNormalsFromTsdfVolumeUnit(settings, volume, points, normals);
+#else
+    if (useGPU)
+        ocl_fetchPointsNormalsFromTsdfVolumeUnit(settings, gpu_volume, points, normals);
+    else
+        fetchPointsNormalsFromTsdfVolumeUnit(settings, cpu_volume, points, normals);
+
+#endif
+}
+
+void TsdfVolume::fetchPointsNormalsColors(OutputArray, OutputArray, OutputArray) const
+{
+    CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+}
+
+void TsdfVolume::reset()
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_OPENCL
+    //TODO: use setTo(Scalar(0, 0))
+    volume.forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
+        {
+            TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
+            v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+        });
+#else
+    if (useGPU)
+        gpu_volume.setTo(Scalar(0, 0));
+    else
+        //TODO: use setTo(Scalar(0, 0))
+        cpu_volume.forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
+            {
+                TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
+                v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+            });
+#endif
+}
+int TsdfVolume::getVisibleBlocks() const { return 1; }
+size_t TsdfVolume::getTotalVolumeUnits() const { return 1; }
+
+
+
+// HASH_TSDF
+
+HashTsdfVolume::HashTsdfVolume(const VolumeSettings& _settings) :
+    Volume::Impl(_settings)
+{
+    Vec3i resolution;
+    settings.getVolumeResolution(resolution);
+    const Point3i volResolution = Point3i(resolution);
+    volumeUnitDegree = calcVolumeUnitDegree(volResolution);
+
+#ifndef HAVE_OPENCL
+    volUnitsData = cv::Mat(VOLUMES_SIZE, resolution[0] * resolution[1] * resolution[2], rawType<TsdfVoxel>());
+    reset();
+#else
+    if (useGPU)
+    {
+        reset();
+    }
+    else
+    {
+        cpu_volUnitsData = cv::Mat(VOLUMES_SIZE, resolution[0] * resolution[1] * resolution[2], rawType<TsdfVoxel>());
+        reset();
+    }
+#endif
+}
+
+HashTsdfVolume::~HashTsdfVolume() {}
+
+void HashTsdfVolume::integrate(const OdometryFrame& frame, InputArray _cameraPose)
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_OPENCL
+    Mat depth;
+#else
+    UMat depth;
+#endif
+    frame.getDepth(depth);
+    integrate(depth, _cameraPose);
+}
+
+void HashTsdfVolume::integrate(InputArray _depth, InputArray _cameraPose)
+{
+#ifndef HAVE_OPENCL
+    Mat depth = _depth.getMat();
+#else
+    UMat depth = _depth.getUMat();
+#endif
+    const Matx44f cameraPose = _cameraPose.getMat();
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    Intr intrinsics(intr);
+    Vec6f newParams((float)depth.rows, (float)depth.cols,
+        intrinsics.fx, intrinsics.fy,
+        intrinsics.cx, intrinsics.cy);
+    if (!(frameParams == newParams))
+    {
+        frameParams = newParams;
+#ifndef HAVE_OPENCL
+        preCalculationPixNorm(depth.size(), intrinsics, pixNorms);
+#else
+        if (useGPU)
+            ocl_preCalculationPixNorm(depth.size(), intrinsics, gpu_pixNorms);
+        else
+            preCalculationPixNorm(depth.size(), intrinsics, cpu_pixNorms);
+#endif
+    }
+#ifndef HAVE_OPENCL
+    integrateHashTsdfVolumeUnit(settings, cameraPose, lastVolIndex, lastFrameId, volumeUnitDegree, depth, pixNorms, volUnitsData, volumeUnits);
+    lastFrameId++;
+#else
+    if (useGPU)
+    {
+        ocl_integrateHashTsdfVolumeUnit(settings, cameraPose, lastVolIndex, lastFrameId, bufferSizeDegree, volumeUnitDegree, depth, gpu_pixNorms, lastVisibleIndices, volUnitsDataCopy, gpu_volUnitsData, hashTable, isActiveFlags);
+    }
+    else
+    {
+        integrateHashTsdfVolumeUnit(settings, cameraPose, lastVolIndex, lastFrameId, volumeUnitDegree, depth, cpu_pixNorms, cpu_volUnitsData, cpu_volumeUnits);
+        lastFrameId++;
+    }
+#endif
+}
+
+void HashTsdfVolume::integrate(InputArray, InputArray, InputArray)
+{
+    CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+}
+
+void HashTsdfVolume::raycast(InputArray cameraPose, OdometryFrame& outFrame) const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), outFrame);
+}
+
+void HashTsdfVolume::raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors)  const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), points, normals, colors);
+}
+
+void HashTsdfVolume::raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const
+{
+#ifndef HAVE_OPENCL
+    Mat points, normals;
+    raycast(cameraPose, height, width, points, normals, noArray());
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+    outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+    outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+#else
+    if (useGPU)
+    {
+        UMat points, normals;
+        raycast(cameraPose, height, width, points, normals, noArray());
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+        outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+    else
+    {
+        Mat points, normals;
+        raycast(cameraPose, height, width, points, normals, noArray());
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+        outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+        outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+
+    }
+#endif
+}
+void HashTsdfVolume::raycast(InputArray _cameraPose, int height, int width, OutputArray _points, OutputArray _normals, OutputArray _colors) const
+{
+    if (_colors.needed())
+        CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+
+    const Matx44f cameraPose = _cameraPose.getMat();
+
+#ifndef HAVE_OPENCL
+    raycastHashTsdfVolumeUnit(settings, cameraPose, height, width, volumeUnitDegree, volUnitsData, volumeUnits, _points, _normals);
+#else
+    if (useGPU)
+        ocl_raycastHashTsdfVolumeUnit(settings, cameraPose, height, width, volumeUnitDegree, hashTable, gpu_volUnitsData, _points, _normals);
+    else
+        raycastHashTsdfVolumeUnit(settings, cameraPose, height, width, volumeUnitDegree, cpu_volUnitsData, cpu_volumeUnits, _points, _normals);
+#endif
+}
+
+void HashTsdfVolume::fetchNormals(InputArray points, OutputArray normals) const
+{
+#ifndef HAVE_OPENCL
+    fetchNormalsFromHashTsdfVolumeUnit(settings, volUnitsData, volumeUnits, volumeUnitDegree, points, normals);
+#else
+    if (useGPU)
+        olc_fetchNormalsFromHashTsdfVolumeUnit(settings, volumeUnitDegree, gpu_volUnitsData, volUnitsDataCopy, hashTable, points, normals);
+    else
+        fetchNormalsFromHashTsdfVolumeUnit(settings, cpu_volUnitsData, cpu_volumeUnits, volumeUnitDegree, points, normals);
+
+#endif
+}
+void HashTsdfVolume::fetchPointsNormals(OutputArray points, OutputArray normals) const
+{
+#ifndef HAVE_OPENCL
+    fetchPointsNormalsFromHashTsdfVolumeUnit(settings, volUnitsData, volumeUnits, volumeUnitDegree, points, normals);
+#else
+    if (useGPU)
+        ocl_fetchPointsNormalsFromHashTsdfVolumeUnit(settings, volumeUnitDegree, gpu_volUnitsData, volUnitsDataCopy, hashTable, points, normals);
+    else
+        fetchPointsNormalsFromHashTsdfVolumeUnit(settings, cpu_volUnitsData, cpu_volumeUnits, volumeUnitDegree, points, normals);
+#endif
+}
+
+void HashTsdfVolume::fetchPointsNormalsColors(OutputArray, OutputArray, OutputArray) const
+{
+    CV_Error(cv::Error::StsBadFunc, "This volume doesn't support vertex colors");
+};
+
+void HashTsdfVolume::reset()
+{
+    CV_TRACE_FUNCTION();
+    lastVolIndex = 0;
+    lastFrameId = 0;
+#ifndef HAVE_OPENCL
+    volUnitsData.forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
+        {
+            TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
+            v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+        });
+    volumeUnits = VolumeUnitIndexes();
+#else
+    if (useGPU)
+    {
+        Vec3i resolution;
+        settings.getVolumeResolution(resolution);
+
+        bufferSizeDegree = 15;
+        int buff_lvl = (int)(1 << bufferSizeDegree);
+        int volCubed = resolution[0] * resolution[1] * resolution[2];
+
+        volUnitsDataCopy = cv::Mat(buff_lvl, volCubed, rawType<TsdfVoxel>());
+        gpu_volUnitsData = cv::UMat(buff_lvl, volCubed, CV_8UC2);
+        lastVisibleIndices = cv::UMat(buff_lvl, 1, CV_32S);
+        isActiveFlags = cv::UMat(buff_lvl, 1, CV_8U);
+        hashTable = CustomHashSet();
+        frameParams = Vec6f();
+        gpu_pixNorms = UMat();
+    }
+    else
+    {
+        cpu_volUnitsData.forEach<VecTsdfVoxel>([](VecTsdfVoxel& vv, const int* /* position */)
+            {
+                TsdfVoxel& v = reinterpret_cast<TsdfVoxel&>(vv);
+                v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+            });
+        cpu_volumeUnits = VolumeUnitIndexes();
+    }
+#endif
+}
+
+int HashTsdfVolume::getVisibleBlocks() const { return 1; }
+size_t HashTsdfVolume::getTotalVolumeUnits() const { return 1; }
+
+// COLOR_TSDF
+
+ColorTsdfVolume::ColorTsdfVolume(const VolumeSettings& _settings) :
+    Volume::Impl(_settings)
+{
+    Vec3i volResolution;
+    settings.getVolumeResolution(volResolution);
+    volume = Mat(1, volResolution[0] * volResolution[1] * volResolution[2], rawType<RGBTsdfVoxel>());
+    reset();
+}
+
+ColorTsdfVolume::~ColorTsdfVolume() {}
+
+void ColorTsdfVolume::integrate(const OdometryFrame& frame, InputArray cameraPose)
+{
+    CV_TRACE_FUNCTION();
+    Mat depth;
+    frame.getDepth(depth);
+    Mat rgb;
+    frame.getImage(rgb);
+
+    integrate(depth, rgb, cameraPose);
+}
+
+void ColorTsdfVolume::integrate(InputArray, InputArray)
+{
+    CV_Error(cv::Error::StsBadFunc, "Color data should be passed for this volume type");
+}
+
+void ColorTsdfVolume::integrate(InputArray _depth, InputArray _image, InputArray _cameraPose)
+{
+    Mat depth = _depth.getMat();
+    Colors image = _image.getMat();
+    const Matx44f cameraPose = _cameraPose.getMat();
+    Matx33f intr;
+    settings.getCameraIntegrateIntrinsics(intr);
+    Intr intrinsics(intr);
+    Vec6f newParams((float)depth.rows, (float)depth.cols,
+        intrinsics.fx, intrinsics.fy,
+        intrinsics.cx, intrinsics.cy);
+    if (!(frameParams == newParams))
+    {
+        frameParams = newParams;
+        preCalculationPixNorm(depth.size(), intrinsics, pixNorms);
+    }
+    integrateColorTsdfVolumeUnit(settings, cameraPose, depth, image, pixNorms, volume);
+}
+
+void ColorTsdfVolume::raycast(InputArray cameraPose, OdometryFrame& outFrame) const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), outFrame);
+}
+void ColorTsdfVolume::raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors)  const
+{
+    raycast(cameraPose, settings.getRaycastHeight(), settings.getRaycastWidth(), points, normals, colors);
+}
+
+void ColorTsdfVolume::raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const
+{
+    Mat points, normals, colors;
+    raycast(cameraPose, height, width, points, normals, colors);
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_CLOUD);
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_NORM);
+    outFrame.setPyramidLevel(1, OdometryFramePyramidType::PYR_IMAGE);
+    outFrame.setPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+    outFrame.setPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+    outFrame.setPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+}
+
+void ColorTsdfVolume::raycast(InputArray _cameraPose, int height, int width, OutputArray _points, OutputArray _normals, OutputArray _colors) const
+{
+    const Matx44f cameraPose = _cameraPose.getMat();
+    raycastColorTsdfVolumeUnit(settings, cameraPose, height, width, volume, _points, _normals, _colors);
+}
+
+void ColorTsdfVolume::fetchNormals(InputArray points, OutputArray normals) const
+{
+    fetchNormalsFromColorTsdfVolumeUnit(settings, volume, points, normals);
+}
+
+void ColorTsdfVolume::fetchPointsNormals(OutputArray points, OutputArray normals) const
+{
+    fetchPointsNormalsFromColorTsdfVolumeUnit(settings, volume, points, normals);
+}
+
+void ColorTsdfVolume::fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const
+{
+    fetchPointsNormalsColorsFromColorTsdfVolumeUnit(settings, volume, points, normals, colors);
+}
+
+void ColorTsdfVolume::reset()
+{
+    CV_TRACE_FUNCTION();
+
+    volume.forEach<VecRGBTsdfVoxel>([](VecRGBTsdfVoxel& vv, const int* /* position */)
+        {
+            RGBTsdfVoxel& v = reinterpret_cast<RGBTsdfVoxel&>(vv);
+            v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+        });
+}
+
+int ColorTsdfVolume::getVisibleBlocks() const { return 1; }
+size_t ColorTsdfVolume::getTotalVolumeUnits() const { return 1; }
+
+}
diff --git a/modules/3d/src/rgbd/volume_impl.hpp b/modules/3d/src/rgbd/volume_impl.hpp
new file mode 100644
index 0000000000..9ad51f0f8c
--- /dev/null
+++ b/modules/3d/src/rgbd/volume_impl.hpp
@@ -0,0 +1,220 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_3D_VOLUME_IMPL_HPP
+#define OPENCV_3D_VOLUME_IMPL_HPP
+
+#include <iostream>
+
+#include "../precomp.hpp"
+#include "hash_tsdf_functions.hpp"
+
+namespace cv
+{
+
+class Volume::Impl
+{
+private:
+    // TODO: make debug function, which show histogram of volume points values
+    // make this function run with debug lvl == 10
+public:
+    Impl(const VolumeSettings& settings);
+    virtual ~Impl() {};
+
+    virtual void integrate(const OdometryFrame& frame, InputArray pose) = 0;
+    virtual void integrate(InputArray depth, InputArray pose) = 0;
+    virtual void integrate(InputArray depth, InputArray image, InputArray pose) = 0;
+
+    virtual void raycast(InputArray cameraPose, OdometryFrame& outFrame) const = 0;
+    virtual void raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors) const = 0;
+
+    virtual void raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const = 0;
+    virtual void raycast(InputArray cameraPose, int height, int width, OutputArray points, OutputArray normals, OutputArray colors) const = 0;
+
+    virtual void fetchNormals(InputArray points, OutputArray normals) const = 0;
+    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const = 0;
+    virtual void fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const = 0;
+
+    virtual void reset() = 0;
+    virtual int getVisibleBlocks() const = 0;
+    virtual size_t getTotalVolumeUnits() const = 0;
+
+public:
+    const VolumeSettings& settings;
+#ifdef HAVE_OPENCL
+    const bool useGPU;
+#endif
+};
+
+
+class TsdfVolume : public Volume::Impl
+{
+public:
+    TsdfVolume(const VolumeSettings& settings);
+    ~TsdfVolume();
+
+    virtual void integrate(const OdometryFrame& frame, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray image, InputArray pose) override;
+    virtual void raycast(InputArray cameraPose, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void fetchNormals(InputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void reset() override;
+    virtual int getVisibleBlocks() const override;
+    virtual size_t getTotalVolumeUnits() const override;
+
+public:
+    Vec6f frameParams;
+#ifndef HAVE_OPENCL
+    Mat pixNorms;
+    // See zFirstMemOrder arg of parent class constructor
+    // for the array layout info
+    // Consist of Voxel elements
+    Mat volume;
+#else
+    //temporary solution
+    Mat cpu_pixNorms;
+    Mat cpu_volume;
+    UMat gpu_pixNorms;
+    UMat gpu_volume;
+#endif
+};
+
+
+typedef std::unordered_set<cv::Vec3i, tsdf_hash> VolumeUnitIndexSet;
+typedef std::unordered_map<cv::Vec3i, VolumeUnit, tsdf_hash> VolumeUnitIndexes;
+
+class HashTsdfVolume : public Volume::Impl
+{
+public:
+    HashTsdfVolume(const VolumeSettings& settings);
+    ~HashTsdfVolume();
+
+    virtual void integrate(const OdometryFrame& frame, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray image, InputArray pose) override;
+    virtual void raycast(InputArray cameraPose, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void fetchNormals(InputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void reset() override;
+    virtual int getVisibleBlocks() const override;
+    virtual size_t getTotalVolumeUnits() const override;
+public:
+    int lastVolIndex;
+    int lastFrameId;
+    Vec6f frameParams;
+    int volumeUnitDegree;
+
+#ifndef HAVE_OPENCL
+    Mat volUnitsData;
+    Mat pixNorms;
+    VolumeUnitIndexes volumeUnits;
+#else
+    VolumeUnitIndexes cpu_volumeUnits;
+
+    Mat cpu_volUnitsData;
+    Mat cpu_pixNorms;
+    UMat gpu_volUnitsData;
+    UMat gpu_pixNorms;
+
+    int bufferSizeDegree;
+    // per-volume-unit data
+    UMat lastVisibleIndices;
+    UMat isActiveFlags;
+    //TODO: remove it when there's no CPU parts
+    Mat volUnitsDataCopy;
+    //TODO: move indexes.volumes to GPU
+    CustomHashSet hashTable;
+#endif
+};
+
+
+class ColorTsdfVolume : public Volume::Impl
+{
+public:
+    ColorTsdfVolume(const VolumeSettings& settings);
+    ~ColorTsdfVolume();
+
+    virtual void integrate(const OdometryFrame& frame, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray pose) override;
+    virtual void integrate(InputArray depth, InputArray image, InputArray pose) override;
+    virtual void raycast(InputArray cameraPose, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, OutputArray points, OutputArray normals, OutputArray colors) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const override;
+    virtual void raycast(InputArray cameraPose, int height, int width, OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void fetchNormals(InputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
+    virtual void fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const override;
+
+    virtual void reset() override;
+    virtual int getVisibleBlocks() const override;
+    virtual size_t getTotalVolumeUnits() const override;
+private:
+    Vec4i volStrides;
+    Vec6f frameParams;
+    Mat pixNorms;
+    // See zFirstMemOrder arg of parent class constructor
+    // for the array layout info
+    // Consist of Voxel elements
+    Mat volume;
+};
+
+
+Volume::Volume()
+{
+    VolumeSettings settings;
+    this->impl = makePtr<TsdfVolume>(settings);
+}
+Volume::Volume(VolumeType vtype, const VolumeSettings& settings)
+{
+    switch (vtype)
+    {
+    case VolumeType::TSDF:
+        this->impl = makePtr<TsdfVolume>(settings);
+        break;
+    case VolumeType::HashTSDF:
+        this->impl = makePtr<HashTsdfVolume>(settings);
+        break;
+    case VolumeType::ColorTSDF:
+        this->impl = makePtr<ColorTsdfVolume>(settings);
+        break;
+    default:
+        CV_Error(Error::StsInternal, "Incorrect OdometryType, you are able to use only { ICP, RGB, RGBD }");
+        break;
+    }
+}
+Volume::~Volume() {}
+
+void Volume::integrate(const OdometryFrame& frame, InputArray pose) { this->impl->integrate(frame, pose); }
+void Volume::integrate(InputArray depth, InputArray pose) { this->impl->integrate(depth, pose); }
+void Volume::integrate(InputArray depth, InputArray image, InputArray pose) { this->impl->integrate(depth, image, pose); }
+void Volume::raycast(InputArray cameraPose, OdometryFrame& outFrame) const { this->impl->raycast(cameraPose, outFrame); }
+void Volume::raycast(InputArray cameraPose, OutputArray _points, OutputArray _normals, OutputArray _colors) const { this->impl->raycast(cameraPose, _points, _normals, _colors); }
+void Volume::raycast(InputArray cameraPose, int height, int width, OdometryFrame& outFrame) const { this->impl->raycast(cameraPose, height, width, outFrame); }
+void Volume::raycast(InputArray cameraPose, int height, int width, OutputArray _points, OutputArray _normals, OutputArray _colors) const { this->impl->raycast(cameraPose, height, width, _points, _normals, _colors); }
+void Volume::fetchNormals(InputArray points, OutputArray normals) const { this->impl->fetchNormals(points, normals); }
+void Volume::fetchPointsNormals(OutputArray points, OutputArray normals) const { this->impl->fetchPointsNormals(points, normals); }
+void Volume::fetchPointsNormalsColors(OutputArray points, OutputArray normals, OutputArray colors) const { this->impl->fetchPointsNormalsColors(points, normals, colors); };
+
+void Volume::reset() { this->impl->reset(); }
+int Volume::getVisibleBlocks() const { return this->impl->getVisibleBlocks(); }
+size_t Volume::getTotalVolumeUnits() const { return this->impl->getTotalVolumeUnits(); }
+
+
+}
+
+#endif // !OPENCV_3D_VOLUME_IMPL_HPP
diff --git a/modules/3d/src/rgbd/volume_settings_impl.cpp b/modules/3d/src/rgbd/volume_settings_impl.cpp
new file mode 100644
index 0000000000..b5bc1c8b89
--- /dev/null
+++ b/modules/3d/src/rgbd/volume_settings_impl.cpp
@@ -0,0 +1,532 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+
+#include "../precomp.hpp"
+
+namespace cv
+{
+
+Vec4i calcVolumeDimensions(Point3i volumeResolution, bool ZFirstMemOrder);
+
+class VolumeSettings::Impl
+{
+public:
+    Impl() {};
+    virtual ~Impl() {};
+
+    virtual void  setIntegrateWidth(int  val) = 0;
+    virtual int   getIntegrateWidth() const = 0;
+    virtual void  setIntegrateHeight(int  val) = 0;
+    virtual int   getIntegrateHeight() const = 0;
+    virtual void  setRaycastWidth(int  val) = 0;
+    virtual int   getRaycastWidth() const = 0;
+    virtual void  setRaycastHeight(int  val) = 0;
+    virtual int   getRaycastHeight() const = 0;
+    virtual void  setDepthFactor(float val) = 0;
+    virtual float getDepthFactor() const = 0;
+    virtual void  setVoxelSize(float  val) = 0;
+    virtual float getVoxelSize() const = 0;
+    virtual void  setTsdfTruncateDistance(float val) = 0;
+    virtual float getTsdfTruncateDistance() const = 0;
+    virtual void  setMaxDepth(float val) = 0;
+    virtual float getMaxDepth() const = 0;
+    virtual void  setMaxWeight(int val) = 0;
+    virtual int   getMaxWeight() const = 0;
+    virtual void  setRaycastStepFactor(float val) = 0;
+    virtual float getRaycastStepFactor() const = 0;
+
+    virtual void setVolumePose(InputArray val) = 0;
+    virtual void getVolumePose(OutputArray val) const = 0;
+    virtual void setVolumeResolution(InputArray val) = 0;
+    virtual void getVolumeResolution(OutputArray val) const = 0;
+    virtual void getVolumeDimensions(OutputArray val) const = 0;
+    virtual void setCameraIntegrateIntrinsics(InputArray val) = 0;
+    virtual void getCameraIntegrateIntrinsics(OutputArray val) const = 0;
+    virtual void setCameraRaycastIntrinsics(InputArray val) = 0;
+    virtual void getCameraRaycastIntrinsics(OutputArray val) const = 0;
+};
+
+class VolumeSettingsImpl : public VolumeSettings::Impl
+{
+public:
+    VolumeSettingsImpl();
+    VolumeSettingsImpl(VolumeType volumeType);
+    ~VolumeSettingsImpl();
+
+    virtual void  setIntegrateWidth(int  val) override;
+    virtual int   getIntegrateWidth() const override;
+    virtual void  setIntegrateHeight(int  val) override;
+    virtual int   getIntegrateHeight() const override;
+    virtual void  setRaycastWidth(int  val) override;
+    virtual int   getRaycastWidth() const override;
+    virtual void  setRaycastHeight(int  val) override;
+    virtual int   getRaycastHeight() const override;
+    virtual void  setDepthFactor(float val) override;
+    virtual float getDepthFactor() const override;
+    virtual void  setVoxelSize(float  val) override;
+    virtual float getVoxelSize() const override;
+    virtual void  setTsdfTruncateDistance(float val) override;
+    virtual float getTsdfTruncateDistance() const override;
+    virtual void  setMaxDepth(float val) override;
+    virtual float getMaxDepth() const override;
+    virtual void  setMaxWeight(int val) override;
+    virtual int   getMaxWeight() const override;
+    virtual void  setRaycastStepFactor(float val) override;
+    virtual float getRaycastStepFactor() const override;
+
+    virtual void setVolumePose(InputArray val) override;
+    virtual void getVolumePose(OutputArray val) const override;
+    virtual void setVolumeResolution(InputArray val) override;
+    virtual void getVolumeResolution(OutputArray val) const override;
+    virtual void getVolumeDimensions(OutputArray val) const override;
+    virtual void setCameraIntegrateIntrinsics(InputArray val) override;
+    virtual void getCameraIntegrateIntrinsics(OutputArray val) const override;
+    virtual void setCameraRaycastIntrinsics(InputArray val) override;
+    virtual void getCameraRaycastIntrinsics(OutputArray val) const override;
+
+private:
+    VolumeType volumeType;
+
+    int   integrateWidth;
+    int   integrateHeight;
+    int   raycastWidth;
+    int   raycastHeight;
+    float depthFactor;
+    float voxelSize;
+    float tsdfTruncateDistance;
+    float maxDepth;
+    int   maxWeight;
+    float raycastStepFactor;
+    bool  zFirstMemOrder;
+
+    Matx44f volumePose;
+    Point3i volumeResolution;
+    Vec4i volumeDimensions;
+    Matx33f cameraIntegrateIntrinsics;
+    Matx33f cameraRaycastIntrinsics;
+
+public:
+    // duplicate classes for all volumes
+
+    class DefaultTsdfSets {
+    public:
+        static const int integrateWidth  = 640;
+        static const int integrateHeight = 480;
+        float ifx = 525.f; // focus point x axis
+        float ify = 525.f; // focus point y axis
+        float icx = float(integrateWidth) / 2.f - 0.5f;  // central point x axis
+        float icy = float(integrateHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraIntegrateIntrinsics = Matx33f(ifx, 0, icx, 0, ify, icy, 0, 0, 1); // camera settings
+
+        static const int raycastWidth = 640;
+        static const int raycastHeight = 480;
+        float rfx = 525.f; // focus point x axis
+        float rfy = 525.f; // focus point y axis
+        float rcx = float(raycastWidth) / 2.f - 0.5f;  // central point x axis
+        float rcy = float(raycastHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraRaycastIntrinsics = Matx33f(rfx, 0, rcx, 0, rfy, rcy, 0, 0, 1); // camera settings
+
+        static constexpr float depthFactor = 5000.f; // 5000 for the 16-bit PNG files, 1 for the 32-bit float images in the ROS bag files
+        static constexpr float volumeSize = 3.f; // meters
+        static constexpr float voxelSize = volumeSize / 128.f; //meters
+        static constexpr float tsdfTruncateDistance = 2 * voxelSize;
+        static constexpr float maxDepth = 0.f;
+        static const int maxWeight = 64; // number of frames
+        static constexpr float raycastStepFactor = 0.75f;
+        static const bool zFirstMemOrder = true; // order of voxels in volume
+
+        const Affine3f volumePose = Affine3f().translate(Vec3f(-volumeSize / 2.f, -volumeSize / 2.f, 0.5f));
+        const Matx44f  volumePoseMatrix = volumePose.matrix;
+        // Unlike original code, this should work with any volume size
+        // Not only when (x,y,z % 32) == 0
+        const Point3i  volumeResolution = Vec3i::all(128); //number of voxels
+    };
+
+    class DefaultHashTsdfSets {
+    public:
+        static const int integrateWidth = 640;
+        static const int integrateHeight = 480;
+        float ifx = 525.f; // focus point x axis
+        float ify = 525.f; // focus point y axis
+        float icx = float(integrateWidth) / 2.f - 0.5f;  // central point x axis
+        float icy = float(integrateHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraIntegrateIntrinsics = Matx33f(ifx, 0, icx, 0, ify, icy, 0, 0, 1); // camera settings
+
+        static const int raycastWidth = 640;
+        static const int raycastHeight = 480;
+        float rfx = 525.f; // focus point x axis
+        float rfy = 525.f; // focus point y axis
+        float rcx = float(raycastWidth) / 2.f - 0.5f;  // central point x axis
+        float rcy = float(raycastHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraRaycastIntrinsics = Matx33f(rfx, 0, rcx, 0, rfy, rcy, 0, 0, 1); // camera settings
+
+        static constexpr float depthFactor = 5000.f; // 5000 for the 16-bit PNG files, 1 for the 32-bit float images in the ROS bag files
+        static constexpr float volumeSize = 3.f; // meters
+        static constexpr float voxelSize = volumeSize / 512.f; //meters
+        static constexpr float tsdfTruncateDistance = 7 * voxelSize;
+        static constexpr float maxDepth = 4.f;
+        static const int maxWeight = 64; // number of frames
+        static constexpr float raycastStepFactor = 0.25f;
+        static const bool zFirstMemOrder = true; // order of voxels in volume
+
+        const Affine3f volumePose = Affine3f().translate(Vec3f(-volumeSize / 2.f, -volumeSize / 2.f, 0.5f));
+        const Matx44f  volumePoseMatrix = volumePose.matrix;
+        // Unlike original code, this should work with any volume size
+        // Not only when (x,y,z % 32) == 0
+        const Point3i  volumeResolution = Vec3i::all(16); //number of voxels
+    };
+
+    class DefaultColorTsdfSets {
+    public:
+        static const int integrateWidth = 640;
+        static const int integrateHeight = 480;
+        float ifx = 525.f; // focus point x axis
+        float ify = 525.f; // focus point y axis
+        float icx = float(integrateWidth) / 2.f - 0.5f;  // central point x axis
+        float icy = float(integrateHeight) / 2.f - 0.5f; // central point y axis
+        float rgb_ifx = 525.f; // focus point x axis
+        float rgb_ify = 525.f; // focus point y axis
+        float rgb_icx = float(integrateWidth) / 2.f - 0.5f;  // central point x axis
+        float rgb_icy = float(integrateHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraIntegrateIntrinsics = Matx33f(ifx, 0, icx, 0, ify, icy, 0, 0, 1); // camera settings
+
+        static const int raycastWidth = 640;
+        static const int raycastHeight = 480;
+        float rfx = 525.f; // focus point x axis
+        float rfy = 525.f; // focus point y axis
+        float rcx = float(raycastWidth) / 2.f - 0.5f;  // central point x axis
+        float rcy = float(raycastHeight) / 2.f - 0.5f; // central point y axis
+        float rgb_rfx = 525.f; // focus point x axis
+        float rgb_rfy = 525.f; // focus point y axis
+        float rgb_rcx = float(raycastWidth) / 2.f - 0.5f;  // central point x axis
+        float rgb_rcy = float(raycastHeight) / 2.f - 0.5f; // central point y axis
+        const Matx33f  cameraRaycastIntrinsics = Matx33f(rfx, 0, rcx, 0, rfy, rcy, 0, 0, 1); // camera settings
+
+        static constexpr float depthFactor = 5000.f; // 5000 for the 16-bit PNG files, 1 for the 32-bit float images in the ROS bag files
+        static constexpr float volumeSize = 3.f; // meters
+        static constexpr float voxelSize = volumeSize / 128.f; //meters
+        static constexpr float tsdfTruncateDistance = 2 * voxelSize;
+        static constexpr float maxDepth = 0.f;
+        static const int maxWeight = 64; // number of frames
+        static constexpr float raycastStepFactor = 0.75f;
+        static const bool zFirstMemOrder = true; // order of voxels in volume
+
+        const Affine3f volumePose = Affine3f().translate(Vec3f(-volumeSize / 2.f, -volumeSize / 2.f, 0.5f));
+        const Matx44f  volumePoseMatrix = volumePose.matrix;
+        // Unlike original code, this should work with any volume size
+        // Not only when (x,y,z % 32) == 0
+        const Point3i  volumeResolution = Vec3i::all(128); //number of voxels
+    };
+
+};
+
+
+VolumeSettings::VolumeSettings()
+{
+    this->impl = makePtr<VolumeSettingsImpl>();
+}
+
+VolumeSettings::VolumeSettings(VolumeType volumeType)
+{
+    this->impl = makePtr<VolumeSettingsImpl>(volumeType);
+}
+
+VolumeSettings::~VolumeSettings() {}
+
+void  VolumeSettings::setIntegrateWidth(int val) { this->impl->setIntegrateWidth(val); };
+int   VolumeSettings::getIntegrateWidth() const { return this->impl->getIntegrateWidth(); };
+void  VolumeSettings::setIntegrateHeight(int val) { this->impl->setIntegrateHeight(val); };
+int   VolumeSettings::getIntegrateHeight() const { return this->impl->getIntegrateHeight(); };
+void  VolumeSettings::setRaycastWidth(int val) { this->impl->setRaycastWidth(val); };
+int   VolumeSettings::getRaycastWidth() const { return this->impl->getRaycastWidth(); };
+void  VolumeSettings::setRaycastHeight(int val) { this->impl->setRaycastHeight(val); };
+int   VolumeSettings::getRaycastHeight() const { return this->impl->getRaycastHeight(); };
+void  VolumeSettings::setVoxelSize(float val) { this->impl->setVoxelSize(val); };
+float VolumeSettings::getVoxelSize() const { return this->impl->getVoxelSize(); };
+void  VolumeSettings::setRaycastStepFactor(float val) { this->impl->setRaycastStepFactor(val); };
+float VolumeSettings::getRaycastStepFactor() const { return this->impl->getRaycastStepFactor(); };
+void  VolumeSettings::setTsdfTruncateDistance(float val) { this->impl->setTsdfTruncateDistance(val); };
+float VolumeSettings::getTsdfTruncateDistance() const { return this->impl->getTsdfTruncateDistance(); };
+void  VolumeSettings::setMaxDepth(float val) { this->impl->setMaxDepth(val); };
+float VolumeSettings::getMaxDepth() const { return this->impl->getMaxDepth(); };
+void  VolumeSettings::setDepthFactor(float val) { this->impl->setDepthFactor(val); };
+float VolumeSettings::getDepthFactor() const { return this->impl->getDepthFactor(); };
+void  VolumeSettings::setMaxWeight(int val) { this->impl->setMaxWeight(val); };
+int   VolumeSettings::getMaxWeight() const { return this->impl->getMaxWeight(); };
+
+void VolumeSettings::setVolumePose(InputArray val) { this->impl->setVolumePose(val); };
+void VolumeSettings::getVolumePose(OutputArray val) const { this->impl->getVolumePose(val); };
+void VolumeSettings::setVolumeResolution(InputArray val) { this->impl->setVolumeResolution(val); };
+void VolumeSettings::getVolumeResolution(OutputArray val) const { this->impl->getVolumeResolution(val); };
+void VolumeSettings::getVolumeDimensions(OutputArray val) const { this->impl->getVolumeDimensions(val); };
+void VolumeSettings::setCameraIntegrateIntrinsics(InputArray val) { this->impl->setCameraIntegrateIntrinsics(val); };
+void VolumeSettings::getCameraIntegrateIntrinsics(OutputArray val) const { this->impl->getCameraIntegrateIntrinsics(val); };
+void VolumeSettings::setCameraRaycastIntrinsics(InputArray val) { this->impl->setCameraRaycastIntrinsics(val); };
+void VolumeSettings::getCameraRaycastIntrinsics(OutputArray val) const { this->impl->getCameraRaycastIntrinsics(val); };
+
+
+VolumeSettingsImpl::VolumeSettingsImpl()
+    : VolumeSettingsImpl(VolumeType::TSDF)
+{
+}
+
+VolumeSettingsImpl::VolumeSettingsImpl(VolumeType _volumeType)
+{
+    volumeType = _volumeType;
+    if (volumeType == VolumeType::TSDF)
+    {
+        DefaultTsdfSets ds = DefaultTsdfSets();
+
+        this->integrateWidth = ds.integrateWidth;
+        this->integrateHeight = ds.integrateHeight;
+        this->raycastWidth = ds.raycastWidth;
+        this->raycastHeight = ds.raycastHeight;
+        this->depthFactor = ds.depthFactor;
+        this->voxelSize = ds.voxelSize;
+        this->tsdfTruncateDistance = ds.tsdfTruncateDistance;
+        this->maxDepth = ds.maxDepth;
+        this->maxWeight = ds.maxWeight;
+        this->raycastStepFactor = ds.raycastStepFactor;
+        this->zFirstMemOrder = ds.zFirstMemOrder;
+
+        this->volumePose = ds.volumePoseMatrix;
+        this->volumeResolution = ds.volumeResolution;
+        this->volumeDimensions = calcVolumeDimensions(ds.volumeResolution, ds.zFirstMemOrder);
+        this->cameraIntegrateIntrinsics = ds.cameraIntegrateIntrinsics;
+        this->cameraRaycastIntrinsics = ds.cameraRaycastIntrinsics;
+    }
+    else if (volumeType == VolumeType::HashTSDF)
+    {
+        DefaultHashTsdfSets ds = DefaultHashTsdfSets();
+
+        this->integrateWidth = ds.integrateWidth;
+        this->integrateHeight = ds.integrateHeight;
+        this->raycastWidth = ds.raycastWidth;
+        this->raycastHeight = ds.raycastHeight;
+        this->depthFactor = ds.depthFactor;
+        this->voxelSize = ds.voxelSize;
+        this->tsdfTruncateDistance = ds.tsdfTruncateDistance;
+        this->maxDepth = ds.maxDepth;
+        this->maxWeight = ds.maxWeight;
+        this->raycastStepFactor = ds.raycastStepFactor;
+        this->zFirstMemOrder = ds.zFirstMemOrder;
+
+        this->volumePose = ds.volumePoseMatrix;
+        this->volumeResolution = ds.volumeResolution;
+        this->volumeDimensions = calcVolumeDimensions(ds.volumeResolution, ds.zFirstMemOrder);
+        this->cameraIntegrateIntrinsics = ds.cameraIntegrateIntrinsics;
+        this->cameraRaycastIntrinsics = ds.cameraRaycastIntrinsics;
+    }
+    else if (volumeType == VolumeType::ColorTSDF)
+    {
+        DefaultColorTsdfSets ds = DefaultColorTsdfSets();
+
+        this->integrateWidth = ds.integrateWidth;
+        this->integrateHeight = ds.integrateHeight;
+        this->raycastWidth = ds.raycastWidth;
+        this->raycastHeight = ds.raycastHeight;
+        this->depthFactor = ds.depthFactor;
+        this->voxelSize = ds.voxelSize;
+        this->tsdfTruncateDistance = ds.tsdfTruncateDistance;
+        this->maxDepth = ds.maxDepth;
+        this->maxWeight = ds.maxWeight;
+        this->raycastStepFactor = ds.raycastStepFactor;
+        this->zFirstMemOrder = ds.zFirstMemOrder;
+
+        this->volumePose = ds.volumePoseMatrix;
+        this->volumeResolution = ds.volumeResolution;
+        this->volumeDimensions = calcVolumeDimensions(ds.volumeResolution, ds.zFirstMemOrder);
+        this->cameraIntegrateIntrinsics = ds.cameraIntegrateIntrinsics;
+        this->cameraRaycastIntrinsics = ds.cameraRaycastIntrinsics;
+    }
+}
+
+
+VolumeSettingsImpl::~VolumeSettingsImpl() {}
+
+
+void VolumeSettingsImpl::setIntegrateWidth(int val)
+{
+    this->integrateWidth = val;
+}
+
+int VolumeSettingsImpl::getIntegrateWidth() const
+{
+    return this->integrateWidth;
+}
+
+void VolumeSettingsImpl::setIntegrateHeight(int val)
+{
+    this->integrateHeight = val;
+}
+
+int VolumeSettingsImpl::getIntegrateHeight() const
+{
+    return this->integrateHeight;
+}
+
+void VolumeSettingsImpl::setRaycastWidth(int val)
+{
+    this->raycastWidth = val;
+}
+
+int VolumeSettingsImpl::getRaycastWidth() const
+{
+    return this->raycastWidth;
+}
+
+void VolumeSettingsImpl::setRaycastHeight(int val)
+{
+    this->raycastHeight = val;
+}
+
+int VolumeSettingsImpl::getRaycastHeight() const
+{
+    return this->raycastHeight;
+}
+
+void VolumeSettingsImpl::setDepthFactor(float val)
+{
+    this->depthFactor = val;
+}
+
+float VolumeSettingsImpl::getDepthFactor() const
+{
+    return this->depthFactor;
+}
+
+void VolumeSettingsImpl::setVoxelSize(float  val)
+{
+    this->voxelSize = val;
+}
+
+float VolumeSettingsImpl::getVoxelSize() const
+{
+    return this->voxelSize;
+}
+
+void VolumeSettingsImpl::setTsdfTruncateDistance(float val)
+{
+    this->tsdfTruncateDistance = val;
+}
+
+float VolumeSettingsImpl::getTsdfTruncateDistance() const
+{
+    return this->tsdfTruncateDistance;
+}
+
+void VolumeSettingsImpl::setMaxDepth(float val)
+{
+    this->maxDepth = val;
+}
+
+float VolumeSettingsImpl::getMaxDepth() const
+{
+    return this->maxDepth;
+}
+
+void VolumeSettingsImpl::setMaxWeight(int val)
+{
+    this->maxWeight = val;
+}
+
+int VolumeSettingsImpl::getMaxWeight() const
+{
+    return this->maxWeight;
+}
+
+void VolumeSettingsImpl::setRaycastStepFactor(float val)
+{
+    this->raycastStepFactor = val;
+}
+
+float VolumeSettingsImpl::getRaycastStepFactor() const
+{
+    return this->raycastStepFactor;
+}
+
+void VolumeSettingsImpl::setVolumePose(InputArray val)
+{
+    if (!val.empty())
+    {
+        val.copyTo(this->volumePose);
+    }
+}
+
+void VolumeSettingsImpl::getVolumePose(OutputArray val) const
+{
+    Mat(this->volumePose).copyTo(val);
+}
+
+void VolumeSettingsImpl::setVolumeResolution(InputArray val)
+{
+    if (!val.empty())
+    {
+        this->volumeResolution = Point3i(val.getMat());
+        this->volumeDimensions = calcVolumeDimensions(this->volumeResolution, this->zFirstMemOrder);
+    }
+}
+
+void VolumeSettingsImpl::getVolumeResolution(OutputArray val) const
+{
+    Mat(this->volumeResolution).copyTo(val);
+}
+
+void VolumeSettingsImpl::getVolumeDimensions(OutputArray val) const
+{
+    Mat(this->volumeDimensions).copyTo(val);
+}
+
+void VolumeSettingsImpl::setCameraIntegrateIntrinsics(InputArray val)
+{
+    if (!val.empty())
+    {
+        this->cameraIntegrateIntrinsics = Matx33f(val.getMat());
+    }
+}
+
+void VolumeSettingsImpl::getCameraIntegrateIntrinsics(OutputArray val) const
+{
+    Mat(this->cameraIntegrateIntrinsics).copyTo(val);
+}
+
+
+void VolumeSettingsImpl::setCameraRaycastIntrinsics(InputArray val)
+{
+    if (!val.empty())
+    {
+        this->cameraRaycastIntrinsics = Matx33f(val.getMat());
+    }
+}
+
+void VolumeSettingsImpl::getCameraRaycastIntrinsics(OutputArray val) const
+{
+    Mat(this->cameraRaycastIntrinsics).copyTo(val);
+}
+
+
+Vec4i calcVolumeDimensions(Point3i volumeResolution, bool ZFirstMemOrder)
+{
+    // (xRes*yRes*zRes) array
+    // Depending on zFirstMemOrder arg:
+    // &elem(x, y, z) = data + x*zRes*yRes + y*zRes + z;
+    // &elem(x, y, z) = data + x + y*xRes + z*xRes*yRes;
+    int xdim, ydim, zdim;
+    if (ZFirstMemOrder)
+    {
+        xdim = volumeResolution.z * volumeResolution.y;
+        ydim = volumeResolution.z;
+        zdim = 1;
+    }
+    else
+    {
+        xdim = 1;
+        ydim = volumeResolution.x;
+        zdim = volumeResolution.x * volumeResolution.y;
+    }
+    return Vec4i(xdim, ydim, zdim);
+}
+
+}
diff --git a/modules/3d/test/ocl/test_tsdf.cpp b/modules/3d/test/ocl/test_tsdf.cpp
index d3214fe1b5..63f7b8c415 100644
--- a/modules/3d/test/ocl/test_tsdf.cpp
+++ b/modules/3d/test/ocl/test_tsdf.cpp
@@ -276,83 +276,6 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
 }
 // ----------------------------
 
-static const bool display = false;
-static const bool parallelCheck = false;
-
-class Settings
-{
-public:
-    float depthFactor;
-    Matx33f intr;
-    Size frameSize;
-    Vec3f lightPose;
-
-    Ptr<Volume> volume;
-    Ptr<Scene> scene;
-    std::vector<Affine3f> poses;
-
-    Settings(bool useHashTSDF, bool onlySemisphere)
-    {
-        frameSize = Size(640, 480);
-
-        float fx, fy, cx, cy;
-        fx = fy = 525.f;
-        cx = frameSize.width / 2 - 0.5f;
-        cy = frameSize.height / 2 - 0.5f;
-        intr = Matx33f(fx,  0, cx,
-                        0, fy, cy,
-                        0,  0,  1);
-
-        // 5000 for the 16-bit PNG files
-        // 1 for the 32-bit float images in the ROS bag files
-        depthFactor = 5000;
-
-        Vec3i volumeDims = Vec3i::all(512); //number of voxels
-
-        float volSize = 3.f;
-        float voxelSize = volSize / 512.f; //meters
-
-        // default pose of volume cube
-        Affine3f volumePose = Affine3f().translate(Vec3f(-volSize / 2.f, -volSize / 2.f, 0.5f));
-        float tsdf_trunc_dist = 7 * voxelSize; // about 0.04f in meters
-        int tsdf_max_weight = 64;   //frames
-
-        float raycast_step_factor = 0.25f;  //in voxel sizes
-        // gradient delta factor is fixed at 1.0f and is not used
-        //p.gradient_delta_factor = 0.5f; //in voxel sizes
-
-        //p.lightPose = p.volume_pose.translation()/4; //meters
-        lightPose = Vec3f::all(0.f); //meters
-
-        // depth truncation is not used by default but can be useful in some scenes
-        float truncateThreshold = 0.f; //meters
-
-        VolumeParams::VolumeKind volumeKind = VolumeParams::VolumeKind::TSDF;
-
-        if (useHashTSDF)
-        {
-            volumeKind = VolumeParams::VolumeKind::HASHTSDF;
-            truncateThreshold = 4.f;
-        }
-        else
-        {
-            volSize = 3.f;
-            volumeDims = Vec3i::all(128); //number of voxels
-            voxelSize = volSize / 128.f;
-            tsdf_trunc_dist = 2 * voxelSize; // 0.04f in meters
-
-            raycast_step_factor = 0.75f;  //in voxel sizes
-        }
-
-        volume = makeVolume(volumeKind, voxelSize, volumePose.matrix,
-                            raycast_step_factor, tsdf_trunc_dist, tsdf_max_weight,
-                            truncateThreshold, volumeDims[0], volumeDims[1], volumeDims[2]);
-
-        scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
-        poses = scene->getPoses();
-    }
-};
-
 void displayImage(Mat depth, Mat points, Mat normals, float depthFactor, Vec3f lightPose)
 {
     Mat image;
@@ -361,6 +284,7 @@ void displayImage(Mat depth, Mat points, Mat normals, float depthFactor, Vec3f l
     renderPointsNormals(points, normals, image, lightPose);
     imshow("render", image);
     waitKey(2000);
+    destroyAllWindows();
 }
 
 void normalsCheck(Mat normals)
@@ -400,123 +324,412 @@ int counterOfValid(Mat points)
     return count;
 }
 
-void normal_test(bool isHashTSDF, bool isRaycast, bool isFetchPointsNormals, bool isFetchNormals)
+enum class VolumeTestFunction
 {
-    Settings settings(isHashTSDF, false);
+    RAYCAST = 0,
+    FETCH_NORMALS = 1,
+    FETCH_POINTS_NORMALS = 2
+};
 
-    Mat depth = settings.scene->depth(settings.poses[0]);
+enum class VolumeTestSrcType
+{
+    MAT = 0,
+    ODOMETRY_FRAME = 1
+};
+
+void normal_test_custom_framesize(VolumeType volumeType, VolumeTestFunction testFunction, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
     UMat udepth;
     depth.copyTo(udepth);
     UMat upoints, unormals, utmpnormals;
-    UMat unewPoints, unewNormals;
-    Mat  points, normals;
+    Mat points, normals;
     AccessFlag af = ACCESS_READ;
 
-    settings.volume->integrate(udepth, settings.depthFactor, settings.poses[0].matrix, settings.intr);
+    OdometryFrame odf(OdometryFrameStoreType::UMAT);
+    odf.setDepth(udepth);
 
-    if (isRaycast)
+    if (testSrcType == VolumeTestSrcType::MAT)
+        volume.integrate(depth, poses[0].matrix);
+    else
+        volume.integrate(odf, poses[0].matrix);
+
+    if (testFunction == VolumeTestFunction::RAYCAST)
     {
-        settings.volume->raycast(settings.poses[0].matrix, settings.intr, settings.frameSize, upoints, unormals);
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, upoints, unormals);
+        }
+        else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+        {
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, odf);
+            odf.getPyramidAt(upoints, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(unormals, OdometryFramePyramidType::PYR_NORM, 0);
+        }
     }
-    if (isFetchPointsNormals)
+    else if (testFunction == VolumeTestFunction::FETCH_NORMALS)
     {
-        settings.volume->fetchPointsNormals(upoints, unormals);
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            // takes only point from raycast for checking fetched normals on the display
+            volume.raycast(poses[0].matrix, frameSize.height,frameSize.width, upoints, utmpnormals);
+            //volume.fetchPointsNormals(upoints, utmpnormals);
+            volume.fetchNormals(upoints, unormals);
+        }
     }
-    if (isFetchNormals)
+    else if (testFunction == VolumeTestFunction::FETCH_POINTS_NORMALS)
     {
-        settings.volume->fetchPointsNormals(upoints, utmpnormals);
-        settings.volume->fetchNormals(upoints, unormals);
+        if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+        {
+            volume.fetchPointsNormals(upoints, unormals);
+        }
     }
 
     normals = unormals.getMat(af);
-    points  = upoints.getMat(af);
+    points = upoints.getMat(af);
 
-    auto normalCheck = [](Vec4f& vector, const int*)
-    {
-        if (!cvIsNaN(vector[0]))
-        {
-            float length = vector[0] * vector[0] +
-                vector[1] * vector[1] +
-                vector[2] * vector[2];
-            ASSERT_LT(abs(1 - length), 0.0001f) << "There is normal with length != 1";
-        }
-    };
+    if (testFunction == VolumeTestFunction::RAYCAST && cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
 
-    if (parallelCheck)
-        normals.forEach<Vec4f>(normalCheck);
-    else
-        normalsCheck(normals);
-
-    if (isRaycast && display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
-
-    if (isRaycast)
-    {
-        settings.volume->raycast(settings.poses[17].matrix, settings.intr, settings.frameSize, unewPoints, unewNormals);
-        normals = unewNormals.getMat(af);
-        points  = unewPoints.getMat(af);
-        normalsCheck(normals);
-
-        if (parallelCheck)
-            normals.forEach<Vec4f>(normalCheck);
-        else
-            normalsCheck(normals);
-
-        if (display)
-            displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
-    }
+    normalsCheck(normals);
 }
 
-void valid_points_test(bool isHashTSDF)
+void normal_test_common_framesize(VolumeType volumeType, VolumeTestFunction testFunction, VolumeTestSrcType testSrcType)
 {
-    Settings settings(isHashTSDF, true);
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
 
-    Mat depth = settings.scene->depth(settings.poses[0]);
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
     UMat udepth;
     depth.copyTo(udepth);
-    UMat upoints, unormals, unewPoints, unewNormals;
+    UMat upoints, unormals, utmpnormals;
+    Mat points, normals;
+    AccessFlag af = ACCESS_READ;
+
+    OdometryFrame odf(OdometryFrameStoreType::UMAT);
+    odf.setDepth(udepth);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+        volume.integrate(depth, poses[0].matrix);
+    else
+        volume.integrate(odf, poses[0].matrix);
+
+    if (testFunction == VolumeTestFunction::RAYCAST)
+    {
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            volume.raycast(poses[0].matrix, upoints, unormals);
+        }
+        else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+        {
+            volume.raycast(poses[0].matrix, odf);
+            odf.getPyramidAt(upoints, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(unormals, OdometryFramePyramidType::PYR_NORM, 0);
+        }
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_NORMALS)
+    {
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            // takes only point from raycast for checking fetched normals on the display
+            volume.raycast(poses[0].matrix, upoints, utmpnormals);
+            //volume.fetchPointsNormals(upoints, utmpnormals);
+            volume.fetchNormals(upoints, unormals);
+        }
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_POINTS_NORMALS)
+    {
+        if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+        {
+            volume.fetchPointsNormals(upoints, unormals);
+        }
+    }
+
+    normals = unormals.getMat(af);
+    points = upoints.getMat(af);
+
+    if (testFunction == VolumeTestFunction::RAYCAST && cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
+
+    normalsCheck(normals);
+}
+
+void valid_points_test_custom_framesize(VolumeType volumeType, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    UMat udepth;
+    depth.copyTo(udepth);
+    UMat upoints, unormals;
+    UMat upoints1, unormals1;
+    Mat points, normals;
     AccessFlag af = ACCESS_READ;
-    Mat  points, normals;
     int anfas, profile;
 
-    settings.volume->integrate(udepth, settings.depthFactor, settings.poses[0].matrix, settings.intr);
-    settings.volume->raycast(settings.poses[0].matrix, settings.intr, settings.frameSize, upoints, unormals);
+    OdometryFrame odf(OdometryFrameStoreType::UMAT);
+    odf.setDepth(udepth);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+        volume.integrate(depth, poses[0].matrix);
+    else
+        volume.integrate(odf, poses[0].matrix);
+
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, upoints, unormals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, odf);
+        odf.getPyramidAt(upoints, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(unormals, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+
     normals = unormals.getMat(af);
     points  = upoints.getMat(af);
     patchNaNs(points);
     anfas = counterOfValid(points);
 
-    ASSERT_NE(anfas, 0) << "There is no points in anfas";
+    if (cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
 
-    if (display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        volume.raycast(poses[17].matrix, frameSize.height, frameSize.width, upoints1, unormals1);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[17].matrix, frameSize.height, frameSize.width, odf);
+        odf.getPyramidAt(upoints1, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(unormals1, OdometryFramePyramidType::PYR_NORM, 0);
+    }
 
-    settings.volume->raycast(settings.poses[17].matrix, settings.intr, settings.frameSize, unewPoints, unewNormals);
-    normals = unewNormals.getMat(af);
-    points  = unewPoints.getMat(af);
+    normals = unormals1.getMat(af);
+    points  = upoints1.getMat(af);
     patchNaNs(points);
     profile = counterOfValid(points);
 
-    ASSERT_NE(profile, 0) << "There is no points in profile";
+    if (cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
 
     // TODO: why profile == 2*anfas ?
     float percentValidity = float(anfas) / float(profile);
 
+    ASSERT_NE(profile, 0) << "There is no points in profile";
+    ASSERT_NE(anfas, 0) << "There is no points in anfas";
     ASSERT_LT(abs(0.5 - percentValidity), 0.3) << "percentValidity out of [0.3; 0.7] (percentValidity=" << percentValidity << ")";
-
-    if (display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
 }
 
-TEST(TSDF_GPU, raycast_normals)      { normal_test(false, true, false, false); }
-TEST(TSDF_GPU, fetch_points_normals) { normal_test(false, false, true, false); }
-TEST(TSDF_GPU, fetch_normals)        { normal_test(false, false, false, true); }
-TEST(TSDF_GPU, valid_points)         { valid_points_test(false); }
+void valid_points_test_common_framesize(VolumeType volumeType, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
 
-TEST(HashTSDF_GPU, raycast_normals)      { normal_test(true, true, false, false); }
-TEST(HashTSDF_GPU, fetch_points_normals) { normal_test(true, false, true, false); }
-TEST(HashTSDF_GPU, fetch_normals)        { normal_test(true, false, false, true); }
-TEST(HashTSDF_GPU, valid_points)         { valid_points_test(true); }
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    UMat udepth;
+    depth.copyTo(udepth);
+    UMat upoints, unormals;
+    UMat upoints1, unormals1;
+    Mat points, normals;
+    AccessFlag af = ACCESS_READ;
+    int anfas, profile;
+
+    OdometryFrame odf(OdometryFrameStoreType::UMAT);
+    odf.setDepth(udepth);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+        volume.integrate(depth, poses[0].matrix);
+    else
+        volume.integrate(odf, poses[0].matrix);
+
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        volume.raycast(poses[0].matrix, upoints, unormals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[0].matrix, odf);
+        odf.getPyramidAt(upoints, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(unormals, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+
+    normals = unormals.getMat(af);
+    points  = upoints.getMat(af);
+    patchNaNs(points);
+    anfas = counterOfValid(points);
+
+    if (cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        volume.raycast(poses[17].matrix, upoints1, unormals1);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[17].matrix, odf);
+        odf.getPyramidAt(upoints1, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(unormals1, OdometryFramePyramidType::PYR_NORM, 0);
+    }
+
+    normals = unormals1.getMat(af);
+    points  = upoints1.getMat(af);
+    patchNaNs(points);
+    profile = counterOfValid(points);
+
+    if (cvtest::debugLevel > 0)
+        displayImage(depth, points, normals, depthFactor, lightPose);
+
+    // TODO: why profile == 2*anfas ?
+    float percentValidity = float(anfas) / float(profile);
+
+    ASSERT_NE(profile, 0) << "There is no points in profile";
+    ASSERT_NE(anfas, 0) << "There is no points in anfas";
+    ASSERT_LT(abs(0.5 - percentValidity), 0.3) << "percentValidity out of [0.3; 0.7] (percentValidity=" << percentValidity << ")";
+}
+
+TEST(TSDF_GPU, raycast_custom_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, raycast_custom_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF_GPU, raycast_common_framesize_normals_mat)
+{
+    normal_test_common_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, raycast_common_framesize_normals_frame)
+{
+    normal_test_common_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF_GPU, fetch_points_normals)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, fetch_normals)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, valid_points_custom_framesize_mat)
+{
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, valid_points_custom_framesize_frame)
+{
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF_GPU, valid_points_common_framesize_mat)
+{
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF_GPU, valid_points_common_framesize_frame)
+{
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF_GPU, raycast_custom_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, raycast_custom_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF_GPU, raycast_common_framesize_normals_mat)
+{
+    normal_test_common_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, raycast_common_framesize_normals_frame)
+{
+    normal_test_common_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF_GPU, fetch_points_normals)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, fetch_normals)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, valid_points_custom_framesize_mat)
+{
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, valid_points_custom_framesize_frame)
+{
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF_GPU, valid_points_common_framesize_mat)
+{
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF_GPU, valid_points_common_framesize_frame)
+{
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
 
 }
 }  // namespace
diff --git a/modules/3d/test/test_tsdf.cpp b/modules/3d/test/test_tsdf.cpp
index 91fc45355b..eafcc53e84 100644
--- a/modules/3d/test/test_tsdf.cpp
+++ b/modules/3d/test/test_tsdf.cpp
@@ -88,11 +88,78 @@ struct RenderInvoker : ParallelLoopBody
     bool onlySemisphere;
 };
 
+template<class Scene>
+struct RenderColorInvoker : ParallelLoopBody
+{
+    RenderColorInvoker(Mat_<Vec3f>& _frame, Affine3f _pose,
+        Reprojector _reproj,
+        float _depthFactor, bool _onlySemisphere) : ParallelLoopBody(),
+        frame(_frame),
+        pose(_pose),
+        reproj(_reproj),
+        depthFactor(_depthFactor),
+        onlySemisphere(_onlySemisphere)
+    { }
+
+    virtual void operator ()(const cv::Range& r) const
+    {
+        for (int y = r.start; y < r.end; y++)
+        {
+            Vec3f* frameRow = frame[y];
+            for (int x = 0; x < frame.cols; x++)
+            {
+                Vec3f pix = 0;
+
+                Point3f orig = pose.translation();
+                // direction through pixel
+                Point3f screenVec = reproj(Point3f((float)x, (float)y, 1.f));
+                Point3f dir = normalize(Vec3f(pose.rotation() * screenVec));
+                // screen space axis
+                dir.y = -dir.y;
+
+                const float maxDepth = 20.f;
+                const float maxSteps = 256;
+                float t = 0.f;
+                for (int step = 0; step < maxSteps && t < maxDepth; step++)
+                {
+                    Point3f p = orig + dir * t;
+                    float d = Scene::map(p, onlySemisphere);
+                    if (d < 0.000001f)
+                    {
+                        float m = 0.25f;
+                        float p0 = float(abs(fmod(p.x, m)) > m / 2.f);
+                        float p1 = float(abs(fmod(p.y, m)) > m / 2.f);
+                        float p2 = float(abs(fmod(p.z, m)) > m / 2.f);
+
+                        pix[0] = p0 + p1;
+                        pix[1] = p1 + p2;
+                        pix[2] = p0 + p2;
+
+                        pix *= 128.f;
+                        break;
+                    }
+                    t += d;
+                }
+
+                frameRow[x] = pix;
+            }
+        }
+    }
+
+    Mat_<Vec3f>& frame;
+    Affine3f pose;
+    Reprojector reproj;
+    float depthFactor;
+    bool onlySemisphere;
+};
+
+
 struct Scene
 {
     virtual ~Scene() {}
     static Ptr<Scene> create(Size sz, Matx33f _intr, float _depthFactor, bool onlySemisphere);
     virtual Mat depth(Affine3f pose) = 0;
+    virtual Mat rgb(Affine3f pose) = 0;
     virtual std::vector<Affine3f> getPoses() = 0;
 };
 
@@ -143,6 +210,17 @@ struct SemisphereScene : Scene
         return std::move(frame);
     }
 
+    Mat rgb(Affine3f pose) override
+    {
+        Mat_<Vec3f> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderColorInvoker<SemisphereScene>(frame, pose, reproj, depthFactor, onlySemisphere));
+
+        return std::move(frame);
+    }
+
     std::vector<Affine3f> getPoses() override
     {
         std::vector<Affine3f> poses;
@@ -173,6 +251,7 @@ Ptr<Scene> Scene::create(Size sz, Matx33f _intr, float _depthFactor, bool _onlyS
 
 typedef cv::Vec4f ptype;
 typedef cv::Mat_< ptype > Points;
+typedef cv::Mat_< ptype > Colors;
 typedef Points Normals;
 typedef Size2i Size;
 
@@ -271,84 +350,49 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
             }
         }, nstripes);
 }
-// ----------------------------
-
-static const bool display = false;
-static const bool parallelCheck = false;
-
-class Settings
+void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _colors, OutputArray image, Affine3f)
 {
-public:
-    float depthFactor;
-    Matx33f intr;
-    Size frameSize;
-    Vec3f lightPose;
+    Size sz = _points.size();
+    image.create(sz, CV_8UC4);
 
-    Ptr<Volume> volume;
-    Ptr<Scene> scene;
-    std::vector<Affine3f> poses;
+    Points  points  = _points.getMat();
+    Colors  colors  = _colors.getMat();
 
-    Settings(bool useHashTSDF, bool onlySemisphere)
-    {
-        frameSize = Size(640, 480);
+    Mat_<Vec4b> img = image.getMat();
 
-        float fx, fy, cx, cy;
-        fx = fy = 525.f;
-        cx = frameSize.width / 2 - 0.5f;
-        cy = frameSize.height / 2 - 0.5f;
-        intr = Matx33f(fx,  0, cx,
-                        0, fy, cy,
-                        0,  0,  1);
-
-        // 5000 for the 16-bit PNG files
-        // 1 for the 32-bit float images in the ROS bag files
-        depthFactor = 5000;
-
-        Vec3i volumeDims = Vec3i::all(512); //number of voxels
-
-        float volSize = 3.f;
-        float voxelSize = volSize / 512.f; //meters
-
-        // default pose of volume cube
-        Affine3f volumePose = Affine3f().translate(Vec3f(-volSize / 2.f, -volSize / 2.f, 0.5f));
-        float tsdf_trunc_dist = 7 * voxelSize; // about 0.04f in meters
-        int tsdf_max_weight = 64;   //frames
-
-        float raycast_step_factor = 0.25f;  //in voxel sizes
-        // gradient delta factor is fixed at 1.0f and is not used
-        //p.gradient_delta_factor = 0.5f; //in voxel sizes
-
-        //p.lightPose = p.volume_pose.translation()/4; //meters
-        lightPose = Vec3f::all(0.f); //meters
-
-        // depth truncation is not used by default but can be useful in some scenes
-        float truncateThreshold = 0.f; //meters
-
-        VolumeParams::VolumeKind volumeKind = VolumeParams::VolumeKind::TSDF;
-
-        if (useHashTSDF)
+    Range range(0, sz.height);
+    const int nstripes = -1;
+    parallel_for_(range, [&](const Range&)
         {
-            volumeKind = VolumeParams::VolumeKind::HASHTSDF;
-            truncateThreshold = 4.f;
-        }
-        else
-        {
-            volSize = 3.f;
-            volumeDims = Vec3i::all(128); //number of voxels
-            voxelSize = volSize / 128.f;
-            tsdf_trunc_dist = 2 * voxelSize; // 0.04f in meters
+            for (int y = range.start; y < range.end; y++)
+            {
+                Vec4b* imgRow = img[y];
+                const ptype* ptsRow = points[y];
+                const ptype* clrRow = colors[y];
 
-            raycast_step_factor = 0.75f;  //in voxel sizes
-        }
+                for (int x = 0; x < sz.width; x++)
+                {
+                    Point3f p = fromPtype(ptsRow[x]);
+                    Point3f c = fromPtype(clrRow[x]);
 
-        volume = makeVolume(volumeKind, voxelSize, volumePose.matrix,
-                            raycast_step_factor, tsdf_trunc_dist, tsdf_max_weight,
-                            truncateThreshold, volumeDims[0], volumeDims[1], volumeDims[2]);
+                    Vec4b color;
 
-        scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
-        poses = scene->getPoses();
-    }
-};
+                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z)
+                        || cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
+                    {
+                        color = Vec4b(0, 32, 0, 0);
+                    }
+                    else
+                    {
+                        color = Vec4b((uchar)c.x, (uchar)c.y, (uchar)c.z, (uchar)0);
+                    }
+
+                    imgRow[x] = color;
+                }
+            }
+        }, nstripes);
+}
+// ----------------------------
 
 void displayImage(Mat depth, Mat points, Mat normals, float depthFactor, Vec3f lightPose)
 {
@@ -358,22 +402,38 @@ void displayImage(Mat depth, Mat points, Mat normals, float depthFactor, Vec3f l
     renderPointsNormals(points, normals, image, lightPose);
     imshow("render", image);
     waitKey(2000);
+    destroyAllWindows();
+}
+
+void displayColorImage(Mat depth, Mat rgb, Mat points, Mat normals, Mat colors, float depthFactor, Vec3f lightPose)
+{
+    Mat image;
+    patchNaNs(points);
+    imshow("depth", depth * (1.f / depthFactor / 4.f));
+    imshow("rgb", rgb * (1.f / 255.f));
+    renderPointsNormalsColors(points, normals, colors, image, lightPose);
+    imshow("render", image);
+    waitKey(2000);
+    destroyAllWindows();
 }
 
 void normalsCheck(Mat normals)
 {
     Vec4f vector;
+    int counter = 0;
     for (auto pvector = normals.begin<Vec4f>(); pvector < normals.end<Vec4f>(); pvector++)
     {
         vector = *pvector;
         if (!cvIsNaN(vector[0]))
         {
+            counter++;
             float length = vector[0] * vector[0] +
                 vector[1] * vector[1] +
                 vector[2] * vector[2];
             ASSERT_LT(abs(1 - length), 0.0001f) << "There is normal with length != 1";
         }
     }
+    ASSERT_GT(counter, 0) << "There are not normals";
 }
 
 int counterOfValid(Mat points)
@@ -397,101 +457,269 @@ int counterOfValid(Mat points)
     return count;
 }
 
-void normal_test(bool isHashTSDF, bool isRaycast, bool isFetchPointsNormals, bool isFetchNormals)
+
+enum class VolumeTestFunction
 {
-    auto normalCheck = [](Vec4f& vector, const int*)
+    RAYCAST = 0,
+    FETCH_NORMALS = 1,
+    FETCH_POINTS_NORMALS = 2
+};
+
+enum class VolumeTestSrcType
+{
+    MAT = 0,
+    ODOMETRY_FRAME = 1
+};
+
+void normal_test_custom_framesize(VolumeType volumeType, VolumeTestFunction testFunction, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    Mat rgb = scene->rgb(poses[0]);
+    Mat points, normals, tmpnormals, colors;
+
+    OdometryFrame odf;
+    odf.setDepth(depth);
+    odf.setImage(rgb);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
     {
-        if (!cvIsNaN(vector[0]))
-        {
-            float length = vector[0] * vector[0] +
-                vector[1] * vector[1] +
-                vector[2] * vector[2];
-            ASSERT_LT(abs(1 - length), 0.0001f) << "There is normal with length != 1";
-        }
-    };
-
-    Settings settings(isHashTSDF, false);
-
-    Mat depth = settings.scene->depth(settings.poses[0]);
-    UMat _points, _normals, _tmpnormals;
-    UMat _newPoints, _newNormals;
-    Mat  points, normals;
-    AccessFlag af = ACCESS_READ;
-
-    settings.volume->integrate(depth, settings.depthFactor, settings.poses[0].matrix, settings.intr);
-
-    if (isRaycast)
-    {
-        settings.volume->raycast(settings.poses[0].matrix, settings.intr, settings.frameSize, _points, _normals);
-    }
-    if (isFetchPointsNormals)
-    {
-        settings.volume->fetchPointsNormals(_points, _normals);
-    }
-    if (isFetchNormals)
-    {
-        settings.volume->fetchPointsNormals(_points, _tmpnormals);
-        settings.volume->fetchNormals(_points, _normals);
-    }
-
-    normals = _normals.getMat(af);
-    points = _points.getMat(af);
-
-    if (parallelCheck)
-        normals.forEach<Vec4f>(normalCheck);
-    else
-        normalsCheck(normals);
-
-    if (isRaycast && display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
-
-    if (isRaycast)
-    {
-        settings.volume->raycast(settings.poses[17].matrix, settings.intr, settings.frameSize, _newPoints, _newNormals);
-        normals = _newNormals.getMat(af);
-        points = _newPoints.getMat(af);
-        normalsCheck(normals);
-
-        if (parallelCheck)
-            normals.forEach<Vec4f>(normalCheck);
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.integrate(depth, rgb, poses[0].matrix);
         else
-            normalsCheck(normals);
-
-        if (display)
-            displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
+            volume.integrate(depth, poses[0].matrix);
+    }
+    else
+    {
+        volume.integrate(odf, poses[0].matrix);
     }
 
-    points.release(); normals.release();
+    if (testFunction == VolumeTestFunction::RAYCAST)
+    {
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            if (volumeType == VolumeType::ColorTSDF)
+                volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, normals, colors);
+            else
+                volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, normals);
+        }
+        else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+        {
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, odf);
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            if (volumeType == VolumeType::ColorTSDF)
+                odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+        }
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_NORMALS)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, tmpnormals, colors);
+        else
+            // hash_tsdf cpu don't works with raycast normals
+            //volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, tmpnormals);
+            volume.fetchPointsNormals(points, tmpnormals);
+
+        volume.fetchNormals(points, normals);
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_POINTS_NORMALS)
+    {
+        volume.fetchPointsNormals(points, normals);
+    }
+
+    if (testFunction == VolumeTestFunction::RAYCAST && cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+
+    normalsCheck(normals);
 }
 
-void valid_points_test(bool isHashTSDF)
+void normal_test_common_framesize(VolumeType volumeType, VolumeTestFunction testFunction, VolumeTestSrcType testSrcType)
 {
-    Settings settings(isHashTSDF, true);
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
 
-    Mat depth = settings.scene->depth(settings.poses[0]);
-    UMat _points, _normals, _newPoints, _newNormals;
-    AccessFlag af = ACCESS_READ;
-    Mat  points, normals;
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = false;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    Mat rgb = scene->rgb(poses[0]);
+    Mat points, normals, tmpnormals, colors;
+
+    OdometryFrame odf;
+    odf.setDepth(depth);
+    odf.setImage(rgb);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.integrate(depth, rgb, poses[0].matrix);
+        else
+            volume.integrate(depth, poses[0].matrix);
+    }
+    else
+    {
+        volume.integrate(odf, poses[0].matrix);
+    }
+
+    if (testFunction == VolumeTestFunction::RAYCAST)
+    {
+        if (testSrcType == VolumeTestSrcType::MAT)
+        {
+            if (volumeType == VolumeType::ColorTSDF)
+                volume.raycast(poses[0].matrix, points, normals, colors);
+            else
+                volume.raycast(poses[0].matrix, points, normals);
+        }
+        else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+        {
+            volume.raycast(poses[0].matrix, odf);
+            odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+            odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+            if (volumeType == VolumeType::ColorTSDF)
+                odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+        }
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_NORMALS)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[0].matrix, points, tmpnormals, colors);
+        else
+            // hash_tsdf cpu don't works with raycast normals
+            //volume.raycast(poses[0].matrix, points, tmpnormals);
+            volume.fetchPointsNormals(points, tmpnormals);
+
+        volume.fetchNormals(points, normals);
+    }
+    else if (testFunction == VolumeTestFunction::FETCH_POINTS_NORMALS)
+    {
+        volume.fetchPointsNormals(points, normals);
+    }
+
+    if (testFunction == VolumeTestFunction::RAYCAST && cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+
+    normalsCheck(normals);
+}
+
+void valid_points_test_custom_framesize(VolumeType volumeType, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    Mat rgb = scene->rgb(poses[0]);
+    Mat points, normals, colors, newPoints, newNormals;
     int anfas, profile;
 
-    settings.volume->integrate(depth, settings.depthFactor, settings.poses[0].matrix, settings.intr);
-    settings.volume->raycast(settings.poses[0].matrix, settings.intr, settings.frameSize, _points, _normals);
-    normals = _normals.getMat(af);
-    points = _points.getMat(af);
+    OdometryFrame odf;
+    odf.setDepth(depth);
+    odf.setImage(rgb);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.integrate(depth, rgb, poses[0].matrix);
+        else
+            volume.integrate(depth, poses[0].matrix);
+    }
+    else
+    {
+        volume.integrate(odf, poses[0].matrix);
+    }
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, normals, colors);
+        else
+            volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, points, normals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[0].matrix, frameSize.height, frameSize.width, odf);
+        odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+        if (volumeType == VolumeType::ColorTSDF)
+            odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+    }
+
     patchNaNs(points);
     anfas = counterOfValid(points);
 
-    if (display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
+    if (cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+
+    points.release();
+    normals.release();
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[17].matrix, frameSize.height, frameSize.width, points, normals, colors);
+        else
+            volume.raycast(poses[17].matrix, frameSize.height, frameSize.width, points, normals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[17].matrix, frameSize.height, frameSize.width, odf);
+        odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+        if (volumeType == VolumeType::ColorTSDF)
+            odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+    }
 
-    settings.volume->raycast(settings.poses[17].matrix, settings.intr, settings.frameSize, _newPoints, _newNormals);
-    normals = _newNormals.getMat(af);
-    points = _newPoints.getMat(af);
     patchNaNs(points);
     profile = counterOfValid(points);
 
-    if (display)
-        displayImage(depth, points, normals, settings.depthFactor, settings.lightPose);
+    if (cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
 
     // TODO: why profile == 2*anfas ?
     float percentValidity = float(anfas) / float(profile);
@@ -501,72 +729,469 @@ void valid_points_test(bool isHashTSDF)
     ASSERT_LT(abs(0.5 - percentValidity), 0.3) << "percentValidity out of [0.3; 0.7] (percentValidity=" << percentValidity << ")";
 }
 
-#ifndef HAVE_OPENCL
-TEST(TSDF, raycast_normals) { normal_test(false, true, false, false); }
-TEST(TSDF, fetch_points_normals) { normal_test(false, false, true, false); }
-TEST(TSDF, fetch_normals) { normal_test(false, false, false, true); }
-TEST(TSDF, valid_points) { valid_points_test(false); }
+void valid_points_test_common_framesize(VolumeType volumeType, VolumeTestSrcType testSrcType)
+{
+    VolumeSettings vs(volumeType);
+    Volume volume(volumeType, vs);
+
+    Size frameSize(vs.getRaycastWidth(), vs.getRaycastHeight());
+    Matx33f intr;
+    vs.getCameraIntegrateIntrinsics(intr);
+    bool onlySemisphere = true;
+    float depthFactor = vs.getDepthFactor();
+    Vec3f lightPose = Vec3f::all(0.f);
+    Ptr<Scene> scene = Scene::create(frameSize, intr, depthFactor, onlySemisphere);
+    std::vector<Affine3f> poses = scene->getPoses();
+
+    Mat depth = scene->depth(poses[0]);
+    Mat rgb = scene->rgb(poses[0]);
+    Mat points, normals, colors, newPoints, newNormals;
+    int anfas, profile;
+
+    OdometryFrame odf;
+    odf.setDepth(depth);
+    odf.setImage(rgb);
+
+    if (testSrcType == VolumeTestSrcType::MAT)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.integrate(depth, rgb, poses[0].matrix);
+        else
+            volume.integrate(depth, poses[0].matrix);
+    }
+    else
+    {
+        volume.integrate(odf, poses[0].matrix);
+    }
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[0].matrix, points, normals, colors);
+        else
+            volume.raycast(poses[0].matrix, points, normals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[0].matrix, odf);
+        odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+        if (volumeType == VolumeType::ColorTSDF)
+            odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+    }
+
+    patchNaNs(points);
+    anfas = counterOfValid(points);
+
+    if (cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+
+    points.release();
+    normals.release();
+
+    if (testSrcType == VolumeTestSrcType::MAT) // Odometry frame or Mats
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            volume.raycast(poses[17].matrix, points, normals, colors);
+        else
+            volume.raycast(poses[17].matrix, points, normals);
+    }
+    else if (testSrcType == VolumeTestSrcType::ODOMETRY_FRAME)
+    {
+        volume.raycast(poses[17].matrix, odf);
+        odf.getPyramidAt(points, OdometryFramePyramidType::PYR_CLOUD, 0);
+        odf.getPyramidAt(normals, OdometryFramePyramidType::PYR_NORM, 0);
+        if (volumeType == VolumeType::ColorTSDF)
+            odf.getPyramidAt(colors, OdometryFramePyramidType::PYR_IMAGE, 0);
+    }
+
+    patchNaNs(points);
+    profile = counterOfValid(points);
+
+    if (cvtest::debugLevel > 0)
+    {
+        if (volumeType == VolumeType::ColorTSDF)
+            displayColorImage(depth, rgb, points, normals, colors, depthFactor, lightPose);
+        else
+            displayImage(depth, points, normals, depthFactor, lightPose);
+    }
+
+    // TODO: why profile == 2*anfas ?
+    float percentValidity = float(anfas) / float(profile);
+
+    ASSERT_NE(profile, 0) << "There is no points in profile";
+    ASSERT_NE(anfas, 0) << "There is no points in anfas";
+    ASSERT_LT(abs(0.5 - percentValidity), 0.3) << "percentValidity out of [0.3; 0.7] (percentValidity=" << percentValidity << ")";
+}
+
+
+#ifndef HAVE_OPENCL
+TEST(TSDF, raycast_custom_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, raycast_custom_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF, raycast_common_framesize_normals_mat)
+{
+    normal_test_common_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, raycast_common_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF, fetch_points_normals)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, fetch_normals)
+{
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, valid_points_custom_framesize_mat)
+{
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, valid_points_custom_framesize_frame)
+{
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(TSDF, valid_points_common_framesize_mat)
+{
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(TSDF, valid_points_common_framesize_frame)
+{
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF, raycast_custom_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, raycast_custom_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF, raycast_common_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, raycast_common_framesize_normals_frame)
+{
+    normal_test_common_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF, fetch_points_normals)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, fetch_normals)
+{
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, valid_points_custom_framesize_mat)
+{
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, valid_points_custom_framesize_frame)
+{
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(HashTSDF, valid_points_common_framesize_mat)
+{
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(HashTSDF, valid_points_common_framesize_frame)
+{
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(ColorTSDF, raycast_custom_framesize_normals_mat)
+{
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, raycast_custom_framesize_normals_frame)
+{
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(ColorTSDF, raycast_common_framesize_normals_mat)
+{
+    normal_test_common_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, raycast_common_framesize_normals_frame)
+{
+    normal_test_common_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(ColorTSDF, fetch_normals)
+{
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, fetch_points_normals)
+{
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, valid_points_custom_framesize_mat)
+{
+    valid_points_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, valid_points_custom_framesize_fetch)
+{
+    valid_points_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
+
+TEST(ColorTSDF, valid_points_common_framesize_mat)
+{
+    valid_points_test_common_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::MAT);
+}
+
+TEST(ColorTSDF, valid_points_common_framesize_fetch)
+{
+    valid_points_test_common_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+}
 
-TEST(HashTSDF, raycast_normals) { normal_test(true, true, false, false); }
-TEST(HashTSDF, fetch_points_normals) { normal_test(true, false, true, false); }
-TEST(HashTSDF, fetch_normals) { normal_test(true, false, false, true); }
-TEST(HashTSDF, valid_points) { valid_points_test(true); }
 #else
-TEST(TSDF_CPU, raycast_normals)
+TEST(TSDF_CPU, raycast_custom_framesize_normals_mat)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(false, true, false, false);
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(TSDF_CPU, raycast_custom_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(TSDF_CPU, raycast_common_framesize_normals_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_common_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(TSDF_CPU, raycast_common_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
     cv::ocl::setUseOpenCL(true);
 }
 
 TEST(TSDF_CPU, fetch_points_normals)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(false, false, true, false);
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
 
 TEST(TSDF_CPU, fetch_normals)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(false, false, false, true);
+    normal_test_custom_framesize(VolumeType::TSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
 
-TEST(TSDF_CPU, valid_points)
+TEST(TSDF_CPU, valid_points_custom_framesize_mat)
 {
     cv::ocl::setUseOpenCL(false);
-    valid_points_test(false);
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
 
-TEST(HashTSDF_CPU, raycast_normals)
+TEST(TSDF_CPU, valid_points_custom_framesize_frame)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(true, true, false, false);
+    valid_points_test_custom_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(TSDF_CPU, valid_points_common_framesize_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(TSDF_CPU, valid_points_common_framesize_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::TSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, raycast_custom_framesize_normals_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, raycast_custom_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, raycast_common_framesize_normals_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, raycast_common_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_common_framesize(VolumeType::HashTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
     cv::ocl::setUseOpenCL(true);
 }
 
 TEST(HashTSDF_CPU, fetch_points_normals)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(true, false, true, false);
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
 
 TEST(HashTSDF_CPU, fetch_normals)
 {
     cv::ocl::setUseOpenCL(false);
-    normal_test(true, false, false, true);
+    normal_test_custom_framesize(VolumeType::HashTSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
 
-TEST(HashTSDF_CPU, valid_points)
+TEST(HashTSDF_CPU, valid_points_custom_framesize_mat)
 {
     cv::ocl::setUseOpenCL(false);
-    valid_points_test(true);
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
     cv::ocl::setUseOpenCL(true);
 }
+
+TEST(HashTSDF_CPU, valid_points_custom_framesize_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_custom_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, valid_points_common_framesize_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(HashTSDF_CPU, valid_points_common_framesize_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::HashTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, raycast_custom_framesize_normals_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, raycast_custom_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, raycast_common_framesize_normals_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_common_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, raycast_common_framesize_normals_frame)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_common_framesize(VolumeType::ColorTSDF, VolumeTestFunction::RAYCAST, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, fetch_normals)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::FETCH_NORMALS, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, fetch_points_normals)
+{
+    cv::ocl::setUseOpenCL(false);
+    normal_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestFunction::FETCH_POINTS_NORMALS, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, valid_points_custom_framesize_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, valid_points_custom_framesize_fetch)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_custom_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, valid_points_common_framesize_mat)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::MAT);
+    cv::ocl::setUseOpenCL(true);
+}
+
+TEST(ColorTSDF_CPU, valid_points_common_framesize_fetch)
+{
+    cv::ocl::setUseOpenCL(false);
+    valid_points_test_common_framesize(VolumeType::ColorTSDF, VolumeTestSrcType::ODOMETRY_FRAME);
+    cv::ocl::setUseOpenCL(true);
+}
+
 #endif
 }
 }  // namespace