diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 2c8c7bae1b..73365e1fc1 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -6979,7 +6979,11 @@ const char* typeToStr(int type)
         "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
         "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
         "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // CV_16BF
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // CV_Bool
+        "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
+        "long", "long2", "long3", "long4", 0, 0, 0, "long8", 0, 0, 0, 0, 0, 0, 0, "long16",
+        "uint", "uint2", "uint3", "uint4", 0, 0, 0, "uint8", 0, 0, 0, 0, 0, 0, 0, "uint16"
     };
     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
     const char* result = cn > 16 ? nullptr : tab[depth*16 + cn-1];
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index 1f495d33bb..5824e1467c 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -1188,6 +1188,11 @@ CV__DNN_INLINE_NS_BEGIN
         static Ptr<GroupNormLayer> create(const LayerParams &params);
     };
 
+    class CV_EXPORTS CastLayer : public Layer {
+    public:
+        static Ptr<CastLayer> create(const LayerParams &params);
+    };
+
 //! @}
 //! @}
 CV__DNN_INLINE_NS_END
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
index e8450c18f9..d0ce9c0057 100644
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -164,6 +164,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Attention,      AttentionLayer);
     CV_DNN_REGISTER_LAYER_CLASS(GroupNormalization, GroupNormLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Cast,           CastLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(Crop,           CropLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
diff --git a/modules/dnn/src/layers/cast_layer.cpp b/modules/dnn/src/layers/cast_layer.cpp
new file mode 100644
index 0000000000..4c812bd700
--- /dev/null
+++ b/modules/dnn/src/layers/cast_layer.cpp
@@ -0,0 +1,95 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+
+
+namespace cv { namespace dnn {
+
+class CastLayerImpl CV_FINAL : public CastLayer
+{
+public:
+    CastLayerImpl(const LayerParams& params)
+    {
+        setParamsFrom(params);
+        outputType = params.get<int>("outputType");
+    }
+
+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV;
+    }
+
+    virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                                 const int requiredOutputs,
+                                 std::vector<MatShape> &outputs,
+                                 std::vector<MatShape> &internals) const CV_OVERRIDE
+    {
+        CV_CheckEQ(inputs.size(), (size_t)1, "");
+        outputs.assign(1, inputs[0]);
+        return false;
+    }
+
+    virtual  void getTypes(const std::vector<MatType>& inputs,
+        const int requiredOutputs,
+        const int requiredInternals,
+        std::vector<MatType>& outputs,
+        std::vector<MatType>& internals) const CV_OVERRIDE
+    {
+        if (preferableTarget == DNN_TARGET_OPENCL_FP16 && outputType == CV_32F)
+            outputs.assign(1, CV_16F);
+        else
+            outputs.assign(1, outputType);
+    }
+
+#ifdef HAVE_OPENCL
+    bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+    {
+        std::vector<UMat> inputs, outputs;
+
+        inputs_.getUMatVector(inputs);
+        outputs_.getUMatVector(outputs);
+        CV_CheckEQ(inputs.size(), (size_t)1, "");
+        CV_CheckEQ(outputs.size(), (size_t)1, "");
+
+        if (inputs[0].depth() == outputs[0].depth())
+            inputs[0].copyTo(outputs[0]);
+        else
+            inputs[0].convertTo(outputs[0], outputs[0].depth());
+        return true;
+    }
+#endif
+
+    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
+            forward_ocl(inputs_arr, outputs_arr, internals_arr));
+
+        std::vector<Mat> inputs, outputs;
+        inputs_arr.getMatVector(inputs);
+        outputs_arr.getMatVector(outputs);
+
+        CV_CheckEQ(inputs.size(), (size_t)1, "");
+        CV_CheckEQ(outputs.size(), (size_t)1, "");
+
+        if (inputs[0].depth() == outputs[0].depth())
+            inputs[0].copyTo(outputs[0]);
+        else
+            inputs[0].convertTo(outputs[0], outputs[0].depth());
+    }
+
+private:
+    int outputType;
+};
+
+Ptr<CastLayer> CastLayer::create(const LayerParams& params)
+{
+    return makePtr<CastLayerImpl>(params);
+}
+
+}}  // namespace cv::dnn
diff --git a/modules/dnn/src/layers/expand_layer.cpp b/modules/dnn/src/layers/expand_layer.cpp
index 752e741a97..d233f5be62 100644
--- a/modules/dnn/src/layers/expand_layer.cpp
+++ b/modules/dnn/src/layers/expand_layer.cpp
@@ -74,6 +74,20 @@ public:
         return false;
     }
 
+    void getTypes(const std::vector<MatType>& inputs,
+        const int requiredOutputs,
+        const int requiredInternals,
+        std::vector<MatType>& outputs,
+        std::vector<MatType>& internals) const CV_OVERRIDE
+    {
+        CV_Assert(inputs.size());
+        for (auto input : inputs)
+            CV_CheckType(input, input == CV_32F || input == CV_16F || input == CV_8S || input == CV_32S || input == CV_64S, "");
+
+        outputs.assign(requiredOutputs, inputs[0]);
+    }
+
+
     virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE {
         std::vector<Mat> inputs;
         inputs_arr.getMatVector(inputs);
@@ -105,12 +119,6 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        if (inputs_arr.depth() == CV_16F)
-        {
-            forward_fallback(inputs_arr, outputs_arr, internals_arr);
-            return;
-        }
-
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
diff --git a/modules/dnn/src/layers/gather_elements_layer.cpp b/modules/dnn/src/layers/gather_elements_layer.cpp
index 327c017f64..4bc0f41cde 100644
--- a/modules/dnn/src/layers/gather_elements_layer.cpp
+++ b/modules/dnn/src/layers/gather_elements_layer.cpp
@@ -64,7 +64,7 @@ public:
         std::vector<MatType>& internals) const CV_OVERRIDE
     {
         CV_CheckEQ(inputs.size(), (size_t)2, "");
-        CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_32S || inputs[0] == CV_16F || inputs[0] == CV_8U, "");
+        CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_32S || inputs[0] == CV_64S || inputs[0] == CV_16F || inputs[0] == CV_8U, "");
         CV_CheckType(inputs[1], inputs[1] == CV_64S || inputs[1] == CV_32S, "");
         outputs.assign(1, inputs[0]);
     }
@@ -165,6 +165,9 @@ public:
             case CV_32S:
                 forward_impl<int32_t, T_INDEX>(std::forward<Args>(args)...);
                 break;
+            case CV_64S:
+                forward_impl<int64_t, T_INDEX>(std::forward<Args>(args)...);
+                break;
             case CV_32F:
                 forward_impl<float, T_INDEX>(std::forward<Args>(args)...);
                 break;
diff --git a/modules/dnn/src/layers/gather_layer.cpp b/modules/dnn/src/layers/gather_layer.cpp
index 4542385deb..ed31f85747 100644
--- a/modules/dnn/src/layers/gather_layer.cpp
+++ b/modules/dnn/src/layers/gather_layer.cpp
@@ -47,7 +47,7 @@ public:
         std::vector<MatType>& internals) const CV_OVERRIDE
     {
         CV_CheckEQ(inputs.size(), (size_t)2, "");
-        CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_32S || inputs[0] == CV_16F || inputs[0] == CV_8U, "");
+        CV_CheckType(inputs[0], inputs[0] == CV_32F || inputs[0] == CV_32S || inputs[0] == CV_64S || inputs[0] == CV_16F || inputs[0] == CV_8U, "");
         CV_CheckType(inputs[1], inputs[1] == CV_64S || inputs[1] == CV_32S, "");
         outputs.assign(1, inputs[0]);
     }
diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp
index cd07c4da73..ea619512c6 100644
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@@ -190,9 +190,9 @@ public:
             if (preferableTarget == DNN_TARGET_CUDA_FP16 || preferableTarget == DNN_TARGET_CUDA)
                 CV_CheckTypeEQ(input, CV_32F, "Unsupported type");
             else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
-                CV_CheckType(input, input == CV_16F || input == CV_8S || input == CV_32S, "");
+                CV_CheckType(input, input == CV_16F || input == CV_8S || input == CV_32S || input == CV_64S, "");
             else
-                CV_CheckType(input, input == CV_32F || input == CV_8S || input == CV_32S, "");
+                CV_CheckType(input, input == CV_32F || input == CV_8S || input == CV_32S || input == CV_64S, "");
         }
 
         outputs.assign(requiredOutputs, inputs[0]);
@@ -367,7 +367,7 @@ public:
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
-                   inputs_arr.depth() != CV_8S && inputs_arr.depth() != CV_32S,
+                   inputs_arr.depth() != CV_8S && inputs_arr.depth() != CV_64S,
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
         if (inputs_arr.depth() == CV_16F)
@@ -392,69 +392,65 @@ public:
         }
         else
         {
-            size_t i, j, count = _count, numAxes = _numAxes;
-            const size_t* newStride = &_newStride[0];
-            const size_t* oldStride = &_oldStride[0];
-            const size_t* order = &_order[0];
-
             for (k = 0; k < ninputs; k++)
             {
-                const Mat& inp = inputs[k];
-                Mat& out = outputs[k];
+                CV_Assert(inputs[k].dims == _numAxes && inputs[k].size == inputs[0].size);
+                CV_Assert(outputs[k].dims == _numAxes && outputs[k].size == outputs[0].size);
 
-                CV_Assert(inp.dims == numAxes && inp.size == inputs[0].size);
-                CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
-
-                CV_Assert(inp.isContinuous() && out.isContinuous());
-                // CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
-
-                if( numAxes == 4 )
+                switch (inputs[k].depth())
                 {
-                    int nstripes = getNumThreads();
-                    if (inp.type() == CV_8S)
-                        PermuteInvoker<int8_t>::run(inp, out, _order, nstripes);
-                    else
-                        PermuteInvoker<float>::run(inp, out, _order, nstripes);
+                case CV_32F:
+                    forward_impl<float>(inputs[k], outputs[k]);
+                    break;
+                case CV_16F:
+                    forward_impl<int16_t>(inputs[k], outputs[k]);
+                    break;
+                case CV_32S:
+                    forward_impl<int32_t>(inputs[k], outputs[k]);
+                    break;
+                case CV_64S:
+                    forward_impl<int64_t>(inputs[k], outputs[k]);
+                    break;
+                case CV_8S:
+                    forward_impl<int8_t>(inputs[k], outputs[k]);
+                    break;
+                default:
+                    CV_Error(Error::BadDepth, "unsupported mat type");
                 }
-                else
+            }
+        }
+    }
+
+    template <class T>
+    void forward_impl(const Mat& inp, Mat& out)
+    {
+        const size_t* newStride = &_newStride[0];
+        const size_t* oldStride = &_oldStride[0];
+        const size_t* order = &_order[0];
+
+        CV_Assert(inp.isContinuous() && out.isContinuous());
+
+        if( _numAxes == 4 )
+        {
+            int nstripes = getNumThreads();
+            PermuteInvoker<T>::run(inp, out, _order, nstripes);
+        }
+        else
+        {
+            const T *srcData = inp.ptr<T>();
+            T *dstData = out.ptr<T>();
+
+            for (size_t i = 0; i < _count; ++i)
+            {
+                size_t oldPosition = 0;
+                size_t newPosition = i;
+
+                for (size_t j = 0; j < _numAxes; ++j)
                 {
-                    if (inp.type() == CV_8S)
-                    {
-                        const int8_t *srcData = inp.ptr<int8_t>();
-                        int8_t *dstData = out.ptr<int8_t>();
-
-                        for (i = 0; i < count; ++i)
-                        {
-                            size_t oldPosition = 0;
-                            size_t newPosition = i;
-
-                            for (j = 0; j < numAxes; ++j)
-                            {
-                                oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
-                                newPosition %= newStride[j];
-                            }
-                            dstData[i] = srcData[oldPosition];
-                        }
-                    }
-                    else
-                    {
-                        const float *srcData = inp.ptr<float>();
-                        float *dstData = out.ptr<float>();
-
-                        for (i = 0; i < count; ++i)
-                        {
-                            size_t oldPosition = 0;
-                            size_t newPosition = i;
-
-                            for (j = 0; j < numAxes; ++j)
-                            {
-                                oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
-                                newPosition %= newStride[j];
-                            }
-                            dstData[i] = srcData[oldPosition];
-                        }
-                    }
+                    oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
+                    newPosition %= newStride[j];
                 }
+                dstData[i] = srcData[oldPosition];
             }
         }
     }
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
index 7b8dd483c7..439d0eaee8 100644
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -359,8 +359,8 @@ class AttentionSubGraph : public Subgraph {
             // get attrs - qkv_hidden_sizes
             qkv_hidden_sizes.clear();
             auto fill_qkv_hidden_sizes = [&] (const int slice_node_id) {
-                int slice_start = extractConstant(net, matchedNodesIds[slice_node_id], 1).at<int>(0);
-                int slice_end = extractConstant(net, matchedNodesIds[slice_node_id], 2).at<int>(0);
+                int slice_start = extractConstant(net, matchedNodesIds[slice_node_id], 1).at<int64_t>(0);
+                int slice_end = extractConstant(net, matchedNodesIds[slice_node_id], 2).at<int64_t>(0);
                 if (slice_end == std::numeric_limits<int>::max()) {
                     qkv_hidden_sizes.push_back(0); // workaround for Slice with end=INT_MAX
                 } else {
@@ -374,7 +374,7 @@ class AttentionSubGraph : public Subgraph {
             CV_CheckEQ(qkv_hidden_sizes.size(), static_cast<size_t>(3), "ONNXSimplifier/Attention: invalid qkv hidden sizes");
             CV_CheckEQ(int(qkv_hidden_sizes[0]), int(qkv_hidden_sizes[1]), "ONNXSimplifier/Attention: invalid qkv hidden sizes, q_hidden_size == v_hidden_size is required");
             // get attrs - num_heads, scale
-            num_heads = extractConstant(net, matchedNodesIds[reshape_q], 1).at<int>(1);
+            num_heads = extractConstant(net, matchedNodesIds[reshape_q], 1).at<int64_t>(1);
             scale = extractConstant(net, matchedNodesIds[div_q], 1).at<float>(0);
             output_ndims = extractConstant(net, matchedNodesIds[last_reshape], 1).size[0];
 
@@ -470,8 +470,8 @@ class AttentionSingleHeadSubGraph : public Subgraph {
             // get attrs - qkv_hidden_sizes
             qkv_hidden_sizes.clear();
             auto fill_qkv_hidden_sizes = [&] (const int slice_node_id) {
-                int slice_start = extractConstant(net, matchedNodesIds[slice_node_id], 1).at<int>(0);
-                int slice_end = extractConstant(net, matchedNodesIds[slice_node_id], 2).at<int>(0);
+                int slice_start = extractConstant(net, matchedNodesIds[slice_node_id], 1).at<int64_t>(0);
+                int slice_end = extractConstant(net, matchedNodesIds[slice_node_id], 2).at<int64_t>(0);
                 if (slice_end == std::numeric_limits<int>::max()) {
                     qkv_hidden_sizes.push_back(0); // workaround for Slice with end=INT_MAX
                 } else {
@@ -1222,9 +1222,12 @@ public:
             }
             Mat mat_value = getMatFromTensor(attr.t());
             switch (mat_value.type()) {
-                case CV_32S: {
+                case CV_32S:
                     val = static_cast<int64_t>(mat_value.at<int>());
-                } break;
+                    break;
+                case CV_64S:
+                    val = mat_value.at<int64_t>();
+                    break;
                 default: return 0;
             }
             return 1;
@@ -1700,12 +1703,9 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
     }
     else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
     {
-        blob.create(sizes, CV_32SC1);
-        int32_t* dst = reinterpret_cast<int32_t*>(blob.data);
-
         if (!tensor_proto.int64_data().empty()) {
             ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = tensor_proto.int64_data();
-            convertInt64ToInt32(src, dst, blob.total());
+            Mat(sizes, CV_64SC1, (void*)src.data()).copyTo(blob);
         }
         else
         {
@@ -1723,7 +1723,7 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
             }
 #endif
             const int64_t* src = reinterpret_cast<const int64_t*>(val);
-            convertInt64ToInt32(src, dst, blob.total());
+            Mat(sizes, CV_64SC1, (void*)src).copyTo(blob);
         }
     }
     else if (datatype == opencv_onnx::TensorProto_DataType_INT8 ||
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 31de39c30a..c07a94b4f7 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -84,6 +84,7 @@ class ONNXImporter
                                     const opencv_onnx::GraphProto& graph_proto);
     Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
     Mat getBlob(const std::string& input_name);
+    Mat getIntBlob(const opencv_onnx::NodeProto& node_proto, int index);
     TensorInfo getBlobExtraInfo(const opencv_onnx::NodeProto& node_proto, int index);
     TensorInfo getBlobExtraInfo(const std::string& input_name);
 
@@ -596,6 +597,20 @@ Mat ONNXImporter::getBlob(const std::string& input_name)
     return constBlob->second;
 }
 
+Mat ONNXImporter::getIntBlob(const opencv_onnx::NodeProto& node_proto, int index)
+{
+    Mat blob = getBlob(node_proto, index);
+    if (blob.depth() == CV_32S)
+        return blob;
+    if (blob.depth() == CV_64S) {
+        Mat blobInt32;
+        blob.convertTo(blobInt32, CV_32S);
+        return blobInt32;
+    }
+    CV_Error(Error::BadDepth, "blob should have integer type");
+    return Mat();
+}
+
 ONNXImporter::TensorInfo ONNXImporter::getBlobExtraInfo(const opencv_onnx::NodeProto &node_proto, int index)
 {
     CV_Assert(index < node_proto.input_size());
@@ -1182,7 +1197,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node
     // "axes" is turned to one of the inputs since opset 18,
     // except for ReduceSum, which has "axes" input since opset 13.
     if (!layerParams.has("axes") && num_inputs == 2 && constBlobs.find(node_proto.input(1)) != constBlobs.end()) {
-        Mat mat_axes = getBlob(node_proto, 1);
+        Mat mat_axes = getIntBlob(node_proto, 1);
         int num_axes = (int)mat_axes.total();
         std::vector<int> axes(num_axes);
         for (int i = 0; i < num_axes; ++i)
@@ -1228,15 +1243,15 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
         {
             CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
         }
-        Mat start_blob = getBlob(node_proto, 1);
-        Mat end_blob = getBlob(node_proto, 2);
+        Mat start_blob = getIntBlob(node_proto, 1);
+        Mat end_blob = getIntBlob(node_proto, 2);
         CV_Assert(start_blob.total() == end_blob.total());
         starts_ = DictValue::arrayInt(start_blob.begin<int>(), start_blob.total());
         ends_ = DictValue::arrayInt(end_blob.begin<int>(), end_blob.total());
 
         if (inp_size > 3 && !getBlob(node_proto, 3).empty())
         {
-            Mat axes_blob = getBlob(node_proto, 3);
+            Mat axes_blob = getIntBlob(node_proto, 3);
             CV_Assert(axes_blob.total() == start_blob.total());
             axes_ = DictValue::arrayInt(axes_blob.begin<int>(), axes_blob.total());
             axis = axes_.getIntValue(0) < 0 ? axes_.getIntValue(0) + dims : axes_.getIntValue(0);
@@ -1245,7 +1260,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP
 
         if (inp_size == 5 && !getBlob(node_proto, 4).empty())
         {
-            Mat step_blob = getBlob(node_proto, 4);
+            Mat step_blob = getIntBlob(node_proto, 4);
             CV_Assert(step_blob.total() == start_blob.total());
             steps_ = DictValue::arrayInt(step_blob.begin<int>(), step_blob.total());
             steps.resize(dims, 1);
@@ -1351,7 +1366,7 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
     else if (node_proto.input_size() == 2) // opset >= 13, the split will be stored at the second input, instead of the attribute.
     {
         CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
-        Mat splitsBlob = getBlob(node_proto, 1);
+        Mat splitsBlob = getIntBlob(node_proto, 1);
         int splitSize = splitsBlob.total();
         if (splitSize == 1)
         {
@@ -1650,7 +1665,7 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr
 
     if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty())
     {
-        Mat blob = getBlob(lstm_proto, 4);
+        Mat blob = getIntBlob(lstm_proto, 4);
         CV_Assert(blob.total() == batch_size);
         for (MatIterator_<int32_t> it = blob.begin<int32_t>(); it != blob.end<int32_t>(); ++it)
         {
@@ -2101,9 +2116,7 @@ void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::Nod
     {
         if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
         {
-            Mat axesMat = getBlob(node_proto, 1);
-            if (axesMat.depth() == CV_32F)
-                axesMat.convertTo(axesMat, CV_32S);
+            Mat axesMat = getIntBlob(node_proto, 1);
             size_t axesLen = axesMat.total();
             for (int i = 0; i < axesLen; i++)
             {
@@ -2232,7 +2245,7 @@ void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::N
     DictValue axes;
     if (node_proto.input_size() == 2)
     {
-        Mat blob = getBlob(node_proto, 1);
+        Mat blob = getIntBlob(node_proto, 1);
         axes = DictValue::arrayInt(blob.ptr<int>(), blob.total());
     }
     else
@@ -2301,7 +2314,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node
     CV_CheckTrue(constBlobs.find(node_proto.input(1)) != constBlobs.end(),
                  "DNN/ONNXImporter-Expand: input shape must be constant");
 
-    Mat mat_input_shape = getBlob(node_proto, 1);
+    Mat mat_input_shape = getIntBlob(node_proto, 1);
     CV_CheckTypeEQ(mat_input_shape.depth(), CV_32S, "DNN/ONNXImporter-Expand: data type of input shape must be CV_32S");
     for (int i = 0; i < mat_input_shape.total(); ++i) {
         CV_Check(i, *(mat_input_shape.ptr<int>() + i) >= 0, "DNN/ONNXImporter-Expand: invalid shape dimension");
@@ -2336,7 +2349,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod
     layerParams.type += (depth == CV_8S) ? "Int8" : "";
 
     if (node_proto.input_size() == 2) {
-        Mat blob = getBlob(node_proto, 1);
+        Mat blob = getIntBlob(node_proto, 1);
         CV_Assert(blob.type() == CV_32SC1);
 
         layerParams.set("dim", DictValue::arrayInt<int*>(blob.ptr<int>(), blob.total()));
@@ -2381,7 +2394,7 @@ void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodePro
     {
         // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
         // We need to shuffle it to begin0, end0, begin1, end1, ...
-        Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
+        Mat paddings = getIntBlob(node_proto, 1).reshape(1, 2);
         paddings = paddings.t();
         layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));
 
@@ -2411,13 +2424,13 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
     int dims = static_cast<int>(inpShape.size());
     if (isInput1D)
         dims = 1;
-    Mat shapeMat(1, dims, CV_32S);
+    Mat shapeMat(1, dims, CV_64S);
     bool isDynamicShape = false;
     for (int j = 0; j < dims; ++j)
     {
         int sz = inpShape[j];
         isDynamicShape |= (sz == 0);
-        shapeMat.at<int>(j) = sz;
+        shapeMat.at<int64_t>(j) = sz;
     }
     shapeMat.dims = 1;  // FIXIT Mat 1D
 
@@ -2431,6 +2444,20 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
 
 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
+    int type;
+    switch (layerParams.get<int>("to"))
+    {
+        case opencv_onnx::TensorProto_DataType_FLOAT:   type = CV_32F; break;
+        case opencv_onnx::TensorProto_DataType_UINT8:   type = CV_8U;  break;
+        case opencv_onnx::TensorProto_DataType_UINT16:  type = CV_16U; break;
+        case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16F; break;
+        case opencv_onnx::TensorProto_DataType_INT8:    type = CV_8S;  break;
+        case opencv_onnx::TensorProto_DataType_INT16:   type = CV_16S; break;
+        case opencv_onnx::TensorProto_DataType_INT32:   type = CV_32S; break;
+        case opencv_onnx::TensorProto_DataType_INT64:   type = CV_64S; break;
+        default: CV_Error(Error::BadDepth, "Unsupported type");
+    }
+
     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
     {
         Mat blob = getBlob(node_proto, 0);
@@ -2438,27 +2465,15 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr
         {
             constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), getBlobExtraInfo(node_proto, 0)));
         }
-        int type;
-        switch (layerParams.get<int>("to"))
-        {
-            case opencv_onnx::TensorProto_DataType_FLOAT:   type = CV_32F; break;
-            case opencv_onnx::TensorProto_DataType_UINT8:   type = CV_8U; break;
-            case opencv_onnx::TensorProto_DataType_UINT16:  type = CV_16U; break;
-            case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16F; break;
-            case opencv_onnx::TensorProto_DataType_INT8:
-            case opencv_onnx::TensorProto_DataType_INT16:
-            case opencv_onnx::TensorProto_DataType_INT32:
-            case opencv_onnx::TensorProto_DataType_INT64:   type = CV_32S; break;
-            default: type = blob.type();
-        }
         Mat dst;
         blob.convertTo(dst, type);
         dst.dims = blob.dims;
         addConstant(node_proto.output(0), dst);
         return;
     }
-    else
-        layerParams.type = "Identity";
+
+    layerParams.type = "Cast";
+    layerParams.set("outputType", type);
     addLayer(layerParams, node_proto);
 }
 
@@ -2477,7 +2492,7 @@ void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx
     else
         fill_value = layerParams.get("value", 0);
 
-    MatShape inpShape = getBlob(node_proto, 0);
+    MatShape inpShape = getIntBlob(node_proto, 0);
     for (int i = 0; i < inpShape.size(); i++)
         CV_CheckGT(inpShape[i], 0, "");
     Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
@@ -2498,15 +2513,12 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node
             std::vector<Mat> inputs, output;
 
             Mat input = getBlob(node_proto, 0);
-            int type = input.type();
-            input.convertTo(input, CV_32FC1);
             inputs.push_back(input);
 
             Mat indices = getBlob(node_proto, 1);
             inputs.push_back(indices);
 
             runLayer(layerParams, inputs, output);
-            output.back().convertTo(output.back(), type);
             //output.back().dims = std::max(input.dims - real_ndims, 1);
             addConstant(node_proto.output(0), output.back());
             return;
@@ -2547,9 +2559,6 @@ void ONNXImporter::parseGatherElements(LayerParams& layerParams, const opencv_on
         std::vector<Mat> inputs, output;
         for (size_t i = 0; i < node_proto.input_size(); i++) {
             Mat blob = getBlob(node_proto, i);
-            if (i == 1) { // indices, from int32/int64 to float32 for compatibility
-                blob.convertTo(blob, CV_32F);
-            }
             inputs.push_back(blob);
         }
         runLayer(layerParams, inputs, output);
@@ -2560,9 +2569,6 @@ void ONNXImporter::parseGatherElements(LayerParams& layerParams, const opencv_on
         for (size_t i = 0; i < node_proto.input_size(); i++) {
             if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) {
                 Mat blob = getBlob(node_proto, i);
-                if (i == 1) { // indices, from int32/int64 to float32 for compatibility
-                    blob.convertTo(blob, CV_32F);
-                }
 
                 LayerParams constParams;
                 constParams.name = node_proto.input(i);
@@ -2691,11 +2697,8 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node
         const std::string& inputSizes = node_proto.input(3);
         if (constBlobs.find(inputSizes) != constBlobs.end())
         {
-            Mat shapes = getBlob(inputSizes);
+            Mat shapes = getIntBlob(node_proto, 3);
             CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
-            CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
-            if (shapes.depth() == CV_32F)
-                shapes.convertTo(shapes, CV_32S);
             layerParams.set("width", shapes.at<int>(3));
             layerParams.set("height", shapes.at<int>(2));
         }
@@ -2809,7 +2812,7 @@ void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::Node
 
     if (constBlobs.find(input1) != constBlobs.end())
     {
-        Mat axis_blob = getBlob(input1);
+        Mat axis_blob = getIntBlob(node_proto, 1);
         CV_Assert(axis_blob.total() == 1u);
         layerParams.set("axis", axis_blob.at<int>(0));
     }
@@ -2989,15 +2992,15 @@ void ONNXImporter::parseRange(LayerParams& layerParams, const opencv_onnx::NodeP
     // only supports the case which all inputs are constant
     CV_Assert(const_id.size() == 3);
 
-    Mat startMat = getBlob(node_proto, 0);
+    Mat startMat = getIntBlob(node_proto, 0);
     CV_Assert(startMat.type() == CV_32SC1);
     int start = startMat.at<int>(0);
 
-    Mat limitMat = getBlob(node_proto, 1);
+    Mat limitMat = getIntBlob(node_proto, 1);
     CV_Assert(limitMat.type() == CV_32SC1);
     int limit = limitMat.at<int>(0);
 
-    Mat deltaMat = getBlob(node_proto, 2);
+    Mat deltaMat = getIntBlob(node_proto, 2);
     CV_Assert(deltaMat.type() == CV_32SC1);
     int delta = deltaMat.at<int>(0);
 
@@ -3043,8 +3046,6 @@ void ONNXImporter::parseScatter(LayerParams& layerParams, const opencv_onnx::Nod
             if (layer_id.find(node_proto.input(i)) == layer_id.end())
             {
                 Mat blob = getBlob(node_proto, i);
-                if (i == 1) // indices, from int32/int64 to float32
-                    blob.convertTo(blob, CV_32F);
 
                 LayerParams constParams;
                 constParams.name = node_proto.input(i);
@@ -3100,14 +3101,14 @@ void ONNXImporter::parseTile(LayerParams& layerParams, const opencv_onnx::NodePr
 
     // repeats, treated as paramenter
     std::vector<int> repeats_vec(input0_dims, 1);
-    Mat input1_blob = getBlob(node_proto, 1);
+    Mat input1_blob = getIntBlob(node_proto, 1);
     if (is_opset_1)
     {
         // input1 in tile-1: tiles, 1d tensor of shape [1]
         CV_CheckEQ(input1_blob.total(), 1ull, "ONNX/Tile: tiles must be a 0D tensor or 1D tensor of shape [1].");
         int tiles = input1_blob.at<int>(0);
         // input2 in tile-1: axis, 1d tensor of shape [1]
-        Mat input2_blob = getBlob(node_proto, 2);
+        Mat input2_blob = getIntBlob(node_proto, 2);
         CV_CheckEQ(input2_blob.total(), 1ull, "ONNX/Tile: axis must be a 0D tensor or 1D tensor of shape [1].");
         int axis = input2_blob.at<int>(0);
         repeats_vec[axis] = tiles;
diff --git a/modules/dnn/test/test_int.cpp b/modules/dnn/test/test_int.cpp
index caf6301d1e..283e2f38a9 100644
--- a/modules/dnn/test/test_int.cpp
+++ b/modules/dnn/test/test_int.cpp
@@ -6,9 +6,21 @@
 // Third party copyrights are property of their respective owners.
 
 #include "test_precomp.hpp"
+#include <opencv2/dnn/shape_utils.hpp>
 
 namespace opencv_test { namespace {
 
+int64_t getValueAt(const Mat &m, const int *indices)
+{
+    if (m.type() == CV_32S)
+        return m.at<int32_t>(indices);
+    else if (m.type() == CV_64S)
+        return m.at<int64_t>(indices);
+    else
+        CV_Error(Error::BadDepth, "Unsupported type");
+    return -1;
+}
+
 typedef testing::TestWithParam<tuple<Backend, Target> > Test_int64_sum;
 TEST_P(Test_int64_sum, basic)
 {
@@ -53,4 +65,302 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_int64_sum,
     dnnBackendsAndTargets()
 );
 
+typedef testing::TestWithParam<tuple<int, tuple<Backend, Target> > > Test_Expand_Int;
+TEST_P(Test_Expand_Int, random)
+{
+    int matType = get<0>(GetParam());
+    tuple<Backend, Target> backend_target= get<1>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    std::vector<int> inShape{2, 3, 1, 5};
+    int64_t low = matType == CV_64S ? 1000000000000000ll : 1000000000;
+    Mat input(inShape, matType);
+    cv::randu(input, low, low + 100);
+    std::vector<int> outShape{2, 1, 4, 5};
+
+    Net net;
+    LayerParams lp;
+    lp.type = "Expand";
+    lp.name = "testLayer";
+    lp.set("shape", DictValue::arrayInt<int*>(&outShape[0], outShape.size()));
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), matType);
+    EXPECT_EQ(re.size.dims(), 4);
+    EXPECT_EQ(re.size[0], 2);
+    EXPECT_EQ(re.size[1], 3);
+    EXPECT_EQ(re.size[2], 4);
+    EXPECT_EQ(re.size[3], 5);
+
+    std::vector<int> inIndices(4);
+    std::vector<int> reIndices(4);
+    for (int i0 = 0; i0 < re.size[0]; ++i0)
+    {
+        inIndices[0] = i0 % inShape[0];
+        reIndices[0] = i0;
+        for (int i1 = 0; i1 < re.size[1]; ++i1)
+        {
+            inIndices[1] = i1 % inShape[1];
+            reIndices[1] = i1;
+            for (int i2 = 0; i2 < re.size[2]; ++i2)
+            {
+                inIndices[2] = i2 % inShape[2];
+                reIndices[2] = i2;
+                for (int i3 = 0; i3 < re.size[3]; ++i3)
+                {
+                    inIndices[3] = i3 % inShape[3];
+                    reIndices[3] = i3;
+                    EXPECT_EQ(getValueAt(re, reIndices.data()), getValueAt(input, inIndices.data()));
+                }
+            }
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_Expand_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
+typedef testing::TestWithParam<tuple<int, tuple<Backend, Target> > > Test_Permute_Int;
+TEST_P(Test_Permute_Int, random)
+{
+    int matType = get<0>(GetParam());
+    tuple<Backend, Target> backend_target= get<1>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    if(backend == DNN_BACKEND_CUDA)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
+
+    std::vector<int> inShape{2, 3, 4, 5};
+    int64_t low = matType == CV_64S ? 1000000000000000ll : 1000000000;
+    Mat input(inShape, matType);
+    cv::randu(input, low, low + 100);
+    std::vector<int> order{0, 2, 3, 1};
+
+    Net net;
+    LayerParams lp;
+    lp.type = "Permute";
+    lp.name = "testLayer";
+    lp.set("order", DictValue::arrayInt<int*>(&order[0], order.size()));
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), matType);
+    EXPECT_EQ(re.size.dims(), 4);
+    EXPECT_EQ(re.size[0], 2);
+    EXPECT_EQ(re.size[1], 4);
+    EXPECT_EQ(re.size[2], 5);
+    EXPECT_EQ(re.size[3], 3);
+
+    std::vector<int> inIndices(4);
+    std::vector<int> reIndices(4);
+    for (int i0 = 0; i0 < input.size[0]; ++i0)
+    {
+        inIndices[0] = i0;
+        reIndices[0] = i0;
+        for (int i1 = 0; i1 < input.size[1]; ++i1)
+        {
+            inIndices[1] = i1;
+            reIndices[3] = i1;
+            for (int i2 = 0; i2 < input.size[2]; ++i2)
+            {
+                inIndices[2] = i2;
+                reIndices[1] = i2;
+                for (int i3 = 0; i3 < input.size[3]; ++i3)
+                {
+                    inIndices[3] = i3;
+                    reIndices[2] = i3;
+                    EXPECT_EQ(getValueAt(re, reIndices.data()), getValueAt(input, inIndices.data()));
+                }
+            }
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_Permute_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
+typedef testing::TestWithParam<tuple<int, int, tuple<Backend, Target> > > Test_GatherElements_Int;
+TEST_P(Test_GatherElements_Int, random)
+{
+    int matType = get<0>(GetParam());
+    int indicesType = get<1>(GetParam());
+    tuple<Backend, Target> backend_target= get<2>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    std::vector<int> inShape{2, 3, 4, 5};
+    int64_t low = matType == CV_64S ? 1000000000000000ll : 1000000000;
+    Mat input(inShape, matType);
+    cv::randu(input, low, low + 100);
+
+    std::vector<int> indicesShape{2, 3, 10, 5};
+    Mat indicesMat(indicesShape, indicesType);
+    cv::randu(indicesMat, 0, 4);
+
+    Net net;
+    LayerParams lp;
+    lp.type = "GatherElements";
+    lp.name = "testLayer";
+    lp.set("axis", 2);
+    int id = net.addLayerToPrev(lp.name, lp.type, lp);
+    net.connect(0, 1, id, 1);
+
+    std::vector<String> inpNames(2);
+    inpNames[0] = "gather_input";
+    inpNames[1] = "gather_indices";
+    net.setInputsNames(inpNames);
+    net.setInput(input, inpNames[0]);
+    net.setInput(indicesMat, inpNames[1]);
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), matType);
+    EXPECT_EQ(re.size.dims(), 4);
+    ASSERT_EQ(shape(indicesMat), shape(re));
+
+    std::vector<int> inIndices(4);
+    std::vector<int> reIndices(4);
+    for (int i0 = 0; i0 < input.size[0]; ++i0)
+    {
+        inIndices[0] = i0;
+        reIndices[0] = i0;
+        for (int i1 = 0; i1 < input.size[1]; ++i1)
+        {
+            inIndices[1] = i1;
+            reIndices[1] = i1;
+            for (int i2 = 0; i2 < indicesMat.size[2]; ++i2)
+            {
+                reIndices[2] = i2;
+                for (int i3 = 0; i3 < input.size[3]; ++i3)
+                {
+                    inIndices[3] = i3;
+                    reIndices[3] = i3;
+                    inIndices[2] = getValueAt(indicesMat, reIndices.data());
+                    EXPECT_EQ(getValueAt(re, reIndices.data()), getValueAt(input, inIndices.data()));
+                }
+            }
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_GatherElements_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
+typedef testing::TestWithParam<tuple<int, int, tuple<Backend, Target> > > Test_Gather_Int;
+TEST_P(Test_Gather_Int, random)
+{
+    int matType = get<0>(GetParam());
+    int indicesType = get<1>(GetParam());
+    tuple<Backend, Target> backend_target= get<2>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    std::vector<int> inShape{5, 1};
+    int64_t low = matType == CV_64S ? 1000000000000000ll : 1000000000;
+    Mat input(inShape, matType);
+    cv::randu(input, low, low + 100);
+
+    std::vector<int> indices_shape = {1, 1};
+    Mat indicesMat = cv::Mat(indices_shape, indicesType, 0.0);
+
+    std::vector<int> output_shape = {5, 1};
+    cv::Mat outputRef = cv::Mat(output_shape, matType, input(cv::Range::all(), cv::Range(0, 1)).data);
+
+    Net net;
+    LayerParams lp;
+    lp.type = "Gather";
+    lp.name = "testLayer";
+    lp.set("axis", 1);
+    lp.set("real_ndims", 1);
+    int id = net.addLayerToPrev(lp.name, lp.type, lp);
+    net.connect(0, 1, id, 1);
+
+    std::vector<String> inpNames(2);
+    inpNames[0] = "gather_input";
+    inpNames[1] = "gather_indices";
+    net.setInputsNames(inpNames);
+    net.setInput(input, inpNames[0]);
+    net.setInput(indicesMat, inpNames[1]);
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), matType);
+
+    ASSERT_EQ(shape(outputRef), shape(re));
+    normAssert(outputRef, re);
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_Gather_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
+typedef testing::TestWithParam<tuple<int, int, tuple<Backend, Target> > > Test_Cast_Int;
+TEST_P(Test_Cast_Int, random)
+{
+    int inMatType = get<0>(GetParam());
+    int outMatType = get<1>(GetParam());
+    tuple<Backend, Target> backend_target= get<2>(GetParam());
+    Backend backend = get<0>(backend_target);
+    Target target = get<1>(backend_target);
+
+    std::vector<int> inShape{2, 3, 4, 5};
+    Mat input(inShape, inMatType);
+    cv::randu(input, 200, 300);
+    Mat outputRef;
+    input.convertTo(outputRef, outMatType);
+
+    Net net;
+    LayerParams lp;
+    lp.type = "Cast";
+    lp.name = "testLayer";
+    lp.set("outputType", outMatType);
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
+    Mat re;
+    re = net.forward();
+    EXPECT_EQ(re.depth(), outMatType);
+    EXPECT_EQ(re.size.dims(), 4);
+
+    ASSERT_EQ(shape(input), shape(re));
+    normAssert(outputRef, re);
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Test_Cast_Int, Combine(
+    testing::Values(CV_32S, CV_64S),
+    testing::Values(CV_32S, CV_64S),
+    dnnBackendsAndTargets()
+));
+
 }} // namespace
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index d91309aa24..e99f0d691e 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -2164,7 +2164,7 @@ TEST_P(Test_ONNX_nets, Alexnet)
     expectNoFallbacksFromIE(net);
 }
 
-TEST_P(Test_ONNX_nets, DISABLED_RAFT)
+TEST_P(Test_ONNX_nets, RAFT)
 {
     applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG, CV_TEST_TAG_MEMORY_2GB);