Merge pull request #24363 from cudawarped:videoio_ffmpeg_add_stream_encapsulation

videoio: Add raw encoded video stream muxing to cv::VideoWriter with CAP_FFMPEG #24363 Allow raw encoded video streams (e.g. h264[5]) to be encapsulated by `cv::VideoWriter` to video containers (e.g. mp4/mkv). Operates in a similar way to https://github.com/opencv/opencv/pull/15290 where encapsulation is enabled by setting the `VideoWriterProperties::VIDEOWRITER_PROP_RAW_VIDEO` flag when constructing `cv::VideoWriter` e.g. ``` VideoWriter container(fileNameOut, api, fourcc, fps, { width, height }, { VideoWriterProperties::VIDEOWRITER_PROP_RAW_VIDEO, 1 }); ``` and each raw encoded frame is passed as single row of a `CV_8U` `cv::Mat`. The main reason for this PR is to allow `cudacodec::VideoWriter` to output its encoded streams to a suitable container, see https://github.com/opencv/opencv_contrib/pull/3569. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-08-05 22:19:14 +08:00 · 2023-10-25 13:21:01 +03:00 · 2023-10-25 13:21:01 +03:00 · 38bc519e4a
commit 38bc519e4a
parent a3b3a589f9
4 changed files with 358 additions and 84 deletions
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@ -225,10 +225,13 @@ enum VideoWriterProperties {
  VIDEOWRITER_PROP_NSTRIPES = 3,   //!< Number of stripes for parallel encoding. -1 for auto detection.
  VIDEOWRITER_PROP_IS_COLOR = 4,   //!< If it is not zero, the encoder will expect and encode color frames, otherwise it
                                   //!< will work with grayscale frames.
-  VIDEOWRITER_PROP_DEPTH = 5,      //!< Defaults to CV_8U.
+  VIDEOWRITER_PROP_DEPTH = 5,      //!< Defaults to \ref CV_8U.
  VIDEOWRITER_PROP_HW_ACCELERATION = 6, //!< (**open-only**) Hardware acceleration type (see #VideoAccelerationType). Setting supported only via `params` parameter in VideoWriter constructor / .open() method. Default value is backend-specific.
  VIDEOWRITER_PROP_HW_DEVICE       = 7, //!< (**open-only**) Hardware device index (select GPU if multiple available). Device enumeration is acceleration type specific.
  VIDEOWRITER_PROP_HW_ACCELERATION_USE_OPENCL= 8, //!< (**open-only**) If non-zero, create new OpenCL context and bind it to current thread. The OpenCL context created with Video Acceleration context attached it (if not attached yet) for optimized GPU data copy between cv::UMat and HW accelerated encoder.
+  VIDEOWRITER_PROP_RAW_VIDEO = 9, //!< (**open-only**) Set to non-zero to enable encapsulation of an encoded raw video stream. Each raw encoded video frame should be passed to VideoWriter::write() as single row or column of a \ref CV_8UC1 Mat. \note If the key frame interval is not 1 then it must be manually specified by the user. This can either be performed during initialization passing \ref VIDEOWRITER_PROP_KEY_INTERVAL as one of the extra encoder params  to \ref VideoWriter::VideoWriter(const String &, int, double, const Size &, const std::vector< int > &params) or afterwards by setting the \ref VIDEOWRITER_PROP_KEY_FLAG with \ref VideoWriter::set() before writing each frame. FFMpeg backend only.
+  VIDEOWRITER_PROP_KEY_INTERVAL = 10, //!< (**open-only**) Set the key frame interval using raw video encapsulation (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). Defaults to 1 when not set. FFMpeg backend only.
+  VIDEOWRITER_PROP_KEY_FLAG = 11, //!< Set to non-zero to signal that the following frames are key frames or zero if not, when encapsulating raw video (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). FFMpeg backend only.
 #ifndef CV_DOXYGEN
  CV__VIDEOWRITER_PROP_LATEST
 #endif
--- a/modules/videoio/src/cap_ffmpeg.cpp
+++ b/modules/videoio/src/cap_ffmpeg.cpp
@ -198,7 +198,11 @@ public:
        return ffmpegWriter->getProperty(propId);
    }

-    virtual bool setProperty(int, double) CV_OVERRIDE { return false; }
+    virtual bool setProperty(int propId, double value) CV_OVERRIDE {
+        if (!ffmpegWriter)
+            return 0;
+        return ffmpegWriter->setProperty(propId, value);
+    }
    virtual bool isOpened() const CV_OVERRIDE { return ffmpegWriter != 0; }

 protected:
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@ -1447,7 +1447,8 @@ bool CvCapture_FFMPEG::processRawPacket()
 #else
        AVCodecContext* ctx = ic->streams[video_stream]->codec;
        int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data,
-            &packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY);
+            &packet_filtered.size, packet.data, packet.size, packet.flags & AV_PKT_FLAG_KEY);
+        if (packet.flags & AV_PKT_FLAG_KEY) packet_filtered.flags |= AV_PKT_FLAG_KEY;
        if (err < 0)
        {
            CV_WARN("Packet filtering failed");
@ -2069,6 +2070,7 @@ struct CvVideoWriter_FFMPEG
    bool writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin );
    bool writeHWFrame(cv::InputArray input);
    double getProperty(int propId) const;
+    bool setProperty(int, double);

    void init();

@ -2092,6 +2094,9 @@ struct CvVideoWriter_FFMPEG
    VideoAccelerationType va_type;
    int               hw_device;
    int               use_opencl;
+    bool              encode_video;
+    int               idr_period;
+    bool              key_frame;
 };

 static const char * icvFFMPEGErrStr(int err)
@ -2157,6 +2162,9 @@ void CvVideoWriter_FFMPEG::init()
    hw_device = -1;
    use_opencl = 0;
    ok = false;
+    encode_video = true;
+    idr_period = 0;
+    key_frame = false;
 }

 /**
@ -2202,7 +2210,7 @@ static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc,
                                                   AVStream *st,
                                                   const AVCodec* codec,
                                                   int w, int h, int bitrate,
-                                                   double fps, AVPixelFormat pixel_format, int fourcc)
+                                                   double fps, AVPixelFormat pixel_format, int fourcc, AVCodecID codec_id)
 {
 #ifdef CV_FFMPEG_CODECPAR
    AVCodecContext *c = avcodec_alloc_context3(codec);
@ -2213,9 +2221,7 @@ static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc,

    int frame_rate, frame_rate_base;

-    c->codec_id = codec->id;
-    c->codec_type = AVMEDIA_TYPE_VIDEO;
-    c->codec_tag = fourcc;
+    c->codec_id = codec ? codec->id : codec_id;

 #ifndef CV_FFMPEG_CODECPAR
    // Set per-codec defaults
@ -2225,6 +2231,9 @@ static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc,
    c->codec_id = c_id;
 #endif

+    c->codec_type = AVMEDIA_TYPE_VIDEO;
+    c->codec_tag = fourcc;
+
    /* put sample parameters */
    int64_t lbit_rate = (int64_t)bitrate;
    lbit_rate += (bitrate / 2);
@ -2323,6 +2332,29 @@ static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc,

 static const int OPENCV_NO_FRAMES_WRITTEN_CODE = 1000;

+static int icv_av_encapsulate_video_FFMPEG(AVFormatContext* oc, AVStream* video_st, AVCodecContext* c,
+    uint8_t* data, int sz, const int frame_idx, const bool key_frame)
+{
+#if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0)
+    AVPacket pkt_;
+    av_init_packet(&pkt_);
+    AVPacket* pkt = &pkt_;
+#else
+    AVPacket* pkt = av_packet_alloc();
+#endif
+    if(key_frame)
+        pkt->flags |= PKT_FLAG_KEY;
+    pkt->pts = frame_idx;
+    pkt->size = sz;
+    pkt->data = data;
+    av_packet_rescale_ts(pkt, c->time_base, video_st->time_base);
+    int ret = av_write_frame(oc, pkt);
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(57, 0, 0)
+    av_packet_free(&pkt);
+#endif
+    return ret;
+}
+
 static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st, AVCodecContext * c,
                                      uint8_t *, uint32_t,
                                      AVFrame * picture, int frame_idx)
@ -2404,6 +2436,14 @@ static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st,
 /// write a frame with FFMPEG
 bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width, int height, int cn, int origin )
 {
+    if (!encode_video) {
+        CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));
+        const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;
+        bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, set_key_frame);
+        frame_idx++;
+        return ret;
+    }
+
    // check parameters
    if (input_pix_fmt == AV_PIX_FMT_BGR24) {
        if (cn != 3) {
@ -2592,6 +2632,21 @@ double CvVideoWriter_FFMPEG::getProperty(int propId) const
    return 0;
 }

+bool CvVideoWriter_FFMPEG::setProperty(int property_id, double value)
+{
+    if (!video_st) return false;
+
+    switch (property_id)
+    {
+    case VIDEOWRITER_PROP_KEY_FLAG:
+        key_frame = static_cast<bool>(value);
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
 /// close video output stream and free associated memory
 void CvVideoWriter_FFMPEG::close()
 {
@ -2601,17 +2656,19 @@ void CvVideoWriter_FFMPEG::close()
    // TODO -- do we need to account for latency here?

    /* write the trailer, if any */
-    if (picture && ok && oc)
+    if ((!encode_video || picture) && ok && oc)
    {
 #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0)
        if (!(oc->oformat->flags & AVFMT_RAWPICTURE))
 #endif
        {
-            for(;;)
-            {
-                int ret = icv_av_write_frame_FFMPEG( oc, video_st, context, outbuf, outbuf_size, NULL, frame_idx);
-                if( ret == OPENCV_NO_FRAMES_WRITTEN_CODE || ret < 0 )
-                    break;
+            if (encode_video) {
+                for (;;)
+                {
+                    int ret = icv_av_write_frame_FFMPEG(oc, video_st, context, outbuf, outbuf_size, NULL, frame_idx);
+                    if (ret == OPENCV_NO_FRAMES_WRITTEN_CODE || ret < 0)
+                        break;
+                }
            }
        }
        av_write_trailer(oc);
@ -2720,6 +2777,8 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,

    close();

+    encode_video = !params.get(VIDEOWRITER_PROP_RAW_VIDEO, false);
+    idr_period = params.get(VIDEOWRITER_PROP_KEY_INTERVAL, 0);
    const bool is_color = params.get(VIDEOWRITER_PROP_IS_COLOR, true);
    const int depth = params.get(VIDEOWRITER_PROP_DEPTH, CV_8U);
    const bool is_supported = depth == CV_8U || (depth == CV_16U && !is_color);
@ -2770,13 +2829,15 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
    if(fps <= 0)
        return false;

-    // we allow frames of odd width or height, but in this case we truncate
-    // the rightmost column/the bottom row. Probably, this should be handled more elegantly,
-    // but some internal functions inside FFMPEG swscale require even width/height.
-    width &= -2;
-    height &= -2;
-    if( width <= 0 || height <= 0 )
-        return false;
+    if (encode_video) {
+        // we allow frames of odd width or height, but in this case we truncate
+        // the rightmost column/the bottom row. Probably, this should be handled more elegantly,
+        // but some internal functions inside FFMPEG swscale require even width/height.
+        width &= -2;
+        height &= -2;
+        if (width <= 0 || height <= 0)
+            return false;
+    }

    /* auto detect the output format from the name and fourcc code. */

@ -3027,41 +3088,46 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
    HWAccelIterator accel_iter(va_type, true/*isEncoder*/, dict);
    while (accel_iter.good())
    {
+        AVPixelFormat hw_format = AV_PIX_FMT_NONE;
+        AVHWDeviceType hw_type = AV_HWDEVICE_TYPE_NONE;
 #else
    do {
 #endif
+        if (encode_video) {
 #if USE_AV_HW_CODECS
-        accel_iter.parse_next();
-        AVHWDeviceType hw_type = accel_iter.hw_type();
-        codec = NULL;
-        AVPixelFormat hw_format = AV_PIX_FMT_NONE;
-        if (hw_device_ctx)
-            av_buffer_unref(&hw_device_ctx);
-        if (hw_type != AV_HWDEVICE_TYPE_NONE)
-        {
-            codec = hw_find_codec(codec_id, hw_type, av_codec_is_encoder, accel_iter.disabled_codecs().c_str(), &hw_format);
+            accel_iter.parse_next();
+            hw_type = accel_iter.hw_type();
+            codec = NULL;
+            hw_format = AV_PIX_FMT_NONE;
+            if (hw_device_ctx)
+                av_buffer_unref(&hw_device_ctx);
+            if (hw_type != AV_HWDEVICE_TYPE_NONE)
+            {
+                codec = hw_find_codec(codec_id, hw_type, av_codec_is_encoder, accel_iter.disabled_codecs().c_str(), &hw_format);
+                if (!codec)
+                    continue;
+
+                hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0);
+                if (!hw_device_ctx)
+                    continue;
+            }
+            else if (hw_type == AV_HWDEVICE_TYPE_NONE)
+#endif
+            {
+                codec = avcodec_find_encoder(codec_id);
+                if (!codec) {
+                    CV_LOG_ERROR(NULL, "Could not find encoder for codec_id=" << (int)codec_id << ", error: "
+                        << icvFFMPEGErrStr(AVERROR_ENCODER_NOT_FOUND));
+                }
+            }
            if (!codec)
                continue;
+        }

-            hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0);
-            if (!hw_device_ctx)
-                continue;
-        }
-        else if (hw_type == AV_HWDEVICE_TYPE_NONE)
-#endif
-        {
-            codec = avcodec_find_encoder(codec_id);
-            if (!codec) {
-                CV_LOG_ERROR(NULL, "Could not find encoder for codec_id=" << (int)codec_id << ", error: "
-                        << icvFFMPEGErrStr(AVERROR_ENCODER_NOT_FOUND));
-            }
-        }
-        if (!codec)
-            continue;
 #if USE_AV_HW_CODECS
-        AVPixelFormat format = (hw_format != AV_PIX_FMT_NONE) ? hw_format : codec_pix_fmt;
+            AVPixelFormat format = (hw_format != AV_PIX_FMT_NONE) ? hw_format : codec_pix_fmt;
 #else
-        AVPixelFormat format = codec_pix_fmt;
+            AVPixelFormat format = codec_pix_fmt;
 #endif

 #ifdef CV_FFMPEG_CODECPAR
@ -3069,7 +3135,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
 #endif
        context = icv_configure_video_stream_FFMPEG(oc, video_st, codec,
                                              width, height, (int) (bitrate + 0.5),
-                                              fps, format, fourcc);
+                                              fps, format, fourcc, codec_id);
        if (!context)
        {
            continue;
@ -3082,17 +3148,18 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
        av_dump_format(oc, 0, filename, 1);
 #endif
 #endif
-
+        if (encode_video) {
 #if USE_AV_HW_CODECS
-        if (hw_device_ctx) {
-            context->hw_device_ctx = av_buffer_ref(hw_device_ctx);
-            if (hw_format != AV_PIX_FMT_NONE) {
-                context->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format);
-                if (!context->hw_frames_ctx)
-                    continue;
+            if (hw_device_ctx) {
+                context->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+                if (hw_format != AV_PIX_FMT_NONE) {
+                    context->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format);
+                    if (!context->hw_frames_ctx)
+                        continue;
+                }
            }
-        }
 #endif
+        }

        int64_t lbit_rate = (int64_t) context->bit_rate;
        lbit_rate += (int64_t)(bitrate / 2);
@ -3101,7 +3168,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
        context->bit_rate = (int) lbit_rate;

        /* open the codec */
-        err = avcodec_open2(context, codec, NULL);
+        err = !encode_video ? 0 : avcodec_open2(context, codec, NULL);
        if (err >= 0) {
 #if USE_AV_HW_CODECS
            va_type = hw_type_to_va_type(hw_type);
@ -3137,43 +3204,43 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc,
    avcodec_parameters_from_context(video_st->codecpar, context);
 #endif

-    outbuf = NULL;
-
-
+    if (encode_video) {
+        outbuf = NULL;
 #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0)
-    if (!(oc->oformat->flags & AVFMT_RAWPICTURE))
+        if (!(oc->oformat->flags & AVFMT_RAWPICTURE))
 #endif
-    {
-        /* allocate output buffer */
-        /* assume we will never get codec output with more than 4 bytes per pixel... */
-        outbuf_size = width*height*4;
-        outbuf = (uint8_t *) av_malloc(outbuf_size);
-    }
+        {
+            /* allocate output buffer */
+            /* assume we will never get codec output with more than 4 bytes per pixel... */
+            outbuf_size = width * height * 4;
+            outbuf = (uint8_t*)av_malloc(outbuf_size);
+        }

-    bool need_color_convert;
-    AVPixelFormat sw_pix_fmt = context->pix_fmt;
+        bool need_color_convert;
+        AVPixelFormat sw_pix_fmt = context->pix_fmt;
 #if USE_AV_HW_CODECS
-    if (context->hw_frames_ctx)
-        sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format;
+        if (context->hw_frames_ctx)
+            sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format;
 #endif

-    need_color_convert = (sw_pix_fmt != input_pix_fmt);
+        need_color_convert = (sw_pix_fmt != input_pix_fmt);

-    /* allocate the encoded raw picture */
-    picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, context->width, context->height, need_color_convert);
-    if (!picture) {
-        return false;
-    }
-
-    /* if the output format is not our input format, then a temporary
-   picture of the input format is needed too. It is then converted
-   to the required output format */
-    input_picture = NULL;
-    if ( need_color_convert ) {
-        input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, context->width, context->height, false);
-        if (!input_picture) {
+        /* allocate the encoded raw picture */
+        picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, context->width, context->height, need_color_convert);
+        if (!picture) {
            return false;
        }
+
+        /* if the output format is not our input format, then a temporary
+       picture of the input format is needed too. It is then converted
+       to the required output format */
+        input_picture = NULL;
+        if (need_color_convert) {
+            input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, context->width, context->height, false);
+            if (!input_picture) {
+                return false;
+            }
+        }
    }

    /* open the output file, if needed */
--- a/modules/videoio/test/test_ffmpeg.cpp
+++ b/modules/videoio/test/test_ffmpeg.cpp
@ -235,6 +235,206 @@ const videoio_container_params_t videoio_container_params[] =

 INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params));

+typedef tuple<string, string, int, int> videoio_encapsulate_params_t;
+typedef testing::TestWithParam< videoio_encapsulate_params_t > videoio_encapsulate;
+
+TEST_P(videoio_encapsulate, write)
+{
+#ifdef _WIN32
+    throw SkipTestException("Test disabled until PR for raw video encapsulation is merged and windows dll is updated");
+#else
+    const VideoCaptureAPIs api = CAP_FFMPEG;
+    if (!videoio_registry::hasBackend(api))
+        throw SkipTestException("FFmpeg backend was not found");
+
+    const string fileName = findDataFile(get<0>(GetParam()));
+    const string ext = get<1>(GetParam());
+    const int idrPeriod = get<2>(GetParam());
+    const int nFrames = get<3>(GetParam());
+    const string fileNameOut = tempfile(cv::format("test_encapsulated_stream.%s", ext.c_str()).c_str());
+
+    // Use VideoWriter to encapsulate encoded video read with VideoReader
+    {
+        VideoCapture capRaw(fileName, api, { CAP_PROP_FORMAT, -1 });
+        ASSERT_TRUE(capRaw.isOpened());
+        const int width = static_cast<int>(capRaw.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(capRaw.get(CAP_PROP_FRAME_HEIGHT));
+        const double fps = capRaw.get(CAP_PROP_FPS);
+        const int codecExtradataIndex = static_cast<int>(capRaw.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
+        Mat extraData;
+        capRaw.retrieve(extraData, codecExtradataIndex);
+        const int fourcc = static_cast<int>(capRaw.get(CAP_PROP_FOURCC));
+        const bool mpeg4 = (fourcc == fourccFromString("FMP4"));
+
+        VideoWriter container(fileNameOut, api, fourcc, fps, { width, height }, { VideoWriterProperties::VIDEOWRITER_PROP_RAW_VIDEO, 1, VideoWriterProperties::VIDEOWRITER_PROP_KEY_INTERVAL, idrPeriod });
+        ASSERT_TRUE(container.isOpened());
+        Mat rawFrame;
+        for (int i = 0; i < nFrames; i++) {
+            ASSERT_TRUE(capRaw.read(rawFrame));
+            ASSERT_FALSE(rawFrame.empty());
+            if (i == 0 && mpeg4) {
+                Mat tmp = rawFrame.clone();
+                const size_t newSzt = tmp.total() + extraData.total();
+                const int newSz = static_cast<int>(newSzt);
+                ASSERT_TRUE(newSzt == static_cast<size_t>(newSz));
+                rawFrame = Mat(1, newSz, CV_8UC1);
+                memcpy(rawFrame.data, extraData.data, extraData.total());
+                memcpy(rawFrame.data + extraData.total(), tmp.data, tmp.total());
+            }
+            container.write(rawFrame);
+        }
+        container.release();
+    }
+
+    std::cout << "Checking encapsulated video container: " << fileNameOut << std::endl;
+
+    // Check encapsulated video container is "identical" to the original
+    {
+        VideoCapture capReference(fileName), capActual(fileNameOut), capActualRaw(fileNameOut, api, { CAP_PROP_FORMAT, -1 });
+        ASSERT_TRUE(capReference.isOpened());
+        ASSERT_TRUE(capActual.isOpened());
+        ASSERT_TRUE(capActualRaw.isOpened());
+        const double fpsReference = capReference.get(CAP_PROP_FPS);
+        const double fpsActual = capActual.get(CAP_PROP_FPS);
+        ASSERT_EQ(fpsReference, fpsActual);
+        const int nFramesActual = static_cast<int>(capActual.get(CAP_PROP_FRAME_COUNT));
+        ASSERT_EQ(nFrames, nFramesActual);
+
+        Mat reference, actual;
+        for (int i = 0; i < nFrames; i++) {
+            ASSERT_TRUE(capReference.read(reference));
+            ASSERT_FALSE(reference.empty());
+            ASSERT_TRUE(capActual.read(actual));
+            ASSERT_FALSE(actual.empty());
+            ASSERT_EQ(0, cvtest::norm(reference, actual, NORM_INF));
+
+            ASSERT_TRUE(capActualRaw.grab());
+            const bool keyFrameActual = capActualRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME) == 1.;
+            const bool keyFrameReference = idrPeriod ? i % idrPeriod == 0 : 1;
+            ASSERT_EQ(keyFrameReference, keyFrameActual);
+        }
+    }
+
+    ASSERT_EQ(0, remove(fileNameOut.c_str()));
+#endif
+}
+
+const videoio_encapsulate_params_t videoio_encapsulate_params[] =
+{
+    videoio_encapsulate_params_t("video/big_buck_bunny.h264", "avi", 125, 125),
+    videoio_encapsulate_params_t("video/big_buck_bunny.h265", "mp4", 125, 125),
+    videoio_encapsulate_params_t("video/big_buck_bunny.wmv", "wmv", 12, 13),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mp4", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mjpg.avi", "mp4", 0, 4),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mov", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/big_buck_bunny.avi", "mp4", 125, 125),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mpg", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.wmv", "wmv", 12, 13),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.mpg", "mp4", 12,13),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.avi", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.3GP", "mp4", 51, 52),
+    videoio_encapsulate_params_t("video/sample_sorenson.avi", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libxvid.mp4", "mp4", 3, 4),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mpeg2video.mp4", "mp4", 12, 13),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mjpeg.mp4", "mp4", 0, 5),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx264.mp4", "avi", 15, 15),
+    videoio_encapsulate_params_t("../cv/tracking/faceocc2/data/faceocc2.webm", "webm", 128, 129),
+    videoio_encapsulate_params_t("../cv/video/1920x1080.avi", "mp4", 12, 13),
+    videoio_encapsulate_params_t("../cv/video/768x576.avi", "avi", 15, 16)
+    // Not supported by with FFmpeg:
+    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx265.mp4", "mp4", 15, 15),
+    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "mp4", 15, 15),
+
+};
+
+INSTANTIATE_TEST_CASE_P(/**/, videoio_encapsulate, testing::ValuesIn(videoio_encapsulate_params));
+
+TEST(videoio_encapsulate_set_idr, write)
+{
+#ifdef _WIN32
+    throw SkipTestException("Test disabled until PR for raw video encapsulation is merged and windows dll is updated");
+#else
+    const VideoCaptureAPIs api = CAP_FFMPEG;
+    if (!videoio_registry::hasBackend(api))
+        throw SkipTestException("FFmpeg backend was not found");
+
+    const string fileName = findDataFile("video/big_buck_bunny.mp4");
+    const string ext = "mp4";
+    const string fileNameOut = tempfile(cv::format("test_encapsulated_stream_set_idr.%s", ext.c_str()).c_str());
+
+    // Use VideoWriter to encapsulate encoded video read with VideoReader
+    {
+        VideoCapture capRaw(fileName, api, { CAP_PROP_FORMAT, -1 });
+        ASSERT_TRUE(capRaw.isOpened());
+        const int width = static_cast<int>(capRaw.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(capRaw.get(CAP_PROP_FRAME_HEIGHT));
+        const double fps = capRaw.get(CAP_PROP_FPS);
+        const int codecExtradataIndex = static_cast<int>(capRaw.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
+        Mat extraData;
+        capRaw.retrieve(extraData, codecExtradataIndex);
+        const int fourcc = static_cast<int>(capRaw.get(CAP_PROP_FOURCC));
+        const bool mpeg4 = (fourcc == fourccFromString("FMP4"));
+
+        VideoWriter container(fileNameOut, api, fourcc, fps, { width, height }, { VideoWriterProperties::VIDEOWRITER_PROP_RAW_VIDEO, 1 });
+        ASSERT_TRUE(container.isOpened());
+        Mat rawFrame;
+        int i = 0;
+        while (capRaw.read(rawFrame)) {
+            ASSERT_FALSE(rawFrame.empty());
+            if (i == 0 && mpeg4) {
+                Mat tmp = rawFrame.clone();
+                const size_t newSzt = tmp.total() + extraData.total();
+                const int newSz = static_cast<int>(newSzt);
+                ASSERT_TRUE(newSzt == static_cast<size_t>(newSz));
+                rawFrame = Mat(1, newSz, CV_8UC1);
+                memcpy(rawFrame.data, extraData.data, extraData.total());
+                memcpy(rawFrame.data + extraData.total(), tmp.data, tmp.total());
+            }
+            if (static_cast<bool>(capRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME)))
+                container.set(VideoWriterProperties::VIDEOWRITER_PROP_KEY_FLAG, 1);
+            else
+                container.set(VideoWriterProperties::VIDEOWRITER_PROP_KEY_FLAG, 0);
+            container.write(rawFrame);
+            i++;
+        }
+        container.release();
+    }
+
+    std::cout << "Checking encapsulated video container: " << fileNameOut << std::endl;
+
+    // Check encapsulated video container is "identical" to the original
+    {
+        VideoCapture capReference(fileName), capReferenceRaw(fileName, api, { CAP_PROP_FORMAT, -1 }), capActual(fileNameOut), capActualRaw(fileNameOut, api, { CAP_PROP_FORMAT, -1 });
+        ASSERT_TRUE(capReference.isOpened());
+        ASSERT_TRUE(capActual.isOpened());
+        ASSERT_TRUE(capReferenceRaw.isOpened());
+        ASSERT_TRUE(capActualRaw.isOpened());
+        const double fpsReference = capReference.get(CAP_PROP_FPS);
+        const double fpsActual = capActual.get(CAP_PROP_FPS);
+        ASSERT_EQ(fpsReference, fpsActual);
+        const int nFramesReference = static_cast<int>(capReference.get(CAP_PROP_FRAME_COUNT));
+        const int nFramesActual = static_cast<int>(capActual.get(CAP_PROP_FRAME_COUNT));
+        ASSERT_EQ(nFramesReference, nFramesActual);
+
+        Mat reference, actual;
+        for (int i = 0; i < nFramesReference; i++) {
+            ASSERT_TRUE(capReference.read(reference));
+            ASSERT_FALSE(reference.empty());
+            ASSERT_TRUE(capActual.read(actual));
+            ASSERT_FALSE(actual.empty());
+            ASSERT_EQ(0, cvtest::norm(reference, actual, NORM_INF));
+            ASSERT_TRUE(capReferenceRaw.grab());
+            ASSERT_TRUE(capActualRaw.grab());
+            const bool keyFrameReference = capActualRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME) == 1.;
+            const bool keyFrameActual = capActualRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME) == 1.;
+            ASSERT_EQ(keyFrameReference, keyFrameActual);
+        }
+    }
+
+    ASSERT_EQ(0, remove(fileNameOut.c_str()));
+#endif
+}
+
 typedef tuple<string, string, int> videoio_skip_params_t;
 typedef testing::TestWithParam< videoio_skip_params_t > videoio_skip;