videoio: retrieve encoded frames through FFmpeg backend

- backport 15290
- add extra test case
This commit is contained in:
cudawarped 2019-11-18 14:07:06 +00:00 committed by Alexander Alekhin
parent f4d55d512f
commit aff8c9bd28
4 changed files with 287 additions and 6 deletions

View File

@ -136,7 +136,8 @@ enum VideoCaptureProperties {
CAP_PROP_FPS =5, //!< Frame rate.
CAP_PROP_FOURCC =6, //!< 4-character code of codec. see VideoWriter::fourcc .
CAP_PROP_FRAME_COUNT =7, //!< Number of frames in the video file.
CAP_PROP_FORMAT =8, //!< Format of the %Mat objects returned by VideoCapture::retrieve().
CAP_PROP_FORMAT =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve().
//!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1).
CAP_PROP_MODE =9, //!< Backend-specific value indicating the current capture mode.
CAP_PROP_BRIGHTNESS =10, //!< Brightness of the image (only for those cameras that support).
CAP_PROP_CONTRAST =11, //!< Contrast of the image (only for cameras).
@ -173,6 +174,7 @@ enum VideoCaptureProperties {
CAP_PROP_CHANNEL =43, //!< Video input or Channel Number (only for those cameras that support)
CAP_PROP_AUTO_WB =44, //!< enable/ disable auto white-balance
CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
CAP_PROP_CODEC_PIXEL_FORMAT =46, //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown
#ifndef CV_DOXYGEN
CV__CAP_PROP_LATEST
#endif

View File

@ -24,8 +24,10 @@ enum
CV_FFMPEG_CAP_PROP_FPS=5,
CV_FFMPEG_CAP_PROP_FOURCC=6,
CV_FFMPEG_CAP_PROP_FRAME_COUNT=7,
CV_FFMPEG_CAP_PROP_FORMAT=8,
CV_FFMPEG_CAP_PROP_SAR_NUM=40,
CV_FFMPEG_CAP_PROP_SAR_DEN=41
CV_FFMPEG_CAP_PROP_SAR_DEN=41,
CV_FFMPEG_CAP_PROP_CODEC_PIXEL_FORMAT=46
};
typedef struct CvCapture_FFMPEG CvCapture_FFMPEG;

View File

@ -528,6 +528,17 @@ struct CvCapture_FFMPEG
#if USE_AV_INTERRUPT_CALLBACK
AVInterruptCallbackMetadata interrupt_metadata;
#endif
bool setRaw();
bool processRawPacket();
bool rawMode;
bool rawModeInitialized;
AVPacket packet_filtered;
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
AVBSFContext* bsfc;
#else
AVBitStreamFilterContext* bsfc;
#endif
};
void CvCapture_FFMPEG::init()
@ -552,6 +563,12 @@ void CvCapture_FFMPEG::init()
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
dict = NULL;
#endif
rawMode = false;
rawModeInitialized = false;
memset(&packet_filtered, 0, sizeof(packet_filtered));
av_init_packet(&packet_filtered);
bsfc = NULL;
}
@ -620,6 +637,21 @@ void CvCapture_FFMPEG::close()
av_dict_free(&dict);
#endif
if (packet_filtered.data)
{
_opencv_ffmpeg_av_packet_unref(&packet_filtered);
packet_filtered.data = NULL;
}
if (bsfc)
{
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
av_bsf_free(&bsfc);
#else
av_bitstream_filter_close(bsfc);
#endif
}
init();
}
@ -996,6 +1028,124 @@ exit_func:
return valid;
}
bool CvCapture_FFMPEG::setRaw()
{
if (!rawMode)
{
if (frame_number != 0)
{
CV_WARN("Incorrect usage: do not grab frames before .set(CAP_PROP_FORMAT, -1)");
}
// binary stream filter creation is moved into processRawPacket()
rawMode = true;
}
return true;
}
bool CvCapture_FFMPEG::processRawPacket()
{
if (packet.data == NULL) // EOF
return false;
if (!rawModeInitialized)
{
rawModeInitialized = true;
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
#elif LIBAVFORMAT_BUILD > 4628
AVCodecID eVideoCodec = video_st->codec->codec_id;
#else
AVCodecID eVideoCodec = video_st->codec.codec_id;
#endif
const char* filterName = NULL;
if (eVideoCodec == CV_CODEC(CODEC_ID_H264)
#if LIBAVCODEC_VERSION_MICRO >= 100 \
&& LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(57, 24, 102) // FFmpeg 3.0
|| eVideoCodec == CV_CODEC(CODEC_ID_H265)
#elif LIBAVCODEC_VERSION_MICRO < 100 \
&& LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 34, 1) // libav v10+
|| eVideoCodec == CV_CODEC(CODEC_ID_HEVC)
#endif
)
{
// check start code prefixed mode (as defined in the Annex B H.264 / H.265 specification)
if (packet.size >= 5
&& !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 0 && packet.data[3] == 1)
&& !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 1)
)
{
filterName = eVideoCodec == CV_CODEC(CODEC_ID_H264) ? "h264_mp4toannexb" : "hevc_mp4toannexb";
}
}
if (filterName)
{
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
const AVBitStreamFilter * bsf = av_bsf_get_by_name(filterName);
if (!bsf)
{
CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
return false;
}
int err = av_bsf_alloc(bsf, &bsfc);
if (err < 0)
{
CV_WARN("Error allocating context for bitstream buffer");
return false;
}
avcodec_parameters_copy(bsfc->par_in, ic->streams[video_stream]->codecpar);
err = av_bsf_init(bsfc);
if (err < 0)
{
CV_WARN("Error initializing bitstream buffer");
return false;
}
#else
bsfc = av_bitstream_filter_init(filterName);
if (!bsfc)
{
CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
return false;
}
#endif
}
}
if (bsfc)
{
if (packet_filtered.data)
{
_opencv_ffmpeg_av_packet_unref(&packet_filtered);
}
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
int err = av_bsf_send_packet(bsfc, &packet);
if (err < 0)
{
CV_WARN("Packet submission for filtering failed");
return false;
}
err = av_bsf_receive_packet(bsfc, &packet_filtered);
if (err < 0)
{
CV_WARN("Filtered packet retrieve failed");
return false;
}
#else
#if LIBAVFORMAT_BUILD > 4628
AVCodecContext* ctx = ic->streams[video_stream]->codec;
#else
AVCodecContext* ctx = &ic->streams[video_stream]->codec;
#endif
int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data,
&packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY);
if (err < 0)
{
CV_WARN("Packet filtering failed");
return false;
}
#endif
return packet_filtered.data != NULL;
}
return packet.data != NULL;
}
bool CvCapture_FFMPEG::grabFrame()
{
@ -1047,6 +1197,12 @@ bool CvCapture_FFMPEG::grabFrame()
continue;
}
if (rawMode)
{
valid = processRawPacket();
break;
}
// Decode video frame
#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
@ -1067,7 +1223,6 @@ bool CvCapture_FFMPEG::grabFrame()
if( picture_pts == AV_NOPTS_VALUE_ )
picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts;
frame_number++;
valid = true;
}
else
@ -1078,7 +1233,10 @@ bool CvCapture_FFMPEG::grabFrame()
}
}
if( valid && first_frame_number < 0 )
if (valid)
frame_number++;
if (!rawMode && valid && first_frame_number < 0)
first_frame_number = dts_to_frame_number(picture_pts);
#if USE_AV_INTERRUPT_CALLBACK
@ -1086,14 +1244,28 @@ bool CvCapture_FFMPEG::grabFrame()
interrupt_metadata.timeout_after_ms = 0;
#endif
// return if we have a new picture or not
// return if we have a new frame or not
return valid;
}
bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* width, int* height, int* cn)
{
if( !video_st || !picture->data[0] )
if (!video_st)
return false;
if (rawMode)
{
AVPacket& p = bsfc ? packet_filtered : packet;
*data = p.data;
*step = p.size;
*width = p.size;
*height = 1;
*cn = 1;
return p.data != NULL;
}
if (!picture->data[0])
return false;
if( img_convert_ctx == NULL ||
@ -1216,6 +1388,20 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num;
case CV_FFMPEG_CAP_PROP_SAR_DEN:
return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den;
case CV_FFMPEG_CAP_PROP_CODEC_PIXEL_FORMAT:
{
#if LIBAVFORMAT_BUILD > 4628
AVPixelFormat pix_fmt = video_st->codec->pix_fmt;
#else
AVPixelFormat pix_fmt = video_st->codec.pix_fmt;
#endif
unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt);
return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag;
}
case CV_FFMPEG_CAP_PROP_FORMAT:
if (rawMode)
return -1;
break;
default:
break;
}
@ -1385,6 +1571,10 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value )
picture_pts=(int64_t)value;
}
break;
case CV_FFMPEG_CAP_PROP_FORMAT:
if (value == -1)
return setRaw();
return false;
default:
return false;
}

View File

@ -218,6 +218,93 @@ TEST(Videoio_Video, ffmpeg_image) { CV_FFmpegReadImageTest test; test.safe_run()
#if defined(HAVE_FFMPEG)
typedef tuple<VideoCaptureAPIs, string, string, string, string, string> videoio_container_params_t;
typedef testing::TestWithParam< videoio_container_params_t > videoio_container;
TEST_P(videoio_container, read)
{
const VideoCaptureAPIs api = get<0>(GetParam());
//if (!videoio_registry::hasBackend(api))
// throw SkipTestException("Backend was not found");
const string path = get<1>(GetParam());
const string ext = get<2>(GetParam());
const string ext_raw = get<3>(GetParam());
const string codec = get<4>(GetParam());
const string pixelFormat = get<5>(GetParam());
const string fileName = path + "." + ext;
const string fileNameOut = tempfile(cv::format("test_container_stream.%s", ext_raw.c_str()).c_str());
// Write encoded video read using VideoContainer to tmp file
size_t totalBytes = 0;
{
VideoCapture container(findDataFile(fileName), api);
if (!container.isOpened())
throw SkipTestException("Video stream is not supported");
if (!container.set(CAP_PROP_FORMAT, -1)) // turn off video decoder (extract stream)
throw SkipTestException("Fetching of RAW video streams is not supported");
ASSERT_EQ(-1.f, container.get(CAP_PROP_FORMAT)); // check
EXPECT_EQ(codec, fourccToString((int)container.get(CAP_PROP_FOURCC)));
EXPECT_EQ(pixelFormat, fourccToString((int)container.get(CAP_PROP_CODEC_PIXEL_FORMAT)));
std::ofstream file(fileNameOut.c_str(), ios::out | ios::trunc | std::ios::binary);
Mat raw_data;
while (true)
{
container >> raw_data;
size_t size = raw_data.total();
if (raw_data.empty())
break;
ASSERT_EQ(CV_8UC1, raw_data.type());
ASSERT_LE(raw_data.dims, 2);
ASSERT_EQ(raw_data.rows, 1);
ASSERT_EQ((size_t)raw_data.cols, raw_data.total());
ASSERT_TRUE(raw_data.isContinuous());
totalBytes += size;
file.write(reinterpret_cast<char*>(raw_data.data), size);
ASSERT_FALSE(file.fail());
}
ASSERT_GE(totalBytes, (size_t)65536) << "Encoded stream is too small";
}
std::cout << "Checking extracted video stream: " << fileNameOut << " (size: " << totalBytes << " bytes)" << std::endl;
// Check decoded frames read from original media are equal to frames decoded from tmp file
{
VideoCapture capReference(findDataFile(fileName), api);
ASSERT_TRUE(capReference.isOpened());
VideoCapture capActual(fileNameOut.c_str(), api);
ASSERT_TRUE(capActual.isOpened());
Mat reference, actual;
int nframes = 0, n_err = 0;
while (capReference.read(reference) && n_err < 3)
{
nframes++;
ASSERT_TRUE(capActual.read(actual)) << nframes;
EXPECT_EQ(0, cvtest::norm(actual, reference, NORM_INF)) << "frame=" << nframes << " err=" << ++n_err;
}
ASSERT_GT(nframes, 0);
}
ASSERT_EQ(0, remove(fileNameOut.c_str()));
}
const videoio_container_params_t videoio_container_params[] =
{
videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h264", "h264", "h264", "I420"),
videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h265", "h265", "hevc", "I420"),
videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "mjpg.avi", "mjpg", "MJPG", "I420"),
//videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h264.mkv", "mkv.h264", "h264", "I420"),
//videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h265.mkv", "mkv.h265", "hevc", "I420"),
//videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h264.mp4", "mp4.avc1", "avc1", "I420"),
//videoio_container_params_t(CAP_FFMPEG, "video/big_buck_bunny", "h265.mp4", "mp4.hev1", "hev1", "I420"),
};
INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params));
//==========================================================================
//////////////////////////////// Parallel VideoWriters and VideoCaptures ////////////////////////////////////
class CreateVideoWriterInvoker :