diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp index 1b798704fd..6df542afc0 100644 --- a/modules/highgui/src/cap_ffmpeg_impl.hpp +++ b/modules/highgui/src/cap_ffmpeg_impl.hpp @@ -118,11 +118,6 @@ extern "C" { #define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__) #endif -/* PIX_FMT_RGBA32 macro changed in newer ffmpeg versions */ -#ifndef PIX_FMT_RGBA32 -#define PIX_FMT_RGBA32 PIX_FMT_RGB32 -#endif - #define CALC_FFMPEG_VERSION(a,b,c) ( a<<16 | b<<8 | c ) #if defined WIN32 || defined _WIN32 @@ -132,6 +127,11 @@ extern "C" { #include #include #include + #include +#if defined __APPLE__ + #include + #include +#endif #endif #ifndef MIN @@ -156,6 +156,155 @@ extern "C" { # define CV_CODEC(name) name #endif +#if LIBAVUTIL_BUILD < (LIBAVUTIL_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(51, 74, 100) : CALC_FFMPEG_VERSION(51, 42, 0)) +#define AVPixelFormat PixelFormat +#define AV_PIX_FMT_BGR24 PIX_FMT_BGR24 +#define AV_PIX_FMT_RGB24 PIX_FMT_RGB24 +#define AV_PIX_FMT_GRAY8 PIX_FMT_GRAY8 +#define AV_PIX_FMT_YUV422P PIX_FMT_YUV422P +#define AV_PIX_FMT_YUV420P PIX_FMT_YUV420P +#define AV_PIX_FMT_YUV444P PIX_FMT_YUV444P +#define AV_PIX_FMT_YUVJ420P PIX_FMT_YUVJ420P +#define AV_PIX_FMT_GRAY16LE PIX_FMT_GRAY16LE +#define AV_PIX_FMT_GRAY16BE PIX_FMT_GRAY16BE +#endif + +#if LIBAVUTIL_BUILD >= (LIBAVUTIL_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(52, 38, 100) : CALC_FFMPEG_VERSION(52, 13, 0)) +#define USE_AV_FRAME_GET_BUFFER 1 +#else +#define USE_AV_FRAME_GET_BUFFER 0 +#ifndef AV_NUM_DATA_POINTERS // required for 0.7.x/0.8.x ffmpeg releases +#define AV_NUM_DATA_POINTERS 4 +#endif +#endif + + +#ifndef USE_AV_INTERRUPT_CALLBACK +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 21, 0) +#define USE_AV_INTERRUPT_CALLBACK 1 +#else +#define USE_AV_INTERRUPT_CALLBACK 0 +#endif +#endif + +#if USE_AV_INTERRUPT_CALLBACK +#define LIBAVFORMAT_INTERRUPT_TIMEOUT_MS 30000 + +#ifdef WIN32 +// http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows + +static +inline LARGE_INTEGER get_filetime_offset() +{ + SYSTEMTIME s; + FILETIME f; + LARGE_INTEGER t; + + s.wYear = 1970; + s.wMonth = 1; + s.wDay = 1; + s.wHour = 0; + s.wMinute = 0; + s.wSecond = 0; + s.wMilliseconds = 0; + SystemTimeToFileTime(&s, &f); + t.QuadPart = f.dwHighDateTime; + t.QuadPart <<= 32; + t.QuadPart |= f.dwLowDateTime; + return t; +} + +static +inline void get_monotonic_time(timespec *tv) +{ + LARGE_INTEGER t; + FILETIME f; + double microseconds; + static LARGE_INTEGER offset; + static double frequencyToMicroseconds; + static int initialized = 0; + static BOOL usePerformanceCounter = 0; + + if (!initialized) + { + LARGE_INTEGER performanceFrequency; + initialized = 1; + usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency); + if (usePerformanceCounter) + { + QueryPerformanceCounter(&offset); + frequencyToMicroseconds = (double)performanceFrequency.QuadPart / 1000000.; + } + else + { + offset = get_filetime_offset(); + frequencyToMicroseconds = 10.; + } + } + + if (usePerformanceCounter) + { + QueryPerformanceCounter(&t); + } else { + GetSystemTimeAsFileTime(&f); + t.QuadPart = f.dwHighDateTime; + t.QuadPart <<= 32; + t.QuadPart |= f.dwLowDateTime; + } + + t.QuadPart -= offset.QuadPart; + microseconds = (double)t.QuadPart / frequencyToMicroseconds; + t.QuadPart = microseconds; + tv->tv_sec = t.QuadPart / 1000000; + tv->tv_nsec = (t.QuadPart % 1000000) * 1000; +} +#else +static +inline void get_monotonic_time(timespec *time) +{ +#if defined(__APPLE__) && defined(__MACH__) + clock_serv_t cclock; + mach_timespec_t mts; + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + clock_get_time(cclock, &mts); + mach_port_deallocate(mach_task_self(), cclock); + time->tv_sec = mts.tv_sec; + time->tv_nsec = mts.tv_nsec; +#else + clock_gettime(CLOCK_MONOTONIC, time); +#endif +} +#endif + +static +inline timespec get_monotonic_time_diff(timespec start, timespec end) +{ + timespec temp; + if (end.tv_nsec - start.tv_nsec < 0) + { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec; + } + else + { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return temp; +} + +static +inline double get_monotonic_time_diff_ms(timespec time1, timespec time2) +{ + timespec delta = get_monotonic_time_diff(time1, time2); + double milliseconds = delta.tv_sec * 1000 + (double)delta.tv_nsec / 1000000.0; + + return milliseconds; +} +#endif // USE_AV_INTERRUPT_CALLBACK + static int get_number_of_cpus(void) { #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(52, 111, 0) @@ -205,12 +354,36 @@ struct Image_FFMPEG }; +#if USE_AV_INTERRUPT_CALLBACK +struct AVInterruptCallbackMetadata +{ + timespec value; + unsigned int timeout_after_ms; + int timeout; +}; + +static inline void _opencv_ffmpeg_free(void** ptr) { if(*ptr) free(*ptr); *ptr = 0; } +static +inline int _opencv_ffmpeg_interrupt_callback(void *ptr) +{ + AVInterruptCallbackMetadata* metadata = (AVInterruptCallbackMetadata*)ptr; + assert(metadata); + + timespec now; + get_monotonic_time(&now); + + metadata->timeout = get_monotonic_time_diff_ms(metadata->value, now) > metadata->timeout_after_ms; + + return metadata->timeout ? -1 : 0; +} +#endif + struct CvCapture_FFMPEG { @@ -264,6 +437,10 @@ struct CvCapture_FFMPEG #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0) AVDictionary *dict; #endif + +#if USE_AV_INTERRUPT_CALLBACK + AVInterruptCallbackMetadata interrupt_metadata; +#endif }; void CvCapture_FFMPEG::init() @@ -301,8 +478,10 @@ void CvCapture_FFMPEG::close() if( picture ) { - // FFmpeg and Libav added avcodec_free_frame in different versions. #if LIBAVCODEC_BUILD >= (LIBAVCODEC_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(55, 45, 101) : CALC_FFMPEG_VERSION(55, 28, 1)) + av_frame_free(&picture); +#elif LIBAVCODEC_BUILD >= (LIBAVCODEC_VERSION_MICRO >= 100 \ ? CALC_FFMPEG_VERSION(54, 59, 100) : CALC_FFMPEG_VERSION(54, 28, 0)) avcodec_free_frame(&picture); #else @@ -333,11 +512,15 @@ void CvCapture_FFMPEG::close() ic = NULL; } +#if USE_AV_FRAME_GET_BUFFER + av_frame_unref(&rgb_picture); +#else if( rgb_picture.data[0] ) { free( rgb_picture.data[0] ); rgb_picture.data[0] = 0; } +#endif // free last packet if exist if (packet.data) { @@ -556,6 +739,16 @@ bool CvCapture_FFMPEG::open( const char* _filename ) close(); +#if USE_AV_INTERRUPT_CALLBACK + /* interrupt callback */ + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_TIMEOUT_MS; + get_monotonic_time(&interrupt_metadata.value); + + ic = avformat_alloc_context(); + ic->interrupt_callback.callback = _opencv_ffmpeg_interrupt_callback; + ic->interrupt_callback.opaque = &interrupt_metadata; +#endif + #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0) av_dict_set(&dict, "rtsp_transport", "tcp", 0); int err = avformat_open_input(&ic, _filename, NULL, &dict); @@ -619,19 +812,18 @@ bool CvCapture_FFMPEG::open( const char* _filename ) video_stream = i; video_st = ic->streams[i]; +#if LIBAVCODEC_BUILD >= (LIBAVCODEC_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(55, 45, 101) : CALC_FFMPEG_VERSION(55, 28, 1)) + picture = av_frame_alloc(); +#else picture = avcodec_alloc_frame(); - - rgb_picture.data[0] = (uint8_t*)malloc( - avpicture_get_size( PIX_FMT_BGR24, - enc->width, enc->height )); - avpicture_fill( (AVPicture*)&rgb_picture, rgb_picture.data[0], - PIX_FMT_BGR24, enc->width, enc->height ); +#endif frame.width = enc->width; frame.height = enc->height; frame.cn = 3; - frame.step = rgb_picture.linesize[0]; - frame.data = rgb_picture.data[0]; + frame.step = 0; + frame.data = NULL; break; } } @@ -668,6 +860,16 @@ bool CvCapture_FFMPEG::grabFrame() // get the next frame while (!valid) { + av_free_packet (&packet); + +#if USE_AV_INTERRUPT_CALLBACK + if (interrupt_metadata.timeout) + { + valid = false; + break; + } +#endif + int ret = av_read_frame(ic, &packet); if (ret == AVERROR(EAGAIN)) continue; @@ -703,6 +905,11 @@ bool CvCapture_FFMPEG::grabFrame() picture_pts = packet.pts != AV_NOPTS_VALUE_ && packet.pts != 0 ? packet.pts : packet.dts; frame_number++; valid = true; + +#if USE_AV_INTERRUPT_CALLBACK + // update interrupt value + get_monotonic_time(&interrupt_metadata.value); +#endif } else { @@ -727,38 +934,59 @@ bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* if( !video_st || !picture->data[0] ) return false; - avpicture_fill((AVPicture*)&rgb_picture, rgb_picture.data[0], PIX_FMT_RGB24, - video_st->codec->width, video_st->codec->height); - if( img_convert_ctx == NULL || frame.width != video_st->codec->width || - frame.height != video_st->codec->height ) + frame.height != video_st->codec->height || + frame.data == NULL ) { - if( img_convert_ctx ) - sws_freeContext(img_convert_ctx); - - frame.width = video_st->codec->width; - frame.height = video_st->codec->height; + // Some sws_scale optimizations have some assumptions about alignment of data/step/width/height + // Also we use coded_width/height to workaround problem with legacy ffmpeg versions (like n0.8) + int buffer_width = video_st->codec->coded_width, buffer_height = video_st->codec->coded_height; img_convert_ctx = sws_getCachedContext( - NULL, - video_st->codec->width, video_st->codec->height, + img_convert_ctx, + buffer_width, buffer_height, video_st->codec->pix_fmt, - video_st->codec->width, video_st->codec->height, - PIX_FMT_BGR24, + buffer_width, buffer_height, + AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL ); if (img_convert_ctx == NULL) return false;//CV_Error(0, "Cannot initialize the conversion context!"); + +#if USE_AV_FRAME_GET_BUFFER + av_frame_unref(&rgb_picture); + rgb_picture.format = AV_PIX_FMT_BGR24; + rgb_picture.width = buffer_width; + rgb_picture.height = buffer_height; + if (0 != av_frame_get_buffer(&rgb_picture, 32)) + { + CV_WARN("OutOfMemory"); + return false; + } +#else + int aligns[AV_NUM_DATA_POINTERS]; + avcodec_align_dimensions2(video_st->codec, &buffer_width, &buffer_height, aligns); + rgb_picture.data[0] = (uint8_t*)realloc(rgb_picture.data[0], + avpicture_get_size( AV_PIX_FMT_BGR24, + buffer_width, buffer_height )); + avpicture_fill( (AVPicture*)&rgb_picture, rgb_picture.data[0], + AV_PIX_FMT_BGR24, buffer_width, buffer_height ); +#endif + frame.width = video_st->codec->width; + frame.height = video_st->codec->height; + frame.cn = 3; + frame.data = rgb_picture.data[0]; + frame.step = rgb_picture.linesize[0]; } sws_scale( img_convert_ctx, picture->data, picture->linesize, - 0, video_st->codec->height, + 0, video_st->codec->coded_height, rgb_picture.data, rgb_picture.linesize ); @@ -1099,10 +1327,20 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo uint8_t * picture_buf; int size; +#if LIBAVCODEC_BUILD >= (LIBAVCODEC_VERSION_MICRO >= 100 \ + ? CALC_FFMPEG_VERSION(55, 45, 101) : CALC_FFMPEG_VERSION(55, 28, 1)) + picture = av_frame_alloc(); +#else picture = avcodec_alloc_frame(); +#endif if (!picture) return NULL; - size = avpicture_get_size( (PixelFormat) pix_fmt, width, height); + + picture->format = pix_fmt; + picture->width = width; + picture->height = height; + + size = avpicture_get_size( (AVPixelFormat) pix_fmt, width, height); if(alloc){ picture_buf = (uint8_t *) malloc(size); if (!picture_buf) @@ -1111,7 +1349,7 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo return NULL; } avpicture_fill((AVPicture *)picture, picture_buf, - (PixelFormat) pix_fmt, width, height); + (AVPixelFormat) pix_fmt, width, height); } else { } @@ -1211,7 +1449,7 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc, #endif c->gop_size = 12; /* emit one intra frame every twelve frames at most */ - c->pix_fmt = (PixelFormat) pixel_format; + c->pix_fmt = (AVPixelFormat) pixel_format; if (c->codec_id == CV_CODEC(CODEC_ID_MPEG2VIDEO)) { c->max_b_frames = 2; @@ -1372,12 +1610,12 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int #endif // check parameters - if (input_pix_fmt == PIX_FMT_BGR24) { + if (input_pix_fmt == AV_PIX_FMT_BGR24) { if (cn != 3) { return false; } } - else if (input_pix_fmt == PIX_FMT_GRAY8) { + else if (input_pix_fmt == AV_PIX_FMT_GRAY8) { if (cn != 1) { return false; } @@ -1390,13 +1628,13 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int assert( input_picture ); // let input_picture point to the raw data buffer of 'image' avpicture_fill((AVPicture *)input_picture, (uint8_t *) data, - (PixelFormat)input_pix_fmt, width, height); + (AVPixelFormat)input_pix_fmt, width, height); if( !img_convert_ctx ) { img_convert_ctx = sws_getContext(width, height, - (PixelFormat)input_pix_fmt, + (AVPixelFormat)input_pix_fmt, c->width, c->height, c->pix_fmt, @@ -1414,7 +1652,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int } else{ avpicture_fill((AVPicture *)picture, (uint8_t *) data, - (PixelFormat)input_pix_fmt, width, height); + (AVPixelFormat)input_pix_fmt, width, height); } picture->pts = frame_idx; @@ -1547,10 +1785,10 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, /* determine optimal pixel format */ if (is_color) { - input_pix_fmt = PIX_FMT_BGR24; + input_pix_fmt = AV_PIX_FMT_BGR24; } else { - input_pix_fmt = PIX_FMT_GRAY8; + input_pix_fmt = AV_PIX_FMT_GRAY8; } /* Lookup codec_id for given fourcc */ @@ -1587,21 +1825,21 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, break; #endif case CV_CODEC(CODEC_ID_HUFFYUV): - codec_pix_fmt = PIX_FMT_YUV422P; + codec_pix_fmt = AV_PIX_FMT_YUV422P; break; case CV_CODEC(CODEC_ID_MJPEG): case CV_CODEC(CODEC_ID_LJPEG): - codec_pix_fmt = PIX_FMT_YUVJ420P; + codec_pix_fmt = AV_PIX_FMT_YUVJ420P; bitrate_scale = 3; break; case CV_CODEC(CODEC_ID_RAWVIDEO): - codec_pix_fmt = input_pix_fmt == PIX_FMT_GRAY8 || - input_pix_fmt == PIX_FMT_GRAY16LE || - input_pix_fmt == PIX_FMT_GRAY16BE ? input_pix_fmt : PIX_FMT_YUV420P; + codec_pix_fmt = input_pix_fmt == AV_PIX_FMT_GRAY8 || + input_pix_fmt == AV_PIX_FMT_GRAY16LE || + input_pix_fmt == AV_PIX_FMT_GRAY16BE ? input_pix_fmt : AV_PIX_FMT_YUV420P; break; default: // good for lossy formats, MPEG, etc. - codec_pix_fmt = PIX_FMT_YUV420P; + codec_pix_fmt = AV_PIX_FMT_YUV420P; break; } @@ -1826,7 +2064,7 @@ struct OutputMediaStream_FFMPEG void write(unsigned char* data, int size, int keyFrame); // add a video output stream to the container - static AVStream* addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format); + static AVStream* addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, AVPixelFormat pixel_format); AVOutputFormat* fmt_; AVFormatContext* oc_; @@ -1873,7 +2111,7 @@ void OutputMediaStream_FFMPEG::close() } } -AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format) +AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, AVPixelFormat pixel_format) { #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 10, 0) AVStream* st = avformat_new_stream(oc, 0); @@ -2011,7 +2249,7 @@ bool OutputMediaStream_FFMPEG::open(const char* fileName, int width, int height, oc_->max_delay = (int)(0.7 * AV_TIME_BASE); // This reduces buffer underrun warnings with MPEG // set a few optimal pixel formats for lossless codecs of interest.. - PixelFormat codec_pix_fmt = PIX_FMT_YUV420P; + AVPixelFormat codec_pix_fmt = AV_PIX_FMT_YUV420P; int bitrate_scale = 64; // TODO -- safe to ignore output audio stream? @@ -2150,6 +2388,10 @@ private: AVFormatContext* ctx_; int video_stream_id_; AVPacket pkt_; + +#if USE_AV_INTERRUPT_CALLBACK + AVInterruptCallbackMetadata interrupt_metadata; +#endif }; bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma_format, int* width, int* height) @@ -2160,6 +2402,16 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma video_stream_id_ = -1; memset(&pkt_, 0, sizeof(AVPacket)); +#if USE_AV_INTERRUPT_CALLBACK + /* interrupt callback */ + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_TIMEOUT_MS; + get_monotonic_time(&interrupt_metadata.value); + + ctx_ = avformat_alloc_context(); + ctx_->interrupt_callback.callback = _opencv_ffmpeg_interrupt_callback; + ctx_->interrupt_callback.opaque = &interrupt_metadata; +#endif + #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 13, 0) avformat_network_init(); #endif @@ -2220,15 +2472,15 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma switch (enc->pix_fmt) { - case PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV420P: *chroma_format = ::VideoChromaFormat_YUV420; break; - case PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV422P: *chroma_format = ::VideoChromaFormat_YUV422; break; - case PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV444P: *chroma_format = ::VideoChromaFormat_YUV444; break; @@ -2276,11 +2528,23 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi // get the next frame for (;;) { +#if USE_AV_INTERRUPT_CALLBACK + if(interrupt_metadata.timeout) + { + break; + } +#endif + int ret = av_read_frame(ctx_, &pkt_); if (ret == AVERROR(EAGAIN)) continue; +#if USE_AV_INTERRUPT_CALLBACK + // update interrupt value + get_monotonic_time(&interrupt_metadata.value); +#endif + if (ret < 0) { if (ret == (int)AVERROR_EOF)