[Libav-user] Why does sws_scale runtime depend on calling frequency?
lukas
lukas.bommes at gmx.de
Thu May 30 12:53:18 EEST 2019
Dear libav-users,
I wrote a program which extracts motion vectors from a video stream and
encountered the problem that the runtime of sws_scale changes depending
on whether I put a sleep command in the main loop of the caller. If no
sleep command is present, sws_scale returns after ca. 0.9 ms on my
machine. With a sleep command of arbitrary length (I tested 1ms, 25ms,
500ms and 1s) the runtime is around 7 ms.
I am using libswscale as shipped in FFMPEG 4.1 and my implementation is
similar to the code used in OpenCV VideoCapture
(https://github.com/opencv/opencv/blob/master/modules/videoio/src/cap_ffmpeg_impl.hpp#L431)
I would be glad if someone could provide me with at least an idea of
what is going wrong here. My code is attached below.
Best regards,
Lukas
// Compile command: g++ -I ~/boost -I /usr/include/python3.6m/ -fpic
video_cap.cpp -o main -L ~/boost/stage/lib -lboost_python36
-lboost_numpy36 -lpython3.6m `pkg-config --cflags --libs libavformat
libswscale opencv4` -Wl,-Bsymbolic
#include <thread>
#include <iostream>
#include <vector>
#include <chrono>
#include <opencv2/opencv.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/imgproc.hpp>
// FFMPEG
extern "C" {
#include <libavutil/motion_vector.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
// for changing the dtype of motion vector
#define MVS_DTYPE int32_t
#define MVS_DTYPE_NP NPY_INT32
struct Image_FFMPEG
{
unsigned char* data;
int step;
int width;
int height;
int cn;
};
class VideoCap {
private:
const char *url;
AVDictionary *opts;
AVCodec *codec;
AVFormatContext *fmt_ctx;
AVCodecContext *video_dec_ctx;
AVStream *video_stream;
int video_stream_idx;
AVFrame *frame;
AVFrame rgb_frame;
Image_FFMPEG picture;
struct SwsContext *img_convert_ctx;
public:
VideoCap() {
this->opts = NULL;
this->codec = NULL;
this->fmt_ctx = NULL;
this->video_dec_ctx = NULL;
this->video_stream = NULL;
this->video_stream_idx = -1;
this->frame = NULL;
this->img_convert_ctx = NULL;
memset(&(this->rgb_frame), 0, sizeof(this->rgb_frame));
memset(&(this->picture), 0, sizeof(this->picture));
}
void free_all() {
if (this->img_convert_ctx) {
sws_freeContext(this->img_convert_ctx);
this->img_convert_ctx = 0;
}
if (this->frame)
av_frame_free(&(this->frame));
av_frame_unref(&(this->rgb_frame));
if (this->video_dec_ctx)
avcodec_free_context(&(this->video_dec_ctx));
if (this->fmt_ctx)
avformat_close_input(&(this->fmt_ctx));
}
void release(void) {
this->free_all();
}
bool open(const char *url) {
this->url = url;
int ret;
// open RTSP stream with TCP
av_dict_set(&(this->opts), "rtsp_transport", "tcp", 0);
ret = avformat_open_input(&(this->fmt_ctx), url, NULL,
&(this->opts));
if (ret < 0) {
std::cerr << "Could not open source file ' " << url << "'"
<< std::endl;
return false;
}
// read packets of a media file to get stream information.
ret = avformat_find_stream_info(this->fmt_ctx, NULL);
if (ret < 0) {
std::cerr << "Could not find stream information" << std::endl;
return false;
}
ret = this->open_codec_context(this->fmt_ctx, AVMEDIA_TYPE_VIDEO);
if (!ret) {
std::cerr << "Could not create codex context" << std::endl;
return false;
}
// print info (duration, bitrate, streams, container, programs,
metadata, side data, codec, time base)
av_dump_format(this->fmt_ctx, 0, url, 0);
if (!this->video_stream) {
std::cerr << "Could not find video stream in the input,
aborting" << std::endl;
this->free_all();
return false;
}
this->frame = av_frame_alloc();
if (!this->frame) {
std::cerr << "Could not allocate frame" << std::endl;
this->free_all();
return false;
}
return true;
}
bool open_codec_context(AVFormatContext *fmt_ctx, enum AVMediaType
type) {
// find the most suitable stream of given type (e.g. video) and
set the codec accordingly
int ret = av_find_best_stream(fmt_ctx, type, -1, -1,
&(this->codec), 0);
if (ret < 0) {
std::cerr << "Could not find " <<
av_get_media_type_string(type) << " stream in input file '" << this->url
<< "'" << std::endl;
return false;
}
else {
// set stream in format context
this->video_stream_idx = ret;
AVStream *st = fmt_ctx->streams[this->video_stream_idx];
// allocate an AVCodecContext and set its fields to default
values
this->video_dec_ctx = avcodec_alloc_context3(this->codec);
if (!this->video_dec_ctx) {
std::cerr << "Failed to allocate codec" << std::endl;
return false;
}
// fill the codec context based on the values from the
supplied codec parameters
ret = avcodec_parameters_to_context(this->video_dec_ctx,
st->codecpar);
if (ret < 0) {
std::cerr << "Failed to copy codec parameters to codec
context" << std::endl;
return false;
}
this->video_dec_ctx->thread_count =
std::thread::hardware_concurrency();
std::cerr << "Using parallel processing with " <<
this->video_dec_ctx->thread_count << " threads" << std::endl;
// backup encoder's width/height
int enc_width = this->video_dec_ctx->width;
int enc_height = this->video_dec_ctx->height;
// Init the video decoder with the codec and set additional
option to extract motion vectors
av_dict_set(&(this->opts), "flags2", "+export_mvs", 0);
ret = avcodec_open2(this->video_dec_ctx, this->codec,
&(this->opts));
if (ret < 0) {
std::cerr << "Failed to open " <<
av_get_media_type_string(type) << " codec" << std::endl;
return false;
}
this->video_stream = fmt_ctx->streams[this->video_stream_idx];
// checking width/height (since decoder can sometimes alter
it, eg. vp6f)
if (enc_width && (this->video_dec_ctx->width != enc_width)) {
this->video_dec_ctx->width = enc_width;
}
if (enc_height && (this->video_dec_ctx->height !=
enc_height)) {
this->video_dec_ctx->height = enc_height;
}
this->picture.width = this->video_dec_ctx->width;
this->picture.height = this->video_dec_ctx->height;
this->picture.cn = 3;
this->picture.step = 0;
this->picture.data = NULL;
}
return true;
}
bool read(cv::OutputArray cv_frame, char *frame_type, MVS_DTYPE
**motion_vectors, MVS_DTYPE *num_mvs) {
uint8_t* data = 0;
int step = 0, width = 0, height = 0, cn = 0;
// loop over different streams (video, audio) in the file
while(1) {
AVPacket pkt = { 0 };
// read next packet from the stream
int ret = av_read_frame(this->fmt_ctx, &pkt);
if (ret < 0) {
return false;
}
// if the packet is not from the video stream don't do
anything and get next packet
if (pkt.stream_index != this->video_stream_idx) {
continue;
}
// if the packet is from the video stream send it to decoder
else {
bool ret = this->decode_packet(&pkt, &data, &step,
&width, &height, &cn, frame_type, motion_vectors, num_mvs);
if (!ret) {
return false;
}
cv::Mat(height, width, CV_MAKETYPE(CV_8U, cn), data,
step).copyTo(cv_frame);
av_packet_unref(&pkt);
return true;
}
}
}
bool frame_to_buffer(uint8_t** data, int* step, int* width, int*
height, int* cn)
{
if (!this->video_stream || !(this->frame->data[0])) {
return false;
}
if (this->img_convert_ctx == NULL ||
this->picture.width != this->video_dec_ctx->width ||
this->picture.height != this->video_dec_ctx->height ||
this->picture.data == NULL) {
// Some sws_scale optimizations have some assumptions about
alignment of data/step/width/height
// Also we use coded_width/height to workaround problem
with legacy ffmpeg versions (like n0.8)
int buffer_width = this->video_dec_ctx->coded_width;
int buffer_height = this->video_dec_ctx->coded_height;
this->img_convert_ctx = sws_getCachedContext(
this->img_convert_ctx,
buffer_width, buffer_height,
this->video_dec_ctx->pix_fmt,
buffer_width, buffer_height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL, NULL, NULL
);
if (this->img_convert_ctx == NULL) {
std::cerr << "Allocation of conversion context failed"
<< std::endl;
return false;
}
av_frame_unref(&(this->rgb_frame));
this->rgb_frame.format = AV_PIX_FMT_BGR24;
this->rgb_frame.width = buffer_width;
this->rgb_frame.height = buffer_height;
if (0 != av_frame_get_buffer(&(this->rgb_frame), 32)) {
std::cerr << "Not enough memory to allocate buffer for
frame conversion" << std::endl;
return false;
}
this->picture.width = this->video_dec_ctx->width;
this->picture.height = this->video_dec_ctx->height;
this->picture.cn = 3;
this->picture.data = this->rgb_frame.data[0];
this->picture.step = this->rgb_frame.linesize[0];
}
auto start = std::chrono::high_resolution_clock::now();
sws_scale(
this->img_convert_ctx,
this->frame->data,
this->frame->linesize,
0, this->video_dec_ctx->coded_height,
this->rgb_frame.data,
this->rgb_frame.linesize
);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "sws_scale (C++): " << elapsed.count() << " s\n";
*data = this->picture.data;
*step = this->picture.step;
*width = this->picture.width;
*height = this->picture.height;
*cn = this->picture.cn;
return true;
}
bool decode_packet(const AVPacket *pkt, uint8_t** data, int* step,
int* width, int* height, int* cn, char *frame_type, MVS_DTYPE
**motion_vectors, MVS_DTYPE *num_mvs) {
// send encoded data packet to the decoder
int ret = avcodec_send_packet(this->video_dec_ctx, pkt);
if (ret < 0) {
std::cerr << "Error while sending a packet to the decoder:
" << ret << std::endl;
return false;
}
// loop over packets until the next frame is fully assembled
while (ret >= 0) {
// try to get the next frame from decoder
ret = avcodec_receive_frame(this->video_dec_ctx, this->frame);
// failed: end of stream or no frame available, stop and
return with success
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
// failed: another error occured, return with error
else if (ret < 0) {
std::cerr << "Error while receiving a frame from the
decoder: " << ret << std::endl;
return false;
}
// sucessfully decoded new frame, get motion vectors
else {
AVFrameSideData *sd =
av_frame_get_side_data(this->frame, AV_FRAME_DATA_MOTION_VECTORS);
if (sd) {
AVMotionVector *mvs = (AVMotionVector *)sd->data;
*num_mvs = sd->size / sizeof(*mvs);
if (*num_mvs > 0) {
// allocate memory for motion vectors as 1D array
if (!(*motion_vectors = (MVS_DTYPE *)
malloc(*num_mvs * 10 * sizeof(MVS_DTYPE)))) {
std::cerr << "Memory allocation for motion
vectors failed." << std::endl;
return false;
}
// store the motion vectors in the allocated
memory (C contiguous)
for (MVS_DTYPE i = 0; i < *num_mvs; ++i) {
*(*motion_vectors + i*10 ) =
static_cast<MVS_DTYPE>(mvs[i].source);
*(*motion_vectors + i*10 + 1) =
static_cast<MVS_DTYPE>(mvs[i].w);
*(*motion_vectors + i*10 + 2) =
static_cast<MVS_DTYPE>(mvs[i].h);
*(*motion_vectors + i*10 + 3) =
static_cast<MVS_DTYPE>(mvs[i].src_x);
*(*motion_vectors + i*10 + 4) =
static_cast<MVS_DTYPE>(mvs[i].src_y);
*(*motion_vectors + i*10 + 5) =
static_cast<MVS_DTYPE>(mvs[i].dst_x);
*(*motion_vectors + i*10 + 6) =
static_cast<MVS_DTYPE>(mvs[i].dst_y);
*(*motion_vectors + i*10 + 7) =
static_cast<MVS_DTYPE>(mvs[i].motion_x);
*(*motion_vectors + i*10 + 8) =
static_cast<MVS_DTYPE>(mvs[i].motion_y);
*(*motion_vectors + i*10 + 9) =
static_cast<MVS_DTYPE>(mvs[i].motion_scale);
//*(*motion_vectors + i*11 + 10) =
static_cast<MVS_DTYPE>(mvs[i].flags);
}
}
}
// convert AVFrame to numpy ndarray
if(!this->frame_to_buffer(data, step, width, height, cn)) {
std::cerr << "Conversion of frame failed." <<
std::endl;
return false;
}
// get frame type (I, P, B, etc.) and create a null
terminated c-string
frame_type[0] =
av_get_picture_type_char(this->frame->pict_type);
frame_type[1] = '\0';
}
}
return true;
}
};
//##############################################################################
//
// MAIN
//
//##############################################################################
void draw_motion_vectors(cv::Mat frame, std::vector<AVMotionVector>
*motion_vectors) {
for (std::vector<AVMotionVector>::size_type i = 0; i <
motion_vectors->size(); i++) {
cv::Point start_pt, end_pt;
start_pt.y = (*motion_vectors)[i].src_y;
start_pt.x = (*motion_vectors)[i].src_x;
end_pt.y = (*motion_vectors)[i].dst_y;
end_pt.x = (*motion_vectors)[i].dst_x;
cv::arrowedLine(frame, start_pt, end_pt, cv::Scalar(0, 0, 255),
1, cv::LINE_AA, 0, 0.1);
}
}
int main(int argc, char **argv)
{
// filename of the video file
const char *url = "vid.mp4";
VideoCap cap;
// open the video file
bool ret = cap.open(url);
if (!ret) {
std::cerr << "Could not open the video url" << std::endl;
return -1;
}
// continuously read and display video frames and motion vectors
while(1) {
std::cout << "##########################" << std::endl;
cv::Mat frame;
MVS_DTYPE *motion_vectors = NULL;
MVS_DTYPE num_mvs = 0;
char frame_type[2] = "?";
auto start = std::chrono::high_resolution_clock::now();
// read next video frame and corresponding motion vectors
bool ret = cap.read(frame, frame_type, &motion_vectors, &num_mvs);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n";
std::chrono::milliseconds timespan(25);
std::this_thread::sleep_for(timespan);
// if there is an error reading the frame
if(!ret) {
std::cerr << "Could not read the next frame" << std::endl;
return -1;
}
else {
// if the frame is not empty
cv::Size s = frame.size();
if (s.height > 0 && s.width > 0) {
// print type of frame (I, P, B, etc)
std::cout << "Frame type: " << frame_type << std::endl;
// print motion vectors
/*for (std::vector<AVMotionVector>::size_type i = 0; i
< motion_vectors.size(); i++) {
std::cout << std::setw(7) << "src: " <<
motion_vectors[i].source
<< std::setw(6) << "w: " <<
static_cast<int16_t>(motion_vectors[i].w)
<< std::setw(6) << "h: " <<
static_cast<int16_t>(motion_vectors[i].h)
<< std::setw(10) << "src_x: " <<
motion_vectors[i].src_x
<< std::setw(10) << "src_y: " <<
motion_vectors[i].src_y
<< std::setw(10) << "dst_x: " <<
motion_vectors[i].dst_x
<< std::setw(10) << "dst_y: " <<
motion_vectors[i].dst_y
<< std::setw(10) << "mot_x: " <<
motion_vectors[i].motion_x
<< std::setw(12) << "mot_y: " <<
motion_vectors[i].motion_y
<< std::setw(12) << "mot_scl: " <<
motion_vectors[i].motion_scale
<< std::setw(9) << "flags: " <<
motion_vectors[i].flags << std::endl;
}*/
//draw_motion_vectors(frame, &motion_vectors);
// show frame
cv::imshow("Frame", frame);
if (motion_vectors)
free(motion_vectors);
motion_vectors = NULL;
// if user presses "ESC" stop program
char c=(char)cv::waitKey(1);
if(c==27) {
break;
}
}
}
}
// when everything done, release the video capture object
cap.release();
// close the GUI window
cv::destroyAllWindows();
return 0;
}
More information about the Libav-user
mailing list