[FFmpeg-devel] a new benchmarking option for ffmpeg - patch 1 of 1
"René J.V. Bertin"
rjvbertin at gmail.com
Wed Mar 13 15:31:48 CET 2013
updated patch against current head:
-------------------------------
A new benchmarking mode for ffmpeg.
Adds a -benchmark_most option to ffmpeg which collects benchmark
timings much as -benchmark_all does, but only presents a breakdown
of the averages when all else is done. It determines the average
user, system and total (real) execution times, and estimates the
average CPU percentage.
Timing is based on a new (x86 and ppc) timer declared (conditionally)
in libavutil/timer.h that uses the linux/Mac OS X/MS Windows routines
to determine elapsed time (in seconds) with the highest possible
resolution and the lowest possible overhead.
Signed-off-by: René 'RJVB' Bertin <rjvbertin at gmail.com>
---
ffmpeg.c | 176 ++++++++++++++++++++++++++++++++++++++++++++------
ffmpeg.h | 1 +
ffmpeg_opt.c | 5 +-
libavutil/ppc/timer.h | 110 +++++++++++++++++++++++++++++++
libavutil/x86/timer.h | 138 +++++++++++++++++++++++++++++++++++++++
5 files changed, 409 insertions(+), 21 deletions(-)
diff --git a/ffmpeg.c b/ffmpeg.c
index 17a7eab..03b4538 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -104,6 +104,11 @@
#include "libavutil/avassert.h"
+//#include "timing.h"
+// include libavutil/timer.h with the flag set to obtain the highres time functions
+#define AVUTIL_HIGHRES_TIMERS_REQUIRED
+#include "libavutil/timer.h"
+
const char program_name[] = "ffmpeg";
const int program_birth_year = 2000;
@@ -118,8 +123,16 @@ const char *const forced_keyframes_const_names[] = {
NULL
};
+//RJVB
+typedef struct BenchTimes {
+ double user_time, system_time, total_time;
+ double user_cum_time, system_cum_time, total_cum_time;
+ size_t num_samples, num_obs;
+} BenchTimes;
+BenchTimes flush_bench, vid_dec_bench, aud_dec_bench, vid_enc_bench, aud_enc_bench, ffmpeg_bench;
+
static void do_video_stats(OutputStream *ost, int frame_size);
-static int64_t getutime(void);
+static int64_t getutime(BenchTimes *bin);
static int64_t getmaxrss(void);
static int run_as_daemon = 0;
@@ -521,10 +534,17 @@ static void abort_codec_experimental(AVCodec *c, int encoder)
exit(1);
}
-static void update_benchmark(const char *fmt, ...)
+/**
+ * benchmarking updater/priming function. Primes for a new interval measurement if fmt==NULL, or determines
+ * the duration of the current interval.
+ * @param bin the target benchmarking bin that will be updated or primed in benchmark_most mode.
+ * @param fmt If not NULL, a printf style format string that determines how the timing result is printed if in
+ * benchmark_all mode.
+ */
+static void update_benchmark( BenchTimes *bin, const char *fmt, ...)
{
if (do_benchmark_all) {
- int64_t t = getutime();
+ int64_t t = getutime(NULL);
va_list va;
char buf[1024];
@@ -536,6 +556,64 @@ static void update_benchmark(const char *fmt, ...)
}
current_time = t;
}
+ else if( do_benchmark_most && bin ){
+ if( fmt ){
+ double uT0 = bin->user_time, sT0 = bin->system_time, tT0 = bin->total_time;
+ // collect data
+ getutime(bin);
+ bin->user_cum_time += bin->user_time - uT0;
+ bin->system_cum_time += bin->system_time - sT0;
+ bin->total_cum_time += bin->total_time - tT0;
+ bin->num_samples += 1;
+ bin->num_obs += 1;
+ }
+ else{
+ // mark start of benchmarking interval
+ getutime(bin);
+ }
+ }
+}
+
+/**
+ * prints the state of a benchmarking bin and optionally adds the results to a different bin
+ * that will hold the cumulative timings across several input bins. Pass its address in the bin
+ * and cum_bin arguments to print weighed averages.
+ * @param fp the destination file pointer
+ * @param[in] bin the bin with the data to be printed
+ * @param[in] header the string to prefix to the output
+ * @param cum_bin the bin that will hold the cumulative results, or NULL.
+ */
+static void print_bench_bin( FILE *fp, BenchTimes *bin, const char *header, BenchTimes *cum_bin )
+{ double cpu;
+ if( bin && bin->num_samples ){
+ if( cum_bin && cum_bin == bin ){
+ char tots[32];
+ bin->user_cum_time /= bin->num_samples;
+ bin->system_cum_time /= bin->num_samples;
+ bin->total_cum_time /= bin->num_samples;
+ cpu = (bin->user_cum_time + bin->system_cum_time) * 100.0 / bin->total_cum_time;
+ // output the number of samples over the number of observations using an intermediate
+ // string (so that it prints in a fixed number of spaces without internal whitespace).
+ snprintf( tots, sizeof(tots), "%g/%lu", (double) bin->num_samples, bin->num_obs );
+ fprintf( fp, "%s\t%10s\t%10gs\t%10gs\t%10gs\t%10g%%\n", header,
+ tots, bin->user_cum_time, bin->system_cum_time, bin->total_cum_time,
+ cpu );
+ cum_bin = NULL;
+ }
+ else{
+ cpu = (bin->user_cum_time + bin->system_cum_time) * 100.0 / bin->total_cum_time;
+ fprintf( fp, "%s\t%10ld\t%10gs\t%10gs\t%10gs\t%10g%%\n", header,
+ bin->num_samples, bin->user_cum_time, bin->system_cum_time, bin->total_cum_time,
+ cpu );
+ }
+ if( cum_bin ){
+ cum_bin->num_samples += bin->num_samples;
+ cum_bin->user_cum_time += bin->user_cum_time * bin->num_samples;
+ cum_bin->system_cum_time += bin->system_cum_time * bin->num_samples;
+ cum_bin->total_cum_time += bin->total_cum_time * bin->num_samples;
+ cum_bin->num_obs += 1;
+ }
+ }
}
static void write_frame(AVFormatContext *s, AVPacket *pkt, OutputStream *ost)
@@ -672,12 +750,12 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
ost->sync_opts = frame->pts + frame->nb_samples;
av_assert0(pkt.size || !pkt.data);
- update_benchmark(NULL);
+ update_benchmark(&aud_enc_bench, NULL);
if (avcodec_encode_audio2(enc, &pkt, frame, &got_packet) < 0) {
av_log(NULL, AV_LOG_FATAL, "Audio encoding failed (avcodec_encode_audio2)\n");
exit(1);
}
- update_benchmark("encode_audio %d.%d", ost->file_index, ost->index);
+ update_benchmark(&aud_enc_bench, "encode_audio %d.%d", ost->file_index, ost->index);
if (got_packet) {
if (pkt.pts != AV_NOPTS_VALUE)
@@ -964,9 +1042,9 @@ static void do_video_out(AVFormatContext *s,
av_log(NULL, AV_LOG_DEBUG, "Forced keyframe at time %f\n", pts_time);
}
- update_benchmark(NULL);
- ret = avcodec_encode_video2(enc, &pkt, in_picture, &got_packet);
- update_benchmark("encode_video %d.%d", ost->file_index, ost->index);
+ update_benchmark(&vid_enc_bench, NULL);
+ ret = avcodec_encode_video2(enc, &pkt, &in_picture, &got_packet);
+ update_benchmark(&vid_enc_bench, "encode_video %d.%d", ost->file_index, ost->index);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "Video encoding failed\n");
exit(1);
@@ -1362,9 +1440,9 @@ static void flush_encoders(void)
pkt.data = NULL;
pkt.size = 0;
- update_benchmark(NULL);
+ update_benchmark(&flush_bench, NULL);
ret = encode(enc, &pkt, NULL, &got_packet);
- update_benchmark("flush %s %d.%d", desc, ost->file_index, ost->index);
+ update_benchmark(&flush_bench, "flush %s %d.%d", desc, ost->file_index, ost->index);
if (ret < 0) {
av_log(NULL, AV_LOG_FATAL, "%s encoding failed\n", desc);
exit(1);
@@ -1539,9 +1617,9 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
return AVERROR(ENOMEM);
decoded_frame = ist->decoded_frame;
- update_benchmark(NULL);
+ update_benchmark(&aud_dec_bench, NULL);
ret = avcodec_decode_audio4(avctx, decoded_frame, got_output, pkt);
- update_benchmark("decode_audio %d.%d", ist->file_index, ist->st->index);
+ update_benchmark(&aud_dec_bench, "decode_audio %d.%d", ist->file_index, ist->st->index);
if (ret >= 0 && avctx->sample_rate <= 0) {
av_log(avctx, AV_LOG_ERROR, "Sample rate %d invalid\n", avctx->sample_rate);
@@ -1678,10 +1756,10 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
decoded_frame = ist->decoded_frame;
pkt->dts = av_rescale_q(ist->dts, AV_TIME_BASE_Q, ist->st->time_base);
- update_benchmark(NULL);
+ update_benchmark(&vid_dec_bench, NULL);
ret = avcodec_decode_video2(ist->st->codec,
decoded_frame, got_output, pkt);
- update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index);
+ update_benchmark(&vid_dec_bench, "decode_video %d.%d", ist->file_index, ist->st->index);
if (!*got_output || ret < 0) {
if (!pkt->size) {
for (i = 0; i < ist->nb_filters; i++)
@@ -3259,19 +3337,31 @@ static int transcode(void)
return ret;
}
-
-static int64_t getutime(void)
+static int64_t getutime(BenchTimes *bin)
{
#if HAVE_GETRUSAGE
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
+ if( bin ){
+ bin->user_time = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_usec * 1e-6;
+ bin->system_time = rusage.ru_stime.tv_sec + rusage.ru_stime.tv_usec * 1e-6;
+ bin->total_time = get_hr_time();
+ }
return (rusage.ru_utime.tv_sec * 1000000LL) + rusage.ru_utime.tv_usec;
#elif HAVE_GETPROCESSTIMES
HANDLE proc;
FILETIME c, e, k, u;
proc = GetCurrentProcess();
GetProcessTimes(proc, &c, &e, &k, &u);
+ if( bin ){
+ ULARGE_INTEGER uT, kT;
+ uT.LowPart = u.dwLowDateTime, uT.HighPart = u.dwHighDateTime;
+ kT.LowPart = k.dwLowDateTime, kT.HighPart = k.dwHighDateTime;
+ bin->user_time = uT.QuadPart * 100e-9;
+ bin->system_time = kT.QuadPart * 100e-9;
+ bin->total_time = get_hr_time();
+ }
return ((int64_t) u.dwHighDateTime << 32 | u.dwLowDateTime) / 10;
#else
return av_gettime();
@@ -3307,6 +3397,21 @@ int main(int argc, char **argv)
atexit(exit_program);
+#if HAVE_GETRUSAGE || HAVE_GETPROCESSTIMES
+ init_hr_time();
+ memset( &ffmpeg_bench, 0, sizeof(BenchTimes) );
+ // initialise the overall benchmarking bin. Since this is done before the user's options are parsed
+ // we clamp do_benchmark_most to 1 before calling update_benchmark. Restoring it afterwards is
+ // just done for good practices' sake.
+ { int dbm = do_benchmark_most;
+ do_benchmark_most = 1;
+ update_benchmark( &ffmpeg_bench, NULL );
+ do_benchmark_most = dbm;
+ }
+#endif
+
+ // initialise (zero) benchmarking structures
+ memset( &flush_bench, 0, sizeof(BenchTimes) );
setvbuf(stderr,NULL,_IONBF,0); /* win32 runtime needs this */
av_log_set_flags(AV_LOG_SKIP_REPEATED);
@@ -3348,17 +3453,48 @@ int main(int argc, char **argv)
exit(1);
}
+ // RJVB
+ if( do_benchmark_most ){
+#if !HAVE_GETRUSAGE && !HAVE_GETPROCESSTIMES
+ av_log( NULL, AV_LOG_WARNING, "Cannot provide 'most' benchmark data, resorting to basic benchmarking.\n" );
+#else
+ // initialise (zero) benchmarking structures
+ memset( &flush_bench, 0, sizeof(BenchTimes) );
+ memset( &vid_dec_bench, 0, sizeof(BenchTimes) );
+ memset( &aud_dec_bench, 0, sizeof(BenchTimes) );
+ memset( &vid_enc_bench, 0, sizeof(BenchTimes) );
+ memset( &aud_enc_bench, 0, sizeof(BenchTimes) );
+#endif
+ }
+
// if (nb_input_files == 0) {
// av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
// exit(1);
// }
- current_time = ti = getutime();
+ current_time = ti = getutime(NULL);
if (transcode() < 0)
exit(1);
- ti = getutime() - ti;
- if (do_benchmark) {
- printf("bench: utime=%0.3fs\n", ti / 1000000.0);
+ ti = getutime(NULL) - ti;
+ if( do_benchmark_most ){
+ BenchTimes cum_bin;
+ update_benchmark( &ffmpeg_bench, "Done" );
+ fprintf( stdout, "Detailed benchmark results:\n" );
+ fprintf( stdout, " \t%10s\t%10s\t%10s\t%10s\t%10s\n", "samples", "user t", "kernel t", "real t", "CPU %" );
+ memset( &cum_bin, 0, sizeof(BenchTimes) );
+ print_bench_bin( stdout, &vid_dec_bench, "Video decode :", &cum_bin );
+ print_bench_bin( stdout, &aud_dec_bench, "Audio decode :", &cum_bin );
+ print_bench_bin( stdout, &vid_enc_bench, "Video encode :", &cum_bin );
+// print_bench_bin( stdout, &cum_bin, "Runny totals :", NULL );
+ print_bench_bin( stdout, &aud_enc_bench, "Audio encode :", &cum_bin );
+ print_bench_bin( stdout, &flush_bench, "Failed loops :", &cum_bin );
+ print_bench_bin( stdout, &cum_bin, "Weighed totals:", &cum_bin );
+ fprintf( stdout, "Overall execution timing:\n" );
+ print_bench_bin( stdout, &ffmpeg_bench, " :", NULL );
+ }
+ else if (do_benchmark) {
+ int maxrss = getmaxrss() / 1024;
+ printf("bench: utime=%0.3fs maxrss=%ikB\n", ti / 1000000.0, maxrss);
}
exit(received_nb_signals ? 255 : 0);
diff --git a/ffmpeg.h b/ffmpeg.h
index f7178ff..d5f10f0 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -396,6 +396,7 @@ extern int audio_volume;
extern int audio_sync_method;
extern int video_sync_method;
extern int do_benchmark;
+extern int do_benchmark_most;
extern int do_benchmark_all;
extern int do_deinterlace;
extern int do_hex_dump;
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 6b3ef3b..6bc86b9 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -74,6 +74,7 @@ int audio_sync_method = 0;
int video_sync_method = VSYNC_AUTO;
int do_deinterlace = 0;
int do_benchmark = 0;
+int do_benchmark_most = 0;
int do_benchmark_all = 0;
int do_hex_dump = 0;
int do_pkt_dump = 0;
@@ -2541,8 +2542,10 @@ const OptionDef options[] = {
"set the number of data frames to record", "number" },
{ "benchmark", OPT_BOOL | OPT_EXPERT, { &do_benchmark },
"add timings for benchmarking" },
+ { "benchmark_most", OPT_BOOL | OPT_EXPERT, { &do_benchmark_most },
+ "add timings for each task and present an overview at the end" },
{ "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all },
- "add timings for each task" },
+ "add and display timings for each task" },
{ "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress },
"write program-readable progress information", "url" },
{ "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction },
diff --git a/libavutil/ppc/timer.h b/libavutil/ppc/timer.h
index 155fc01..47bcbf8 100644
--- a/libavutil/ppc/timer.h
+++ b/libavutil/ppc/timer.h
@@ -44,4 +44,114 @@ static inline uint64_t read_time(void)
return (((uint64_t)tbu)<<32) | (uint64_t)tbl;
}
+#ifdef AVUTIL_HIGHRES_TIMERS_REQUIRED
+
+# ifdef __MACH__
+# include <mach/mach.h>
+# include <mach/mach_time.h>
+# include <mach/mach_init.h>
+# include <sys/sysctl.h>
+
+static mach_timebase_info_data_t ff_mach_timebase_info;
+static double ff_time_to_seconds= 0;
+
+static void init_hr_time(void)
+{
+ if( !ff_time_to_seconds ){
+ mach_timebase_info(&ff_mach_timebase_info);
+ /* go from absolute time units to seconds (the timebase is calibrated in nanoseconds): */
+ ff_time_to_seconds= 1e-9 * ff_mach_timebase_info.numer / ff_mach_timebase_info.denom;
+ }
+}
+
+static double get_hr_time(void)
+{
+ return mach_absolute_time() * ff_time_to_seconds;
+}
+
+# elif defined(linux)
+# include <time.h>
+# include <sys/time.h>
+# include <unistd.h>
+
+# if defined(CLOCK_MONOTONIC)
+
+static inline double gettime()
+{ struct timespec hrt;
+ clock_gettime( CLOCK_MONOTONIC, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+static inline double clock_get_res()
+{ struct timespec hrt;
+ clock_getres( CLOCK_MONOTONIC, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+# elif defined(CLOCK_REALTIME)
+
+static inline double gettime()
+{ struct timespec hrt;
+ clock_gettime( CLOCK_REALTIME, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+static inline double clock_get_res()
+{ struct timespec hrt;
+ clock_getres( CLOCK_REALTIME, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+
+# elif defined(CPUCLOCK_CYCLES_PER_SEC)
+
+static inline gettime()
+{
+ return read_time() / CPUCLOCK_CYCLES_PER_SEC;
+}
+
+# else
+
+static inline gettime()
+{ struct timezone tzp;
+ struct timeval ES_tv;
+
+ gettimeofday( &ES_tv, &tzp );
+ return ES_tv.tv_sec + ES_tv.tv_usec* 1e-6;
+}
+# endif
+
+static void init_hr_time(void)
+{
+#if defined(CLOCK_MONOTONIC) || defined(CLOCK_REALTIME)
+ // just for kicks, probably not necessary:
+ clock_get_res();
+#endif
+}
+
+static double get_hr_time(void)
+{
+ return( gettime() );
+}
+
+# else // __MACH__ or linux or something else
+
+// catchall, use gettimeofday():
+
+static void init_hr_time(void)
+{
+}
+
+static double get_hr_time(void)
+{ struct timezone tzp;
+ struct timeval ES_tv;
+
+ gettimeofday( &ES_tv, &tzp );
+ return ES_tv.tv_sec + ES_tv.tv_usec* 1e-6;
+}
+
+# endif // __MACH__ or linux
+
+#endif // AVUTIL_HIGHRES_TIMERS_REQUIRED
+
#endif /* AVUTIL_PPC_TIMER_H */
diff --git a/libavutil/x86/timer.h b/libavutil/x86/timer.h
index 5969876..a2a7d5e 100644
--- a/libavutil/x86/timer.h
+++ b/libavutil/x86/timer.h
@@ -40,4 +40,142 @@ static inline uint64_t read_time(void)
#endif /* HAVE_INLINE_ASM */
+#ifdef AVUTIL_HIGHRES_TIMERS_REQUIRED
+
+# ifdef __MACH__
+# include <mach/mach.h>
+# include <mach/mach_time.h>
+# include <mach/mach_init.h>
+# include <sys/sysctl.h>
+
+static double ff_time_to_seconds= 0;
+
+static void init_hr_time(void)
+{
+ if( !ff_time_to_seconds ){
+ mach_timebase_info_data_t ff_mach_timebase_info;
+ mach_timebase_info(&ff_mach_timebase_info);
+ /* go from absolute time units to seconds (the timebase is calibrated in nanoseconds): */
+ ff_time_to_seconds= 1e-9 * ff_mach_timebase_info.numer / ff_mach_timebase_info.denom;
+ }
+}
+
+static double get_hr_time(void)
+{
+ return mach_absolute_time() * ff_time_to_seconds;
+}
+
+# elif defined(linux)
+# include <time.h>
+# include <sys/time.h>
+# include <unistd.h>
+
+# if defined(CLOCK_MONOTONIC)
+
+static inline double gettime()
+{ struct timespec hrt;
+ clock_gettime( CLOCK_MONOTONIC, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+static inline double clock_get_res()
+{ struct timespec hrt;
+ clock_getres( CLOCK_MONOTONIC, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+# elif defined(CLOCK_REALTIME)
+
+static inline double gettime()
+{ struct timespec hrt;
+ clock_gettime( CLOCK_REALTIME, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+static inline double clock_get_res()
+{ struct timespec hrt;
+ clock_getres( CLOCK_REALTIME, &hrt );
+ return hrt.tv_sec + hrt.tv_nsec * 1e-9;
+}
+
+
+# elif defined(CPUCLOCK_CYCLES_PER_SEC)
+
+static inline gettime()
+{
+ return read_time() / CPUCLOCK_CYCLES_PER_SEC;
+}
+
+# else
+
+/* Use gettimeofday(): */
+static inline gettime()
+{ struct timezone tzp;
+ struct timeval ES_tv;
+
+ gettimeofday( &ES_tv, &tzp );
+ return ES_tv.tv_sec + ES_tv.tv_usec* 1e-6;
+}
+# endif
+
+static void init_hr_time(void)
+{
+#if defined(CLOCK_MONOTONIC) || defined(CLOCK_REALTIME)
+ // just for kicks, probably not necessary:
+ clock_get_res();
+#endif
+}
+
+static double get_hr_time(void)
+{
+ return( gettime() );
+}
+
+#elif defined(_WINDOWS) || defined(WIN32) || defined(_MSC_VER)
+#pragma mark ---win32---
+
+#include <windows.h>
+
+static double ff_time_to_seconds= 0;
+
+static void init_hr_time(void)
+{
+ if( !ff_time_to_seconds ){
+ LARGE_INTEGER lp_frequency;
+ if( !QueryPerformanceFrequency(&lp_frequency) ){
+ ff_time_to_seconds= 0;
+ }
+ else{
+ ff_time_to_seconds= 1.0 / ((double) lp_frequency.QuadPart);
+ }
+ }
+}
+
+static double get_hr_time(void)
+{ LARGE_INTEGER count;
+
+ QueryPerformanceCounter(&count);
+ return count.QuadPart * ff_time_to_seconds;
+}
+
+# else // __MACH__ or MS Windows or linux or something else
+
+// catchall, use gettimeofday():
+
+static void init_hr_time(void)
+{
+}
+
+static double get_hr_time(void)
+{ struct timezone tzp;
+ struct timeval ES_tv;
+
+ gettimeofday( &ES_tv, &tzp );
+ return ES_tv.tv_sec + ES_tv.tv_usec* 1e-6;
+}
+
+# endif // __MACH__ or MS Windows or linux
+
+#endif // AVUTIL_HIGHRES_TIMERS_REQUIRED
+
#endif /* AVUTIL_X86_TIMER_H */
--
1.8.1.3
More information about the ffmpeg-devel
mailing list