[FFmpeg-devel] [PATCH] hwcontext_vaapi: use the special UC copy for downloading, frames.

Jun Zhao mypopydev at gmail.com
Tue Apr 11 10:30:57 EEST 2017


From 9bab458006369f427fa2f4c6248ee89329e81067 Mon Sep 17 00:00:00 2001
From: Jun Zhao <jun.zhao at intel.com>
Date: Tue, 11 Apr 2017 14:37:07 +0800
Subject: [PATCH] hwcontext_vaapi: use the special UC copy for downloading
 frames.

used SSE4 UC function for copying image data from GPU mapped memory,
see https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers

before this change, VA-API HWAccel decoder copy image data from GPU
mapped memory used vaCreateImage/vaGetImage/av_frame_copy, now use
vaDeriveImage/av_image_copy_uc_from.

decoding a 3000 frames 1080p h264 stream in Intel(R) Core(TM)
i5-6260U CPU @ 1.80GHz, the CPU usage and decode fps as follow:

1. Software decoder.
./ffmpeg -i ./skyfall2-trailer.mp4 -f null /dev/null

CPU: 80%, fps: 334fps

2a. vaCreateImage/vaGetImage/av_frame_copy
./ffmpeg -hwaccel vaapi -vaapi_device /dev/dri/renderD128 -i skyfall2-trailer.mp4 -f null /dev/null

CPU: 12%, fps: 147fps

2b. vaDeriveImage/av_image_copy_uc_from
./ffmpeg -hwaccel vaapi -vaapi_device /dev/dri/renderD128 -i skyfall2-trailer.mp4 -f null /dev/null

CPU: 23%, fps: 628fps

Signed-off-by: Jun Zhao <jun.zhao at intel.com>
---
 libavutil/hwcontext_vaapi.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index 3b50e95..23899f1 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -40,6 +40,7 @@
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
+#include "imgutils.h"
 
 typedef struct VAAPIDevicePriv {
 #if HAVE_VAAPI_X11
@@ -720,7 +721,7 @@ static int vaapi_map_frame(AVHWFramesContext *hwfc,
     // assume for now that the user is not aware of that and would therefore
     // prefer not to be given direct-mapped memory if they request read access.
     if (ctx->derive_works && dst->format == hwfc->sw_format &&
-        ((flags & AV_HWFRAME_MAP_DIRECT) || !(flags & AV_HWFRAME_MAP_READ))) {
+        ((flags & AV_HWFRAME_MAP_DIRECT) || (flags & AV_HWFRAME_MAP_READ))) {
         vas = vaDeriveImage(hwctx->display, surface_id, &map->image);
         if (vas != VA_STATUS_SUCCESS) {
             av_log(hwfc, AV_LOG_ERROR, "Failed to derive image from "
@@ -736,7 +737,6 @@ static int vaapi_map_frame(AVHWFramesContext *hwfc,
             err = AVERROR(EIO);
             goto fail;
         }
-        map->flags |= AV_HWFRAME_MAP_DIRECT;
     } else {
         vas = vaCreateImage(hwctx->display, image_format,
                             hwfc->width, hwfc->height, &map->image);
@@ -806,7 +806,8 @@ static int vaapi_transfer_data_from(AVHWFramesContext *hwfc,
                                     AVFrame *dst, const AVFrame *src)
 {
     AVFrame *map;
-    int err;
+    int i,err;
+    ptrdiff_t src_linesize[4], dst_linesize[4];
 
     if (dst->width > hwfc->width || dst->height > hwfc->height)
         return AVERROR(EINVAL);
@@ -823,11 +824,12 @@ static int vaapi_transfer_data_from(AVHWFramesContext *hwfc,
     map->width  = dst->width;
     map->height = dst->height;
 
-    err = av_frame_copy(dst, map);
-    if (err)
-        goto fail;
-
-    err = 0;
+    for (i = 0; i < 4; i++) {
+        dst_linesize[i] = dst->linesize[i];
+        src_linesize[i] = map->linesize[i];
+    }
+    av_image_copy_uc_from(dst->data, dst_linesize, map->data, src_linesize,
+                          hwfc->sw_format, src->width, src->height);
 fail:
     av_frame_free(&map);
     return err;
-- 
2.9.3



More information about the ffmpeg-devel mailing list