[FFmpeg-devel] [PATCH 8/8] libavformat/dashenc: add support for assigning streams to AdaptationSets
Peter Große
pegro at friiks.de
Sat Jan 21 16:39:09 EET 2017
This patch is based on the stream assignment code in webmdashenc.
Additional changes:
* Default to one AdaptationSet per stream
Previously all mapped streams of a media type (video, audio) where assigned
to a single AdaptationSet. Using the DASH live profile it is mandatory, that
the segments of all representations are aligned, which is currently not
enforced. This leads to problems when using video streams with different
key frame intervals. So to play safe, default to one AdaptationSet per stream,
unless overwritten by explicit assignment
* Make sure all streams are assigned to exactly one AdaptationSet
* Copy "language" and "role" metadata from streams assigned to the set
* Stream assignment in "adaptation_sets" option supports stream identifier
(e.g. v:0)
* If "min_seg_duration" option is set, audio segments will be created
independently from occurance of video key frames when exceeding given segment
duration
* Since the "bandwidth" attribute on a Representation is mandatory, calculate
bandwith based on the size and duration of the first segment
Signed-off-by: Peter Große <pegro at friiks.de>
---
libavformat/dashenc.c | 295 ++++++++++++++++++++++++++++++++++++++------------
1 file changed, 228 insertions(+), 67 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 522a0eb..bd6bb88 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,8 +24,10 @@
#include <unistd.h>
#endif
+#include "libavutil/avutil.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
@@ -59,9 +61,17 @@ typedef struct Segment {
int n;
} Segment;
+typedef struct AdaptationSet {
+ char id[10];
+ enum AVMediaType media_type;
+ AVRational min_frame_rate, max_frame_rate;
+ int ambiguous_frame_rate;
+ AVDictionary *metadata;
+} AdaptationSet;
+
typedef struct OutputStream {
AVFormatContext *ctx;
- int ctx_inited;
+ int ctx_inited, as_idx;
AVIOContext *out;
int packets_written;
char initfile[1024];
@@ -72,13 +82,14 @@ typedef struct OutputStream {
int64_t first_pts, start_pts, max_pts;
int64_t last_dts;
int bit_rate;
- char bandwidth_str[64];
-
char codec_str[100];
} OutputStream;
typedef struct DASHContext {
const AVClass *class; /* Class for private options. */
+ char *adaptation_sets;
+ AdaptationSet *as;
+ int nb_as;
int window_size;
int extra_window_size;
int min_seg_duration;
@@ -87,7 +98,6 @@ typedef struct DASHContext {
int use_timeline;
int single_file;
OutputStream *streams;
- int has_video, has_audio;
int64_t last_duration;
int64_t total_duration;
char availability_start_time[100];
@@ -96,8 +106,6 @@ typedef struct DASHContext {
const char *init_seg_name;
const char *media_seg_name;
const char *utc_timing_url;
- AVRational min_frame_rate, max_frame_rate;
- int ambiguous_frame_rate;
} DASHContext;
// RFC 6381
@@ -193,6 +201,16 @@ static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int i, j;
+
+ if (c->as) {
+ for (i = 0; i < c->nb_as; i++) {
+ if(&c->as[i].metadata)
+ av_dict_free(&c->as[i].metadata);
+ }
+ av_freep(&c->as);
+ c->nb_as = 0;
+ }
+
if (!c->streams)
return;
for (i = 0; i < s->nb_streams; i++) {
@@ -453,12 +471,165 @@ static void format_date_now(char *buf, int size)
}
}
+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index)
+{
+ DASHContext *c = s->priv_data;
+ AdaptationSet *as = &c->as[as_index];
+ AVDictionaryEntry *lang, *role;
+ int i;
+
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"",
+ as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+
+ lang = av_dict_get(as->metadata, "language", NULL, 0);
+ if (lang)
+ avio_printf(out, " lang=\"%s\"", lang->value);
+
+ if (as->max_frame_rate.num && !as->ambiguous_frame_rate)
+ avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(as->min_frame_rate, as->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", as->max_frame_rate.num, as->max_frame_rate.den);
+ avio_printf(out, ">\n");
+
+ role = av_dict_get(as->metadata, "role", NULL, 0);
+ if (role)
+ avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
+
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+
+ if (os->as_idx != as_index)
+ continue;
+
+ if (as->media_type == AVMEDIA_TYPE_VIDEO) {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\" bandwidth=\"%d\" width=\"%d\" height=\"%d\"",
+ i, os->codec_str, os->bit_rate, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ if (s->streams[i]->avg_frame_rate.num)
+ avio_printf(out, " frameRate=\"%d/%d\"", s->streams[i]->avg_frame_rate.num, s->streams[i]->avg_frame_rate.den);
+ avio_printf(out, ">\n");
+ } else {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\" bandwidth=\"%d\" audioSamplingRate=\"%d\">\n",
+ i, os->codec_str, os->bit_rate, s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
+ s->streams[i]->codecpar->channels);
+ }
+ output_segment_list(os, out, c);
+ avio_printf(out, "\t\t\t</Representation>\n");
+ }
+ avio_printf(out, "\t\t</AdaptationSet>\n");
+
+ return 0;
+}
+
+static int parse_adaptation_sets(AVFormatContext *s)
+{
+ DASHContext *c = s->priv_data;
+ char *p = c->adaptation_sets;
+ char *q;
+ enum { new_set, parse_id, parsing_streams } state;
+ int i;
+
+ // default: one AdaptationSet for each stream
+ if(!p) {
+ void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams);
+ if (mem == NULL)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ c->nb_as = s->nb_streams;
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AdaptationSet *as = &c->as[i];
+ OutputStream *os = &c->streams[i];
+ snprintf(as->id, sizeof(as->id), "%d", i);
+ as->metadata = NULL;
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ os->as_idx = i + 1;
+ }
+ return 0;
+ }
+
+ // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
+ state = new_set;
+ while (p < c->adaptation_sets + strlen(c->adaptation_sets)) {
+ if (*p == ' ')
+ continue;
+ else if (state == new_set && !strncmp(p, "id=", 3)) {
+ AdaptationSet *as;
+ void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1));
+ if (mem == NULL)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ ++c->nb_as;
+
+ as = &c->as[c->nb_as - 1];
+ as->metadata = NULL;
+ as->media_type = AVMEDIA_TYPE_UNKNOWN;
+
+ p += 3; // consume "id="
+ q = as->id;
+ while (*p != ',') *q++ = *p++;
+ *q = 0;
+ p++;
+ state = parse_id;
+ } else if (state == parse_id && !strncmp(p, "streams=", 8)) {
+ p += 8; // consume "streams="
+ state = parsing_streams;
+ } else if (state == parsing_streams) {
+ struct AdaptationSet *as = &c->as[c->nb_as - 1];
+ int ret;
+ char *stream_identifier;
+
+ q = p;
+ while (*q != '\0' && *q != ',' && *q != ' ') q++;
+
+ stream_identifier = av_strndup(p, q - p);
+ for (i = 0; i < s->nb_streams; i++) {
+ ret = avformat_match_stream_specifier(s, s->streams[i], stream_identifier);
+ if (ret > 0) {
+ OutputStream *os = &c->streams[i];
+ if (as->media_type == AVMEDIA_TYPE_UNKNOWN) {
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ } else if (as->media_type != s->streams[i]->codecpar->codec_type) {
+ av_log(s, AV_LOG_ERROR, "Mixing codec types within an AdaptationSet is not allowed\n");
+ return -1;
+ } else if (os->as_idx) {
+ av_log(s, AV_LOG_ERROR, "Assigning a stream to more than one AdaptationSet is not allowed\n");
+ return -1;
+ }
+ os->as_idx = c->nb_as;
+ break;
+ }
+ }
+
+ if(ret < 0) {
+ av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not found!\n", stream_identifier);
+ return -1;
+ }
+ av_free(stream_identifier);
+
+ if (*q == '\0') break;
+ if (*q == ' ') state = new_set;
+ p = ++q;
+ } else {
+ return -1;
+ }
+ }
+
+ // check for unassigned streams
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+ if (!os->as_idx) {
+ av_log(s, AV_LOG_ERROR, "Stream %d is not mapped to an AdaptationSet\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int write_manifest(AVFormatContext *s, int final)
{
DASHContext *c = s->priv_data;
AVIOContext *out;
char temp_filename[1024];
- int ret, i, as_id = 0;
+ int ret, i;
AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename);
@@ -522,45 +693,13 @@ static int write_manifest(AVFormatContext *s, int final)
avio_printf(out, "\t<Period id=\"0\" start=\"PT0.0S\">\n");
}
- if (c->has_video) {
- avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"", as_id++);
- if (c->max_frame_rate.num && !c->ambiguous_frame_rate)
- avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(c->min_frame_rate, c->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", c->max_frame_rate.num, c->max_frame_rate.den);
- avio_printf(out, ">\n");
-
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- OutputStream *os = &c->streams[i];
-
- if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
- continue;
-
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"", i, os->codec_str, os->bandwidth_str, st->codecpar->width, st->codecpar->height);
- if (st->avg_frame_rate.num)
- avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den);
- avio_printf(out, ">\n");
-
- output_segment_list(&c->streams[i], out, c);
- avio_printf(out, "\t\t\t</Representation>\n");
+ for (i = 0; i < c->nb_as; i++) {
+ ret = write_adaptation_set(s, out, i);
+ if (ret < 0) {
+ return ret;
}
- avio_printf(out, "\t\t</AdaptationSet>\n");
}
- if (c->has_audio) {
- avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- OutputStream *os = &c->streams[i];
- if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
- continue;
-
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->sample_rate);
- avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", st->codecpar->channels);
- output_segment_list(&c->streams[i], out, c);
- avio_printf(out, "\t\t\t</Representation>\n");
- }
- avio_printf(out, "\t\t</AdaptationSet>\n");
- }
avio_printf(out, "\t</Period>\n");
avio_printf(out, "</MPD>\n");
avio_flush(out);
@@ -568,6 +707,23 @@ static int write_manifest(AVFormatContext *s, int final)
return avpriv_io_move(temp_filename, s->filename);
}
+static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const char *key)
+{
+ AVDictionaryEntry *entry;
+
+ // do not overwrite
+ if(dst) {
+ entry = av_dict_get(*dst, key, NULL, 0);
+ if(entry)
+ return 0;
+ }
+
+ entry = av_dict_get(src, key, NULL, 0);
+ if(entry)
+ av_dict_set(dst, key, entry->value, 0);
+ return 0;
+}
+
static int dash_init(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
@@ -579,7 +735,6 @@ static int dash_init(AVFormatContext *s)
c->single_file = 1;
if (c->single_file)
c->use_template = 0;
- c->ambiguous_frame_rate = 0;
av_strlcpy(c->dirname, s->filename, sizeof(c->dirname));
ptr = strrchr(c->dirname, '/');
@@ -599,8 +754,13 @@ static int dash_init(AVFormatContext *s)
if (!c->streams)
return AVERROR(ENOMEM);
+ ret = parse_adaptation_sets(s);
+ if (ret < 0)
+ return ret;
+
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
+ AdaptationSet *as = &c->as[os->as_idx - 1];
AVFormatContext *ctx;
AVStream *st;
AVDictionary *opts = NULL;
@@ -618,10 +778,7 @@ static int dash_init(AVFormatContext *s)
os->bit_rate = 0;
}
}
- if (os->bit_rate) {
- snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
- " bandwidth=\"%d\"", os->bit_rate);
- } else {
+ if (!os->bit_rate) {
int level = s->strict_std_compliance >= FF_COMPLIANCE_STRICT ?
AV_LOG_ERROR : AV_LOG_WARNING;
av_log(s, level, "No bit rate set for stream %d\n", i);
@@ -629,6 +786,10 @@ static int dash_init(AVFormatContext *s)
return AVERROR(EINVAL);
}
+ // copy AdaptationSet language and role from stream metadata
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "language");
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "role");
+
ret = avformat_alloc_output_context2(&ctx, NULL, "mp4", NULL);
if (ret < 0)
return AVERROR(ENOMEM);
@@ -684,16 +845,13 @@ static int dash_init(AVFormatContext *s)
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
AVRational avg_frame_rate = s->streams[i]->avg_frame_rate;
if (avg_frame_rate.num > 0) {
- if (av_cmp_q(avg_frame_rate, c->min_frame_rate) < 0)
- c->min_frame_rate = avg_frame_rate;
- if (av_cmp_q(c->max_frame_rate, avg_frame_rate) < 0)
- c->max_frame_rate = avg_frame_rate;
+ if (av_cmp_q(avg_frame_rate, as->min_frame_rate) < 0)
+ as->min_frame_rate = avg_frame_rate;
+ if (av_cmp_q(as->max_frame_rate, avg_frame_rate) < 0)
+ as->max_frame_rate = avg_frame_rate;
} else {
- c->ambiguous_frame_rate = 1;
+ as->ambiguous_frame_rate = 1;
}
- c->has_video = 1;
- } else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
- c->has_audio = 1;
}
set_codec_str(s, st->codecpar, os->codec_str, sizeof(os->codec_str));
@@ -702,11 +860,6 @@ static int dash_init(AVFormatContext *s)
os->last_dts = AV_NOPTS_VALUE;
os->segment_index = 1;
}
-
- if (!c->has_video && c->min_seg_duration <= 0) {
- av_log(s, AV_LOG_WARNING, "no video stream and no min seg duration set\n");
- return AVERROR(EINVAL);
- }
return 0;
}
@@ -846,13 +999,13 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
// Flush the single stream that got a keyframe right now.
// Flush all audio streams as well, in sync with video keyframes,
- // but not the other video streams.
+ // but not the other video streams or when audio is flushed explicitly
if (stream >= 0 && i != stream) {
- if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
+ if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO || c->min_seg_duration)
continue;
// Make sure we don't flush audio streams multiple times, when
// all video streams are flushed one at a time.
- if (c->has_video && os->segment_index > cur_flush_segment_index)
+ if (os->segment_index > cur_flush_segment_index)
continue;
}
@@ -879,6 +1032,13 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
if (ret < 0)
break;
}
+
+ if(!os->bit_rate) {
+ double bitrate = (int)( (double) range_length * 8.0 * AV_TIME_BASE / (double)(os->max_pts - os->start_pts) );
+ if (bitrate >= 0 && bitrate <= INT64_MAX)
+ os->bit_rate = bitrate;
+ }
+
add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length);
av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
@@ -948,9 +1108,9 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
if (os->first_pts == AV_NOPTS_VALUE)
os->first_pts = pkt->pts;
- if ((!c->has_video || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
- pkt->flags & AV_PKT_FLAG_KEY && os->packets_written &&
- av_compare_ts(pkt->pts - os->first_pts, st->time_base,
+ if ((st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && c->min_seg_duration ||
+ st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && pkt->flags & AV_PKT_FLAG_KEY
+ ) && os->packets_written && av_compare_ts(pkt->pts - os->first_pts, st->time_base,
seg_end_duration, AV_TIME_BASE_Q) >= 0) {
int64_t prev_duration = c->last_duration;
@@ -1049,6 +1209,7 @@ static int dash_check_bitstream(struct AVFormatContext *s, const AVPacket *avpkt
#define OFFSET(x) offsetof(DASHContext, x)
#define E AV_OPT_FLAG_ENCODING_PARAM
static const AVOption options[] = {
+ { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
{ "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
{ "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
{ "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
--
2.10.2
More information about the ffmpeg-devel
mailing list