Folks, This patch makes dashenc.c handle vp8/vp9 video codecs and vorbis codec for audio. The current webm_chunk and webm_dash_manifest is a two stage process and not really suitable for realti me "live" dash mpd creation. I have tested this for both webm and mp4 chunk creation - both for live and static mpd types. Since this is the first time I am submitting a patch, I am not sure if I am doing all the right things - I followed the patch submission process described in https://www.ffmpeg.org/developer.html. -Ram
From 34ad6ca077c11f09e21c1ed12fa1af7de6e428f3 Mon Sep 17 00:00:00 2001 From: Ram Natarajan <ram95...@gmail.com> Date: Thu, 28 Jan 2016 10:34:28 -0800 Subject: [PATCH] Added webm support for dash. This would replace/make redundant webm_chunk and webm_dash_manifest muxers. The current webm_chunk and webm_dash_manifest are a two stage process and not suitable for live streaming. Reorganized chunk creation so that both webm and mp4 chunks can be produced.
--- libavformat/dashenc.c | 277 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 214 insertions(+), 63 deletions(-) diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c index 4509ee4..85d31d5 100644 --- a/libavformat/dashenc.c +++ b/libavformat/dashenc.c @@ -59,6 +59,11 @@ typedef struct Segment { int n; } Segment; +typedef enum { + WEBM_MUXER = 1, + MP4_MUXER = 2 +} MuxerType; + typedef struct OutputStream { AVFormatContext *ctx; int ctx_inited; @@ -74,8 +79,12 @@ typedef struct OutputStream { int64_t last_dts; int bit_rate; char bandwidth_str[64]; + int frame_count; char codec_str[100]; + char filename[1024]; + char full_path[1024]; + int64_t seg_start_pos; } OutputStream; typedef struct DASHContext { @@ -96,15 +105,22 @@ typedef struct DASHContext { const char *single_file_name; const char *init_seg_name; const char *media_seg_name; + char init_seg_name_full[2048]; + char media_seg_name_full[2048]; + int video_bitrate; + int audio_bitrate; AVRational min_frame_rate, max_frame_rate; int ambiguous_frame_rate; + MuxerType muxer_type; } DASHContext; static int dash_write(void *opaque, uint8_t *buf, int buf_size) { OutputStream *os = opaque; - if (os->out) + if (os->out) { + av_log(os->ctx, AV_LOG_DEBUG, "dash_write: writing %d bytes to %p\n", buf_size, os->out); ffurl_write(os->out, buf, buf_size); + } return buf_size; } @@ -114,6 +130,22 @@ static void set_codec_str(AVFormatContext *s, AVCodecContext *codec, { const AVCodecTag *tags[2] = { NULL, NULL }; uint32_t tag; + + switch (codec->codec_id) { + case AV_CODEC_ID_VP8: + snprintf(str, size, "vp8"); + return; + case AV_CODEC_ID_VP9: + snprintf(str, size, "vp9"); + return; + case AV_CODEC_ID_VORBIS: + snprintf(str, size, "vorbis"); + return; + case AV_CODEC_ID_OPUS: + snprintf(str, size, "opus"); + return; + } + if (codec->codec_type == AVMEDIA_TYPE_VIDEO) tags[0] = ff_codec_movvideo_tags; else if (codec->codec_type == AVMEDIA_TYPE_AUDIO) @@ -139,12 +171,13 @@ static void set_codec_str(AVFormatContext *s, AVCodecContext *codec, return; if (tag == MKTAG('m', 'p', '4', 'a')) { + int aot = 2; if (codec->extradata_size >= 2) { int aot = codec->extradata[0] >> 3; if (aot == 31) aot = ((AV_RB16(codec->extradata) >> 5) & 0x3f) + 32; - av_strlcatf(str, size, ".%d", aot); } + av_strlcatf(str, size, ".%d", aot); } else if (tag == MKTAG('m', 'p', '4', 'v')) { // Unimplemented, should output ProfileLevelIndication as a decimal number av_log(s, AV_LOG_WARNING, "Incomplete RFC 6381 codec string for mp4v\n"); @@ -210,7 +243,7 @@ static void output_segment_list(OutputStream *os, AVIOContext *out, DASHContext avio_printf(out, "\t\t\t\t<SegmentTemplate timescale=\"%d\" ", timescale); if (!c->use_timeline) avio_printf(out, "duration=\"%"PRId64"\" ", c->last_duration); - avio_printf(out, "initialization=\"%s\" media=\"%s\" startNumber=\"%d\">\n", c->init_seg_name, c->media_seg_name, c->use_timeline ? start_number : 1); + avio_printf(out, "initialization=\"%s\" media=\"%s\" startNumber=\"%d\">\n", c->init_seg_name_full, c->media_seg_name_full, c->use_timeline ? start_number : 1); if (c->use_timeline) { int64_t cur_time = 0; avio_printf(out, "\t\t\t\t\t<SegmentTimeline>\n"); @@ -519,7 +552,8 @@ static int write_manifest(AVFormatContext *s, int final) if (st->codec->codec_type != AVMEDIA_TYPE_VIDEO) continue; - avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"", i, os->codec_str, os->bandwidth_str, st->codec->width, st->codec->height); + avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"%s\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"", i, c->muxer_type == MP4_MUXER ? "video/mp4" : "video/webm", + os->codec_str, os->bandwidth_str, st->codec->width, st->codec->height); if (st->avg_frame_rate.num) avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den); avio_printf(out, ">\n"); @@ -538,7 +572,8 @@ static int write_manifest(AVFormatContext *s, int final) if (st->codec->codec_type != AVMEDIA_TYPE_AUDIO) continue; - avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codec->sample_rate); + avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"%s\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, c->muxer_type == MP4_MUXER ? "audio/mp4" : "audio/webm", + os->codec_str, os->bandwidth_str, st->codec->sample_rate); avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", st->codec->channels); output_segment_list(&c->streams[i], out, c); avio_printf(out, "\t\t\t</Representation>\n"); @@ -552,6 +587,38 @@ static int write_manifest(AVFormatContext *s, int final) return ff_rename(temp_filename, s->filename, s); } +static void write_styp(AVIOContext *pb) +{ + avio_wb32(pb, 24); + ffio_wfourcc(pb, "styp"); + ffio_wfourcc(pb, "msdh"); + avio_wb32(pb, 0); /* minor */ + ffio_wfourcc(pb, "msdh"); + ffio_wfourcc(pb, "msix"); +} + + +static int open_next_segment(DASHContext *c, OutputStream *os, AVFormatContext *s, int stream_idx) +{ + os->seg_start_pos = avio_tell(os->ctx->pb); + if (!c->single_file) { + int ret; + dash_fill_tmpl_params(os->filename, sizeof(os->filename), c->media_seg_name_full, stream_idx, + os->segment_index, os->bit_rate, os->start_pts); + snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname, os->filename); + ret = ffurl_open(&os->out, os->full_path, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL); + if (ret < 0) + return ret; + if (c->muxer_type == MP4_MUXER) + write_styp(os->ctx->pb); + av_log(s, AV_LOG_DEBUG, "open_next_segment: %d: opened %s, %p\n", stream_idx, os->full_path, os->out); + } else { + snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname, os->initfile); + } + return 0; +} + + static int dash_write_header(AVFormatContext *s) { DASHContext *c = s->priv_data; @@ -559,6 +626,7 @@ static int dash_write_header(AVFormatContext *s) AVOutputFormat *oformat; char *ptr; char basename[1024]; + const char * muxer = "mp4"; if (c->single_file_name) c->single_file = 1; @@ -579,8 +647,21 @@ static int dash_write_header(AVFormatContext *s) ptr = strrchr(basename, '.'); if (ptr) *ptr = '\0'; + c->muxer_type = MP4_MUXER; + snprintf(c->init_seg_name_full, sizeof(c->init_seg_name_full), "%s.m4s", c->init_seg_name); + snprintf(c->media_seg_name_full, sizeof(c->media_seg_name_full), "%s.m4s", c->media_seg_name); + for (i = 0; i < s->nb_streams; i++) { + if (s->streams[i]->codec->codec_id == AV_CODEC_ID_VP8 || + s->streams[i]->codec->codec_id == AV_CODEC_ID_VP9) { + muxer = "webm"; + snprintf(c->init_seg_name_full, sizeof(c->init_seg_name_full), "%s.chk", c->init_seg_name); + snprintf(c->media_seg_name_full, sizeof(c->media_seg_name_full), "%s.chk", c->media_seg_name); + c->muxer_type = WEBM_MUXER; + } + } - oformat = av_guess_format("mp4", NULL, NULL); + av_log(s, AV_LOG_DEBUG, "Opening muxer of type %s\n", muxer); + oformat = av_guess_format(muxer, NULL, NULL); if (!oformat) { ret = AVERROR_MUXER_NOT_FOUND; goto fail; @@ -608,21 +689,39 @@ static int dash_write_header(AVFormatContext *s) } else { int level = s->strict_std_compliance >= FF_COMPLIANCE_STRICT ? AV_LOG_ERROR : AV_LOG_WARNING; - av_log(s, level, "No bit rate set for stream %d\n", i); - if (s->strict_std_compliance >= FF_COMPLIANCE_STRICT) { - ret = AVERROR(EINVAL); - goto fail; + os->bit_rate = s->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO ? c->video_bitrate : + s->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO ? c->audio_bitrate : 0; + if (os->bit_rate != 0) { + av_log(s, AV_LOG_VERBOSE, "stream %d: using user provided bitrate %d\n", i, os->bit_rate); + snprintf(os->bandwidth_str, sizeof(os->bandwidth_str), + " bandwidth=\"%d\"", os->bit_rate); + } + if (os->bit_rate == 0) { + av_log(s, level, "No bit rate set for stream %d\n", i); + if (s->strict_std_compliance >= FF_COMPLIANCE_STRICT) { + ret = AVERROR(EINVAL); + goto fail; + } } } - ctx = avformat_alloc_context(); + ret = avformat_alloc_output_context2(&ctx, oformat, NULL, NULL); + if (ret < 0) { + goto fail; + } if (!ctx) { ret = AVERROR(ENOMEM); goto fail; } + os->ctx = ctx; - ctx->oformat = oformat; + ctx->interrupt_callback = s->interrupt_callback; + if (c->muxer_type == WEBM_MUXER) { + av_dict_copy(&ctx->metadata, s->metadata, 0); + ctx->max_delay = s->max_delay; + } + if (!(st = avformat_new_stream(ctx, NULL))) { ret = AVERROR(ENOMEM); @@ -639,13 +738,18 @@ static int dash_write_header(AVFormatContext *s) goto fail; } + if (c->muxer_type == WEBM_MUXER) { + ctx->pb->seekable = 0; + } + + // open the init file if (c->single_file) { if (c->single_file_name) dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->single_file_name, i, 0, os->bit_rate, 0); else - snprintf(os->initfile, sizeof(os->initfile), "%s-stream%d.m4s", basename, i); + snprintf(os->initfile, sizeof(os->initfile), "%s-stream%d.%s", basename, i, c->muxer_type == MP4_MUXER ? "m4s" : "chk"); } else { - dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->init_seg_name, i, 0, os->bit_rate, 0); + dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->init_seg_name_full, i, 0, os->bit_rate, 0); } snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile); ret = ffurl_open(&os->out, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL); @@ -653,16 +757,67 @@ static int dash_write_header(AVFormatContext *s) goto fail; os->init_start_pos = 0; - av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0); - if ((ret = avformat_write_header(ctx, &opts)) < 0) { - goto fail; + av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename); + + // write the header and close the init file + if (c->muxer_type == MP4_MUXER) { + av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0); + if ((ret = avformat_write_header(ctx, &opts)) < 0) { + goto fail; + } + av_write_frame(ctx, NULL); + avio_flush(ctx->pb); + os->init_range_length = avio_tell(ctx->pb); + if (!c->single_file) { + av_log(s, AV_LOG_DEBUG, "dash_write_header: mp4: %d: closing os->out %p, init_range_length = %d\n", i, os->out, os->init_range_length); + ffurl_close(os->out); + os->out = NULL; + + } + } + if (c->muxer_type == WEBM_MUXER) { + char cd[64]; + char index[64]; + int chunk_duration = (int)(c->min_seg_duration/1000); + + snprintf(cd, sizeof(cd), "%d", chunk_duration); + snprintf(index, sizeof(index), "%d", i); + + av_dict_set(&opts, "dash", "1", 0); + av_dict_set(&opts, "cluster_time_limit", cd, 0); + av_dict_set(&opts, "live", "1", 0); + av_dict_set(&opts, "dash_track_number", index, 0); + if ((ret = avformat_write_header(ctx, &opts)) < 0) { + goto fail; + } + // for webm, avformat_write_header fails to write the header because check_bitstream is set. + // force writing the header so we can close the init file; the call to avformat_write_header + // is still required as a future call to ff_write_chained will fail otherwise + if ((ret = ctx->oformat->write_header(ctx)) < 0) { + goto fail; + } + av_write_frame(ctx, NULL); + avio_flush(ctx->pb); + os->init_range_length = avio_tell(ctx->pb); + if (!c->single_file) { + av_log(s, AV_LOG_DEBUG, "dash_write_header: webm: %d: closing os->out %p, init_range_length = %d\n", + i, os->out, os->init_range_length); + ffurl_close(os->out); + os->out = NULL; + } } + os->segment_index = 1; + + // open the next segment + ret = open_next_segment(c, os, s, i); + if (ret < 0) + goto fail; + + av_log(s, AV_LOG_DEBUG, "dash_write_header: %d: avio_tell()= %"PRId64"\n", i, avio_tell(ctx->pb)); + os->ctx_inited = 1; - avio_flush(ctx->pb); av_dict_free(&opts); - av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename); - s->streams[i]->time_base = st->time_base; // If the muxer wants to shift timestamps, request to have them shifted // already before being handed to this muxer, so we don't have mismatches @@ -687,7 +842,6 @@ static int dash_write_header(AVFormatContext *s) os->first_pts = AV_NOPTS_VALUE; os->max_pts = AV_NOPTS_VALUE; os->last_dts = AV_NOPTS_VALUE; - os->segment_index = 1; } if (!c->has_video && c->min_seg_duration <= 0) { @@ -733,21 +887,13 @@ static int add_segment(OutputStream *os, const char *file, seg->start_pos = start_pos; seg->range_length = range_length; seg->index_length = index_length; + av_log(os->ctx, AV_LOG_DEBUG, "Adding segment: start_pos = %"PRId64", range_length = %"PRId64", index_length = %"PRId64"\n", + start_pos, range_length, index_length); os->segments[os->nb_segments++] = seg; os->segment_index++; return 0; } -static void write_styp(AVIOContext *pb) -{ - avio_wb32(pb, 24); - ffio_wfourcc(pb, "styp"); - ffio_wfourcc(pb, "msdh"); - avio_wb32(pb, 0); /* minor */ - ffio_wfourcc(pb, "msdh"); - ffio_wfourcc(pb, "msix"); -} - static void find_index_range(AVFormatContext *s, const char *full_path, int64_t pos, int *index_length) { @@ -799,13 +945,13 @@ static int dash_flush(AVFormatContext *s, int final, int stream) DASHContext *c = s->priv_data; int i, ret = 0; int cur_flush_segment_index = 0; + + av_log(s, AV_LOG_DEBUG, "dash_flush: final = %d, stream = %d\n", final, stream); if (stream >= 0) cur_flush_segment_index = c->streams[stream].segment_index; for (i = 0; i < s->nb_streams; i++) { OutputStream *os = &c->streams[i]; - char filename[1024] = "", full_path[1024], temp_path[1024]; - int64_t start_pos; int range_length, index_length = 0; if (!os->packets_written) @@ -823,45 +969,25 @@ static int dash_flush(AVFormatContext *s, int final, int stream) continue; } - if (!os->init_range_length) { - av_write_frame(os->ctx, NULL); - os->init_range_length = avio_tell(os->ctx->pb); - if (!c->single_file) { - ffurl_close(os->out); - os->out = NULL; - } - } - start_pos = avio_tell(os->ctx->pb); - - if (!c->single_file) { - dash_fill_tmpl_params(filename, sizeof(filename), c->media_seg_name, i, os->segment_index, os->bit_rate, os->start_pts); - snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, filename); - snprintf(temp_path, sizeof(temp_path), "%s.tmp", full_path); - ret = ffurl_open(&os->out, temp_path, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL); - if (ret < 0) - break; - write_styp(os->ctx->pb); - } else { - snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, os->initfile); - } av_write_frame(os->ctx, NULL); avio_flush(os->ctx->pb); os->packets_written = 0; - range_length = avio_tell(os->ctx->pb) - start_pos; + range_length = avio_tell(os->ctx->pb) - os->seg_start_pos; if (c->single_file) { - find_index_range(s, full_path, start_pos, &index_length); + find_index_range(s, os->full_path, os->seg_start_pos, &index_length); } else { + av_log(s, AV_LOG_DEBUG, "dash_flush: closing os->out = %p, %s\n", os->out, os->full_path); ffurl_close(os->out); os->out = NULL; - ret = ff_rename(temp_path, full_path, s); - if (ret < 0) - break; } - add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, start_pos, range_length, index_length); - av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path); + add_segment(os, os->filename, os->start_pts, os->max_pts - os->start_pts, os->seg_start_pos, range_length, index_length); + av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, os->full_path); + ret = open_next_segment(c, os, s, i); + if (ret < 0) + break; } if (c->window_size || (final && c->remove_at_exit)) { @@ -921,6 +1047,27 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt) if (os->first_pts == AV_NOPTS_VALUE) os->first_pts = pkt->pts; + av_log(s, AV_LOG_DEBUG, "Stream %d: handling pkt of size %d\n", pkt->stream_index, pkt->size); + + if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { + if (pkt->flags & AV_PKT_FLAG_KEY) { + int64_t duration = av_rescale_q(pkt->pts - os->first_pts, + st->time_base, + AV_TIME_BASE_Q); + + int seconds = duration / AV_TIME_BASE; + int fractions = duration % AV_TIME_BASE; + int minutes = seconds / 60; + int hours = minutes / 60; + int segment_ended = av_compare_ts(pkt->pts - os->first_pts, st->time_base, seg_end_duration, AV_TIME_BASE_Q); + seconds %= 60; + minutes %= 60; + av_log(s, AV_LOG_DEBUG, "Duration @ frame %d = %dH:%dM:%d.%dS\n", os->frame_count, hours, minutes, seconds, fractions / (AV_TIME_BASE / 10)); + av_log(s, AV_LOG_DEBUG, "Key frame at pkt %d, segment_end_duration = %"PRId64" usecs\n", os->frame_count, seg_end_duration); + av_log(s, AV_LOG_DEBUG, "Segment ended = %d\n", segment_ended); + } + os->frame_count++; + } if ((!c->has_video || st->codec->codec_type == AVMEDIA_TYPE_VIDEO) && pkt->flags & AV_PKT_FLAG_KEY && os->packets_written && @@ -935,6 +1082,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt) st->time_base, AV_TIME_BASE_Q); + av_log(s, AV_LOG_DEBUG, "Last duration = %"PRId64"\n", c->last_duration); if ((!c->use_timeline || !c->use_template) && prev_duration) { if (c->last_duration < prev_duration*9/10 || c->last_duration > prev_duration*11/10) { @@ -962,6 +1110,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt) else os->max_pts = FFMAX(os->max_pts, pkt->pts + pkt->duration); os->packets_written++; + return ff_write_chained(os->ctx, 0, pkt, s, 0); } @@ -1009,8 +1158,10 @@ static const AVOption options[] = { { "use_timeline", "Use SegmentTimeline in SegmentTemplate", OFFSET(use_timeline), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E }, { "single_file", "Store all segments in one file, accessed using byte ranges", OFFSET(single_file), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E }, { "single_file_name", "DASH-templated name to be used for baseURL. Implies storing all segments in one file, accessed using byte ranges", OFFSET(single_file_name), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E }, - { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.m4s"}, 0, 0, E }, - { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.m4s"}, 0, 0, E }, + { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$"}, 0, 0, E }, + { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$"}, 0, 0, E }, + { "video_bitrate", "Set video bitrate in the mpd if one is not found in the stream", OFFSET(video_bitrate), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E }, + { "audio_bitrate", "Set audio bitrate in the mpd if one is not found in the stream", OFFSET(audio_bitrate), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E }, { NULL }, }; -- 1.9.1
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel