ffmpeg | branch: master | Dennis Sädtler <den...@obsproject.com> | Mon Apr 1 18:16:49 2024 +0200| [d8d0175d3a8782f84069378105520861c1bdccaf] | committer: Timo Rothenpieler
avformat/flvenc: implement support for multi-track video Based on enhanced-rtmp v2 spec published by Veovera: https://veovera.github.io/enhanced-rtmp/docs/enhanced/enhanced-rtmp-v2 This implementation maintains some backwards compatibility by only writing the track information for track indices > 0. This means that older FFmpeg versions - and possibly other software - can still read the first video track properly and skip over unsupported packets. Signed-off-by: Dennis Sädtler <den...@obsproject.com> Signed-off-by: Timo Rothenpieler <t...@rothenpieler.org> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d8d0175d3a8782f84069378105520861c1bdccaf --- libavformat/flv.h | 7 +++ libavformat/flvenc.c | 160 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 120 insertions(+), 47 deletions(-) diff --git a/libavformat/flv.h b/libavformat/flv.h index f710963b92..82eabf7749 100644 --- a/libavformat/flv.h +++ b/libavformat/flv.h @@ -125,6 +125,13 @@ enum { PacketTypeCodedFramesX = 3, PacketTypeMetadata = 4, PacketTypeMPEG2TSSequenceStart = 5, + PacketTypeMultitrack = 6, +}; + +enum { + MultitrackTypeOneTrack = 0x00, + MultitrackTypeManyTracks = 0x10, + MultitrackTypeManyTracksManyCodecs = 0x20, }; enum { diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c index 1c7ba61776..b40770bb8a 100644 --- a/libavformat/flvenc.c +++ b/libavformat/flvenc.c @@ -126,8 +126,9 @@ typedef struct FLVContext { AVCodecParameters *data_par; int flags; - int64_t last_ts[FLV_STREAM_TYPE_NB]; - int metadata_pkt_written; + int64_t *last_ts; + int *metadata_pkt_written; + int *video_track_idx_map; } FLVContext; static int get_audio_flags(AVFormatContext *s, AVCodecParameters *par) @@ -485,7 +486,7 @@ static void write_metadata(AVFormatContext *s, unsigned int ts) avio_wb32(pb, flv->metadata_totalsize + 11); } -static void flv_write_metadata_packet(AVFormatContext *s, AVCodecParameters *par, unsigned int ts) +static void flv_write_metadata_packet(AVFormatContext *s, AVCodecParameters *par, unsigned int ts, int stream_idx) { AVIOContext *pb = s->pb; FLVContext *flv = s->priv_data; @@ -495,7 +496,9 @@ static void flv_write_metadata_packet(AVFormatContext *s, AVCodecParameters *par int64_t total_size = 0; const AVPacketSideData *side_data = NULL; - if (flv->metadata_pkt_written) return; + if (flv->metadata_pkt_written[stream_idx]) + return; + if (par->codec_id == AV_CODEC_ID_HEVC || par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) { int flags_size = 5; @@ -617,7 +620,7 @@ static void flv_write_metadata_packet(AVFormatContext *s, AVCodecParameters *par avio_wb24(pb, total_size); avio_skip(pb, total_size + 10 - 3); avio_wb32(pb, total_size + 11); // previous tag size - flv->metadata_pkt_written = 1; + flv->metadata_pkt_written[stream_idx] = 1; } } @@ -632,7 +635,7 @@ static int unsupported_codec(AVFormatContext *s, return AVERROR(ENOSYS); } -static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, int64_t ts) { +static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, int64_t ts, int stream_index) { int64_t data_size; AVIOContext *pb = s->pb; FLVContext *flv = s->priv_data; @@ -682,12 +685,32 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, i } avio_write(pb, par->extradata, par->extradata_size); } else { - if (par->codec_id == AV_CODEC_ID_HEVC) { - avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeSequenceStart | FLV_FRAME_KEY); // ExVideoTagHeader mode with PacketTypeSequenceStart - avio_write(pb, "hvc1", 4); - } else if (par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) { - avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeSequenceStart | FLV_FRAME_KEY); - avio_write(pb, par->codec_id == AV_CODEC_ID_AV1 ? "av01" : "vp09", 4); + int track_idx = flv->video_track_idx_map[stream_index]; + // If video stream has track_idx > 0 we need to send H.264 as extended video packet + int extended_flv = (par->codec_id == AV_CODEC_ID_H264 && track_idx) || + par->codec_id == AV_CODEC_ID_HEVC || + par->codec_id == AV_CODEC_ID_AV1 || + par->codec_id == AV_CODEC_ID_VP9; + + if (extended_flv) { + if (track_idx) { + avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMultitrack | FLV_FRAME_KEY); + avio_w8(pb, MultitrackTypeOneTrack | PacketTypeSequenceStart); + } else { + avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeSequenceStart | FLV_FRAME_KEY); + } + + if (par->codec_id == AV_CODEC_ID_H264) + avio_write(pb, "avc1", 4); + else if (par->codec_id == AV_CODEC_ID_HEVC) + avio_write(pb, "hvc1", 4); + else if (par->codec_id == AV_CODEC_ID_AV1) + avio_write(pb, "av01", 4); + else if (par->codec_id == AV_CODEC_ID_VP9) + avio_write(pb, "vp09", 4); + + if (track_idx) + avio_w8(pb, track_idx); } else { avio_w8(pb, par->codec_tag | FLV_FRAME_KEY); // flags avio_w8(pb, 0); // AVC sequence header @@ -770,13 +793,14 @@ static int shift_data(AVFormatContext *s) static int flv_init(struct AVFormatContext *s) { int i; + int video_ctr = 0; FLVContext *flv = s->priv_data; - if (s->nb_streams > FLV_STREAM_TYPE_NB) { - av_log(s, AV_LOG_ERROR, "invalid number of streams %d\n", - s->nb_streams); - return AVERROR(EINVAL); - } + flv->last_ts = av_calloc(s->nb_streams, sizeof(*flv->last_ts)); + flv->metadata_pkt_written = av_calloc(s->nb_streams, sizeof(*flv->metadata_pkt_written)); + flv->video_track_idx_map = av_calloc(s->nb_streams, sizeof(*flv->video_track_idx_map)); + if (!flv->last_ts || !flv->metadata_pkt_written || !flv->video_track_idx_map) + return AVERROR(ENOMEM); for (i = 0; i < s->nb_streams; i++) { AVCodecParameters *par = s->streams[i]->codecpar; @@ -787,12 +811,17 @@ static int flv_init(struct AVFormatContext *s) s->streams[i]->avg_frame_rate.num) { flv->framerate = av_q2d(s->streams[i]->avg_frame_rate); } - if (flv->video_par) { + flv->video_track_idx_map[i] = video_ctr++; + if (flv->video_par && flv->flags & FLV_ADD_KEYFRAME_INDEX) { av_log(s, AV_LOG_ERROR, - "at most one video stream is supported in flv\n"); + "at most one video stream is supported in flv with keyframe index\n"); return AVERROR(EINVAL); + } else if (flv->video_par) { + av_log(s, AV_LOG_WARNING, + "more than one video stream is not supported by most flv demuxers.\n"); } - flv->video_par = par; + if (!flv->video_par) + flv->video_par = par; if (!ff_codec_get_tag(flv_video_codec_ids, par->codec_id)) return unsupported_codec(s, "Video", par->codec_id); @@ -882,7 +911,7 @@ static int flv_write_header(AVFormatContext *s) } for (i = 0; i < s->nb_streams; i++) { - flv_write_codec_header(s, s->streams[i]->codecpar, 0); + flv_write_codec_header(s, s->streams[i]->codecpar, 0, i); } flv->datastart_offset = avio_tell(pb); @@ -990,6 +1019,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) uint8_t frametype = pkt->flags & AV_PKT_FLAG_KEY ? FLV_FRAME_KEY : FLV_FRAME_INTER; int flags = -1, flags_size, ret = 0; int64_t cur_offset = avio_tell(pb); + int track_idx = flv->video_track_idx_map[pkt->stream_index]; if (par->codec_type == AVMEDIA_TYPE_AUDIO && !pkt->size) { av_log(s, AV_LOG_WARNING, "Empty audio Packet\n"); @@ -1006,7 +1036,12 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) else flags_size = 1; - if (par->codec_id == AV_CODEC_ID_HEVC && pkt->pts != pkt->dts) + if (par->codec_type == AVMEDIA_TYPE_VIDEO && track_idx) + flags_size += 2; // additional header bytes for multi-track video + + if ((par->codec_id == AV_CODEC_ID_HEVC || + (par->codec_id == AV_CODEC_ID_H264 && track_idx)) + && pkt->pts != pkt->dts) flags_size += 3; if (par->codec_id == AV_CODEC_ID_AAC || par->codec_id == AV_CODEC_ID_H264 @@ -1019,9 +1054,9 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) if (ret < 0) return ret; memcpy(par->extradata, side, side_size); - flv_write_codec_header(s, par, pkt->dts); + flv_write_codec_header(s, par, pkt->dts, pkt->stream_index); } - flv_write_metadata_packet(s, par, pkt->dts); + flv_write_metadata_packet(s, par, pkt->dts, pkt->stream_index); } if (flv->delay == AV_NOPTS_VALUE) @@ -1143,32 +1178,59 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) avio_seek(pb, data_size + 10 - 3, SEEK_CUR); avio_wb32(pb, data_size + 11); } else { - av_assert1(flags>=0); - if (par->codec_id == AV_CODEC_ID_HEVC) { - int pkttype = (pkt->pts != pkt->dts) ? PacketTypeCodedFrames : PacketTypeCodedFramesX; - avio_w8(pb, FLV_IS_EX_HEADER | pkttype | frametype); // ExVideoTagHeader mode with PacketTypeCodedFrames(X) - avio_write(pb, "hvc1", 4); - if (pkttype == PacketTypeCodedFrames) + int extended_flv = (par->codec_id == AV_CODEC_ID_H264 && track_idx) || + par->codec_id == AV_CODEC_ID_HEVC || + par->codec_id == AV_CODEC_ID_AV1 || + par->codec_id == AV_CODEC_ID_VP9; + + if (extended_flv) { + int h2645 = par->codec_id == AV_CODEC_ID_H264 || + par->codec_id == AV_CODEC_ID_HEVC; + int pkttype = PacketTypeCodedFrames; + // Optimisation for HEVC/H264: Do not send composition time if DTS == PTS + if (h2645 && pkt->pts == pkt->dts) + pkttype = PacketTypeCodedFramesX; + + if (track_idx) { + avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMultitrack | frametype); + avio_w8(pb, MultitrackTypeOneTrack | pkttype); + } else { + avio_w8(pb, FLV_IS_EX_HEADER | pkttype | frametype); + } + + if (par->codec_id == AV_CODEC_ID_H264) + avio_write(pb, "avc1", 4); + else if (par->codec_id == AV_CODEC_ID_HEVC) + avio_write(pb, "hvc1", 4); + else if (par->codec_id == AV_CODEC_ID_AV1) + avio_write(pb, "av01", 4); + else if (par->codec_id == AV_CODEC_ID_VP9) + avio_write(pb, "vp09", 4); + + if (track_idx) + avio_w8(pb, track_idx); + if (h2645 && pkttype == PacketTypeCodedFrames) avio_wb24(pb, pkt->pts - pkt->dts); - } else if (par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) { - avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeCodedFrames | frametype); - avio_write(pb, par->codec_id == AV_CODEC_ID_AV1 ? "av01" : "vp09", 4); } else { + av_assert1(flags >= 0); avio_w8(pb, flags); - } - if (par->codec_id == AV_CODEC_ID_VP6) - avio_w8(pb,0); - if (par->codec_id == AV_CODEC_ID_VP6F || par->codec_id == AV_CODEC_ID_VP6A) { - if (par->extradata_size) - avio_w8(pb, par->extradata[0]); - else - avio_w8(pb, ((FFALIGN(par->width, 16) - par->width) << 4) | - (FFALIGN(par->height, 16) - par->height)); - } else if (par->codec_id == AV_CODEC_ID_AAC) - avio_w8(pb, 1); // AAC raw - else if (par->codec_id == AV_CODEC_ID_H264 || par->codec_id == AV_CODEC_ID_MPEG4) { - avio_w8(pb, 1); // AVC NALU - avio_wb24(pb, pkt->pts - pkt->dts); + + if (par->codec_id == AV_CODEC_ID_VP6) { + avio_w8(pb,0); + } else if (par->codec_id == AV_CODEC_ID_VP6F || + par->codec_id == AV_CODEC_ID_VP6A) { + if (par->extradata_size) + avio_w8(pb, par->extradata[0]); + else + avio_w8(pb, ((FFALIGN(par->width, 16) - par->width) << 4) | + (FFALIGN(par->height, 16) - par->height)); + } else if (par->codec_id == AV_CODEC_ID_AAC) { + avio_w8(pb, 1); // AAC raw + } else if (par->codec_id == AV_CODEC_ID_H264 || + par->codec_id == AV_CODEC_ID_MPEG4) { + avio_w8(pb, 1); // AVC NALU + avio_wb24(pb, pkt->pts - pkt->dts); + } } avio_write(pb, data ? data : pkt->data, size); @@ -1235,6 +1297,10 @@ static void flv_deinit(AVFormatContext *s) } flv->filepositions = flv->head_filepositions = NULL; flv->filepositions_count = 0; + + av_freep(&flv->last_ts); + av_freep(&flv->metadata_pkt_written); + av_freep(&flv->video_track_idx_map); } static const AVOption options[] = { _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".