Hi (also apologies if my client mangles the inline version of the patch, it's the first time I've tried to review an attached patch with it)...
On Monday, December 2nd, 2024 at 9:57 AM, Chris Hodges <chris.hod...@axis.com> wrote: > Hi, > > On 11/25/24 18:47, Tristan Matthews via ffmpeg-devel wrote: > > > One nit I'd add is that since the RTP AV1 spec is still in draft (according > > to https://aomediacodec.github.io/av1-rtp-spec/) this feature should > > probably be marked experimental as is done for VP9 in RTP, see: > > https://git.ffmpeg.org/gitweb/ffmpeg.git/blob/f8e91ab05ff3d111626ab8a3b5d570865a934f07:/libavformat/rtpenc.c#l221 > > > > in which case CLI users will have to add `-strict experimental` to their > > options. > > > I've added this as suggested. > > > For the keyframe detection issue I'm not sure if this is something missing > > in FFMPEG's RTP stack (e.g. I've noticed that both GStreamer and libwebrtc > > signal that a buffer contains a keyframe at a higher level), but if not > > could you set it if you're dealing with a FRAME OBU of type 0 (keyframe) or > > 2 (intra-only)? You'd need to parse the OBU to extract that however. > > > It turns out this was a lapsus on my side, when I used the flags field > in the RTPMuxContext structure instead of the flags in AVPacket. After > transferring the key frame information to the encoder function, I could > remove the workaround. > > I also added the key frame info to the AVPacket during demuxing that was > missing before. > > New patch attached. > > -- > Best regards, Chris_______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > From: Chris Hodges <chris...@axis.com> > Date: Mon, 26 Aug 2024 11:29:29 +0200 > Subject: [PATCH] avformat: add AV1 RTP depacketizer and packetizer > X-Unsent: 1 > To: ffmpeg-devel@ffmpeg.org > Content-Type: text/plain > MIME-Version: 1.0 > > Add RTP packetizer and depacketizer according to (most) > of the official AV1 RTP specification. This enables > streaming via RTSP between ffmpeg and ffmpeg and has > also been tested to work with AV1 RTSP streams via > GStreamer. > > It also adds the required SDP attributes for AV1. > > AV1 RTP encoding is marked as experimental due to > draft specification status (courtesy of Tristan). > > Change-Id: Ie7f984b97be54d86d06bc73fa97c6faa8ffabf89 > Signed-off-by: Chris Hodges <chris...@axis.com> > --- > libavformat/Makefile | 2 + > libavformat/demux.c | 1 + > libavformat/rtp_av1.h | 128 +++++++++++ > libavformat/rtpdec.c | 1 + > libavformat/rtpdec_av1.c | 421 +++++++++++++++++++++++++++++++++++ > libavformat/rtpdec_formats.h | 1 + > libavformat/rtpenc.c | 14 ++ > libavformat/rtpenc.h | 1 + > libavformat/rtpenc_av1.c | 294 ++++++++++++++++++++++++ > libavformat/sdp.c | 30 +++ > 10 files changed, 893 insertions(+) > create mode 100644 libavformat/rtp_av1.h > create mode 100644 libavformat/rtpdec_av1.c > create mode 100644 libavformat/rtpenc_av1.c > > diff --git a/libavformat/Makefile b/libavformat/Makefile > index 7ca68a7036..1200668a2f 100644 > --- a/libavformat/Makefile > +++ b/libavformat/Makefile > @@ -47,6 +47,7 @@ OBJS-$(CONFIG_RTPDEC) += rdt.o > \ > rtpdec_ac3.o \ > rtpdec_amr.o \ > rtpdec_asf.o \ > + rtpdec_av1.o \ > rtpdec_dv.o \ > rtpdec_g726.o \ > rtpdec_h261.o \ > @@ -515,6 +516,7 @@ OBJS-$(CONFIG_RTP_MUXER) += rtp.o > \ > rtpenc_aac.o \ > rtpenc_latm.o \ > rtpenc_amr.o \ > + rtpenc_av1.o \ > rtpenc_h261.o \ > rtpenc_h263.o \ > rtpenc_h263_rfc2190.o \ > diff --git a/libavformat/demux.c b/libavformat/demux.c > index cba1f2e4df..8357a3bff1 100644 > --- a/libavformat/demux.c > +++ b/libavformat/demux.c > @@ -111,6 +111,7 @@ static int set_codec_from_probe_data(AVFormatContext *s, > AVStream *st, > { "aac", AV_CODEC_ID_AAC, AVMEDIA_TYPE_AUDIO }, > { "ac3", AV_CODEC_ID_AC3, AVMEDIA_TYPE_AUDIO }, > { "aptx", AV_CODEC_ID_APTX, AVMEDIA_TYPE_AUDIO }, > + { "av1", AV_CODEC_ID_AV1, AVMEDIA_TYPE_VIDEO }, > { "dts", AV_CODEC_ID_DTS, AVMEDIA_TYPE_AUDIO }, > { "dvbsub", AV_CODEC_ID_DVB_SUBTITLE, AVMEDIA_TYPE_SUBTITLE }, > { "dvbtxt", AV_CODEC_ID_DVB_TELETEXT, AVMEDIA_TYPE_SUBTITLE }, > diff --git a/libavformat/rtp_av1.h b/libavformat/rtp_av1.h > new file mode 100644 > index 0000000000..a353fc0e4e > --- /dev/null > +++ b/libavformat/rtp_av1.h > @@ -0,0 +1,128 @@ > +/* > + * Shared definitions and helper functions for > + * AV1 (de)packetization. > + * Copyright (c) 2024 Axis Communications > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +/** > + * @file > + * @brief shared defines and functions for AV1 RTP dec/enc > + * @author Chris Hodges <chris.hod...@axis.com> > + */ > + > +#ifndef AVFORMAT_RTP_AV1_H > +#define AVFORMAT_RTP_AV1_H > + > +// define a couple of flags and bit fields > +#define AV1B_OBU_FORBIDDEN 7 > +#define AV1F_OBU_FORBIDDEN (1u << AV1B_OBU_FORBIDDEN) > +#define AV1S_OBU_TYPE 3 > +#define AV1M_OBU_TYPE 15 > +#define AV1B_OBU_EXTENSION_FLAG 2 > +#define AV1F_OBU_EXTENSION_FLAG (1u << AV1B_OBU_EXTENSION_FLAG) > +#define AV1B_OBU_HAS_SIZE_FIELD 1 > +#define AV1F_OBU_HAS_SIZE_FIELD (1u << AV1B_OBU_HAS_SIZE_FIELD) > +#define AV1B_OBU_RESERVED_1BIT 0 > +#define AV1F_OBU_RESERVED_1BIT (1u << AV1B_OBU_RESERVED_1BIT) > + > +#define AV1B_AGGR_HDR_FRAG_CONT 7 > +#define AV1F_AGGR_HDR_FRAG_CONT (1u << AV1B_AGGR_HDR_FRAG_CONT) > +#define AV1B_AGGR_HDR_LAST_FRAG 6 > +#define AV1F_AGGR_HDR_LAST_FRAG (1u << AV1B_AGGR_HDR_LAST_FRAG) > +#define AV1S_AGGR_HDR_NUM_OBUS 4 > +#define AV1M_AGGR_HDR_NUM_OBUS 3 > +#define AV1B_AGGR_HDR_FIRST_PKT 3 > +#define AV1F_AGGR_HDR_FIRST_PKT (1u << AV1B_AGGR_HDR_FIRST_PKT) > + > +/// calculate number of required LEB bytes for the given length > +static inline unsigned int calc_leb_size(uint32_t length) { > + unsigned int num_lebs = 0; > + do { > + num_lebs++; > + length >>= 7; > + } while (length); > + return num_lebs; > +} > + > +/// write out variable number of LEB bytes for the given length > +static inline unsigned int write_leb(uint8_t *lebptr, uint32_t length) { > + unsigned int num_lebs = 0; > + do { > + num_lebs++; > + if (length < 0x80) { > + *lebptr = length; > + break; > + } > + *lebptr++ = length | 0x80; // no need to mask out > + length >>= 7; > + } while (1); > + return num_lebs; > +} > + > +/// write out fixed number of LEB bytes (may have "unused" bytes) > +static inline void write_leb_n(uint8_t *lebptr, uint32_t length, unsigned > int num_lebs) { > + for (int i = 0; i < num_lebs; i++) { > + if (i == num_lebs - 1) { > + *lebptr = length & 0x7f; > + } else { > + *lebptr++ = length | 0x80; // no need to mask out > + } > + length >>= 7; > + } > +} > + > +/// securely parse LEB bytes and return the resulting encoded length > +static inline unsigned int parse_leb(AVFormatContext *ctx, const uint8_t > *buf_ptr, > + uint32_t buffer_size, uint32_t > *obu_size) { > + uint8_t leb128; > + unsigned int num_lebs = 0; > + *obu_size = 0; > + do { > + uint32_t leb7; > + if (!buffer_size) { > + av_log(ctx, AV_LOG_ERROR, "AV1: Out of data in OBU size field > AV1 RTP packet\n"); > + return 0; > + } > + leb128 = *buf_ptr++; > + leb7 = leb128 & 0x7f; > + buffer_size--; > + /* AV1 spec says that the maximum value returned from leb128 must > fit in > + * 32 bits, so if the next byte will shift data out, we have some > kind > + * of violation here. It is legal, though, to have the most > significant > + * bytes with all zero bits (in the lower 7 bits). */ > + if (((num_lebs == 4) && (leb7 >= 0x10)) || ((num_lebs > 4) && leb7)) > { > + av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field exceeds 32 bit in > AV1 RTP packet\n"); > + return 0; > + } > + if ((num_lebs == 7) && (leb128 >= 0x80)) { > + /* leb128 is defined to be up to 8 bytes (why???), 8th byte MUST > NOT > + * indicate continuation */ > + av_log(ctx, AV_LOG_ERROR, "AV1: OBU size field consists of too > many bytes in AV1 RTP packet\n"); > + return 0; > + } > + // shifts >= 32 are undefined in C! > + if (num_lebs <= 4) { > + *obu_size |= leb7 << (7 * num_lebs); > + } > + num_lebs++; > + } while (leb128 >= 0x80); > + return num_lebs; > +} > + > +#endif /* AVFORMAT_RTP_AV1_H */ > diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c > index 729bf83685..a7d5a79a83 100644 > --- a/libavformat/rtpdec.c > +++ b/libavformat/rtpdec.c > @@ -83,6 +83,7 @@ static const RTPDynamicProtocolHandler *const > rtp_dynamic_protocol_handler_list[ > &ff_ac3_dynamic_handler, > &ff_amr_nb_dynamic_handler, > &ff_amr_wb_dynamic_handler, > + &ff_av1_dynamic_handler, > &ff_dv_dynamic_handler, > &ff_g726_16_dynamic_handler, > &ff_g726_24_dynamic_handler, > diff --git a/libavformat/rtpdec_av1.c b/libavformat/rtpdec_av1.c > new file mode 100644 > index 0000000000..878847df06 > --- /dev/null > +++ b/libavformat/rtpdec_av1.c > @@ -0,0 +1,421 @@ > +/* > + * Depacketization for RTP Payload Format For AV1 (v1.0) > + * https://aomediacodec.github.io/av1-rtp-spec/ > + * Copyright (c) 2024 Axis Communications > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +/** > + * @file > + * @brief AV1 / RTP depacketization code (RTP Payload Format For AV1 (v1.0)) > + * @author Chris Hodges <chris.hod...@axis.com> > + * @note The process will restore TDs and put back size fields into headers. > + * It will also try to keep complete OBUs and remove partial OBUs > + * caused by packet drops and thus keep the stream syntactically > intact. > + */ > + > +#include "libavutil/avstring.h" > +#include "libavutil/mem.h" > +#include "avformat.h" > + > +#include "rtpdec.h" > +#include "libavcodec/av1.h" > +#include "rtp_av1.h" > + > +/** > + * RTP/AV1 specific private data. > + */ > +struct PayloadContext { > + uint32_t timestamp; ///< last received timestamp for frame > + uint8_t profile; ///< profile (main/high/professional) > + uint8_t level_idx; ///< level (0-31) > + uint8_t tier; ///< main tier or high tier > + uint16_t prev_seq; ///< sequence number of previous packet > + unsigned int frag_obu_size; ///< current total size of fragmented OBU > + unsigned int frag_pkt_leb_pos; ///< offset in buffer where OBU LEB > starts > + unsigned int frag_lebs_res; ///< number of bytes reserved for LEB > + unsigned int frag_header_size; ///< size of OBU header (1 or 2) > + int needs_td; ///< indicates that a TD should be output > + int drop_fragment; ///< drop all fragments until next frame > +}; > + > +static int sdp_parse_fmtp_config_av1(AVFormatContext *s, > + AVStream *stream, > + PayloadContext *av1_data, > + const char *attr, const char *value) { > + if (!strcmp(attr, "profile")) { > + av1_data->profile = atoi(value); > + av_log(s, AV_LOG_DEBUG, "RTP AV1 profile: %u\n", av1_data->profile); > + } else if (!strcmp(attr, "level-idx")) { > + av1_data->level_idx = atoi(value); > + av_log(s, AV_LOG_DEBUG, "RTP AV1 level: %u\n", av1_data->profile); > + } else if (!strcmp(attr, "tier")) { > + av1_data->tier = atoi(value); > + av_log(s, AV_LOG_DEBUG, "RTP AV1 tier: %u\n", av1_data->tier); > + } > + return 0; > +} > + > +// return 0 on complete packet, -1 on partial packet > +static int av1_handle_packet(AVFormatContext *ctx, PayloadContext *data, > + AVStream *st, AVPacket *pkt, uint32_t > *timestamp, > + const uint8_t *buf, int len, uint16_t seq, > + int flags) { > + uint8_t aggr_hdr; > + int result = 0; > + int is_frag_cont; > + int is_last_fragmented; > + int is_first_pkt; > + unsigned int num_obus; > + unsigned int obu_cnt = 1; > + unsigned int rem_pkt_size = len; > + unsigned int pktpos; > + const uint8_t *buf_ptr = buf; > + uint16_t expected_seq = data->prev_seq + 1; > + > + data->prev_seq = seq; > + > + if (!len) { > + av_log(ctx, AV_LOG_ERROR, "Empty AV1 RTP packet\n"); > + return AVERROR_INVALIDDATA; > + } > + if (len < 2) { > + av_log(ctx, AV_LOG_ERROR, "AV1 RTP packet too short\n"); > + return AVERROR_INVALIDDATA; > + } > + > + /* The payload structure is supposed to be straight-forward, but there > are a > + * couple of edge cases which need to be tackled and make things a bit > more > + * complex. > + * These are mainly due to: > + * - To reconstruct the OBU size for fragmented packets and place it the > OBU > + * header, the final size will not be known until the last fragment has > + * been parsed. However, the number LEBs in the header is variable > + * depending on the length of the payload. > + * - We are increasing the out-packet size while we are getting > fragmented > + * OBUs. If an RTP packet gets dropped, we would create corrupted OBUs. > + * In this case we decide to drop the whole frame. > + */ > + > + av_log(ctx, AV_LOG_DEBUG, "RTP Packet %d in (%x), len=%d:\n", > + seq, flags, len); > + av_hex_dump_log(ctx, AV_LOG_TRACE, buf, FFMIN(len, 128)); I think this is probably too verbose for DEBUG level (since it's called every packet)...maybe ifdef this bit out. > + > + /* 8 bit aggregate header: Z Y W W N - - - */ > + aggr_hdr = *buf_ptr++; > + rem_pkt_size--; > + > + /* Z: MUST be set to 1 if the first OBU element is an OBU fragment that > is a > + * continuation of an OBU fragment from the previous packet, and MUST be > set > + * to 0 otherwise */ > + is_frag_cont = (aggr_hdr >> AV1B_AGGR_HDR_FRAG_CONT) & 1; > + > + /* Y: MUST be set to 1 if the last OBU element is an OBU fragment that > will > + * continue in the next packet, and MUST be set to 0 otherwise */ > + is_last_fragmented = (aggr_hdr >> AV1B_AGGR_HDR_LAST_FRAG) & 1; > + > + /* W: two bit field that describes the number of OBU elements in the > packet. > + * This field MUST be set equal to 0 or equal to the number of OBU > elements > + * contained in the packet. > + * If set to 0, each OBU element MUST be preceded by a length field. > + * If not set to 0 (i.e., W = 1, 2 or 3) the last OBU element MUST NOT be > + * preceded by a length field (it's derived from RTP packet size minus > other > + * known lengths). */ > + num_obus = (aggr_hdr >> AV1S_AGGR_HDR_NUM_OBUS) & AV1M_AGGR_HDR_NUM_OBUS; > + > + /* N: MUST be set to 1 if the packet is the first packet of a coded video > + * sequence, and MUST be set to 0 otherwise.*/ > + is_first_pkt = (aggr_hdr >> AV1B_AGGR_HDR_FIRST_PKT) & 1; > + > + if (is_frag_cont) { > + int16_t seq_diff = seq - expected_seq; > + > + if (data->drop_fragment) { > + return AVERROR_INVALIDDATA; > + } > + if (is_first_pkt) { > + av_log(ctx, AV_LOG_ERROR, "Illegal aggregation header in first > AV1 RTP packet\n"); > + return AVERROR_INVALIDDATA; > + } > + if (seq_diff) { > + av_log(ctx, AV_LOG_WARNING, "AV1 RTP packet sequence mismatch > (%d != %d), dropping fragment\n", > + seq, expected_seq); > + goto drop_fragment; > + } > + if (!pkt->size || !data->frag_obu_size) { > + av_log(ctx, AV_LOG_WARNING, "Unexpected fragment continuation in > AV1 RTP packet\n"); > + goto drop_fragment; // avoid repeated output for the same > fragment > + } > + } else { > + data->drop_fragment = 0; > + if ((data->timestamp != *timestamp) || is_first_pkt) { > + av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %d (or first > pkt), forcing TD\n", *timestamp); > + data->needs_td = 1; > + data->frag_obu_size = 0; // new temporal unit might have been > caused by dropped packets > + } > + if (data->frag_obu_size) { > + data->frag_obu_size = 0; // make sure we recover > + av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in AV1 > RTP packet\n"); > + return AVERROR_INVALIDDATA; > + } > + // update the timestamp in the frame packet with the one from the > RTP packet > + data->timestamp = *timestamp; > + } > + pktpos = pkt->size; > + av_log(ctx, AV_LOG_DEBUG, "Input buffer size %d, aggr head 0x%02x fc %d, > lf %d, no %d, fp %d\n", > + len, aggr_hdr, is_frag_cont, is_last_fragmented, num_obus, > is_first_pkt); > + > + if (is_first_pkt) { > + pkt->flags |= AV_PKT_FLAG_KEY; > + } > + > + // loop over OBU elements > + while (rem_pkt_size) { > + uint32_t obu_size; > + int num_lebs; > + int needs_size_field; > + int output_size; > + unsigned int obu_payload_size; > + uint8_t obu_hdr; > + > + obu_size = rem_pkt_size; > + if (!num_obus || obu_cnt < num_obus) { > + // read out explicit OBU element size (which almost corresponds > to the original OBU size) > + num_lebs = parse_leb(ctx, buf_ptr, rem_pkt_size, &obu_size); > + if (!num_lebs) { > + return AVERROR_INVALIDDATA; > + } > + rem_pkt_size -= num_lebs; > + buf_ptr += num_lebs; > + } > + // read first byte (which is the header byte only for non-fragmented > elements) > + obu_hdr = *buf_ptr; > + if (obu_size > rem_pkt_size) { > + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %u larger than remaining > pkt size %d\n", obu_size, rem_pkt_size); > + return AVERROR_INVALIDDATA; > + } > + > + if (!obu_size) { > + av_log(ctx, AV_LOG_ERROR, "Unreasonable AV1 OBU size %u\n", > obu_size); > + return AVERROR_INVALIDDATA; > + } > + > + if (!is_frag_cont) { > + uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE; > + if (obu_hdr & AV1F_OBU_FORBIDDEN) { > + av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU > header (0x%02x)\n", obu_hdr); > + return AVERROR_INVALIDDATA; > + } > + // ignore and remove OBUs according to spec > + if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) || > + (obu_type == AV1_OBU_TILE_LIST)) { > + pktpos += obu_size; > + rem_pkt_size -= obu_size; > + // TODO: This probably breaks if the OBU_TILE_LIST is > fragmented > + // into the next RTP packet, so at least check and fail here > + if (rem_pkt_size == 0 && is_last_fragmented) { > + av_log(ctx, AV_LOG_ERROR, "AV1 OBU_TILE_LIST fragmented, > unsupported\n"); > + return AVERROR_INVALIDDATA; > + } > + obu_cnt++; > + continue; > + } > + } > + > + // If we need to add a size field, out size will be different > + output_size = obu_size; > + // Spec says the OBUs should have their size fields removed, > + // but this is not mandatory > + if (is_frag_cont || (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD)) { > + needs_size_field = 0; > + } else { > + needs_size_field = 1; > + // (re)calculate number of LEB bytes needed (if it was implicit, > there were no LEBs) > + output_size += calc_leb_size(obu_size - (1 + ((obu_hdr & > AV1F_OBU_EXTENSION_FLAG) ? 1 : 0))); > + } > + > + if (!is_frag_cont && (obu_cnt == 1)) { > + if (data->needs_td) { > + av_log(ctx, AV_LOG_TRACE, "TD will be added\n"); > + output_size += 2; // for Temporal Delimiter (TD) > + } > + if (pkt->data) { > + if ((result = av_grow_packet(pkt, output_size)) < 0) > + return result; > + } else { > + if ((result = av_new_packet(pkt, output_size) < 0)) > + return result; > + } > + > + if (data->needs_td) { > + // restore TD > + pkt->data[pktpos++] = 0x12; > + pkt->data[pktpos++] = 0x00; > + } > + data->needs_td = 0; > + } else { > + if ((result = av_grow_packet(pkt, output_size)) < 0) > + return result; > + } > + > + obu_payload_size = obu_size; > + // do we need to restore the OBU size field? > + if (needs_size_field) { > + // set obu_has_size_field in header byte > + pkt->data[pktpos++] = *buf_ptr++ | AV1F_OBU_HAS_SIZE_FIELD; > + data->frag_header_size = 1; > + obu_payload_size--; > + > + // copy extension byte, if available > + if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) { > + /* TODO we cannot handle the edge case where last element is > a > + * fragment of exactly one byte AND the header has the > extension > + * flag set. Note that it would be more efficient to not > send a > + * fragment of one byte and instead drop the size field of > the > + * prior element */ > + if (!obu_payload_size) { > + av_log(ctx, AV_LOG_ERROR, "AV1 OBU too short for > extension byte (0x%02x)\n", > + obu_hdr); > + return AVERROR_INVALIDDATA; > + } > + pkt->data[pktpos++] = *buf_ptr++; > + data->frag_header_size = 2; > + obu_payload_size--; > + } > + > + // remember start position of LEB for possibly fragmented packet > to > + // fixup OBU size later > + data->frag_pkt_leb_pos = pktpos; > + // write intermediate OBU size field > + num_lebs = write_leb(pkt->data + pktpos, obu_payload_size); > + data->frag_lebs_res = num_lebs; > + pktpos += num_lebs; > + } > + // copy verbatim or without above header size patch > + memcpy(pkt->data + pktpos, buf_ptr, obu_payload_size); > + pktpos += obu_payload_size; > + buf_ptr += obu_payload_size; > + rem_pkt_size -= obu_size; > + > + // if we were handling a fragmented packet and this was the last > + // fragment, correct OBU size field > + if (data->frag_obu_size && (rem_pkt_size || !is_last_fragmented)) { > + uint32_t final_obu_size = data->frag_obu_size + obu_size - > data->frag_header_size; > + uint8_t *lebptr = pkt->data + data->frag_pkt_leb_pos; > + num_lebs = calc_leb_size(final_obu_size); > + > + // check if we had allocated enough LEB bytes in header, > + // otherwise make some extra space > + if (num_lebs > data->frag_lebs_res) { > + int extra_bytes = num_lebs - data->frag_lebs_res; > + if ((result = av_grow_packet(pkt, extra_bytes)) < 0) > + return result; > + // update pointer in case buffer address changed > + lebptr = pkt->data + data->frag_pkt_leb_pos; > + // move existing data for OBU back a bit > + memmove(lebptr + extra_bytes, lebptr, > + pkt->size - extra_bytes - data->frag_pkt_leb_pos); > + } > + > + // update OBU size field > + write_leb(lebptr, final_obu_size); > + > + data->frag_obu_size = 0; // signal end of fragment > + } else if (is_last_fragmented && !rem_pkt_size) { > + // add to total OBU size, so we can fix that in OBU header > + // (but only if the OBU size was missing!) > + if (needs_size_field || data->frag_obu_size) { > + data->frag_obu_size += obu_size; > + } > + // packet not yet finished! > + result = -1; > + } > + is_frag_cont = 0; > + > + if (!rem_pkt_size && !num_obus && (num_obus != obu_cnt)) { > + av_log(ctx, AV_LOG_WARNING, "AV1 aggregation header indicated %u > OBU elements, was %u\n", > + num_obus, obu_cnt); > + } > + obu_cnt++; > + } > + > + if (flags & RTP_FLAG_MARKER) { > + av_log(ctx, AV_LOG_TRACE, "TD on next packet due to marker\n"); > + data->needs_td = 1; > + } > + > + if (!is_last_fragmented) { > + data->frag_obu_size = 0; > + data->frag_pkt_leb_pos = 0; > + } > + > + if (!result) { > + av_log(ctx, AV_LOG_DEBUG, "AV1 out pkt-size: %d\n", pkt->size); > + av_hex_dump_log(ctx, AV_LOG_TRACE, pkt->data, FFMIN(pkt->size, 128)); > + } > + pkt->stream_index = st->index; > + > + return result; > + > +drop_fragment: > + data->drop_fragment = 1; > + data->frag_obu_size = 0; > + data->needs_td = 1; > + if (pkt->size) { > + av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n"); > + // we can't seem to deallocate the fragmented packet, but we can > shrink it to 0 > + av_shrink_packet(pkt, 0); > + } > + return AVERROR_INVALIDDATA; > +} > + > +static void av1_close_context(PayloadContext *data) { > +} > + > +static int parse_av1_sdp_line(AVFormatContext *s, int st_index, > + PayloadContext *av1_data, const char *line) { > + AVStream * stream; > + const char *p = line; > + int result = 0; > + > + if (st_index < 0) > + return 0; > + > + stream = s->streams[st_index]; > + > + /* Optional parameters are profile, level-idx, and tier. > + * See Section 7.2.1 of https://aomediacodec.github.io/av1-rtp-spec/ */ > + if (av_strstart(p, "fmtp:", &p)) { > + result = ff_parse_fmtp(s, stream, av1_data, p, > sdp_parse_fmtp_config_av1); > + av_log(s, AV_LOG_DEBUG,"RTP AV1 Profile: %u, Level: %u, Tier: %u\n", > + av1_data->profile, av1_data->level_idx, av1_data->tier); > + } > + > + return result; > +} > + > +const RTPDynamicProtocolHandler ff_av1_dynamic_handler = { > + .enc_name = "AV1", > + .codec_type = AVMEDIA_TYPE_VIDEO, > + .codec_id = AV_CODEC_ID_AV1, > + .need_parsing = AVSTREAM_PARSE_FULL, > + .priv_data_size = sizeof(PayloadContext), > + .parse_sdp_a_line = parse_av1_sdp_line, > + .close = av1_close_context, > + .parse_packet = av1_handle_packet, > +}; > diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h > index dad2b8ac1b..72a8f16a90 100644 > --- a/libavformat/rtpdec_formats.h > +++ b/libavformat/rtpdec_formats.h > @@ -50,6 +50,7 @@ void ff_h264_parse_framesize(AVCodecParameters *par, const > char *p); > extern const RTPDynamicProtocolHandler ff_ac3_dynamic_handler; > extern const RTPDynamicProtocolHandler ff_amr_nb_dynamic_handler; > extern const RTPDynamicProtocolHandler ff_amr_wb_dynamic_handler; > +extern const RTPDynamicProtocolHandler ff_av1_dynamic_handler; > extern const RTPDynamicProtocolHandler ff_dv_dynamic_handler; > extern const RTPDynamicProtocolHandler ff_g726_16_dynamic_handler; > extern const RTPDynamicProtocolHandler ff_g726_24_dynamic_handler; > diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c > index 7b4ae37d13..3db13ee0b2 100644 > --- a/libavformat/rtpenc.c > +++ b/libavformat/rtpenc.c > @@ -79,6 +79,7 @@ static int is_supported(enum AVCodecID id) > case AV_CODEC_ID_THEORA: > case AV_CODEC_ID_VP8: > case AV_CODEC_ID_VP9: > + case AV_CODEC_ID_AV1: > case AV_CODEC_ID_ADPCM_G722: > case AV_CODEC_ID_ADPCM_G726: > case AV_CODEC_ID_ADPCM_G726LE: > @@ -228,6 +229,16 @@ static int rtp_write_header(AVFormatContext *s1) > goto fail; > } > break; > + case AV_CODEC_ID_AV1: > + if (s1->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { > + av_log(s, AV_LOG_ERROR, > + "Packetizing AV1 is experimental and its specification is > " > + "still in draft state. " > + "Please set -strict experimental in order to enable > it.\n"); > + ret = AVERROR_EXPERIMENTAL; > + goto fail; > + } > + break; > case AV_CODEC_ID_VORBIS: > case AV_CODEC_ID_THEORA: > s->max_frames_per_packet = 15; > @@ -579,6 +590,9 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket > *pkt) > case AV_CODEC_ID_AMR_WB: > ff_rtp_send_amr(s1, pkt->data, size); > break; > + case AV_CODEC_ID_AV1: > + ff_rtp_send_av1(s1, pkt->data, size, (pkt->flags & AV_PKT_FLAG_KEY) > ? 1 : 0); > + break; > case AV_CODEC_ID_MPEG2TS: > rtp_send_mpegts_raw(s1, pkt->data, size); > break; > diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h > index 854bf07f0e..ba88bfefc0 100644 > --- a/libavformat/rtpenc.h > +++ b/libavformat/rtpenc.h > @@ -94,6 +94,7 @@ void ff_rtp_send_xiph(AVFormatContext *s1, const uint8_t > *buff, int size); > void ff_rtp_send_vc2hq(AVFormatContext *s1, const uint8_t *buf, int size, > int interlaced); > void ff_rtp_send_vp8(AVFormatContext *s1, const uint8_t *buff, int size); > void ff_rtp_send_vp9(AVFormatContext *s1, const uint8_t *buff, int size); > +void ff_rtp_send_av1(AVFormatContext *s1, const uint8_t *buf1, int size, int > is_keyframe); > void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buff, int size); > void ff_rtp_send_raw_rfc4175(AVFormatContext *s1, const uint8_t *buf, int > size, int interlaced, int field); > > diff --git a/libavformat/rtpenc_av1.c b/libavformat/rtpenc_av1.c > new file mode 100644 > index 0000000000..5d6403183b > --- /dev/null > +++ b/libavformat/rtpenc_av1.c > @@ -0,0 +1,294 @@ > +/* > + * Packetization for RTP Payload Format For AV1 (v1.0) > + * https://aomediacodec.github.io/av1-rtp-spec/ > + * Copyright (c) 2024 Axis Communications > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +/** > + * @file > + * @brief AV1 / RTP packetization code (RTP Payload Format For AV1 (v1.0)) > + * @author Chris Hodges <chris.hod...@axis.com> > + * @note This will remove TDs and OBU size fields > + */ > + > +#include "avformat.h" > +#include "rtpenc.h" > +#include "libavcodec/av1.h" > +#include "rtp_av1.h" > + > +void ff_rtp_send_av1(AVFormatContext *ctx, const uint8_t *frame_buf, int > frame_size, int is_keyframe) { > + uint8_t aggr_hdr = 0; > + int last_packet_of_frame = 0; > + RTPMuxContext *rtp_ctx = ctx->priv_data; > + const uint8_t *obu_ptr = frame_buf; > + int start_new_packet = 0; > + unsigned int num_obus = 0; > + unsigned int rem_pkt_size = rtp_ctx->max_payload_size - 1; > + uint8_t *pkt_ptr = NULL; > + > + const uint8_t *curr_obu_ptr = NULL; > + uint32_t curr_elem_size = 0; > + int curr_obu_hdr = -1; > + int curr_obu_ext = -1; > + const uint8_t *last_obu_ptr = NULL; > + uint32_t last_elem_size = 0; > + int last_obu_hdr = -1; > + int last_obu_ext = -1; > + > + rtp_ctx->timestamp = rtp_ctx->cur_timestamp; > + > + /* The payload structure is supposed to be straight-forward, but there > are a > + * couple of edge cases to be tackled and make things very complex. > + * These are mainly due to: > + * - the OBU element size being optional for the last element, but > MANDATORY > + * if there are more than 3 elements > + * - the size field of the element is made up of a variable number of > + * LEB bytes > + * - the latter in combination with the desire to fill the max packet > size > + * could cause a catch22 > + * - if there's less than 2 bytes remaining (depending on the required > LEB), > + * one would not have space for the payload of an element and must > instead > + * start the next packet > + * - if there's less than 3 bytes remaining, the header byte plus the > + * optional extension byte will not fit in the fragment making the > + * handling even more complicated > + * - as some OBU types are supposed to be filtered out, it is hard to > decide > + * via the remaining length whether the outputted OBU element will > + * actually be the last one > + * > + * There are two major ways to tackle that: Pre-parsing of all OBUs > within a > + * frame (adds memory complexity) or lazy copying of the prior element. > + * Here, the latter is implemented. > + */ > + > + if (is_keyframe) { > + av_log(ctx, AV_LOG_DEBUG, "Marking FIRST packet\n"); > + aggr_hdr |= AV1F_AGGR_HDR_FIRST_PKT; > + } I noticed that other implementations only set this bit if we're dealing with a keyframe that is also accompanied by a sequence header (since an encoder may emit a keyframe without one)...the spec is a bit vague about this but it may be safer? > + > + rem_pkt_size = rtp_ctx->max_payload_size - 1; > + pkt_ptr = rtp_ctx->buf + 1; > + > + av_log(ctx, AV_LOG_TRACE, "AV1 Frame %d in (%x), size=%d:\n", > + rtp_ctx->seq, rtp_ctx->flags, frame_size); > + av_hex_dump_log(ctx, AV_LOG_TRACE, frame_buf, FFMIN(frame_size, 128)); This is also too frequent/verbose I think. > + > + while (frame_size) { > + uint32_t obu_size; > + int num_lebs = 0; > + int ext_byte = -1; > + > + uint8_t obu_hdr = *obu_ptr++; > + uint8_t obu_type = (obu_hdr >> AV1S_OBU_TYPE) & AV1M_OBU_TYPE; > + frame_size--; > + > + if (obu_hdr & AV1F_OBU_FORBIDDEN) { > + av_log(ctx, AV_LOG_ERROR, "Forbidden bit set in AV1 OBU header > (0x%02x)\n", obu_hdr); > + return; > + } > + > + if (obu_hdr & AV1F_OBU_EXTENSION_FLAG) { > + if (!frame_size) { > + av_log(ctx, AV_LOG_ERROR, "Out of data for AV1 OBU header > extension byte\n"); > + return; > + } > + ext_byte = *obu_ptr++; > + frame_size--; > + } > + > + if (obu_hdr & AV1F_OBU_HAS_SIZE_FIELD) { > + obu_hdr &= ~AV1F_OBU_HAS_SIZE_FIELD; // remove size field > + // read out explicit OBU size > + num_lebs = parse_leb(ctx, obu_ptr, frame_size, &obu_size); > + if (!num_lebs) { > + return; > + } > + obu_ptr += num_lebs; > + frame_size -= num_lebs; > + } else { > + av_log(ctx, AV_LOG_ERROR, "Cannot handle AV1 OBUs without size > fields\n"); > + return; > + } > + > + if ((long) obu_size > frame_size) { > + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size %d larger than remaining > frame size %d\n", obu_size, frame_size); > + return; > + } > + > + if (obu_size > 0xfffffffd) { > + av_log(ctx, AV_LOG_ERROR, "AV1 OBU size 0x%x might overflow > (attack?)\n", obu_size); > + return; > + } > + > + frame_size -= obu_size; > + > + if ((obu_type == AV1_OBU_TEMPORAL_DELIMITER) || > + (obu_type == AV1_OBU_TILE_LIST)) { > + // ignore and remove according to spec > + obu_ptr += obu_size; > + continue; > + } You also want to ignore AV1_OBU_PADDING here I believe. > + > + /* if the last OBU had a temporal or spatial ID, they need to match > to current; > + * otherwise start new packet */ > + if ((last_obu_ext >= 0) && (curr_obu_ext != last_obu_ext)) { > + start_new_packet = 1; > + } > + > +flush_last_packet: > + last_obu_ptr = curr_obu_ptr; > + last_elem_size = curr_elem_size; > + last_obu_hdr = curr_obu_hdr; > + last_obu_ext = curr_obu_ext; > + > + curr_obu_ptr = obu_ptr; // behind header > + curr_elem_size = obu_size + 1 + ((ext_byte >= 0) ? 1 : 0); > + curr_obu_hdr = obu_hdr; > + curr_obu_ext = ext_byte; > + > + obu_ptr += obu_size; > + > + if (last_obu_ptr) { > + unsigned int first_elem_with_size = last_elem_size + > calc_leb_size(last_elem_size); > + // check if last packet fits completely and has reasonable space > for > + // at least a fragment of the next > + if (!last_packet_of_frame && (first_elem_with_size + 10 < > rem_pkt_size)) { > + num_lebs = write_leb(pkt_ptr, last_elem_size); > + pkt_ptr += num_lebs; > + rem_pkt_size -= num_lebs; > + } else { > + if ((num_obus >= 3) && (last_packet_of_frame || > (first_elem_with_size <= rem_pkt_size))) { > + // last fits with forced size, but nothing else > + num_lebs = write_leb(pkt_ptr, last_elem_size); > + pkt_ptr += num_lebs; > + rem_pkt_size -= num_lebs; > + } > + // force new packet > + start_new_packet = 1; > + } > + > + // write header and optional extension byte (if not a continued > fragment) > + if (last_obu_hdr >= 0) { > + *pkt_ptr++ = last_obu_hdr; > + last_elem_size--; > + rem_pkt_size--; > + if (last_obu_ext >= 0) { > + *pkt_ptr++ = last_obu_ext; > + last_elem_size--; > + rem_pkt_size--; > + } > + } > + // copy payload > + memcpy(pkt_ptr, last_obu_ptr, last_elem_size); > + pkt_ptr += last_elem_size; > + rem_pkt_size -= last_elem_size; > + num_obus++; > + } > + > + if (start_new_packet || last_packet_of_frame) { > + if (num_obus < 4) { > + aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS; > + } > + rtp_ctx->buf[0] = aggr_hdr; > + av_log(ctx, AV_LOG_TRACE, "Sending NON-FRAG packet %ld/%d, %d > OBUs\n", > + pkt_ptr - rtp_ctx->buf, rtp_ctx->max_payload_size, > num_obus); > + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, FFMIN(pkt_ptr - > rtp_ctx->buf, 128)); > + > + ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - rtp_ctx->buf, > last_packet_of_frame); > + > + rem_pkt_size = rtp_ctx->max_payload_size - 1; > + pkt_ptr = rtp_ctx->buf + 1; > + aggr_hdr = 0; > + num_obus = 0; > + } > + > + if (last_packet_of_frame) { > + break; > + } > + > + // check if element needs to be fragmented, otherwise we will deal > with > + // it in the next iteration > + if ((curr_elem_size > rem_pkt_size) || > + ((num_obus >= 3) && (curr_elem_size + > calc_leb_size(curr_elem_size)) > rem_pkt_size)) { > + uint32_t frag_size = rem_pkt_size; > + > + // if there are going more than 3 OBU elements, we are obliged to > + // have the length field for the last > + if (num_obus >= 3) { > + // that's an upper limit of LEBs > + num_lebs = calc_leb_size(rem_pkt_size - 1); > + frag_size -= num_lebs; > + > + // write a fixed number of LEBs, in case the frag_size could > + // now be specified with one less byte > + write_leb_n(pkt_ptr, frag_size, num_lebs); > + pkt_ptr += num_lebs; > + rem_pkt_size -= num_lebs; > + } > + > + // write header and optional extension byte > + *pkt_ptr++ = curr_obu_hdr; > + curr_elem_size--; > + rem_pkt_size--; > + if (curr_obu_ext >= 0) { > + *pkt_ptr++ = curr_obu_ext; > + curr_elem_size--; > + rem_pkt_size--; > + } > + > + // disable header writing for final fragment > + curr_obu_hdr = -1; > + curr_obu_ext = -1; > + > + // send more full packet sized fragments > + do { > + // copy payload > + memcpy(pkt_ptr, curr_obu_ptr, rem_pkt_size); > + pkt_ptr += rem_pkt_size; > + curr_obu_ptr += rem_pkt_size; > + curr_elem_size -= rem_pkt_size; > + num_obus++; > + > + aggr_hdr |= AV1F_AGGR_HDR_LAST_FRAG; > + if (num_obus < 4) { > + aggr_hdr |= num_obus << AV1S_AGGR_HDR_NUM_OBUS; > + } > + rtp_ctx->buf[0] = aggr_hdr; > + > + av_log(ctx, AV_LOG_TRACE, "Sending FRAG packet %ld/%d, %d > OBUs\n", > + pkt_ptr - rtp_ctx->buf, rtp_ctx->max_payload_size, > num_obus); > + av_hex_dump_log(ctx, AV_LOG_TRACE, rtp_ctx->buf, > FFMIN(pkt_ptr - rtp_ctx->buf, 128)); Again this may be too frequent/verbose. > + > + ff_rtp_send_data(ctx, rtp_ctx->buf, pkt_ptr - rtp_ctx->buf, > 0); > + rem_pkt_size = rtp_ctx->max_payload_size - 1; > + pkt_ptr = rtp_ctx->buf + 1; > + > + aggr_hdr = AV1F_AGGR_HDR_FRAG_CONT; > + num_obus = 0; > + } while (curr_elem_size > rem_pkt_size); > + start_new_packet = 0; > + } > + > + if (!frame_size) { > + // we're done, flush the last packet, set RTP marker bit > + last_packet_of_frame = 1; > + goto flush_last_packet; > + } > + } > +} > diff --git a/libavformat/sdp.c b/libavformat/sdp.c > index a9e964bae1..2738bb2056 100644 > --- a/libavformat/sdp.c > +++ b/libavformat/sdp.c > @@ -31,6 +31,7 @@ > #include "libavcodec/mpeg4audio.h" > #include "avformat.h" > #include "internal.h" > +#include "av1.h" > #include "avc.h" > #include "hevc.h" > #include "nal.h" > @@ -155,6 +156,26 @@ static int sdp_get_address(char *dest_addr, int size, > int *ttl, const char *url) > return port; > } > > +static int extradata2psets_av1(AVFormatContext *s, const AVCodecParameters > *par, > + char **out) > +{ > + char *psets; > + AV1SequenceParameters seq; > + > + if (ff_av1_parse_seq_header(&seq, par->extradata, par->extradata_size) < > 0) > + return AVERROR_INVALIDDATA; > + > + psets = av_mallocz(64); > + if (!psets) { > + av_log(s, AV_LOG_ERROR, "Cannot allocate memory for the parameter > sets.\n"); > + return AVERROR(ENOMEM); > + } > + av_strlcatf(psets, 64, "profile=%u;level-idx=%u;tier=%u", > + seq.profile, seq.level, seq.tier); > + *out = psets; > + return 0; > +} > + > #define MAX_PSET_SIZE 1024 > static int extradata2psets(AVFormatContext *s, const AVCodecParameters *par, > char **out) > @@ -522,6 +543,15 @@ static int sdp_write_media_attributes(char *buff, int > size, const AVStream *st, > int ret = 0; > > switch (p->codec_id) { > + case AV_CODEC_ID_AV1: > + av_strlcatf(buff, size, "a=rtpmap:%d AV1/90000\r\n", payload_type); > + if (p->extradata_size) { > + ret = extradata2psets_av1(fmt, p, &config); > + if (ret < 0) > + return ret; > + av_strlcatf(buff, size, "a=fmtp:%d %s\r\n", payload_type, > config); > + } > + break; > case AV_CODEC_ID_DIRAC: > av_strlcatf(buff, size, "a=rtpmap:%d VC2/90000\r\n", payload_type); > break; > -- > 2.39.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".