Re: [FFmpeg-devel] [PATCH] avformat/id3v2: Support null-separated multi-value properties
> -Original Message- > From: softworkz > Sent: Sonntag, 2. März 2025 00:57 > To: ffmpeg-devel@ffmpeg.org > Cc: softworkz ; softworkz > Subject: [PATCH] avformat/id3v2: Support null-separated multi-value > properties > > From: softworkz > > Fixes Trac ticket https://trac.ffmpeg.org/ticket/6949 > > Signed-off-by: softworkz > --- > avformat/id3v2: Support null-separated multi-value properties > > Fixes Trac ticket https://trac.ffmpeg.org/ticket/6949 > > Published-As: https://github.com/ffstaging/FFmpeg/releases/tag/pr- > ffstaging-54%2Fsoftworkz%2Fsubmit_id3v2-v1 > Fetch-It-Via: git fetch https://github.com/ffstaging/FFmpeg pr- > ffstaging-54/softworkz/submit_id3v2-v1 > Pull-Request: https://github.com/ffstaging/FFmpeg/pull/54 > > libavformat/id3v2.c | 65 - > 1 file changed, 41 insertions(+), 24 deletions(-) > > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c > index 29ee59e1f4..cebb4acd75 100644 > --- a/libavformat/id3v2.c > +++ b/libavformat/id3v2.c > @@ -327,39 +327,55 @@ static void read_ttag(AVFormatContext *s, > AVIOContext *pb, int taglen, >AVDictionary **metadata, const char *key) > { > uint8_t *dst; > -int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | > AV_DICT_DONT_STRDUP_VAL; > +int encoding, nb_values = 0; > unsigned genre; > +AVDictionaryEntry *tag = NULL; > > if (taglen < 1) > return; > > +tag = av_dict_get(*metadata, key, NULL, 0); > +if (tag) > +return; > + > encoding = avio_r8(pb); > taglen--; /* account for encoding type byte */ > > -if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { > -av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", > key); > -return; > -} > +/* Read all null-terminated values */ > +while (taglen > 0) { > +int n = 0, dict_flags = AV_DICT_APPEND | > AV_DICT_DONT_STRDUP_VAL; > > -if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) > && > -(sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) > == 1) && > -genre <= ID3v1_GENRE_MAX) { > -av_freep(&dst); > -dst = av_strdup(ff_id3v1_genre_str[genre]); > -} else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { > -/* dst now contains the key, need to get value */ > -key = dst; > if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { > av_log(s, AV_LOG_ERROR, "Error reading frame %s, > skipped\n", key); > -av_freep(&key); > return; > } > -dict_flags |= AV_DICT_DONT_STRDUP_KEY; > -} else if (!*dst) > -av_freep(&dst); > > -if (dst) > -av_dict_set(metadata, key, dst, dict_flags); > +if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) && > +(sscanf(dst, "(%d)", &genre) == 1 || (sscanf(dst, "%d%n", > &genre, &n) == 1 && n == strlen(dst))) && > +genre <= ID3v1_GENRE_MAX) { > +av_freep(&dst); > +dst = av_strdup(ff_id3v1_genre_str[genre]); > +} else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { > +/* dst now contains the key, need to get value */ > +key = dst; > +if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { > +av_log(s, AV_LOG_ERROR, "Error reading frame %s, > skipped\n", key); > +av_freep(&key); > +return; > +} > +} else if (!*dst) { > +av_freep(&dst); > +return; > +} > + > +if (dst) { > +if (nb_values > 0) > +av_dict_set(metadata, key, ";", dict_flags & > ~AV_DICT_DONT_STRDUP_VAL); > + > +av_dict_set(metadata, key, dst, dict_flags); > +nb_values++; > +} > +} > } > > static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen, > @@ -372,7 +388,7 @@ static void read_uslt(AVFormatContext *s, > AVIOContext *pb, int taglen, > int encoding; > int ok = 0; > > -if (taglen < 4) > +if (taglen < 1) > goto error; > > encoding = avio_r8(pb); > @@ -383,10 +399,10 @@ static void read_uslt(AVFormatContext *s, > AVIOContext *pb, int taglen, > lang[3] = '\0'; > taglen -= 3; > > -if (decode_str(s, pb, encoding, &descriptor, &taglen) < 0 || taglen > < 0) > +if (decode_str(s, pb, encoding, &descriptor, &taglen) < 0) > goto error; > > -if (decode_str(s, pb, encoding, &text, &taglen) < 0 || taglen < 0) > +if (decode_str(s, pb, encoding, &text, &taglen) < 0) > goto error; > > // FFmpeg does not support hierarchical metadata, so concatenate > the keys. > @@ -1003,7 +1019,8 @@ static void id3v2_parse(AVIOContext *pb, > AVDictionary **metadata, > t++; > } > > -ffio_init_read_context(&pb_local, buffer, b - buffer); > +ffio_init_context(&pb_
[FFmpeg-devel] [PATCH] avformat/dashenc: add hevc codec attributes parse
fix ticket: 11316 add set_hevc_codec_str function refer to hlsenc.c but do some necessary changes Signed-off-by: Jack Lau --- libavformat/dashenc.c | 81 +++ 1 file changed, 81 insertions(+) diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c index d4a6fe0304..04c5b562d5 100644 --- a/libavformat/dashenc.c +++ b/libavformat/dashenc.c @@ -57,6 +57,7 @@ #include "url.h" #include "vpcc.h" #include "dash.h" +#include "nal.h" typedef enum { SEGMENT_TYPE_AUTO = 0, @@ -344,6 +345,84 @@ static void set_vp9_codec_str(AVFormatContext *s, AVCodecParameters *par, return; } +static void set_hevc_codec_str(AVCodecParameters *par, char *str, int size) { +uint8_t *data = par->extradata; +int profile = AV_PROFILE_UNKNOWN; +uint32_t profile_compatibility = AV_PROFILE_UNKNOWN; +char tier = 0; +int level = AV_LEVEL_UNKNOWN; +char constraints[8] = ""; + +if (par->profile != AV_PROFILE_UNKNOWN) +profile = par->profile; +if (par->level != AV_LEVEL_UNKNOWN) +level = par->level; + +/* check the boundary of data which from current position is small than extradata_size */ +while (data && (data - par->extradata + 19) < par->extradata_size) { +/* get HEVC SPS NAL and seek to profile_tier_level */ +if (!(data[0] | data[1] | data[2]) && data[3] == 1 && ((data[4] & 0x7E) == 0x42)) { +uint8_t *rbsp_buf; +int remain_size = 0; +int rbsp_size = 0; +uint32_t profile_compatibility_flags = 0; +uint8_t high_nibble = 0; +/* skip start code + nalu header */ +data += 6; +/* process by reference General NAL unit syntax */ +remain_size = par->extradata_size - (data - par->extradata); +rbsp_buf = ff_nal_unit_extract_rbsp(data, remain_size, &rbsp_size, 0); +if (!rbsp_buf) +return; +if (rbsp_size < 13) { +av_freep(&rbsp_buf); +break; +} +/* skip sps_video_parameter_set_id u(4), +* sps_max_sub_layers_minus1u(3), +* and sps_temporal_id_nesting_flag u(1) +* +* TIER represents the general_tier_flag, with 'L' indicating the flag is 0, +* and 'H' indicating the flag is 1 +*/ +tier = (rbsp_buf[1] & 0x20) == 0 ? 'L' : 'H'; +profile = rbsp_buf[1] & 0x1f; +/* PROFILE_COMPATIBILITY is general_profile_compatibility_flags, but in reverse bit order, +* in a hexadecimal representation (leading zeroes may be omitted). +*/ +profile_compatibility_flags = AV_RB32(rbsp_buf + 2); +/* revise these bits to get the profile compatibility value */ +profile_compatibility_flags = ((profile_compatibility_flags & 0xU) << 1) | ((profile_compatibility_flags >> 1) & 0xU); +profile_compatibility_flags = ((profile_compatibility_flags & 0xU) << 2) | ((profile_compatibility_flags >> 2) & 0xU); +profile_compatibility_flags = ((profile_compatibility_flags & 0x0F0F0F0FU) << 4) | ((profile_compatibility_flags >> 4) & 0x0F0F0F0FU); +profile_compatibility_flags = ((profile_compatibility_flags & 0x00FF00FFU) << 8) | ((profile_compatibility_flags >> 8) & 0x00FF00FFU); +profile_compatibility = (profile_compatibility_flags << 16) | (profile_compatibility_flags >> 16); +/* skip 8 + 8 + 32 +* CONSTRAINTS is a hexadecimal representation of the general_constraint_indicator_flags. +* each byte is separated by a '.', and trailing zero bytes may be omitted. +* drop the trailing zero bytes refer to ISO/IEC14496-15. +*/ +high_nibble = rbsp_buf[7] >> 4; +snprintf(constraints, sizeof(constraints), +high_nibble ? "%02x.%x" : "%02x", +rbsp_buf[6], high_nibble); +/* skip 8 + 8 + 32 + 4 + 43 + 1 bit */ +level = rbsp_buf[12]; +av_freep(&rbsp_buf); +break; +} +data++; +} +if (profile != AV_PROFILE_UNKNOWN && +profile_compatibility != AV_PROFILE_UNKNOWN && +tier != 0 && +level != AV_LEVEL_UNKNOWN && +constraints[0] != '\0') { +av_strlcatf(str, size, ".%d.%x.%c%d.%s", profile, profile_compatibility, tier, level, constraints); +} +return; +} + static void set_codec_str(AVFormatContext *s, AVCodecParameters *par, AVRational *frame_rate, char *str, int size) { @@ -422,6 +501,8 @@ static void set_codec_str(AVFormatContext *s, AVCodecParameters *par, av_strlcatf(str, size, ".%02x%02x%02x", extradata[1], extradata[2], extradata[3]); av_free(tmpbuf); +} else if (!strcmp(str, "
Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
On Wed, 12 Mar 2025, Gyan Doshi wrote: On 2025-03-12 01:29 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- ffbuild/library.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..781b018e00 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs Don't we need do the same for static libraries too? On first glance, this looks quite reasonable... However, the limit that is an issue is the length of a command line. The .objs file is written with an "echo" command - doesn't that fundamentally also hit the same limit (just a little bit later, as there are fewer command line flags in this command)? Or do we assume that make executes it with a shell, where the shell handles "echo" as a shell builtin so that it doesn't actually spawn a subprocess for this? Can you test it, if you could extend the list of object files to the point where the .objs file is clearly over 32 KB, and verify that it did include all the files you intended? The specific error I got when building a shared build of 7.1.1 (with ~90 external libs) was /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long Got same error without ccache. The static build of the same config succeeded without any patching. The .objs file generated for libavcodec shared build is 29KB. How do I inflate the size to above 32K? I've enabled pretty much everything I can. The simplest would probably be to add a bunch of empty .c files (with long file names) in libavcodec and hook them up in the makefile. We won't notice if they are missed when linking of course, but if we pass the command line length limit, we should still notice it in one way or another. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2 2/2] avformat/webvttdec: Add webvtt extension and MIME type
> -Original Message- > From: ffmpeg-devel On Behalf Of > Michael Niedermayer > Sent: Dienstag, 25. Februar 2025 02:04 > To: FFmpeg development discussions and patches > Subject: Re: [FFmpeg-devel] [PATCH v2 2/2] avformat/webvttdec: Add > webvtt extension and MIME type > > On Fri, Feb 21, 2025 at 03:13:22PM +, softworkz wrote: > > From: softworkz > > > > The webvtt extension is sometimes used in HLS playlists. > > > > Signed-off-by: softworkz > > --- > > libavformat/webvttdec.c | 3 ++- > > 1 file changed, 2 insertions(+), 1 deletion(-) > > > > diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c > > index 6e60348764..6feda1585e 100644 > > --- a/libavformat/webvttdec.c > > +++ b/libavformat/webvttdec.c > > @@ -216,7 +216,8 @@ static const AVClass webvtt_demuxer_class = { > > const FFInputFormat ff_webvtt_demuxer = { > > .p.name = "webvtt", > > .p.long_name= NULL_IF_CONFIG_SMALL("WebVTT subtitle"), > > -.p.extensions = "vtt", > > +.p.mime_type= "text/vtt", > > +.p.extensions = "vtt,webvtt", > > .p.priv_class = &webvtt_demuxer_class, > > .priv_data_size = sizeof(WebVTTContext), > > .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP, > > should be ok > maybe needs backporting > > thx > > [...] > -- > Michael Are there any more comments or objections from anybody? After merge I'll follow up with a patch to support mpegts time mapping via X-TIMESTAMP-MAP in the WebVTT headers. Thanks sw ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
On 2025-03-12 01:29 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- ffbuild/library.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..781b018e00 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs Don't we need do the same for static libraries too? On first glance, this looks quite reasonable... However, the limit that is an issue is the length of a command line. The .objs file is written with an "echo" command - doesn't that fundamentally also hit the same limit (just a little bit later, as there are fewer command line flags in this command)? Or do we assume that make executes it with a shell, where the shell handles "echo" as a shell builtin so that it doesn't actually spawn a subprocess for this? Can you test it, if you could extend the list of object files to the point where the .objs file is clearly over 32 KB, and verify that it did include all the files you intended? The specific error I got when building a shared build of 7.1.1 (with ~90 external libs) was /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long Got same error without ccache. The static build of the same config succeeded without any patching. The .objs file generated for libavcodec shared build is 29KB. How do I inflate the size to above 32K? I've enabled pretty much everything I can. Secondly, less fundamental - even if we try to remove the .objs file here at the same time it's quite possible for it to be left behind (e.g. if the linking command fails) - so it would be good to make sure that it is removed on "make clean" too. Plus we probably should include it in .gitignore, if someone does in-tree builds. Will add both. Regards, Gyan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
> On Mar 12, 2025, at 16:34, Gyan Doshi wrote: > > > > On 2025-03-12 01:29 pm, Martin Storsjö wrote: >> On Wed, 12 Mar 2025, Gyan Doshi wrote: >> >>> The linker command can exceed the maximum argument limit on MinGW, >>> especially for libavcodec. >>> >>> The objects list is now stored in a file and passed to the linker. >>> --- >>> ffbuild/library.mak | 4 +++- >>> 1 file changed, 3 insertions(+), 1 deletion(-) >>> >>> diff --git a/ffbuild/library.mak b/ffbuild/library.mak >>> index 793e9d41fa..781b018e00 100644 >>> --- a/ffbuild/library.mak >>> +++ b/ffbuild/library.mak >>> @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) >>> >>> $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) >>> $(SUBDIR)lib$(NAME).ver >>> $(SLIB_CREATE_DEF_CMD) >>> -$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) >>> $(FFEXTRALIBS) >>> +$(Q)echo $$(filter %.o,$$^) > $$@.objs >>> +$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs >>> $(FFEXTRALIBS) >>> $(SLIB_EXTRA_CMD) >>> +-$(RM) $$@.objs >> >> Don't we need do the same for static libraries too? >> >> On first glance, this looks quite reasonable... However, the limit that is >> an issue is the length of a command line. The .objs file is written with an >> "echo" command - doesn't that fundamentally also hit the same limit (just a >> little bit later, as there are fewer command line flags in this command)? >> >> Or do we assume that make executes it with a shell, where the shell handles >> "echo" as a shell builtin so that it doesn't actually spawn a subprocess for >> this? Can you test it, if you could extend the list of object files to the >> point where the .objs file is clearly over 32 KB, and verify that it did >> include all the files you intended? > > The specific error I got when building a shared build of 7.1.1 (with ~90 > external libs) was > > /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long I have got the same error from user’s report, but I cannot reproduce the error by myself. FYI, there is a meson port of ffmpeg in gstreamer. It’s about five times faster than configure+make on Windows. Maybe not a ready for build and release binary, but can save a lot of time when developing and test. https://gitlab.freedesktop.org/gstreamer/meson-ports/ffmpeg > > Got same error without ccache. > > The static build of the same config succeeded without any patching. > The .objs file generated for libavcodec shared build is 29KB. > > How do I inflate the size to above 32K? I've enabled pretty much everything I > can. > > >> Secondly, less fundamental - even if we try to remove the .objs file here at >> the same time it's quite possible for it to be left behind (e.g. if the >> linking command fails) - so it would be good to make sure that it is removed >> on "make clean" too. >> >> Plus we probably should include it in .gitignore, if someone does in-tree >> builds. > > Will add both. > > Regards, > Gyan > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [RFC] New swscale internal design prototype
On Wed, 12 Mar 2025 09:15:22 +0200 Martin Storsjö wrote: > On Wed, 12 Mar 2025, Niklas Haas wrote: > > > On Sun, 09 Mar 2025 20:45:23 +0100 Niklas Haas wrote: > >> On Sun, 09 Mar 2025 18:11:54 +0200 Martin Storsjö wrote: > >> > On Sat, 8 Mar 2025, Niklas Haas wrote: > >> > > >> > > What are the thoughts on the float-first approach? > >> > > >> > In general, for modern architectures, relying on floats probably is > >> > reasonable. (On architectures that aren't of quite as widespread > >> > interest, > >> > it might not be so clear cut though.) > >> > > >> > However with the benchmark example you provided a couple of weeks ago, we > >> > concluded that even on x86 on modern HW, floats were faster than int16 > >> > only in one case: When using Clang, not GCC, and when compiling with > >> > -mavx2, not without it. In all the other cases, int16 was faster than > >> > float. > >> > >> Hi Martin, > >> > >> I should preface that this particular benchmark was a very specific test > >> for > >> floating point *filtering*, which is considerably more punishing than the > >> conversion pipeline I have implemented here, and I think it's partly the > >> fault of compilers generating very unoptimal filtering code. > >> > >> I think it would be better to re-assess using the current prototype on > >> actual > >> hardware. I threw up a quick NEON test branch: (untested, should hopefully > >> work) > >> https://github.com/haasn/FFmpeg/commits/swscale3-neon > >> > >> # adjust the benchmark iters count as needed based on the HW perf > >> make libswscale/tests/swscale && libswscale/tests/swscale -unscaled 1 > >> -bench 50 > >> > >> If this differs significantly from the ~1.8x speedup I measure on x86, I > >> will be far more concerned about the new approach. > > Sorry, I haven't had time to try this out myself yet... No worries. I think I have gathered enough performance figures myself to come to the conclusion that this approach won't work unmodified - not because of the usage of floats so much as the fact that the load/store overhead is sufficiently expensive in very simple scenarios to the point where it outweighs the benefits. I think my plan for now is the following: 1. Delete all of the "optimized" variants of the C templates, and keep only the general purpose base case merely as a fallback / reference code. 2. Instead, make the architectural split at a higher level; and allow arch- specific implementations to choose their own preferred chunk size, or even do something wildly different like runtime code generation or custom calling conventions. 3. Merge the new code for now guarded under an explicit opt in flag so we can continue to develop it alongside the existing approach until arch-specific optimized variants are available and sufficiently fast in _all_ cases. > > > I gave it a try. So, the result of a naive/blind run on a Cortex-X1 using > > clang > > version 20.0.0 (from the latest Android NDK v29) is: > > > > Overall speedup=1.688x faster, min=0.141x max=45.898x > > > > This has quite a lot more significant speed regressions compared to x86 > > though. > > > > In particular, clang/LLVM refuses to vectorize packed reads of 2 or 3 > > elements, > > so any sort of operation involving rgb24 or bgr24 suffers horribly: > > So, if the performance of this relies on compiler autovectorization, > what's the plan wrt GCC? We blanket disable autovectorization when > compiling with GCC - see fd6dbc53855fbfc9a782095d0ffe11dd3a98905f for when > it was disabled last time. Building and running fate with > autovectorization in GCC does succeed at least on modern GCC on x86_64, > but it's of course possible that it still can cause issues in various more > tricky configurations. See https://github.com/haasn/FFmpeg/blob/swscale3/libswscale/ops_internal.h#L28 > > // Martin > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] ffbuild: read library linker objects from a file
The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- v2: static linking also switched to use a file for object list objs added to clean target and to gitignore .gitignore | 1 + ffbuild/common.mak | 2 +- ffbuild/library.mak | 8 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 9cfc78b414..430abaf91b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.a *.o +*.objs *.o.* *.d *.def diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 023adb8567..ca45a0f368 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -214,7 +214,7 @@ $(TOOLOBJS): | tools OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS)) -CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.pc *.ptx *.ptx.gz *.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb +CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a define RULES diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..53f739123a 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -35,8 +35,10 @@ OBJS += $(SHLIBOBJS) endif $(SUBDIR)$(LIBNAME): $(OBJS) $(STLIBOBJS) $(RM) $@ - $(AR) $(ARFLAGS) $(AR_O) $^ + $(Q)echo $$^ > $$@.objs + $(AR) $(ARFLAGS) $(AR_O) @$$@.objs $(RANLIB) $@ + -$(RM) $$@.objs install-headers: install-lib$(NAME)-headers install-lib$(NAME)-pkgconfig @@ -66,8 +68,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs ifdef SUBDIR $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(DEP_LIBS) -- 2.46.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/vlc: Reduce debug logging
From: softworkz Signed-off-by: softworkz --- avcodec/vlc: Reduce debug logging It made it hardly possible to enable debug logging for viewing other log lines due to the excessive output created by this. Published-As: https://github.com/ffstaging/FFmpeg/releases/tag/pr-ffstaging-60%2Fsoftworkz%2Fsubmit_vlclogging-v1 Fetch-It-Via: git fetch https://github.com/ffstaging/FFmpeg pr-ffstaging-60/softworkz/submit_vlclogging-v1 Pull-Request: https://github.com/ffstaging/FFmpeg/pull/60 libavcodec/vlc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/vlc.c b/libavcodec/vlc.c index f46ecbb55e..c49c801181 100644 --- a/libavcodec/vlc.c +++ b/libavcodec/vlc.c @@ -155,7 +155,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, int n = codes[i].bits; uint32_t code = codes[i].code; intsymbol = codes[i].symbol; -ff_dlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code); +ff_tlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code); if (n <= table_nb_bits) { /* no need to add another table */ int j = code >> (32 - table_nb_bits); @@ -169,7 +169,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, for (int k = 0; k < nb; k++) { int bits = table[j].len; int oldsym = table[j].sym; -ff_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n); +ff_tlog(NULL, "%4x: code=%d n=%d\n", j, i, n); if ((bits || oldsym) && (bits != n || oldsym != symbol)) { av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); return AVERROR_INVALIDDATA; base-commit: 1bce40cb73fffd9d1fb6e118d71ac8999cded808 -- ffmpeg-codebot ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf
On 3/12/2025 10:25 AM, Andreas Rheinhardt wrote: From 3809f60cd976de71bd409bc24b9bd11b43aacee4 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Wed, 12 Mar 2025 14:23:51 +0100 Subject: [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf Forgotten in 6325aede08d5c7086b3798cb7041299e1d07f93a. Signed-off-by: Andreas Rheinhardt --- fftools/ffmpeg_opt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c index 27a9fc9e42..1f5e6050c8 100644 --- a/fftools/ffmpeg_opt.c +++ b/fftools/ffmpeg_opt.c @@ -1507,7 +1507,6 @@ static int opt_adrift_threshold(void *optctx, const char *opt, const char *arg) } #endif -static const char *const alt_bsf[]= { "absf", "vbsf", NULL }; static const char *const alt_channel_layout[] = { "ch_layout", NULL}; static const char *const alt_codec[] = { "c", "acodec", "vcodec", "scodec", "dcodec", NULL }; static const char *const alt_filter[] = { "af", "vf", NULL }; -- 2.45.2 LGTM OpenPGP_signature.asc Description: OpenPGP digital signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] swscale/utils: split off format code into new file
From: Niklas Haas utils.c is getting quite crowded, and I need a new place to dump a lot of format handling code for the ongoing rewrite. Rather than bloating this file even more, start splitting format handling helpers off into a new file. This also renames the existing utils.h header, which didn't really contain anything except the SwsFormat definition anyway (the prototypes for what should have been in utils.h are all still in the legacy swscale_internal.h). --- libswscale/Makefile | 1 + libswscale/cms.c | 2 +- libswscale/cms.h | 2 +- libswscale/csputils.c| 2 +- libswscale/format.c | 582 +++ libswscale/{utils.h => format.h} | 6 +- libswscale/graph.c | 2 +- libswscale/graph.h | 2 +- libswscale/lut3d.h | 2 +- libswscale/utils.c | 559 - 10 files changed, 592 insertions(+), 568 deletions(-) create mode 100644 libswscale/format.c rename libswscale/{utils.h => format.h} (98%) diff --git a/libswscale/Makefile b/libswscale/Makefile index 267952d870..4cf4bb2602 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -10,6 +10,7 @@ OBJS = alphablend.o \ csputils.o \ hscale.o \ hscale_fast_bilinear.o \ + format.o \ gamma.o \ graph.o \ half2float.o \ diff --git a/libswscale/cms.c b/libswscale/cms.c index 0a2e8bafda..a23d7a9772 100644 --- a/libswscale/cms.c +++ b/libswscale/cms.c @@ -29,7 +29,7 @@ #include "cms.h" #include "csputils.h" #include "libswscale/swscale.h" -#include "utils.h" +#include "format.h" bool ff_sws_color_map_noop(const SwsColorMap *map) { diff --git a/libswscale/cms.h b/libswscale/cms.h index 0c730e520d..5323a27260 100644 --- a/libswscale/cms.h +++ b/libswscale/cms.h @@ -27,7 +27,7 @@ #include "csputils.h" #include "swscale.h" -#include "utils.h" +#include "format.h" /* Minimum, maximum, and default knee point for perceptual tone mapping [0,1] */ #define PERCEPTUAL_KNEE_MIN 0.10f diff --git a/libswscale/csputils.c b/libswscale/csputils.c index 9546b26fe1..728871d293 100644 --- a/libswscale/csputils.c +++ b/libswscale/csputils.c @@ -21,7 +21,7 @@ #include "libavutil/csp.h" #include "csputils.h" -#include "utils.h" +#include "format.h" void ff_sws_matrix3x3_mul(SwsMatrix3x3 *a, const SwsMatrix3x3 *b) { diff --git a/libswscale/format.c b/libswscale/format.c new file mode 100644 index 00..958577796c --- /dev/null +++ b/libswscale/format.c @@ -0,0 +1,582 @@ +/* + * Copyright (C) 2024 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/hdr_dynamic_metadata.h" +#include "libavutil/mastering_display_metadata.h" + +#include "format.h" + +typedef struct FormatEntry { +uint8_t is_supported_in :1; +uint8_t is_supported_out:1; +uint8_t is_supported_endianness :1; +} FormatEntry; + +/* Format support table for legacy swscale */ +static const FormatEntry format_entries[] = { +[AV_PIX_FMT_YUV420P]= { 1, 1 }, +[AV_PIX_FMT_YUYV422]= { 1, 1 }, +[AV_PIX_FMT_RGB24] = { 1, 1 }, +[AV_PIX_FMT_BGR24] = { 1, 1 }, +[AV_PIX_FMT_YUV422P]= { 1, 1 }, +[AV_PIX_FMT_YUV444P]= { 1, 1 }, +[AV_PIX_FMT_YUV410P]= { 1, 1 }, +[AV_PIX_FMT_YUV411P]= { 1, 1 }, +[AV_PIX_FMT_GRAY8] = { 1, 1 }, +[AV_PIX_FMT_MONOWHITE] = { 1, 1 }, +[AV_PIX_FMT_MONOBLACK] = { 1, 1 }, +[AV_PIX_FMT_PAL8] = { 1, 0 }, +[AV_PIX_FMT_YUVJ420P] = { 1, 1 }, +[AV_PIX_FMT_YUVJ411P] = { 1, 1 }, +[AV_PIX_FMT_YUVJ422P] = { 1, 1 }, +[AV_PIX_FMT_YUVJ444P] = { 1, 1 }, +[AV_PIX_FMT_YVYU422]= { 1, 1 }, +[AV_PIX_FMT_UYVY422]= { 1, 1 }, +[AV_PIX_FMT_UYYVYY411] = { 0,
Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
On Wed, 12 Mar 2025, Gyan Doshi wrote: On 2025-03-12 03:12 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: On 2025-03-12 01:29 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- ffbuild/library.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..781b018e00 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs Don't we need do the same for static libraries too? On first glance, this looks quite reasonable... However, the limit that is an issue is the length of a command line. The .objs file is written with an "echo" command - doesn't that fundamentally also hit the same limit (just a little bit later, as there are fewer command line flags in this command)? Or do we assume that make executes it with a shell, where the shell handles "echo" as a shell builtin so that it doesn't actually spawn a subprocess for this? Can you test it, if you could extend the list of object files to the point where the .objs file is clearly over 32 KB, and verify that it did include all the files you intended? The specific error I got when building a shared build of 7.1.1 (with ~90 external libs) was /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long Got same error without ccache. The static build of the same config succeeded without any patching. The .objs file generated for libavcodec shared build is 29KB. How do I inflate the size to above 32K? I've enabled pretty much everything I can. The simplest would probably be to add a bunch of empty .c files (with long file names) in libavcodec and hook them up in the makefile. We won't notice if they are missed when linking of course, but if we pass the command line length limit, we should still notice it in one way or another. I did something simpler. I just duplicated the arg on the echo line. The build was successful. Each objs file was twice as big, same for the generated DLL. Does that answer your queries? Yes, thanks. Btw, just to be clear - are you running this in msys2? If you're running mingw build tools via WSL, there wouldn't be any issue when running the shell commands on the linux side, but it's specifically with the msys2 make where it's interesting to know which limitations make and the shell runs into or avoids. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
On 10/03/2025 18:42, Lynne wrote: On 10/03/2025 04:14, Andreas Rheinhardt wrote: Lynne: --- libavcodec/ffv1.h | 3 +++ libavcodec/ffv1dec.c | 19 +-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 8c0e71284d..860a5c14b1 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -174,6 +174,9 @@ typedef struct FFV1Context { * NOT shared between frame threads. */ uint8_t frame_damaged; + + /* Reference to the current packet */ + AVPacket *pkt_ref; } FFV1Context; int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s); diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index eaa21eebdf..6396f22f79 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx) f->pix_fmt = AV_PIX_FMT_NONE; f->configured_pix_fmt = AV_PIX_FMT_NONE; + f->pkt_ref = av_packet_alloc(); + if (!f->pkt_ref) + return AVERROR(ENOMEM); + if ((ret = ff_ffv1_common_init(avctx, f)) < 0) return ret; @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, /* Start */ if (hwaccel) { + ret = av_packet_ref(f->pkt_ref, avpkt); + if (ret < 0) + return ret; + ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); if (ret < 0) return ret; @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, uint32_t len; ret = find_next_slice(avctx, avpkt->data, buf_end, i, &pos, &len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } buf_end -= len; ret = hwaccel->decode_slice(avctx, pos, len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } } + + av_packet_unref(f->pkt_ref); } else { ret = decode_slices(avctx, c, avpkt); if (ret < 0) @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx) ff_progress_frame_unref(&s->last_picture); av_refstruct_unref(&s->hwaccel_last_picture_private); + av_packet_free(&s->pkt_ref); ff_ffv1_close(s); return 0; Why not simply use a const AVPacket*? No reason. Fixed locally. Thanks. *reverted this change. We need to ref the packet, since we map its memory and let the GPU use it directly without copying the contents. 6k16bit content at 24fps is typically around 2Gbps when compressed, so avoiding copies is important. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- ffbuild/library.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..781b018e00 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs ifdef SUBDIR $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(DEP_LIBS) -- 2.46.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 FFmpeg 20/20] doc/filters.texi: avgclass documentation
Signed-off-by: MaximilianKaindl --- doc/filters.texi | 64 1 file changed, 64 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index a7046e0f4e..340ce39e2a 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -30776,6 +30776,70 @@ bench=start,selectivecolor=reds=-.2 .12 -.49,bench=stop @end example @end itemize +@section avgclass + +Average classification probabilities across multiple frames for both audio and video streams. + +This filter analyzes classification data from frame side data (bounding boxes) and calculates average confidence scores for each label. The filter processes classification metadata from the @code{dnn_classify} filter or other sources that generate AVDetectionBBox side data, computing averages over the entire stream. + +At the end of the stream (or when manually triggered), the filter outputs the average probability for each detected class, both to console logs and optionally to a CSV file. + +@table @option +@item output_file +Path to a CSV output file where average classification results will be written. If not specified, results are only printed to log output. + +@item v +Specify the number of video streams (default: 1). + +@item a +Specify the number of audio streams (default: 0). +@end table + +This filter supports the following commands: + +@table @option +@item writeinfo +Immediately write current average classification results to the log and output file (if specified) without waiting for the stream to end. + +@item flush +Force the filter to write results and flush all its internal state. +@end table + +@subsection Examples + +Process a video with object detection and classification, then calculate average classification probabilities: +@example +ffmpeg -i input.mp4 -vf "dnn_detect=model=detection.xml:input=data:output=detection_out:confidence=0.5,dnn_classify=model=classification.pt:dnn_backend=torch:tokenizer=tokenizer.json:labels=labels.txt,avgclass=output_file=results.csv" -f null - +@end example + +Process both audio and video classification: +@example +ffmpeg -i input.mkv -filter_complex "[0:v]dnn_classify[v0]; [0:a]aformat=sample_fmts=fltp,dnn_classify=dnn_backend=torch:model=clap_model.pt:is_audio=1:tokenizer=tokenizer.json:labels=audio_labels.txt[a0]; [v0][a0]avgclass=v=1:a=1:output_file=av_results.csv" -f null - +@end example + +@subsection Output Format + +When the filter completes processing (or when the @code{writeinfo} command is sent), it outputs classification results in this format: + +@example +Classification averages: +Stream #0: + Label: cat: Average probability 0.8765, Appeared 120 times + Label: dog: Average probability 0.3421, Appeared 42 times +Stream #1: + Label: music: Average probability 0.9823, Appeared 315 times + Label: speech: Average probability 0.1245, Appeared 15 times +@end example + +If an output file is specified, the same data is written in CSV format: +@example +stream_id,label,avg_probability,count +0,cat,0.8765,120 +0,dog,0.3421,42 +1,music,0.9823,315 +1,speech,0.1245,15 +@end example + @section concat Concatenate audio and video streams, joining them together one after the -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avformat/fifo: Check for keyframe video type before stop dropping
The current behavior when using restart_with_keyframe is that it will recover if it also encounters any audio packet, as they are flagged as a keyframe. The expectation is that packets are dropped until the next _video_ keyframe. Fix that by checking the packet stream codec type, only letting it recover when it is a video one. Fixes ticket: #11467 Signed-off-by: Arthur Grillo --- Cc: lingjiujia...@gmail.com --- libavformat/fifo.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libavformat/fifo.c b/libavformat/fifo.c index 23e4149ad606b201aade77e28057afe8e5fe2b71..1ebfa1f38f3affec2d81b5a2bb9d69190a5aa63f 100644 --- a/libavformat/fifo.c +++ b/libavformat/fifo.c @@ -185,14 +185,21 @@ static int fifo_thread_write_packet(FifoThreadContext *ctx, AVPacket *pkt) AVRational src_tb, dst_tb; int ret, s_idx; int64_t orig_pts, orig_dts, orig_duration; +enum AVMediaType stream_codec_type = avf->streams[pkt->stream_index]->codecpar->codec_type; if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE) atomic_fetch_sub_explicit(&fifo->queue_duration, next_duration(avf, pkt, &ctx->last_received_dts), memory_order_relaxed); if (ctx->drop_until_keyframe) { if (pkt->flags & AV_PKT_FLAG_KEY) { -ctx->drop_until_keyframe = 0; -av_log(avf, AV_LOG_VERBOSE, "Keyframe received, recovering...\n"); +if (stream_codec_type == AVMEDIA_TYPE_VIDEO) { +ctx->drop_until_keyframe = 0; +av_log(avf, AV_LOG_VERBOSE, "Video keyframe received, recovering...\n"); +} else { +av_log(avf, AV_LOG_VERBOSE, "Dropping non-video keyframe\n"); +av_packet_unref(pkt); +return 0; +} } else { av_log(avf, AV_LOG_VERBOSE, "Dropping non-keyframe packet\n"); av_packet_unref(pkt); --- base-commit: 3165fe5ecf4917d9f7d7f27d4fa3af31bc604515 change-id: 20250312-video-only-kf-recover-18816009f240 Best regards, -- Arthur Grillo ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] avutil/hwcontext: Add item_name function for AVHWDeviceContext
From: softworkz Signed-off-by: softworkz --- libavutil/hwcontext.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index 276dc9cee6..3111a44651 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -137,9 +137,15 @@ enum AVHWDeviceType av_hwdevice_iterate_types(enum AVHWDeviceType prev) return set ? next : AV_HWDEVICE_TYPE_NONE; } +static const char *hwdevice_ctx_get_name(void *ptr) +{ +FFHWDeviceContext *ctx = ptr; +return ctx->hw_type->name; +} + static const AVClass hwdevice_ctx_class = { .class_name = "AVHWDeviceContext", -.item_name = av_default_item_name, +.item_name = hwdevice_ctx_get_name, .category = AV_CLASS_CATEGORY_HWDEVICE, .version= LIBAVUTIL_VERSION_INT, }; -- ffmpeg-codebot ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code
On 12/03/2025 06:27, Andreas Rheinhardt wrote: Lynne: On 12/03/2025 04:10, Andreas Rheinhardt wrote: Patches attached. - Andreas First patch is wild, its surprising no one considered inverting the way decoder parses codes for an encoder yet. I didn't even look at the decoder. (It is actually surprising that it took until 512e597932dfe05cf5665192efbe2c93c2e36af2 for the original code to be improved.) Rather than ORing and using put_bits63, I think it would make more sense to write out each chunk using put_bits sequentially. It might be possible to reverse the lookups such that you get the MSBs first so you wouldn't need to reverse them out of place in a small array. But either way, LGTM. Feel free to explore this in a follow-up. I don't think that writing them sequentially will improve anything: In order to be able to use a LUT, I would have to shift the bits starting with the MSBs into position; and then there would be the internal shifts and checks inside put_bits(). Apart from that: put_bits63() is the same as put_bits() when BUF_BITS is 64 (see ede2b391cc516f4f93621f6a214b3410b231f582). Second patch seems a bit pointless. It's just one single call you're uninlining? Chasing to save a few extra bytes of binary surely don't deserve having a wrapper function for uninlining. I am uninlining all calls besides the hot one. 31 callsites. For GCC, this reduced codesize 2c36 to 25b1 (15% saved), for clang from 4b08 to 3338 (32% saved). Oh, it was late and I didn't read carefully. Both patches LGTM. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] FFmpeg 4.3.9 and 3.4.14
On Mon, Mar 03, 2025 at 02:10:49AM +0100, Michael Niedermayer wrote: > Hi all > > As ive already backported and somewhat tested release/4.3 i intend to > make the next point release from that and after that next is > 3.4 (because its the supported branch that is longest without a release) > > like always, please backport things if you want something backported > please test, if you want something tested > and tell me if theres anything i need to know (like waiting for something) 4.3.9 released, 3.4.14 is next [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB z(9) = an object that transcends all computable functions describable in finite terms. - ChatGPT in 2024 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf
Patch attached. - Andreas From 3809f60cd976de71bd409bc24b9bd11b43aacee4 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Wed, 12 Mar 2025 14:23:51 +0100 Subject: [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf Forgotten in 6325aede08d5c7086b3798cb7041299e1d07f93a. Signed-off-by: Andreas Rheinhardt --- fftools/ffmpeg_opt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c index 27a9fc9e42..1f5e6050c8 100644 --- a/fftools/ffmpeg_opt.c +++ b/fftools/ffmpeg_opt.c @@ -1507,7 +1507,6 @@ static int opt_adrift_threshold(void *optctx, const char *opt, const char *arg) } #endif -static const char *const alt_bsf[]= { "absf", "vbsf", NULL }; static const char *const alt_channel_layout[] = { "ch_layout", NULL}; static const char *const alt_codec[] = { "c", "acodec", "vcodec", "scodec", "dcodec", NULL }; static const char *const alt_filter[] = { "af", "vf", NULL }; -- 2.45.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file
On 2025-03-12 05:52 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: On 2025-03-12 03:12 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: On 2025-03-12 01:29 pm, Martin Storsjö wrote: On Wed, 12 Mar 2025, Gyan Doshi wrote: The linker command can exceed the maximum argument limit on MinGW, especially for libavcodec. The objects list is now stored in a file and passed to the linker. --- ffbuild/library.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ffbuild/library.mak b/ffbuild/library.mak index 793e9d41fa..781b018e00 100644 --- a/ffbuild/library.mak +++ b/ffbuild/library.mak @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR) $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver $(SLIB_CREATE_DEF_CMD) - $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS) + $(Q)echo $$(filter %.o,$$^) > $$@.objs + $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs $(FFEXTRALIBS) $(SLIB_EXTRA_CMD) + -$(RM) $$@.objs Don't we need do the same for static libraries too? On first glance, this looks quite reasonable... However, the limit that is an issue is the length of a command line. The .objs file is written with an "echo" command - doesn't that fundamentally also hit the same limit (just a little bit later, as there are fewer command line flags in this command)? Or do we assume that make executes it with a shell, where the shell handles "echo" as a shell builtin so that it doesn't actually spawn a subprocess for this? Can you test it, if you could extend the list of object files to the point where the .objs file is clearly over 32 KB, and verify that it did include all the files you intended? The specific error I got when building a shared build of 7.1.1 (with ~90 external libs) was /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long Got same error without ccache. The static build of the same config succeeded without any patching. The .objs file generated for libavcodec shared build is 29KB. How do I inflate the size to above 32K? I've enabled pretty much everything I can. The simplest would probably be to add a bunch of empty .c files (with long file names) in libavcodec and hook them up in the makefile. We won't notice if they are missed when linking of course, but if we pass the command line length limit, we should still notice it in one way or another. I did something simpler. I just duplicated the arg on the echo line. The build was successful. Each objs file was twice as big, same for the generated DLL. Does that answer your queries? Yes, thanks. Btw, just to be clear - are you running this in msys2? If you're running mingw build tools via WSL, there wouldn't be any issue when running the shell commands on the linux side, but it's specifically with the msys2 make where it's interesting to know which limitations make and the shell runs into or avoids. Yes, via msys2. This is what `xargs --show-limits` prints in the same shell, POSIX upper limit on argument length (this system): 25382 POSIX smallest allowable upper limit on argument length (all systems): 4096 Maximum length of command we could actually use: 20812 Size of command buffer we are actually using: 25382 Regards, Gyan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 FFmpeg 21/20] libavfilter: classify fix category post_processing with very low temperature
Patch attached. I hope this correctly links to my series. Signed-off-by: MaximilianKaindl 0021-libavfilter-classify-fix-category-post_processing-wi.patch Description: Binary data ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] nvdec/vc1: add marker insertion logic
Bump Le 04/03/2025 à 18:17, averne a écrit : > > > --- > Insert the relevant marker into the bitstream on > slice submission. > This is analogous to the logic found in DXVA and > D3D hwaccels. > > Fixes decoding of various VC-1 streams, eg.: > https://drive.google.com/file/d/1WJyiRhcdU4FHTW3sVMitS7UdrZM1NBy-/view?usp=sharing > > This was investigated using my nvdec tracing tool: > https://github.com/averne/NvdecTrace > > Signed-off-by: averne > --- > libavcodec/nvdec_vc1.c | 50 > -- > 1 file changed, 48 insertions(+), 2 deletions(-) > > diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c > index > fbfba1ecb43421573ef8fea1e37a2425c272edc9..2726574a26583b0cfc28bdec5595c15bdc465ff8 > 100644 > --- a/libavcodec/nvdec_vc1.c > +++ b/libavcodec/nvdec_vc1.c > @@ -22,6 +22,7 @@ > > #include "config_components.h" > > +#include "libavutil/mem.h" > #include "avcodec.h" > #include "hwaccel_internal.h" > #include "internal.h" > @@ -107,6 +108,51 @@ static int nvdec_vc1_start_frame(AVCodecContext *avctx, > const uint8_t *buffer, u > return 0; > } > > +static int nvdec_vc1_decode_slice(AVCodecContext *avctx, const uint8_t > *buffer, > + uint32_t size) > +{ > +NVDECContext *ctx = avctx->internal->hwaccel_priv_data; > +const VC1Context *v = avctx->priv_data; > +uint32_t marker; > +int marker_size; > +void *tmp; > + > +if (avctx->codec_id != AV_CODEC_ID_VC1) { > +marker_size = 0; > +} else { > +if (ctx->bitstream_len) > +marker = VC1_CODE_SLICE; > +else if (v->profile == PROFILE_ADVANCED && v->fcm == ILACE_FIELD && > v->second_field) > +marker = VC1_CODE_FIELD; > +else > +marker = VC1_CODE_FRAME; > + > +marker_size = (size >= sizeof(marker) && AV_RB32(buffer) != marker) > ? sizeof(marker) : 0; > +} > + > +tmp = av_fast_realloc(ctx->bitstream_internal, &ctx->bitstream_allocated, > + ctx->bitstream_len + size + marker_size); > +if (!tmp) > +return AVERROR(ENOMEM); > +ctx->bitstream = ctx->bitstream_internal = tmp; > + > +tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated, > + (ctx->nb_slices + 1) * > sizeof(*ctx->slice_offsets)); > +if (!tmp) > +return AVERROR(ENOMEM); > +ctx->slice_offsets = tmp; > + > +if (marker_size) > +AV_WB32(ctx->bitstream_internal + ctx->bitstream_len, marker); > + > +memcpy(ctx->bitstream_internal + ctx->bitstream_len + marker_size, > buffer, size); > +ctx->slice_offsets[ctx->nb_slices] = ctx->bitstream_len; > +ctx->bitstream_len += size + marker_size; > +ctx->nb_slices++; > + > +return 0; > +} > + > static int nvdec_vc1_frame_params(AVCodecContext *avctx, >AVBufferRef *hw_frames_ctx) > { > @@ -121,7 +167,7 @@ const FFHWAccel ff_vc1_nvdec_hwaccel = { > .p.pix_fmt= AV_PIX_FMT_CUDA, > .start_frame = nvdec_vc1_start_frame, > .end_frame= ff_nvdec_simple_end_frame, > -.decode_slice = ff_nvdec_simple_decode_slice, > +.decode_slice = nvdec_vc1_decode_slice, > .frame_params = nvdec_vc1_frame_params, > .init = ff_nvdec_decode_init, > .uninit = ff_nvdec_decode_uninit, > @@ -136,7 +182,7 @@ const FFHWAccel ff_wmv3_nvdec_hwaccel = { > .p.pix_fmt= AV_PIX_FMT_CUDA, > .start_frame = nvdec_vc1_start_frame, > .end_frame= ff_nvdec_simple_end_frame, > -.decode_slice = ff_nvdec_simple_decode_slice, > +.decode_slice = nvdec_vc1_decode_slice, > .frame_params = nvdec_vc1_frame_params, > .init = ff_nvdec_decode_init, > .uninit = ff_nvdec_decode_uninit, > > --- > base-commit: f76195ff656d6bea68feee783160652e2b3e3d60 > change-id: 20250304-nvdec-vc1-marker2-53d6bd30ee99 > > Best regards, ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/aom_film_grain: Cast const away to suppress compiler, warning
Patch attached. - Andreas From fa46d708680d10dd2f9b80365eafbeeda6cc Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Wed, 12 Mar 2025 18:59:05 +0100 Subject: [PATCH] avcodec/aom_film_grain: Cast const away to suppress compiler warning av_frame_side_data_add() typically takes ownership of the provided AVBufferRef reference and therefore uses a parameter of type AVBufferRef**; yet with the AV_FRAME_SIDE_DATA_FLAG_NEW_REF, it creates new references instead, without touching the given reference. Therefore it is safe to cast const away. Signed-off-by: Andreas Rheinhardt --- libavcodec/aom_film_grain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/aom_film_grain.c b/libavcodec/aom_film_grain.c index d5ea75f61c..0f24a2bcf8 100644 --- a/libavcodec/aom_film_grain.c +++ b/libavcodec/aom_film_grain.c @@ -366,7 +366,8 @@ int ff_aom_attach_film_grain_sets(const AVFilmGrainAFGS1Params *s, AVFrame *fram continue; if (!av_frame_side_data_add(&frame->side_data, &frame->nb_side_data, -AV_FRAME_DATA_FILM_GRAIN_PARAMS, &s->sets[i], +AV_FRAME_DATA_FILM_GRAIN_PARAMS, +(AVBufferRef**)&s->sets[i], AV_FRAME_SIDE_DATA_FLAG_NEW_REF)) return AVERROR(ENOMEM); } -- 2.45.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [RFC] New swscale internal design prototype
On Wed, 12 Mar 2025, Niklas Haas wrote: On Sun, 09 Mar 2025 20:45:23 +0100 Niklas Haas wrote: On Sun, 09 Mar 2025 18:11:54 +0200 Martin Storsjö wrote: > On Sat, 8 Mar 2025, Niklas Haas wrote: > > > What are the thoughts on the float-first approach? > > In general, for modern architectures, relying on floats probably is > reasonable. (On architectures that aren't of quite as widespread interest, > it might not be so clear cut though.) > > However with the benchmark example you provided a couple of weeks ago, we > concluded that even on x86 on modern HW, floats were faster than int16 > only in one case: When using Clang, not GCC, and when compiling with > -mavx2, not without it. In all the other cases, int16 was faster than > float. Hi Martin, I should preface that this particular benchmark was a very specific test for floating point *filtering*, which is considerably more punishing than the conversion pipeline I have implemented here, and I think it's partly the fault of compilers generating very unoptimal filtering code. I think it would be better to re-assess using the current prototype on actual hardware. I threw up a quick NEON test branch: (untested, should hopefully work) https://github.com/haasn/FFmpeg/commits/swscale3-neon # adjust the benchmark iters count as needed based on the HW perf make libswscale/tests/swscale && libswscale/tests/swscale -unscaled 1 -bench 50 If this differs significantly from the ~1.8x speedup I measure on x86, I will be far more concerned about the new approach. Sorry, I haven't had time to try this out myself yet... I gave it a try. So, the result of a naive/blind run on a Cortex-X1 using clang version 20.0.0 (from the latest Android NDK v29) is: Overall speedup=1.688x faster, min=0.141x max=45.898x This has quite a lot more significant speed regressions compared to x86 though. In particular, clang/LLVM refuses to vectorize packed reads of 2 or 3 elements, so any sort of operation involving rgb24 or bgr24 suffers horribly: So, if the performance of this relies on compiler autovectorization, what's the plan wrt GCC? We blanket disable autovectorization when compiling with GCC - see fd6dbc53855fbfc9a782095d0ffe11dd3a98905f for when it was disabled last time. Building and running fate with autovectorization in GCC does succeed at least on modern GCC on x86_64, but it's of course possible that it still can cause issues in various more tricky configurations. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model
Martin Storsjö: > On Wed, 12 Mar 2025, Andreas Rheinhardt wrote: > >> Pranav Kant via ffmpeg-devel: >>> By default, all globals in C/C++ compiled by clang are allocated >>> in non-large data sections. See [1] for background on code models. >>> For PIC (Position independent code), this is fine as long as binary is >>> small but as binary size increases, users maybe want to use medium/large >>> code models (-mcmodel=medium) which moves data in to large sections. >>> As data in these large sections cannot be accessed using PIC code >>> anymore (as it may be too far away), compiler ends up using a different >>> instruction sequence when building C/C++ code -- using GOT to access >>> these globals (which can be relaxed by linker at link time if binary >>> ends up being smaller). However, assembly files continue to access these >>> globals defined in C/C++ files using older (and invalid instruction >>> sequence). So, we mark all such globals with an attribute that forces >>> them to be allocated in small sections allowing them to validly be >>> accessed from the assembly code. >>> >>> This patch should not have any affect on builds that use small code >>> model, which is the default mode. >>> >>> [1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64- >>> code-models >>> >>> Signed-off-by: Pranav Kant >>> --- >>> libavcodec/ac3dsp.c | 2 ++ >>> libavcodec/cabac.c | 2 ++ >>> libavcodec/x86/constants.c | 8 >>> libavutil/attributes.h | 6 ++ >>> libavutil/attributes_internal.h | 16 >>> 5 files changed, 34 insertions(+) >>> >>> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c >>> index 730fa70fff..d16b6c24c3 100644 >>> --- a/libavcodec/ac3dsp.c >>> +++ b/libavcodec/ac3dsp.c >>> @@ -25,6 +25,7 @@ >>> >>> #include "config.h" >>> #include "libavutil/attributes.h" >>> +#include "libavutil/attributes_internal.h" >>> #include "libavutil/common.h" >>> #include "libavutil/intmath.h" >>> #include "libavutil/mem_internal.h" >>> @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t >>> mant_cnt[16], uint8_t *bap, >>> mant_cnt[bap[len]]++; >>> } >>> >>> +attribute_mcmodel_small >>> DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { >> >> Shouldn't stuff like this be applied to the declaration so that C code >> can also take advantage of the knowledge that this object will be placed >> in the small code section? >> >>> 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 >>> }; >>> diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c >>> index 7d41cd2ae6..b8c6db29a2 100644 >>> --- a/libavcodec/cabac.c >>> +++ b/libavcodec/cabac.c >>> @@ -24,11 +24,13 @@ >>> * Context Adaptive Binary Arithmetic Coder. >>> */ >>> >>> +#include "libavutil/attributes_internal.h" >>> #include "libavutil/error.h" >>> #include "libavutil/mem_internal.h" >>> >>> #include "cabac.h" >>> >>> +attribute_mcmodel_small >>> DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + >>> 4*2*64 + 4*64 + 63] = { >> >> Your commit message ("However, assembly files continue to access") >> speaks only of assembly files, i.e. external asm. Yet this here is only >> used by inline ASM. Looking through the code the reason for this is that >> I thought that specifying the memory model is only necessary for stuff >> used by external asm, yet ff_h264_cabac_tables does not seem to be used >> by external ASM at all, only inline ASM. If I see this correctly, the >> reason for this is that LOCAL_MANGLE (and therefore MANGLE) uses rip >> addressing on x64 when configure sets the RIP define. But this means >> that the set of files needing attribute_mcmodel_small is a superset of >> the files currently using DECLARE_ASM_ALIGNED. This means that one would >> only need two macros for the variables accessed by ASM: One for only >> external ASM and one for inline ASM (and potentially external ASM) >> instead of adding attribute_mcmodel_small at various places in the >> codebase. >> >>> 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, >>> 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, >>> diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c >>> index bc7f2b17b8..347b7dd1d3 100644 >>> --- a/libavcodec/x86/constants.c >>> +++ b/libavcodec/x86/constants.c >>> @@ -18,17 +18,21 @@ >>> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >>> 02110-1301 USA >>> */ >>> >>> +#include "libavutil/attributes_internal.h" >>> #include "libavutil/mem_internal.h" >>> #include "libavutil/x86/asm.h" // for xmm_reg >>> #include "constants.h" >>> >>> +attribute_mcmodel_small >>> DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1) = { >> 0x0001000100010001ULL, 0x0001000100010001ULL,> >> 0x0001000100010001ULL, 0x0001000100010001ULL }; >>> DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2) = >>> { 0x0002000200020002ULL, 0x0002000200020002ULL, >>> >>> 0x0002000
Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model
On Wed, 12 Mar 2025, Andreas Rheinhardt wrote: Pranav Kant via ffmpeg-devel: By default, all globals in C/C++ compiled by clang are allocated in non-large data sections. See [1] for background on code models. For PIC (Position independent code), this is fine as long as binary is small but as binary size increases, users maybe want to use medium/large code models (-mcmodel=medium) which moves data in to large sections. As data in these large sections cannot be accessed using PIC code anymore (as it may be too far away), compiler ends up using a different instruction sequence when building C/C++ code -- using GOT to access these globals (which can be relaxed by linker at link time if binary ends up being smaller). However, assembly files continue to access these globals defined in C/C++ files using older (and invalid instruction sequence). So, we mark all such globals with an attribute that forces them to be allocated in small sections allowing them to validly be accessed from the assembly code. This patch should not have any affect on builds that use small code model, which is the default mode. [1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models Signed-off-by: Pranav Kant --- libavcodec/ac3dsp.c | 2 ++ libavcodec/cabac.c | 2 ++ libavcodec/x86/constants.c | 8 libavutil/attributes.h | 6 ++ libavutil/attributes_internal.h | 16 5 files changed, 34 insertions(+) diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 730fa70fff..d16b6c24c3 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -25,6 +25,7 @@ #include "config.h" #include "libavutil/attributes.h" +#include "libavutil/attributes_internal.h" #include "libavutil/common.h" #include "libavutil/intmath.h" #include "libavutil/mem_internal.h" @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap, mant_cnt[bap[len]]++; } +attribute_mcmodel_small DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { Shouldn't stuff like this be applied to the declaration so that C code can also take advantage of the knowledge that this object will be placed in the small code section? 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 }; diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 7d41cd2ae6..b8c6db29a2 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -24,11 +24,13 @@ * Context Adaptive Binary Arithmetic Coder. */ +#include "libavutil/attributes_internal.h" #include "libavutil/error.h" #include "libavutil/mem_internal.h" #include "cabac.h" +attribute_mcmodel_small DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 4*64 + 63] = { Your commit message ("However, assembly files continue to access") speaks only of assembly files, i.e. external asm. Yet this here is only used by inline ASM. Looking through the code the reason for this is that I thought that specifying the memory model is only necessary for stuff used by external asm, yet ff_h264_cabac_tables does not seem to be used by external ASM at all, only inline ASM. If I see this correctly, the reason for this is that LOCAL_MANGLE (and therefore MANGLE) uses rip addressing on x64 when configure sets the RIP define. But this means that the set of files needing attribute_mcmodel_small is a superset of the files currently using DECLARE_ASM_ALIGNED. This means that one would only need two macros for the variables accessed by ASM: One for only external ASM and one for inline ASM (and potentially external ASM) instead of adding attribute_mcmodel_small at various places in the codebase. 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c index bc7f2b17b8..347b7dd1d3 100644 --- a/libavcodec/x86/constants.c +++ b/libavcodec/x86/constants.c @@ -18,17 +18,21 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/attributes_internal.h" #include "libavutil/mem_internal.h" #include "libavutil/x86/asm.h" // for xmm_reg #include "constants.h" +attribute_mcmodel_small DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1)= { 0x0001000100010001ULL, 0x0001000100010001ULL,> 0x0001000100010001ULL, 0x0001000100010001ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2)= { 0x0002000200020002ULL, 0x0002000200020002ULL, 0x0002000200020002ULL, 0x0002000200020002ULL }; DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_3)= { 0x0003000300030003ULL, 0x0003000300030003ULL }; +attribute_mcmodel_small DECLARE_ASM_ALIGNED(32, const ymm_reg, ff_pw_4)= { 0x0004000400040004ULL, 0x0004000400040004ULL, 0x0004000400040004ULL, 0x0004000400040004ULL }; +attribute_mcmodel_small DECLARE_ASM_ALIGNED(16,
Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model
On Wed, 12 Mar 2025, Andreas Rheinhardt wrote: Martin Storsjö: On Wed, 12 Mar 2025, Andreas Rheinhardt wrote: Pranav Kant via ffmpeg-devel: + * all globals in a data section that's unreachable with PC relative instructions + * (small code model instruction sequence). We mark all such globals with this + * attribute_mcmodel_small to ensure assembly accessible globals continue to be + * allocated in sections reachable from PC relative instructions. + */ +#if ARCH_X86_64 && defined(__ELF__) && AV_HAS_ATTRIBUTE(model) You make this ARCH_X86_64 only, yet the concept of code models also exists for other arches, e.g. AArch64. I presume that assembly for other arches presumes that we are not using the large code model for accesses, so that the same issue can happen with them. Then we have two options: FWIW, in e4ac156b7c47725327dff78bb83f5eecbaee3add we added attribute_visibility_hidden to a bunch of symbols that are accessed from aarch64 assembly, in a similar fashion. While the effects are different, I wonder if we should abstract these two down to a more generic attribute for any symbol accessed directly from any assembly. As I said above: We would only need two macros: One for any object accessed from inline assembly and one for objects accessed by external assembly (and not inline assembly). The latter would be hidden visibility+small code model, the former would also have av_used. Of course, both would allow to specify alignment. But naming is hard. How about DECLARE_ASM_VAR and DECLARE_INLINE_ASM_VAR? That sounds ok to me. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [RFC] New swscale internal design prototype
On Wed, 12 Mar 2025 02:58:52 +0200 Rémi Denis-Courmont wrote: > > > Le 10 mars 2025 15:14:46 GMT+02:00, Niklas Haas a écrit : > >On Sun, 09 Mar 2025 17:57:48 -0700 Rémi Denis-Courmont > >wrote: > >> > >> > >> Le 9 mars 2025 12:57:47 GMT-07:00, Niklas Haas a écrit : > >> >On Sun, 09 Mar 2025 11:18:04 -0700 Rémi Denis-Courmont > >> >wrote: > >> >> Hi, > >> >> > >> >> Le 8 mars 2025 14:53:42 GMT-08:00, Niklas Haas a > >> >> écrit : > >> >> >https://github.com/haasn/FFmpeg/blob/swscale3/doc/swscale-v2.txt > >> >> > >> >> >I have spent the past week or so ironing > >> >> >I wanted to post it here to gather some feedback on the approach. > >> >> >Where does > >> >> >it fall on the "madness" scale? Is the new operations and optimizer > >> >> >design > >> >> >comprehensible? Am I trying too hard to reinvent compilers? Are there > >> >> >any > >> >> >platforms where the high number of function calls per frame would be > >> >> >probitively expensive? What are the thoughts on the float-first > >> >> >approach? See > >> >> >also the list of limitations and improvement ideas at the bottom of my > >> >> >design > >> >> >document. > >> >> > >> >> Using floats internally may be fine if there's (almost) never any > >> >> spillage, but that necessarily implies custom calling conventions. And > >> >> won't work with as many as 32 pixels. On RVV 128-bit, you'd have only 4 > >> >> vectors. On Arm NEON, it would be even worse as scalars/constants need > >> >> to be stored in vectors as well. > >> > > >> >I think that a custom calling convention is not as unreasonable as it may > >> >sound, > >> >and will actually be easier to implement than the standard calling > >> >convention > >> >since functions will not have to deal with pixel load/store, nor will > >> >there be > >> >any need for "fused" versions of operations (whose only purpose is to > >> >avoid > >> >the roundtrip through L1). > >> > > >> >The pixel chunk size is easily changed; it is a compile time constant and > >> >there > >> >are no strict requirements on it. If RISC-V (or any other platform) > >> >struggles > >> >with storing 32 floats in vector registers, we could go down to 16 (or > >> >even 8); > >> >the number 32 was merely chosen by benchmarking and not through any > >> >careful > >> >design consideration. > >> > >> It can't be a compile time constant on RVV nor (if it's ever introduced) > >> SVE because they are scalable. I doubt that a compile-time constant will > >> work well across all variants of x86 as well, but not that I'd know. > > > >It's my understanding that on existing RVV implementations, the number of > >cycles needed to execute an m4/m2 operation is roughly 4x/2x the cost of > >an equivalent m1 operation. > > But that's exactly the problem! We want to use the *same* group multipler > regardless of the vector length to obtain roughly optimal bandwidth. That > means the number of elements will be proportional to the vector length. The > multiplier depends on the element size and perhaps the register pressure of a > given chunk processing, not the vector length. > > And with SVE2, it'll most probably work optimally with a 2x unroll (like NEON > typically). This is more or less equivalent to RVV m2, and will also lead to > a chunk size proportional to the hardware vector length. > > If you calculate the chunk size based in the worst 128-bit case, then it'll > work on 256-bit but at only 50% of the possible speed, because half the CPU > time will be wasted working on tail or masked elements. Okay, I came up with slightly different approach that will scale to arbitrary chunk sizes, permit custom calling conventions or more exotic approaches, solve some other issues with the current prototype (such as the expensive swizzle operations), and also lower the per-call overhead. I will post a refactored branch whenever it's done. In the meantime, let's put this RFC on hold, I think I've gathered enough feedback. > > >If this continues to be the case, the underlying VLEN of the implementation > >should not matter much, even with a compile time constant chunk size, as long > >as it does not greatly exceed 512. > > No clue how you come to that conclusion. The maths don't add up here. > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/1] avcodec/pcm: fix build warning by replacing deprecated method with avcodec_get_supported_config.
Zhao Zhili: > > >> On Mar 11, 2025, at 17:25, joney...@gmail.com wrote: >> >> From: Jingwei Yao >> >> Signed-off-by: Jingwei Yao >> --- >> libavcodec/pcm.c | 12 ++-- >> 1 file changed, 10 insertions(+), 2 deletions(-) >> >> diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c >> index a23293dca2..e04ca78101 100644 >> --- a/libavcodec/pcm.c >> +++ b/libavcodec/pcm.c >> @@ -252,8 +252,10 @@ typedef struct PCMDecode { >> static av_cold int pcm_decode_init(AVCodecContext *avctx) >> { >> PCMDecode *s = avctx->priv_data; >> +const enum AVSampleFormat *sample_fmts; >> AVFloatDSPContext *fdsp; >> -int i; >> +int num_sample_fmts; >> +int i, ret; >> >> switch (avctx->codec_id) { >> case AV_CODEC_ID_PCM_ALAW: >> @@ -284,7 +286,13 @@ static av_cold int pcm_decode_init(AVCodecContext >> *avctx) >> break; >> } >> >> -avctx->sample_fmt = avctx->codec->sample_fmts[0]; >> +ret = avcodec_get_supported_config(avctx, NULL, >> AV_CODEC_CONFIG_SAMPLE_FORMAT, >> + 0, (const void**)&sample_fmts, >> &num_sample_fmts); >> +if (ret < 0) >> +return ret; >> + >> +if (sample_fmts) >> +avctx->sample_fmt = sample_fmts[0]; > > How about just disable the warning with FF_DISABLE_DEPRECATION_WARNINGS? > > sample_fmts should be moved to FFCodec finally. So we can fix it finally by > > avctx->sample_fmt = ffcodec(avctx->codec)->sample_fmts[0]; Actually, sample_fmts and the other stuff should be encoder-only. I therefore just sent a patchset that avoids using it here (by the decoders). > > It doesn’t worth the trouble and complexity to get some information which is > only an array > in current translation unit. > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
Lynne: > On 10/03/2025 18:42, Lynne wrote: >> On 10/03/2025 04:14, Andreas Rheinhardt wrote: >>> Lynne: --- libavcodec/ffv1.h | 3 +++ libavcodec/ffv1dec.c | 19 +-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 8c0e71284d..860a5c14b1 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -174,6 +174,9 @@ typedef struct FFV1Context { * NOT shared between frame threads. */ uint8_t frame_damaged; + + /* Reference to the current packet */ + AVPacket *pkt_ref; } FFV1Context; int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s); diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index eaa21eebdf..6396f22f79 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx) f->pix_fmt = AV_PIX_FMT_NONE; f->configured_pix_fmt = AV_PIX_FMT_NONE; + f->pkt_ref = av_packet_alloc(); + if (!f->pkt_ref) + return AVERROR(ENOMEM); + if ((ret = ff_ffv1_common_init(avctx, f)) < 0) return ret; @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, /* Start */ if (hwaccel) { + ret = av_packet_ref(f->pkt_ref, avpkt); + if (ret < 0) + return ret; + ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); if (ret < 0) return ret; @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, uint32_t len; ret = find_next_slice(avctx, avpkt->data, buf_end, i, &pos, &len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } buf_end -= len; ret = hwaccel->decode_slice(avctx, pos, len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } } + + av_packet_unref(f->pkt_ref); } else { ret = decode_slices(avctx, c, avpkt); if (ret < 0) @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx) ff_progress_frame_unref(&s->last_picture); av_refstruct_unref(&s->hwaccel_last_picture_private); + av_packet_free(&s->pkt_ref); ff_ffv1_close(s); return 0; >>> >>> Why not simply use a const AVPacket*? >> >> No reason. Fixed locally. >> Thanks. > > *reverted this change. > We need to ref the packet, since we map its memory and let the GPU use > it directly without copying the contents. 6k16bit content at 24fps is > typically around 2Gbps when compressed, so avoiding copies is important. How long does the hwaccel need this data? - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/9] avcodec/msmpeg4enc: Inline constant
Patches attached. - Andreas From b3a259cf6bf825a34d9405a53e923883def1 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 7 Mar 2025 15:42:18 +0100 Subject: [PATCH 1/9] avcodec/msmpeg4enc: Inline constant Signed-off-by: Andreas Rheinhardt --- libavcodec/msmpeg4enc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libavcodec/msmpeg4enc.c b/libavcodec/msmpeg4enc.c index 2a9e16975f..8310e0a578 100644 --- a/libavcodec/msmpeg4enc.c +++ b/libavcodec/msmpeg4enc.c @@ -354,9 +354,8 @@ static void msmpeg4v2_encode_motion(MpegEncContext * s, int val) int range, bit_size, sign, code, bits; if (val == 0) { -/* zero vector */ -code = 0; -put_bits(&s->pb, ff_mvtab[code][1], ff_mvtab[code][0]); +/* zero vector; corresponds to ff_mvtab[0] */ +put_bits(&s->pb, 1, 0x1); } else { bit_size = s->f_code - 1; range = 1 << bit_size; -- 2.45.2 From 78b6d8190d4dfbfefc80cc663a883e4e7ee7e805 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 7 Mar 2025 01:51:49 +0100 Subject: [PATCH 2/9] avcodec/mpegvideo_dec: Mark init, flush, close functions as av_cold Signed-off-by: Andreas Rheinhardt --- libavcodec/mpegvideo_dec.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/mpegvideo_dec.c b/libavcodec/mpegvideo_dec.c index 532d8cf5c1..2856dbfbd6 100644 --- a/libavcodec/mpegvideo_dec.c +++ b/libavcodec/mpegvideo_dec.c @@ -43,7 +43,7 @@ #include "threadprogress.h" #include "wmv2dec.h" -int ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx) +av_cold int ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx) { enum ThreadingStatus thread_status; @@ -141,7 +141,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst, return 0; } -int ff_mpv_decode_close(AVCodecContext *avctx) +av_cold int ff_mpv_decode_close(AVCodecContext *avctx) { MpegEncContext *s = avctx->priv_data; @@ -150,7 +150,7 @@ int ff_mpv_decode_close(AVCodecContext *avctx) return 0; } -int ff_mpv_common_frame_size_change(MpegEncContext *s) +av_cold int ff_mpv_common_frame_size_change(MpegEncContext *s) { int err = 0; @@ -427,7 +427,7 @@ void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h) s->first_field, s->low_delay); } -void ff_mpeg_flush(AVCodecContext *avctx) +av_cold void ff_mpeg_flush(AVCodecContext *avctx) { MpegEncContext *const s = avctx->priv_data; -- 2.45.2 From 5956d3424e2eb7c2419dcbf026cb28eee0d880fd Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 7 Mar 2025 02:06:40 +0100 Subject: [PATCH 3/9] avcodec/vc1_block: Stop setting write-only block_last_index It is only used by the mpegvideo unquantize functions which this decoder does not use at all. Signed-off-by: Andreas Rheinhardt --- libavcodec/vc1_block.c | 32 +--- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c index 8babbde38c..26adfbca1d 100644 --- a/libavcodec/vc1_block.c +++ b/libavcodec/vc1_block.c @@ -585,7 +585,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, GetBitContext *gb = &v->s.gb; MpegEncContext *s = &v->s; int dc_pred_dir = 0; /* Direction of the DC prediction used */ -int i; int16_t *dc_val; int16_t *ac_val, *ac_val2; int dcdiff, scale; @@ -622,7 +621,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, scale = v->pq * 2 + v->halfpq; //AC Decoding -i = !!coded; if (coded) { int last = 0, skip, value; @@ -637,14 +635,14 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, } else zz_table = v->zz_8x8[1]; -while (!last) { +for (int i = 1; !last; ++i) { int ret = vc1_decode_ac_coeff(v, &last, &skip, &value, codingset); if (ret < 0) return ret; i += skip; if (i > 63) break; -block[zz_table[i++]] = value; +block[zz_table[i]] = value; } /* apply AC prediction if needed */ @@ -696,8 +694,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n, } } } -if (s->ac_pred) i = 63; -s->block_last_index[n] = i; return 0; } @@ -716,7 +712,6 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, GetBitContext *gb = &v->s.gb; MpegEncContext *s = &v->s; int dc_pred_dir = 0; /* Direction of the DC prediction used */ -int i; int16_t *dc_val = NULL; int16_t *ac_val, *ac_val2; int dcdiff; @@ -778,7 +773,6 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n, } //AC Decoding -i = 1; if (coded) { int last = 0, skip, value; @@ -801,14 +795,14 @@ static int vc1_decode_i_bloc
Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
Lynne: > > > On 13/03/2025 02:24, Andreas Rheinhardt wrote: >> Lynne: >>> On 10/03/2025 18:42, Lynne wrote: On 10/03/2025 04:14, Andreas Rheinhardt wrote: > Lynne: >> --- >> libavcodec/ffv1.h | 3 +++ >> libavcodec/ffv1dec.c | 19 +-- >> 2 files changed, 20 insertions(+), 2 deletions(-) >> >> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h >> index 8c0e71284d..860a5c14b1 100644 >> --- a/libavcodec/ffv1.h >> +++ b/libavcodec/ffv1.h >> @@ -174,6 +174,9 @@ typedef struct FFV1Context { >> * NOT shared between frame threads. >> */ >> uint8_t frame_damaged; >> + >> + /* Reference to the current packet */ >> + AVPacket *pkt_ref; >> } FFV1Context; >> int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s); >> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c >> index eaa21eebdf..6396f22f79 100644 >> --- a/libavcodec/ffv1dec.c >> +++ b/libavcodec/ffv1dec.c >> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext >> *avctx) >> f->pix_fmt = AV_PIX_FMT_NONE; >> f->configured_pix_fmt = AV_PIX_FMT_NONE; >> + f->pkt_ref = av_packet_alloc(); >> + if (!f->pkt_ref) >> + return AVERROR(ENOMEM); >> + >> if ((ret = ff_ffv1_common_init(avctx, f)) < 0) >> return ret; >> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, >> AVFrame *rframe, >> /* Start */ >> if (hwaccel) { >> + ret = av_packet_ref(f->pkt_ref, avpkt); >> + if (ret < 0) >> + return ret; >> + >> ret = hwaccel->start_frame(avctx, avpkt->data, avpkt- >> >size); >> if (ret < 0) >> return ret; >> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, >> AVFrame *rframe, >> uint32_t len; >> ret = find_next_slice(avctx, avpkt->data, buf_end, i, >> &pos, &len); >> - if (ret < 0) >> + if (ret < 0) { >> + av_packet_unref(f->pkt_ref); >> return ret; >> + } >> buf_end -= len; >> ret = hwaccel->decode_slice(avctx, pos, len); >> - if (ret < 0) >> + if (ret < 0) { >> + av_packet_unref(f->pkt_ref); >> return ret; >> + } >> } >> + >> + av_packet_unref(f->pkt_ref); >> } else { >> ret = decode_slices(avctx, c, avpkt); >> if (ret < 0) >> @@ -827,6 +841,7 @@ static av_cold int >> ffv1_decode_close(AVCodecContext *avctx) >> ff_progress_frame_unref(&s->last_picture); >> av_refstruct_unref(&s->hwaccel_last_picture_private); >> + av_packet_free(&s->pkt_ref); >> ff_ffv1_close(s); >> return 0; > > Why not simply use a const AVPacket*? No reason. Fixed locally. Thanks. >>> >>> *reverted this change. >>> We need to ref the packet, since we map its memory and let the GPU use >>> it directly without copying the contents. 6k16bit content at 24fps is >>> typically around 2Gbps when compressed, so avoiding copies is important. >> >> How long does the hwaccel need this data? > > Until the frame has been asynchronously decoded. We give an output frame > with a semaphore that receivers need to wait on to determine when that is. > > On the decoder-side, the hardware has a fixed number of queues where > submissions can be sent to asynchronously. We treat it as a ring buffer > and keep a reference to all resources our side for each submission, > until we need to reuse the slot, at which point we wait on the frame > decoding to complete (which it usually has), and we release all > resources used. > > Output frames also have a bit of state that has to be freed once the > frame is marked (unreferenced) by the decoder as no longer being needed > as a reference, this is done in the FFHWAccel.free_frame_priv callback. > There, we have to wait for the last internal use of the frame to be > finished (done via the vp->wait_semaphores() call in vulkan_decode.c). > > This is valid for both ASIC hardware decoders and a compute shader based > implementation, since the two share the same code, except for decode > submissions. 1. If you need a reference to the packet's data, then reference AVPacket.buf, not the whole AVPacket. This avoids allocating a spare AVPacket as well as copying side data. 2. It sounds very wrong and fragile that the decoder has to keep a reference because the hwaccel might need it. There may be future hwaccels that don't need such a reference etc. It seems better to extend e.g. the start_frame callback and pass a r
[FFmpeg-devel] [PATCH 1/7] avcodec/pcm: Remove always-false check
Patches attached. - Andreas From d07c1ad85a60395eb20501a59ff35de1d60f029b Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Tue, 11 Mar 2025 17:26:43 +0100 Subject: [PATCH 1/7] avcodec/pcm: Remove always-false check All codecs here have a valid sample size at this point. This check has (presumably) been added for DVD PCM, but even for them the check was always-true after 381e195b46d080aee1d9b05ef2b6b140e9463519 (and the DVD code was later moved out altogether). So just remove this check and the leftover DVD comment. Signed-off-by: Andreas Rheinhardt --- libavcodec/pcm.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index a23293dca2..5d8dcb8ff0 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -334,7 +334,6 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame, sample_size = av_get_bits_per_sample(avctx->codec_id) / 8; -/* av_get_bits_per_sample returns 0 for AV_CODEC_ID_PCM_DVD */ samples_per_block = 1; if (avctx->codec_id == AV_CODEC_ID_PCM_LXF) { /* we process 40-bit blocks per channel for LXF */ @@ -342,11 +341,6 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame, sample_size = 5; } -if (sample_size == 0) { -av_log(avctx, AV_LOG_ERROR, "Invalid sample_size\n"); -return AVERROR(EINVAL); -} - if (channels == 0) { av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n"); return AVERROR(EINVAL); -- 2.45.2 From 8d65f524e5d5c5b262e5d9bac6c1c319f72d5a24 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Tue, 11 Mar 2025 17:53:06 +0100 Subject: [PATCH 2/7] avcodec/pcm: Cache sample_size value No need to go through two switches per packet. Signed-off-by: Andreas Rheinhardt --- libavcodec/pcm.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index 5d8dcb8ff0..620acf0f46 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -243,6 +243,7 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } typedef struct PCMDecode { +int sample_size; short table[256]; void (*vector_fmul_scalar)(float *dst, const float *src, float mul, int len); @@ -286,8 +287,14 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx) avctx->sample_fmt = avctx->codec->sample_fmts[0]; -if (avctx->sample_fmt == AV_SAMPLE_FMT_S32) -avctx->bits_per_raw_sample = av_get_bits_per_sample(avctx->codec_id); +if (avctx->codec_id != AV_CODEC_ID_PCM_LXF) { +int bits_per_sample = av_get_exact_bits_per_sample(avctx->codec_id); +if (avctx->sample_fmt == AV_SAMPLE_FMT_S32) +avctx->bits_per_raw_sample = bits_per_sample; +s->sample_size = bits_per_sample / 8; +} else { +s->sample_size = 5; +} return 0; } @@ -328,17 +335,14 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame, int buf_size = avpkt->size; PCMDecode *s = avctx->priv_data; int channels = avctx->ch_layout.nb_channels; -int sample_size, c, n, ret, samples_per_block; +int sample_size = s->sample_size, c, n, ret, samples_per_block; uint8_t *samples; int32_t *dst_int32_t; -sample_size = av_get_bits_per_sample(avctx->codec_id) / 8; - samples_per_block = 1; if (avctx->codec_id == AV_CODEC_ID_PCM_LXF) { /* we process 40-bit blocks per channel for LXF */ samples_per_block = 2; -sample_size = 5; } if (channels == 0) { -- 2.45.2 From c221a9f2f31c8fcb65e478e8d71b127d9466f3d3 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Thu, 13 Mar 2025 04:37:12 +0100 Subject: [PATCH 3/7] avcodec/pcm: Remove duplication from FFCodec define macros Signed-off-by: Andreas Rheinhardt --- libavcodec/pcm.c | 78 +--- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index 620acf0f46..6c1feecca3 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -550,7 +550,7 @@ const FFCodec ff_ ## name_ ## _encoder = { \ .p.name = #name_, \ CODEC_LONG_NAME(long_name_),\ .p.type = AVMEDIA_TYPE_AUDIO, \ -.p.id = AV_CODEC_ID_ ## id_,\ +.p.id = id_,\ .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | \ AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,\ .init = pcm_encode_init,\ @@ -563,7 +563,8 @@ const FFCodec ff_ ## name_ ## _encoder = { \ #d