Re: [FFmpeg-devel] [PATCH] avformat/id3v2: Support null-separated multi-value properties

2025-03-12 Thread Soft Works


> -Original Message-
> From: softworkz 
> Sent: Sonntag, 2. März 2025 00:57
> To: ffmpeg-devel@ffmpeg.org
> Cc: softworkz ; softworkz 
> Subject: [PATCH] avformat/id3v2: Support null-separated multi-value
> properties
> 
> From: softworkz 
> 
> Fixes Trac ticket https://trac.ffmpeg.org/ticket/6949
> 
> Signed-off-by: softworkz 
> ---
> avformat/id3v2: Support null-separated multi-value properties
> 
> Fixes Trac ticket https://trac.ffmpeg.org/ticket/6949
> 
> Published-As: https://github.com/ffstaging/FFmpeg/releases/tag/pr-
> ffstaging-54%2Fsoftworkz%2Fsubmit_id3v2-v1
> Fetch-It-Via: git fetch https://github.com/ffstaging/FFmpeg pr-
> ffstaging-54/softworkz/submit_id3v2-v1
> Pull-Request: https://github.com/ffstaging/FFmpeg/pull/54
> 
>  libavformat/id3v2.c | 65 -
>  1 file changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 29ee59e1f4..cebb4acd75 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -327,39 +327,55 @@ static void read_ttag(AVFormatContext *s,
> AVIOContext *pb, int taglen,
>AVDictionary **metadata, const char *key)
>  {
>  uint8_t *dst;
> -int encoding, dict_flags = AV_DICT_DONT_OVERWRITE |
> AV_DICT_DONT_STRDUP_VAL;
> +int encoding, nb_values = 0;
>  unsigned genre;
> +AVDictionaryEntry *tag = NULL;
> 
>  if (taglen < 1)
>  return;
> 
> +tag = av_dict_get(*metadata, key, NULL, 0);
> +if (tag)
> +return;
> +
>  encoding = avio_r8(pb);
>  taglen--; /* account for encoding type byte */
> 
> -if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> -av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n",
> key);
> -return;
> -}
> +/* Read all null-terminated values */
> +while (taglen > 0) {
> +int n = 0, dict_flags = AV_DICT_APPEND |
> AV_DICT_DONT_STRDUP_VAL;
> 
> -if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
> &&
> -(sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre)
> == 1) &&
> -genre <= ID3v1_GENRE_MAX) {
> -av_freep(&dst);
> -dst = av_strdup(ff_id3v1_genre_str[genre]);
> -} else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
> -/* dst now contains the key, need to get value */
> -key = dst;
>  if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>  av_log(s, AV_LOG_ERROR, "Error reading frame %s,
> skipped\n", key);
> -av_freep(&key);
>  return;
>  }
> -dict_flags |= AV_DICT_DONT_STRDUP_KEY;
> -} else if (!*dst)
> -av_freep(&dst);
> 
> -if (dst)
> -av_dict_set(metadata, key, dst, dict_flags);
> +if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
> +(sscanf(dst, "(%d)", &genre) == 1 || (sscanf(dst, "%d%n",
> &genre, &n) == 1 && n == strlen(dst))) &&
> +genre <= ID3v1_GENRE_MAX) {
> +av_freep(&dst);
> +dst = av_strdup(ff_id3v1_genre_str[genre]);
> +} else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
> +/* dst now contains the key, need to get value */
> +key = dst;
> +if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> +av_log(s, AV_LOG_ERROR, "Error reading frame %s,
> skipped\n", key);
> +av_freep(&key);
> +return;
> +}
> +} else if (!*dst) {
> +av_freep(&dst);
> +return;
> +}
> +
> +if (dst) {
> +if (nb_values > 0)
> +av_dict_set(metadata, key, ";", dict_flags &
> ~AV_DICT_DONT_STRDUP_VAL);
> +
> +av_dict_set(metadata, key, dst, dict_flags);
> +nb_values++;
> +}
> +}
>  }
> 
>  static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen,
> @@ -372,7 +388,7 @@ static void read_uslt(AVFormatContext *s,
> AVIOContext *pb, int taglen,
>  int encoding;
>  int ok = 0;
> 
> -if (taglen < 4)
> +if (taglen < 1)
>  goto error;
> 
>  encoding = avio_r8(pb);
> @@ -383,10 +399,10 @@ static void read_uslt(AVFormatContext *s,
> AVIOContext *pb, int taglen,
>  lang[3] = '\0';
>  taglen -= 3;
> 
> -if (decode_str(s, pb, encoding, &descriptor, &taglen) < 0 || taglen
> < 0)
> +if (decode_str(s, pb, encoding, &descriptor, &taglen) < 0)
>  goto error;
> 
> -if (decode_str(s, pb, encoding, &text, &taglen) < 0 || taglen < 0)
> +if (decode_str(s, pb, encoding, &text, &taglen) < 0)
>  goto error;
> 
>  // FFmpeg does not support hierarchical metadata, so concatenate
> the keys.
> @@ -1003,7 +1019,8 @@ static void id3v2_parse(AVIOContext *pb,
> AVDictionary **metadata,
>  t++;
>  }
> 
> -ffio_init_read_context(&pb_local, buffer, b - buffer);
> +ffio_init_context(&pb_

[FFmpeg-devel] [PATCH] avformat/dashenc: add hevc codec attributes parse

2025-03-12 Thread Jack Lau via ffmpeg-devel
fix ticket: 11316
add set_hevc_codec_str function refer to hlsenc.c but do some necessary changes
Signed-off-by: Jack Lau 
---
 libavformat/dashenc.c | 81 +++
 1 file changed, 81 insertions(+)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index d4a6fe0304..04c5b562d5 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -57,6 +57,7 @@
 #include "url.h"
 #include "vpcc.h"
 #include "dash.h"
+#include "nal.h"
 
 typedef enum {
 SEGMENT_TYPE_AUTO = 0,
@@ -344,6 +345,84 @@ static void set_vp9_codec_str(AVFormatContext *s, 
AVCodecParameters *par,
 return;
 }
 
+static void set_hevc_codec_str(AVCodecParameters *par, char *str, int size) {
+uint8_t *data = par->extradata;
+int profile = AV_PROFILE_UNKNOWN;
+uint32_t profile_compatibility = AV_PROFILE_UNKNOWN; 
+char tier = 0;
+int level = AV_LEVEL_UNKNOWN;
+char constraints[8] = "";
+
+if (par->profile != AV_PROFILE_UNKNOWN)
+profile = par->profile;
+if (par->level != AV_LEVEL_UNKNOWN)
+level = par->level;
+
+/* check the boundary of data which from current position is small than 
extradata_size */
+while (data && (data - par->extradata + 19) < par->extradata_size) {
+/* get HEVC SPS NAL and seek to profile_tier_level */
+if (!(data[0] | data[1] | data[2]) && data[3] == 1 && ((data[4] & 
0x7E) == 0x42)) {
+uint8_t *rbsp_buf;
+int remain_size = 0;
+int rbsp_size = 0;
+uint32_t profile_compatibility_flags = 0;
+uint8_t high_nibble = 0;
+/* skip start code + nalu header */
+data += 6;
+/* process by reference General NAL unit syntax */
+remain_size = par->extradata_size - (data - par->extradata);
+rbsp_buf = ff_nal_unit_extract_rbsp(data, remain_size, &rbsp_size, 
0);
+if (!rbsp_buf)
+return;
+if (rbsp_size < 13) {
+av_freep(&rbsp_buf);
+break;
+}
+/* skip sps_video_parameter_set_id   u(4),
+*  sps_max_sub_layers_minus1u(3),
+*  and sps_temporal_id_nesting_flag u(1) 
+* 
+* TIER represents the general_tier_flag, with 'L' indicating the 
flag is 0,
+* and 'H' indicating the flag is 1
+*/
+tier = (rbsp_buf[1] & 0x20) == 0 ? 'L' : 'H';
+profile = rbsp_buf[1] & 0x1f;
+/* PROFILE_COMPATIBILITY is general_profile_compatibility_flags, 
but in reverse bit order,
+* in a hexadecimal representation (leading zeroes may be omitted).
+*/
+profile_compatibility_flags = AV_RB32(rbsp_buf + 2);
+/* revise these bits to get the profile compatibility value */
+profile_compatibility_flags = ((profile_compatibility_flags & 
0xU) << 1) | ((profile_compatibility_flags >> 1) & 0xU);
+profile_compatibility_flags = ((profile_compatibility_flags & 
0xU) << 2) | ((profile_compatibility_flags >> 2) & 0xU);
+profile_compatibility_flags = ((profile_compatibility_flags & 
0x0F0F0F0FU) << 4) | ((profile_compatibility_flags >> 4) & 0x0F0F0F0FU);
+profile_compatibility_flags = ((profile_compatibility_flags & 
0x00FF00FFU) << 8) | ((profile_compatibility_flags >> 8) & 0x00FF00FFU);
+profile_compatibility = (profile_compatibility_flags << 16) | 
(profile_compatibility_flags >> 16);
+/* skip 8 + 8 + 32
+* CONSTRAINTS is a hexadecimal representation of the 
general_constraint_indicator_flags. 
+* each byte is separated by a '.', and trailing zero bytes may be 
omitted.
+* drop the trailing zero bytes refer to ISO/IEC14496-15.
+*/
+high_nibble = rbsp_buf[7] >> 4;
+snprintf(constraints, sizeof(constraints),
+high_nibble ? "%02x.%x" : "%02x",
+rbsp_buf[6], high_nibble);
+/* skip 8 + 8 + 32 + 4 + 43 + 1 bit */
+level = rbsp_buf[12];
+av_freep(&rbsp_buf);
+break;
+}
+data++;
+}
+if (profile != AV_PROFILE_UNKNOWN &&
+profile_compatibility != AV_PROFILE_UNKNOWN &&
+tier != 0 &&
+level != AV_LEVEL_UNKNOWN &&
+constraints[0] != '\0') {
+av_strlcatf(str, size, ".%d.%x.%c%d.%s", profile, 
profile_compatibility, tier, level, constraints);
+}
+return;
+}
+
 static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
   AVRational *frame_rate, char *str, int size)
 {
@@ -422,6 +501,8 @@ static void set_codec_str(AVFormatContext *s, 
AVCodecParameters *par,
 av_strlcatf(str, size, ".%02x%02x%02x",
 extradata[1], extradata[2], extradata[3]);
 av_free(tmpbuf);
+} else if (!strcmp(str, "

Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Martin Storsjö

On Wed, 12 Mar 2025, Gyan Doshi wrote:


On 2025-03-12 01:29 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
ffbuild/library.mak | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..781b018e00 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)

$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver

$(SLIB_CREATE_DEF_CMD)
-    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter 
%.o,$$^) $(FFEXTRALIBS)

+    $(Q)echo $$(filter %.o,$$^) > $$@.objs
+    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)

$(SLIB_EXTRA_CMD)
+    -$(RM) $$@.objs


Don't we need do the same for static libraries too?

On first glance, this looks quite reasonable... However, the limit 
that is an issue is the length of a command line. The .objs file is 
written with an "echo" command - doesn't that fundamentally also hit 
the same limit (just a little bit later, as there are fewer command 
line flags in this command)?


Or do we assume that make executes it with a shell, where the shell 
handles "echo" as a shell builtin so that it doesn't actually spawn a 
subprocess for this? Can you test it, if you could extend the list of 
object files to the point where the .objs file is clearly over 32 KB, 
and verify that it did include all the files you intended?


The specific error I got when building a shared build of 7.1.1 (with ~90 
external libs) was


/bin/sh: line 1: /mingw64/bin/ccache: Argument list too long

Got same error without ccache.

The static build of the same config succeeded without any patching.
The .objs file generated for libavcodec shared build is 29KB.

How do I inflate the size to above 32K? I've enabled pretty much 
everything I can.


The simplest would probably be to add a bunch of empty .c files (with long 
file names) in libavcodec and hook them up in the makefile. We won't 
notice if they are missed when linking of course, but if we pass the 
command line length limit, we should still notice it in one way or 
another.


// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 2/2] avformat/webvttdec: Add webvtt extension and MIME type

2025-03-12 Thread Soft Works



> -Original Message-
> From: ffmpeg-devel  On Behalf Of
> Michael Niedermayer
> Sent: Dienstag, 25. Februar 2025 02:04
> To: FFmpeg development discussions and patches 
> Subject: Re: [FFmpeg-devel] [PATCH v2 2/2] avformat/webvttdec: Add
> webvtt extension and MIME type
> 
> On Fri, Feb 21, 2025 at 03:13:22PM +, softworkz wrote:
> > From: softworkz 
> >
> > The webvtt extension is sometimes used in HLS playlists.
> >
> > Signed-off-by: softworkz 
> > ---
> >  libavformat/webvttdec.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
> > index 6e60348764..6feda1585e 100644
> > --- a/libavformat/webvttdec.c
> > +++ b/libavformat/webvttdec.c
> > @@ -216,7 +216,8 @@ static const AVClass webvtt_demuxer_class = {
> >  const FFInputFormat ff_webvtt_demuxer = {
> >  .p.name = "webvtt",
> >  .p.long_name= NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
> > -.p.extensions   = "vtt",
> > +.p.mime_type= "text/vtt",
> > +.p.extensions   = "vtt,webvtt",
> >  .p.priv_class   = &webvtt_demuxer_class,
> >  .priv_data_size = sizeof(WebVTTContext),
> >  .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP,
> 
> should be ok
> maybe needs backporting
> 
> thx
> 
> [...]
> --
> Michael


Are there any more comments or objections from anybody?


After merge I'll follow up with a patch to support mpegts time mapping via 
X-TIMESTAMP-MAP in the  WebVTT headers.


Thanks
sw
 


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Gyan Doshi



On 2025-03-12 01:29 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
ffbuild/library.mak | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..781b018e00 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)

$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver

$(SLIB_CREATE_DEF_CMD)
-    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter 
%.o,$$^) $(FFEXTRALIBS)

+    $(Q)echo $$(filter %.o,$$^) > $$@.objs
+    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)

$(SLIB_EXTRA_CMD)
+    -$(RM) $$@.objs


Don't we need do the same for static libraries too?

On first glance, this looks quite reasonable... However, the limit 
that is an issue is the length of a command line. The .objs file is 
written with an "echo" command - doesn't that fundamentally also hit 
the same limit (just a little bit later, as there are fewer command 
line flags in this command)?


Or do we assume that make executes it with a shell, where the shell 
handles "echo" as a shell builtin so that it doesn't actually spawn a 
subprocess for this? Can you test it, if you could extend the list of 
object files to the point where the .objs file is clearly over 32 KB, 
and verify that it did include all the files you intended?


The specific error I got when building a shared build of 7.1.1 (with ~90 
external libs) was


/bin/sh: line 1: /mingw64/bin/ccache: Argument list too long

Got same error without ccache.

The static build of the same config succeeded without any patching.
The .objs file generated for libavcodec shared build is 29KB.

How do I inflate the size to above 32K? I've enabled pretty much 
everything I can.



Secondly, less fundamental - even if we try to remove the .objs file 
here at the same time it's quite possible for it to be left behind 
(e.g. if the linking command fails) - so it would be good to make sure 
that it is removed on "make clean" too.


Plus we probably should include it in .gitignore, if someone does 
in-tree builds.


Will add both.

Regards,
Gyan

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Zhao Zhili


> On Mar 12, 2025, at 16:34, Gyan Doshi  wrote:
> 
> 
> 
> On 2025-03-12 01:29 pm, Martin Storsjö wrote:
>> On Wed, 12 Mar 2025, Gyan Doshi wrote:
>> 
>>> The linker command can exceed the maximum argument limit on MinGW,
>>> especially for libavcodec.
>>> 
>>> The objects list is now stored in a file and passed to the linker.
>>> ---
>>> ffbuild/library.mak | 4 +++-
>>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/ffbuild/library.mak b/ffbuild/library.mak
>>> index 793e9d41fa..781b018e00 100644
>>> --- a/ffbuild/library.mak
>>> +++ b/ffbuild/library.mak
>>> @@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
>>> 
>>> $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
>>> $(SUBDIR)lib$(NAME).ver
>>> $(SLIB_CREATE_DEF_CMD)
>>> -$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) 
>>> $(FFEXTRALIBS)
>>> +$(Q)echo $$(filter %.o,$$^) > $$@.objs
>>> +$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
>>> $(FFEXTRALIBS)
>>> $(SLIB_EXTRA_CMD)
>>> +-$(RM) $$@.objs
>> 
>> Don't we need do the same for static libraries too?
>> 
>> On first glance, this looks quite reasonable... However, the limit that is 
>> an issue is the length of a command line. The .objs file is written with an 
>> "echo" command - doesn't that fundamentally also hit the same limit (just a 
>> little bit later, as there are fewer command line flags in this command)?
>> 
>> Or do we assume that make executes it with a shell, where the shell handles 
>> "echo" as a shell builtin so that it doesn't actually spawn a subprocess for 
>> this? Can you test it, if you could extend the list of object files to the 
>> point where the .objs file is clearly over 32 KB, and verify that it did 
>> include all the files you intended?
> 
> The specific error I got when building a shared build of 7.1.1 (with ~90 
> external libs) was
> 
> /bin/sh: line 1: /mingw64/bin/ccache: Argument list too long

I have got the same error from user’s report, but I cannot reproduce the error 
by myself.

FYI, there is a meson port of ffmpeg in gstreamer. It’s about five times faster 
than configure+make on Windows. Maybe not a ready for build and release binary, 
but can save a lot of time when developing and test.

https://gitlab.freedesktop.org/gstreamer/meson-ports/ffmpeg

> 
> Got same error without ccache.
> 
> The static build of the same config succeeded without any patching.
> The .objs file generated for libavcodec shared build is 29KB.
> 
> How do I inflate the size to above 32K? I've enabled pretty much everything I 
> can.
> 
> 
>> Secondly, less fundamental - even if we try to remove the .objs file here at 
>> the same time it's quite possible for it to be left behind (e.g. if the 
>> linking command fails) - so it would be good to make sure that it is removed 
>> on "make clean" too.
>> 
>> Plus we probably should include it in .gitignore, if someone does in-tree 
>> builds.
> 
> Will add both.
> 
> Regards,
> Gyan
> 
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] New swscale internal design prototype

2025-03-12 Thread Niklas Haas
On Wed, 12 Mar 2025 09:15:22 +0200 Martin Storsjö  wrote:
> On Wed, 12 Mar 2025, Niklas Haas wrote:
>
> > On Sun, 09 Mar 2025 20:45:23 +0100 Niklas Haas  wrote:
> >> On Sun, 09 Mar 2025 18:11:54 +0200 Martin Storsjö  wrote:
> >> > On Sat, 8 Mar 2025, Niklas Haas wrote:
> >> >
> >> > > What are the thoughts on the float-first approach?
> >> >
> >> > In general, for modern architectures, relying on floats probably is
> >> > reasonable. (On architectures that aren't of quite as widespread 
> >> > interest,
> >> > it might not be so clear cut though.)
> >> >
> >> > However with the benchmark example you provided a couple of weeks ago, we
> >> > concluded that even on x86 on modern HW, floats were faster than int16
> >> > only in one case: When using Clang, not GCC, and when compiling with
> >> > -mavx2, not without it. In all the other cases, int16 was faster than
> >> > float.
> >>
> >> Hi Martin,
> >>
> >> I should preface that this particular benchmark was a very specific test 
> >> for
> >> floating point *filtering*, which is considerably more punishing than the
> >> conversion pipeline I have implemented here, and I think it's partly the
> >> fault of compilers generating very unoptimal filtering code.
> >>
> >> I think it would be better to re-assess using the current prototype on 
> >> actual
> >> hardware. I threw up a quick NEON test branch: (untested, should hopefully 
> >> work)
> >> https://github.com/haasn/FFmpeg/commits/swscale3-neon
> >>
> >> # adjust the benchmark iters count as needed based on the HW perf
> >> make libswscale/tests/swscale && libswscale/tests/swscale -unscaled 1 
> >> -bench 50
> >>
> >> If this differs significantly from the ~1.8x speedup I measure on x86, I
> >> will be far more concerned about the new approach.
>
> Sorry, I haven't had time to try this out myself yet...

No worries. I think I have gathered enough performance figures myself to come
to the conclusion that this approach won't work unmodified - not because of the
usage of floats so much as the fact that the load/store overhead is
sufficiently expensive in very simple scenarios to the point where it outweighs
the benefits.

I think my plan for now is the following:

1. Delete all of the "optimized" variants of the C templates, and keep only the
   general purpose base case merely as a fallback / reference code.

2. Instead, make the architectural split at a higher level; and allow arch-
   specific implementations to choose their own preferred chunk size, or even
   do something wildly different like runtime code generation or custom calling
   conventions.

3. Merge the new code for now guarded under an explicit opt in flag so we can
   continue to develop it alongside the existing approach until arch-specific
   optimized variants are available and sufficiently fast in _all_ cases.

>
> > I gave it a try. So, the result of a naive/blind run on a Cortex-X1 using 
> > clang
> > version 20.0.0 (from the latest Android NDK v29) is:
> >
> > Overall speedup=1.688x faster, min=0.141x max=45.898x
> >
> > This has quite a lot more significant speed regressions compared to x86 
> > though.
> >
> > In particular, clang/LLVM refuses to vectorize packed reads of 2 or 3 
> > elements,
> > so any sort of operation involving rgb24 or bgr24 suffers horribly:
>
> So, if the performance of this relies on compiler autovectorization,
> what's the plan wrt GCC? We blanket disable autovectorization when
> compiling with GCC - see fd6dbc53855fbfc9a782095d0ffe11dd3a98905f for when
> it was disabled last time. Building and running fate with
> autovectorization in GCC does succeed at least on modern GCC on x86_64,
> but it's of course possible that it still can cause issues in various more
> tricky configurations.

See https://github.com/haasn/FFmpeg/blob/swscale3/libswscale/ops_internal.h#L28

>
> // Martin
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] ffbuild: read library linker objects from a file

2025-03-12 Thread Gyan Doshi
The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
v2:
   static linking also switched to use a file for object list
   objs added to clean target and to gitignore

 .gitignore  | 1 +
 ffbuild/common.mak  | 2 +-
 ffbuild/library.mak | 8 ++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9cfc78b414..430abaf91b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.a
 *.o
+*.objs
 *.o.*
 *.d
 *.def
diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index 023adb8567..ca45a0f368 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak
@@ -214,7 +214,7 @@ $(TOOLOBJS): | tools
 
 OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) 
$(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
 
-CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.pc *.ptx *.ptx.gz 
*.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb
+CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx 
*.ptx.gz *.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb
 LIBSUFFIXES   = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
 
 define RULES
diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..53f739123a 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -35,8 +35,10 @@ OBJS += $(SHLIBOBJS)
 endif
 $(SUBDIR)$(LIBNAME): $(OBJS) $(STLIBOBJS)
$(RM) $@
-   $(AR) $(ARFLAGS) $(AR_O) $^
+   $(Q)echo $$^ > $$@.objs
+   $(AR) $(ARFLAGS) $(AR_O) @$$@.objs
$(RANLIB) $@
+   -$(RM) $$@.objs
 
 install-headers: install-lib$(NAME)-headers install-lib$(NAME)-pkgconfig
 
@@ -66,8 +68,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver
$(SLIB_CREATE_DEF_CMD)
-   $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) 
$(FFEXTRALIBS)
+   $(Q)echo $$(filter %.o,$$^) > $$@.objs
+   $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)
$(SLIB_EXTRA_CMD)
+   -$(RM) $$@.objs
 
 ifdef SUBDIR
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(DEP_LIBS)
-- 
2.46.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/vlc: Reduce debug logging

2025-03-12 Thread softworkz
From: softworkz 

Signed-off-by: softworkz 
---
avcodec/vlc: Reduce debug logging

It made it hardly possible to enable debug logging for viewing other log
lines due to the excessive output created by this.

Published-As: 
https://github.com/ffstaging/FFmpeg/releases/tag/pr-ffstaging-60%2Fsoftworkz%2Fsubmit_vlclogging-v1
Fetch-It-Via: git fetch https://github.com/ffstaging/FFmpeg 
pr-ffstaging-60/softworkz/submit_vlclogging-v1
Pull-Request: https://github.com/ffstaging/FFmpeg/pull/60

 libavcodec/vlc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vlc.c b/libavcodec/vlc.c
index f46ecbb55e..c49c801181 100644
--- a/libavcodec/vlc.c
+++ b/libavcodec/vlc.c
@@ -155,7 +155,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int 
nb_codes,
 int n = codes[i].bits;
 uint32_t code = codes[i].code;
 intsymbol = codes[i].symbol;
-ff_dlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code);
+ff_tlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code);
 if (n <= table_nb_bits) {
 /* no need to add another table */
 int   j = code >> (32 - table_nb_bits);
@@ -169,7 +169,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int 
nb_codes,
 for (int k = 0; k < nb; k++) {
 int   bits = table[j].len;
 int oldsym = table[j].sym;
-ff_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
+ff_tlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
 if ((bits || oldsym) && (bits != n || oldsym != symbol)) {
 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
 return AVERROR_INVALIDDATA;

base-commit: 1bce40cb73fffd9d1fb6e118d71ac8999cded808
-- 
ffmpeg-codebot
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf

2025-03-12 Thread James Almer

On 3/12/2025 10:25 AM, Andreas Rheinhardt wrote:

From 3809f60cd976de71bd409bc24b9bd11b43aacee4 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Wed, 12 Mar 2025 14:23:51 +0100
Subject: [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf

Forgotten in 6325aede08d5c7086b3798cb7041299e1d07f93a.

Signed-off-by: Andreas Rheinhardt 
---
 fftools/ffmpeg_opt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 27a9fc9e42..1f5e6050c8 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -1507,7 +1507,6 @@ static int opt_adrift_threshold(void *optctx, const char 
*opt, const char *arg)
 }
 #endif
 
-static const char *const alt_bsf[]= { "absf", "vbsf", NULL };

 static const char *const alt_channel_layout[] = { "ch_layout", NULL};
 static const char *const alt_codec[]  = { "c", "acodec", "vcodec", "scodec", 
"dcodec", NULL };
 static const char *const alt_filter[] = { "af", "vf", NULL };
--
2.45.2



LGTM



OpenPGP_signature.asc
Description: OpenPGP digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] swscale/utils: split off format code into new file

2025-03-12 Thread Niklas Haas
From: Niklas Haas 

utils.c is getting quite crowded, and I need a new place to dump a lot of
format handling code for the ongoing rewrite. Rather than bloating this file
even more, start splitting format handling helpers off into a new file.

This also renames the existing utils.h header, which didn't really contain
anything except the SwsFormat definition anyway (the prototypes for what should
have been in utils.h are all still in the legacy swscale_internal.h).
---
 libswscale/Makefile  |   1 +
 libswscale/cms.c |   2 +-
 libswscale/cms.h |   2 +-
 libswscale/csputils.c|   2 +-
 libswscale/format.c  | 582 +++
 libswscale/{utils.h => format.h} |   6 +-
 libswscale/graph.c   |   2 +-
 libswscale/graph.h   |   2 +-
 libswscale/lut3d.h   |   2 +-
 libswscale/utils.c   | 559 -
 10 files changed, 592 insertions(+), 568 deletions(-)
 create mode 100644 libswscale/format.c
 rename libswscale/{utils.h => format.h} (98%)

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 267952d870..4cf4bb2602 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -10,6 +10,7 @@ OBJS = alphablend.o \
csputils.o   \
hscale.o \
hscale_fast_bilinear.o   \
+   format.o \
gamma.o  \
graph.o  \
half2float.o \
diff --git a/libswscale/cms.c b/libswscale/cms.c
index 0a2e8bafda..a23d7a9772 100644
--- a/libswscale/cms.c
+++ b/libswscale/cms.c
@@ -29,7 +29,7 @@
 #include "cms.h"
 #include "csputils.h"
 #include "libswscale/swscale.h"
-#include "utils.h"
+#include "format.h"
 
 bool ff_sws_color_map_noop(const SwsColorMap *map)
 {
diff --git a/libswscale/cms.h b/libswscale/cms.h
index 0c730e520d..5323a27260 100644
--- a/libswscale/cms.h
+++ b/libswscale/cms.h
@@ -27,7 +27,7 @@
 
 #include "csputils.h"
 #include "swscale.h"
-#include "utils.h"
+#include "format.h"
 
 /* Minimum, maximum, and default knee point for perceptual tone mapping [0,1] 
*/
 #define PERCEPTUAL_KNEE_MIN 0.10f
diff --git a/libswscale/csputils.c b/libswscale/csputils.c
index 9546b26fe1..728871d293 100644
--- a/libswscale/csputils.c
+++ b/libswscale/csputils.c
@@ -21,7 +21,7 @@
 #include "libavutil/csp.h"
 
 #include "csputils.h"
-#include "utils.h"
+#include "format.h"
 
 void ff_sws_matrix3x3_mul(SwsMatrix3x3 *a, const SwsMatrix3x3 *b)
 {
diff --git a/libswscale/format.c b/libswscale/format.c
new file mode 100644
index 00..958577796c
--- /dev/null
+++ b/libswscale/format.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (C) 2024 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/hdr_dynamic_metadata.h"
+#include "libavutil/mastering_display_metadata.h"
+
+#include "format.h"
+
+typedef struct FormatEntry {
+uint8_t is_supported_in :1;
+uint8_t is_supported_out:1;
+uint8_t is_supported_endianness :1;
+} FormatEntry;
+
+/* Format support table for legacy swscale */
+static const FormatEntry format_entries[] = {
+[AV_PIX_FMT_YUV420P]= { 1, 1 },
+[AV_PIX_FMT_YUYV422]= { 1, 1 },
+[AV_PIX_FMT_RGB24]  = { 1, 1 },
+[AV_PIX_FMT_BGR24]  = { 1, 1 },
+[AV_PIX_FMT_YUV422P]= { 1, 1 },
+[AV_PIX_FMT_YUV444P]= { 1, 1 },
+[AV_PIX_FMT_YUV410P]= { 1, 1 },
+[AV_PIX_FMT_YUV411P]= { 1, 1 },
+[AV_PIX_FMT_GRAY8]  = { 1, 1 },
+[AV_PIX_FMT_MONOWHITE]  = { 1, 1 },
+[AV_PIX_FMT_MONOBLACK]  = { 1, 1 },
+[AV_PIX_FMT_PAL8]   = { 1, 0 },
+[AV_PIX_FMT_YUVJ420P]   = { 1, 1 },
+[AV_PIX_FMT_YUVJ411P]   = { 1, 1 },
+[AV_PIX_FMT_YUVJ422P]   = { 1, 1 },
+[AV_PIX_FMT_YUVJ444P]   = { 1, 1 },
+[AV_PIX_FMT_YVYU422]= { 1, 1 },
+[AV_PIX_FMT_UYVY422]= { 1, 1 },
+[AV_PIX_FMT_UYYVYY411]  = { 0, 

Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Martin Storsjö

On Wed, 12 Mar 2025, Gyan Doshi wrote:


On 2025-03-12 03:12 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


On 2025-03-12 01:29 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
ffbuild/library.mak | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..781b018e00 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): 
$(SUBDIR)$(SLIBNAME_WITH_MAJOR)


$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver

$(SLIB_CREATE_DEF_CMD)
-    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter 
%.o,$$^) $(FFEXTRALIBS)

+    $(Q)echo $$(filter %.o,$$^) > $$@.objs
+    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)

$(SLIB_EXTRA_CMD)
+    -$(RM) $$@.objs


Don't we need do the same for static libraries too?

On first glance, this looks quite reasonable... However, the limit 
that is an issue is the length of a command line. The .objs file is 
written with an "echo" command - doesn't that fundamentally also hit 
the same limit (just a little bit later, as there are fewer command 
line flags in this command)?


Or do we assume that make executes it with a shell, where the shell 
handles "echo" as a shell builtin so that it doesn't actually spawn 
a subprocess for this? Can you test it, if you could extend the list 
of object files to the point where the .objs file is clearly over 32 
KB, and verify that it did include all the files you intended?


The specific error I got when building a shared build of 7.1.1 (with 
~90 external libs) was


/bin/sh: line 1: /mingw64/bin/ccache: Argument list too long

Got same error without ccache.

The static build of the same config succeeded without any patching.
The .objs file generated for libavcodec shared build is 29KB.

How do I inflate the size to above 32K? I've enabled pretty much 
everything I can.


The simplest would probably be to add a bunch of empty .c files (with 
long file names) in libavcodec and hook them up in the makefile. We 
won't notice if they are missed when linking of course, but if we pass 
the command line length limit, we should still notice it in one way or 
another.


I did something simpler. I just duplicated the arg on the echo line. The 
build was successful. Each objs file was twice as big, same for the 
generated DLL.

Does that answer your queries?


Yes, thanks.

Btw, just to be clear - are you running this in msys2? If you're running 
mingw build tools via WSL, there wouldn't be any issue when running the 
shell commands on the linux side, but it's specifically with the msys2 
make where it's interesting to know which limitations make and the shell 
runs into or avoids.


// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context

2025-03-12 Thread Lynne

On 10/03/2025 18:42, Lynne wrote:

On 10/03/2025 04:14, Andreas Rheinhardt wrote:

Lynne:

---
  libavcodec/ffv1.h    |  3 +++
  libavcodec/ffv1dec.c | 19 +--
  2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 8c0e71284d..860a5c14b1 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -174,6 +174,9 @@ typedef struct FFV1Context {
   * NOT shared between frame threads.
   */
  uint8_t   frame_damaged;
+
+    /* Reference to the current packet */
+    AVPacket *pkt_ref;
  } FFV1Context;
  int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index eaa21eebdf..6396f22f79 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext 
*avctx)

  f->pix_fmt = AV_PIX_FMT_NONE;
  f->configured_pix_fmt = AV_PIX_FMT_NONE;
+    f->pkt_ref = av_packet_alloc();
+    if (!f->pkt_ref)
+    return AVERROR(ENOMEM);
+
  if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
  return ret;
@@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, 
AVFrame *rframe,

  /* Start */
  if (hwaccel) {
+    ret = av_packet_ref(f->pkt_ref, avpkt);
+    if (ret < 0)
+    return ret;
+
  ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
  if (ret < 0)
  return ret;
@@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, 
AVFrame *rframe,

  uint32_t len;
  ret = find_next_slice(avctx, avpkt->data, buf_end, i,
    &pos, &len);
-    if (ret < 0)
+    if (ret < 0) {
+    av_packet_unref(f->pkt_ref);
  return ret;
+    }
  buf_end -= len;
  ret = hwaccel->decode_slice(avctx, pos, len);
-    if (ret < 0)
+    if (ret < 0) {
+    av_packet_unref(f->pkt_ref);
  return ret;
+    }
  }
+
+    av_packet_unref(f->pkt_ref);
  } else {
  ret = decode_slices(avctx, c, avpkt);
  if (ret < 0)
@@ -827,6 +841,7 @@ static av_cold int 
ffv1_decode_close(AVCodecContext *avctx)

  ff_progress_frame_unref(&s->last_picture);
  av_refstruct_unref(&s->hwaccel_last_picture_private);
+    av_packet_free(&s->pkt_ref);
  ff_ffv1_close(s);
  return 0;


Why not simply use a const AVPacket*?


No reason. Fixed locally.
Thanks.


*reverted this change.
We need to ref the packet, since we map its memory and let the GPU use 
it directly without copying the contents. 6k16bit content at 24fps is 
typically around 2Gbps when compressed, so avoiding copies is important.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Gyan Doshi
The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
 ffbuild/library.mak | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..781b018e00 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver
$(SLIB_CREATE_DEF_CMD)
-   $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter %.o,$$^) 
$(FFEXTRALIBS)
+   $(Q)echo $$(filter %.o,$$^) > $$@.objs
+   $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)
$(SLIB_EXTRA_CMD)
+   -$(RM) $$@.objs
 
 ifdef SUBDIR
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(DEP_LIBS)
-- 
2.46.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 FFmpeg 20/20] doc/filters.texi: avgclass documentation

2025-03-12 Thread m.kaindl0208
Signed-off-by: MaximilianKaindl 
---
 doc/filters.texi | 64 
 1 file changed, 64 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index a7046e0f4e..340ce39e2a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -30776,6 +30776,70 @@ bench=start,selectivecolor=reds=-.2 .12 -.49,bench=stop
 @end example
 @end itemize

+@section avgclass
+
+Average classification probabilities across multiple frames for both audio and 
video streams.
+
+This filter analyzes classification data from frame side data (bounding boxes) 
and calculates average confidence scores for each label. The filter processes 
classification metadata from the @code{dnn_classify} filter or other sources 
that generate AVDetectionBBox side data, computing averages over the entire 
stream.
+
+At the end of the stream (or when manually triggered), the filter outputs the 
average probability for each detected class, both to console logs and 
optionally to a CSV file.
+
+@table @option
+@item output_file
+Path to a CSV output file where average classification results will be 
written. If not specified, results are only printed to log output.
+
+@item v
+Specify the number of video streams (default: 1).
+
+@item a
+Specify the number of audio streams (default: 0).
+@end table
+
+This filter supports the following commands:
+
+@table @option
+@item writeinfo
+Immediately write current average classification results to the log and output 
file (if specified) without waiting for the stream to end.
+
+@item flush
+Force the filter to write results and flush all its internal state.
+@end table
+
+@subsection Examples
+
+Process a video with object detection and classification, then calculate 
average classification probabilities:
+@example
+ffmpeg -i input.mp4 -vf 
"dnn_detect=model=detection.xml:input=data:output=detection_out:confidence=0.5,dnn_classify=model=classification.pt:dnn_backend=torch:tokenizer=tokenizer.json:labels=labels.txt,avgclass=output_file=results.csv"
 -f null -
+@end example
+
+Process both audio and video classification:
+@example
+ffmpeg -i input.mkv -filter_complex "[0:v]dnn_classify[v0]; 
[0:a]aformat=sample_fmts=fltp,dnn_classify=dnn_backend=torch:model=clap_model.pt:is_audio=1:tokenizer=tokenizer.json:labels=audio_labels.txt[a0];
 [v0][a0]avgclass=v=1:a=1:output_file=av_results.csv" -f null -
+@end example
+
+@subsection Output Format
+
+When the filter completes processing (or when the @code{writeinfo} command is 
sent), it outputs classification results in this format:
+
+@example
+Classification averages:
+Stream #0:
+  Label: cat: Average probability 0.8765, Appeared 120 times
+  Label: dog: Average probability 0.3421, Appeared 42 times
+Stream #1:
+  Label: music: Average probability 0.9823, Appeared 315 times
+  Label: speech: Average probability 0.1245, Appeared 15 times
+@end example
+
+If an output file is specified, the same data is written in CSV format:
+@example
+stream_id,label,avg_probability,count
+0,cat,0.8765,120
+0,dog,0.3421,42
+1,music,0.9823,315
+1,speech,0.1245,15
+@end example
+
 @section concat

 Concatenate audio and video streams, joining them together one after the
--
2.34.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avformat/fifo: Check for keyframe video type before stop dropping

2025-03-12 Thread Arthur Grillo
The current behavior when using restart_with_keyframe is that it will
recover if it also encounters any audio packet, as they are flagged as a
keyframe.

The expectation is that packets are dropped until the next _video_
keyframe.

Fix that by checking the packet stream codec type, only letting it
recover when it is a video one.

Fixes ticket: #11467

Signed-off-by: Arthur Grillo 
---
Cc: lingjiujia...@gmail.com
---
 libavformat/fifo.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavformat/fifo.c b/libavformat/fifo.c
index 
23e4149ad606b201aade77e28057afe8e5fe2b71..1ebfa1f38f3affec2d81b5a2bb9d69190a5aa63f
 100644
--- a/libavformat/fifo.c
+++ b/libavformat/fifo.c
@@ -185,14 +185,21 @@ static int fifo_thread_write_packet(FifoThreadContext 
*ctx, AVPacket *pkt)
 AVRational src_tb, dst_tb;
 int ret, s_idx;
 int64_t orig_pts, orig_dts, orig_duration;
+enum AVMediaType stream_codec_type = 
avf->streams[pkt->stream_index]->codecpar->codec_type;
 
 if (fifo->timeshift && pkt->dts != AV_NOPTS_VALUE)
 atomic_fetch_sub_explicit(&fifo->queue_duration, next_duration(avf, 
pkt, &ctx->last_received_dts), memory_order_relaxed);
 
 if (ctx->drop_until_keyframe) {
 if (pkt->flags & AV_PKT_FLAG_KEY) {
-ctx->drop_until_keyframe = 0;
-av_log(avf, AV_LOG_VERBOSE, "Keyframe received, recovering...\n");
+if (stream_codec_type == AVMEDIA_TYPE_VIDEO) {
+ctx->drop_until_keyframe = 0;
+av_log(avf, AV_LOG_VERBOSE, "Video keyframe received, 
recovering...\n");
+} else {
+av_log(avf, AV_LOG_VERBOSE, "Dropping non-video keyframe\n");
+av_packet_unref(pkt);
+return 0;
+}
 } else {
 av_log(avf, AV_LOG_VERBOSE, "Dropping non-keyframe packet\n");
 av_packet_unref(pkt);

---
base-commit: 3165fe5ecf4917d9f7d7f27d4fa3af31bc604515
change-id: 20250312-video-only-kf-recover-18816009f240

Best regards,
-- 
Arthur Grillo 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] avutil/hwcontext: Add item_name function for AVHWDeviceContext

2025-03-12 Thread softworkz
From: softworkz 

Signed-off-by: softworkz 
---
 libavutil/hwcontext.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 276dc9cee6..3111a44651 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -137,9 +137,15 @@ enum AVHWDeviceType av_hwdevice_iterate_types(enum 
AVHWDeviceType prev)
 return set ? next : AV_HWDEVICE_TYPE_NONE;
 }
 
+static const char *hwdevice_ctx_get_name(void *ptr)
+{
+FFHWDeviceContext *ctx = ptr;
+return ctx->hw_type->name;
+}
+
 static const AVClass hwdevice_ctx_class = {
 .class_name = "AVHWDeviceContext",
-.item_name  = av_default_item_name,
+.item_name  = hwdevice_ctx_get_name,
 .category   = AV_CLASS_CATEGORY_HWDEVICE,
 .version= LIBAVUTIL_VERSION_INT,
 };
-- 
ffmpeg-codebot
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code

2025-03-12 Thread Lynne

On 12/03/2025 06:27, Andreas Rheinhardt wrote:

Lynne:

On 12/03/2025 04:10, Andreas Rheinhardt wrote:

Patches attached.

- Andreas


First patch is wild, its surprising no one considered inverting the way
decoder parses codes for an encoder yet.


I didn't even look at the decoder.
(It is actually surprising that it took until
512e597932dfe05cf5665192efbe2c93c2e36af2 for the original code to be
improved.)


Rather than ORing and using put_bits63, I think it would make more sense
to write out each chunk using put_bits sequentially. It might be
possible to reverse the lookups such that you get the MSBs first so you
wouldn't need to reverse them out of place in a small array.
But either way, LGTM. Feel free to explore this in a follow-up.


I don't think that writing them sequentially will improve anything: In
order to be able to use a LUT, I would have to shift the bits starting
with the MSBs into position; and then there would be the internal shifts
and checks inside put_bits().
Apart from that: put_bits63() is the same as put_bits() when BUF_BITS is
64 (see ede2b391cc516f4f93621f6a214b3410b231f582).



Second patch seems a bit pointless. It's just one single call you're
uninlining? Chasing to save a few extra bytes of binary surely don't
deserve having a wrapper function for uninlining.



I am uninlining all calls besides the hot one. 31 callsites.
For GCC, this reduced codesize 2c36 to 25b1 (15% saved), for clang from
4b08 to 3338 (32% saved).


Oh, it was late and I didn't read carefully.
Both patches LGTM.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] FFmpeg 4.3.9 and 3.4.14

2025-03-12 Thread Michael Niedermayer
On Mon, Mar 03, 2025 at 02:10:49AM +0100, Michael Niedermayer wrote:
> Hi all
> 
> As ive already backported and somewhat tested release/4.3 i intend to
> make the next point release from that and after that next is
> 3.4 (because its the supported branch that is longest without a release)
> 
> like always, please backport things if you want something backported
> please test, if you want something tested
> and tell me if theres anything i need to know (like waiting for something)

4.3.9 released, 3.4.14 is next

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

z(9) = an object that transcends all computable functions describable
in finite terms. - ChatGPT in 2024


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf

2025-03-12 Thread Andreas Rheinhardt
Patch attached.

- Andreas
From 3809f60cd976de71bd409bc24b9bd11b43aacee4 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Wed, 12 Mar 2025 14:23:51 +0100
Subject: [PATCH] fftools/ffmpeg_opt: Remove unused alt_bsf

Forgotten in 6325aede08d5c7086b3798cb7041299e1d07f93a.

Signed-off-by: Andreas Rheinhardt 
---
 fftools/ffmpeg_opt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 27a9fc9e42..1f5e6050c8 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -1507,7 +1507,6 @@ static int opt_adrift_threshold(void *optctx, const char *opt, const char *arg)
 }
 #endif
 
-static const char *const alt_bsf[]= { "absf", "vbsf", NULL };
 static const char *const alt_channel_layout[] = { "ch_layout", NULL};
 static const char *const alt_codec[]  = { "c", "acodec", "vcodec", "scodec", "dcodec", NULL };
 static const char *const alt_filter[] = { "af", "vf", NULL };
-- 
2.45.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] ffbuild: read library linker objects from a file

2025-03-12 Thread Gyan Doshi



On 2025-03-12 05:52 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


On 2025-03-12 03:12 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


On 2025-03-12 01:29 pm, Martin Storsjö wrote:

On Wed, 12 Mar 2025, Gyan Doshi wrote:


The linker command can exceed the maximum argument limit on MinGW,
especially for libavcodec.

The objects list is now stored in a file and passed to the linker.
---
ffbuild/library.mak | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ffbuild/library.mak b/ffbuild/library.mak
index 793e9d41fa..781b018e00 100644
--- a/ffbuild/library.mak
+++ b/ffbuild/library.mak
@@ -66,8 +66,10 @@ $(SUBDIR)$(SLIBNAME): 
$(SUBDIR)$(SLIBNAME_WITH_MAJOR)


$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SHLIBOBJS) $(SLIBOBJS) 
$(SUBDIR)lib$(NAME).ver

$(SLIB_CREATE_DEF_CMD)
-    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) $$(filter 
%.o,$$^) $(FFEXTRALIBS)

+    $(Q)echo $$(filter %.o,$$^) > $$@.objs
+    $$(LD) $(SHFLAGS) $(LDFLAGS) $(LDSOFLAGS) $$(LD_O) @$$@.objs 
$(FFEXTRALIBS)

$(SLIB_EXTRA_CMD)
+    -$(RM) $$@.objs


Don't we need do the same for static libraries too?

On first glance, this looks quite reasonable... However, the limit 
that is an issue is the length of a command line. The .objs file 
is written with an "echo" command - doesn't that fundamentally 
also hit the same limit (just a little bit later, as there are 
fewer command line flags in this command)?


Or do we assume that make executes it with a shell, where the 
shell handles "echo" as a shell builtin so that it doesn't 
actually spawn a subprocess for this? Can you test it, if you 
could extend the list of object files to the point where the .objs 
file is clearly over 32 KB, and verify that it did include all the 
files you intended?


The specific error I got when building a shared build of 7.1.1 
(with ~90 external libs) was


/bin/sh: line 1: /mingw64/bin/ccache: Argument list too long

Got same error without ccache.

The static build of the same config succeeded without any patching.
The .objs file generated for libavcodec shared build is 29KB.

How do I inflate the size to above 32K? I've enabled pretty much 
everything I can.


The simplest would probably be to add a bunch of empty .c files 
(with long file names) in libavcodec and hook them up in the 
makefile. We won't notice if they are missed when linking of course, 
but if we pass the command line length limit, we should still notice 
it in one way or another.


I did something simpler. I just duplicated the arg on the echo line. 
The build was successful. Each objs file was twice as big, same for 
the generated DLL.

Does that answer your queries?


Yes, thanks.

Btw, just to be clear - are you running this in msys2? If you're 
running mingw build tools via WSL, there wouldn't be any issue when 
running the shell commands on the linux side, but it's specifically 
with the msys2 make where it's interesting to know which limitations 
make and the shell runs into or avoids.


Yes, via msys2.

This is what `xargs --show-limits` prints in the same shell,

POSIX upper limit on argument length (this system): 25382
POSIX smallest allowable upper limit on argument length (all systems): 4096
Maximum length of command we could actually use: 20812
Size of command buffer we are actually using: 25382

Regards,
Gyan

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 FFmpeg 21/20] libavfilter: classify fix category post_processing with very low temperature

2025-03-12 Thread m.kaindl0208
Patch attached.

I hope this correctly links to my series.

Signed-off-by: MaximilianKaindl 


0021-libavfilter-classify-fix-category-post_processing-wi.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] nvdec/vc1: add marker insertion logic

2025-03-12 Thread averne
Bump

Le 04/03/2025 à 18:17, averne a écrit :
> 
> 
> ---
> Insert the relevant marker into the bitstream on
> slice submission.
> This is analogous to the logic found in DXVA and
> D3D hwaccels.
> 
> Fixes decoding of various VC-1 streams, eg.:
> https://drive.google.com/file/d/1WJyiRhcdU4FHTW3sVMitS7UdrZM1NBy-/view?usp=sharing
> 
> This was investigated using my nvdec tracing tool:
> https://github.com/averne/NvdecTrace
> 
> Signed-off-by: averne 
> ---
>  libavcodec/nvdec_vc1.c | 50 
> --
>  1 file changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c
> index 
> fbfba1ecb43421573ef8fea1e37a2425c272edc9..2726574a26583b0cfc28bdec5595c15bdc465ff8
>  100644
> --- a/libavcodec/nvdec_vc1.c
> +++ b/libavcodec/nvdec_vc1.c
> @@ -22,6 +22,7 @@
>  
>  #include "config_components.h"
>  
> +#include "libavutil/mem.h"
>  #include "avcodec.h"
>  #include "hwaccel_internal.h"
>  #include "internal.h"
> @@ -107,6 +108,51 @@ static int nvdec_vc1_start_frame(AVCodecContext *avctx, 
> const uint8_t *buffer, u
>  return 0;
>  }
>  
> +static int nvdec_vc1_decode_slice(AVCodecContext *avctx, const uint8_t 
> *buffer,
> +  uint32_t size)
> +{
> +NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
> +const VC1Context *v = avctx->priv_data;
> +uint32_t marker;
> +int marker_size;
> +void *tmp;
> +
> +if (avctx->codec_id != AV_CODEC_ID_VC1) {
> +marker_size = 0;
> +} else {
> +if (ctx->bitstream_len)
> +marker = VC1_CODE_SLICE;
> +else if (v->profile == PROFILE_ADVANCED && v->fcm == ILACE_FIELD && 
> v->second_field)
> +marker = VC1_CODE_FIELD;
> +else
> +marker = VC1_CODE_FRAME;
> +
> +marker_size = (size >= sizeof(marker) && AV_RB32(buffer) != marker) 
> ? sizeof(marker) : 0;
> +}
> +
> +tmp = av_fast_realloc(ctx->bitstream_internal, &ctx->bitstream_allocated,
> +  ctx->bitstream_len + size + marker_size);
> +if (!tmp)
> +return AVERROR(ENOMEM);
> +ctx->bitstream = ctx->bitstream_internal = tmp;
> +
> +tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
> +  (ctx->nb_slices + 1) * 
> sizeof(*ctx->slice_offsets));
> +if (!tmp)
> +return AVERROR(ENOMEM);
> +ctx->slice_offsets = tmp;
> +
> +if (marker_size)
> +AV_WB32(ctx->bitstream_internal + ctx->bitstream_len, marker);
> +
> +memcpy(ctx->bitstream_internal + ctx->bitstream_len + marker_size, 
> buffer, size);
> +ctx->slice_offsets[ctx->nb_slices] = ctx->bitstream_len;
> +ctx->bitstream_len += size + marker_size;
> +ctx->nb_slices++;
> +
> +return 0;
> +}
> +
>  static int nvdec_vc1_frame_params(AVCodecContext *avctx,
>AVBufferRef *hw_frames_ctx)
>  {
> @@ -121,7 +167,7 @@ const FFHWAccel ff_vc1_nvdec_hwaccel = {
>  .p.pix_fmt= AV_PIX_FMT_CUDA,
>  .start_frame  = nvdec_vc1_start_frame,
>  .end_frame= ff_nvdec_simple_end_frame,
> -.decode_slice = ff_nvdec_simple_decode_slice,
> +.decode_slice = nvdec_vc1_decode_slice,
>  .frame_params = nvdec_vc1_frame_params,
>  .init = ff_nvdec_decode_init,
>  .uninit   = ff_nvdec_decode_uninit,
> @@ -136,7 +182,7 @@ const FFHWAccel ff_wmv3_nvdec_hwaccel = {
>  .p.pix_fmt= AV_PIX_FMT_CUDA,
>  .start_frame  = nvdec_vc1_start_frame,
>  .end_frame= ff_nvdec_simple_end_frame,
> -.decode_slice = ff_nvdec_simple_decode_slice,
> +.decode_slice = nvdec_vc1_decode_slice,
>  .frame_params = nvdec_vc1_frame_params,
>  .init = ff_nvdec_decode_init,
>  .uninit   = ff_nvdec_decode_uninit,
> 
> ---
> base-commit: f76195ff656d6bea68feee783160652e2b3e3d60
> change-id: 20250304-nvdec-vc1-marker2-53d6bd30ee99
> 
> Best regards,

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/aom_film_grain: Cast const away to suppress compiler, warning

2025-03-12 Thread Andreas Rheinhardt
Patch attached.

- Andreas
From fa46d708680d10dd2f9b80365eafbeeda6cc Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Wed, 12 Mar 2025 18:59:05 +0100
Subject: [PATCH] avcodec/aom_film_grain: Cast const away to suppress compiler
 warning

av_frame_side_data_add() typically takes ownership of
the provided AVBufferRef reference and therefore
uses a parameter of type AVBufferRef**; yet with
the AV_FRAME_SIDE_DATA_FLAG_NEW_REF, it creates
new references instead, without touching the given
reference. Therefore it is safe to cast const away.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/aom_film_grain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/aom_film_grain.c b/libavcodec/aom_film_grain.c
index d5ea75f61c..0f24a2bcf8 100644
--- a/libavcodec/aom_film_grain.c
+++ b/libavcodec/aom_film_grain.c
@@ -366,7 +366,8 @@ int ff_aom_attach_film_grain_sets(const AVFilmGrainAFGS1Params *s, AVFrame *fram
 continue;
 
 if (!av_frame_side_data_add(&frame->side_data, &frame->nb_side_data,
-AV_FRAME_DATA_FILM_GRAIN_PARAMS, &s->sets[i],
+AV_FRAME_DATA_FILM_GRAIN_PARAMS,
+(AVBufferRef**)&s->sets[i],
 AV_FRAME_SIDE_DATA_FLAG_NEW_REF))
 return AVERROR(ENOMEM);
 }
-- 
2.45.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] New swscale internal design prototype

2025-03-12 Thread Martin Storsjö

On Wed, 12 Mar 2025, Niklas Haas wrote:


On Sun, 09 Mar 2025 20:45:23 +0100 Niklas Haas  wrote:

On Sun, 09 Mar 2025 18:11:54 +0200 Martin Storsjö  wrote:
> On Sat, 8 Mar 2025, Niklas Haas wrote:
> 
> > What are the thoughts on the float-first approach?
> 
> In general, for modern architectures, relying on floats probably is 
> reasonable. (On architectures that aren't of quite as widespread interest, 
> it might not be so clear cut though.)
> 
> However with the benchmark example you provided a couple of weeks ago, we 
> concluded that even on x86 on modern HW, floats were faster than int16 
> only in one case: When using Clang, not GCC, and when compiling with 
> -mavx2, not without it. In all the other cases, int16 was faster than 
> float.


Hi Martin,

I should preface that this particular benchmark was a very specific test for
floating point *filtering*, which is considerably more punishing than the
conversion pipeline I have implemented here, and I think it's partly the
fault of compilers generating very unoptimal filtering code.

I think it would be better to re-assess using the current prototype on actual
hardware. I threw up a quick NEON test branch: (untested, should hopefully work)
https://github.com/haasn/FFmpeg/commits/swscale3-neon

# adjust the benchmark iters count as needed based on the HW perf
make libswscale/tests/swscale && libswscale/tests/swscale -unscaled 1 -bench 50

If this differs significantly from the ~1.8x speedup I measure on x86, I
will be far more concerned about the new approach.


Sorry, I haven't had time to try this out myself yet...


I gave it a try. So, the result of a naive/blind run on a Cortex-X1 using clang
version 20.0.0 (from the latest Android NDK v29) is:

Overall speedup=1.688x faster, min=0.141x max=45.898x

This has quite a lot more significant speed regressions compared to x86 though.

In particular, clang/LLVM refuses to vectorize packed reads of 2 or 3 elements,
so any sort of operation involving rgb24 or bgr24 suffers horribly:


So, if the performance of this relies on compiler autovectorization, 
what's the plan wrt GCC? We blanket disable autovectorization when 
compiling with GCC - see fd6dbc53855fbfc9a782095d0ffe11dd3a98905f for when 
it was disabled last time. Building and running fate with 
autovectorization in GCC does succeed at least on modern GCC on x86_64, 
but it's of course possible that it still can cause issues in various more 
tricky configurations.


// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-12 Thread Andreas Rheinhardt
Martin Storsjö:
> On Wed, 12 Mar 2025, Andreas Rheinhardt wrote:
> 
>> Pranav Kant via ffmpeg-devel:
>>> By default, all globals in C/C++ compiled by clang are allocated
>>> in non-large data sections. See [1] for background on code models.
>>> For PIC (Position independent code), this is fine as long as binary is
>>> small but as binary size increases, users maybe want to use medium/large
>>> code models (-mcmodel=medium) which moves data in to large sections.
>>> As data in these large sections cannot be accessed using PIC code
>>> anymore (as it may be too far away), compiler ends up using a different
>>> instruction sequence when building C/C++ code -- using GOT to access
>>> these globals (which can be relaxed by linker at link time if binary
>>> ends up being smaller). However, assembly files continue to access these
>>> globals defined in C/C++ files using older (and invalid instruction
>>> sequence). So, we mark all such globals with an attribute that forces
>>> them to be allocated in small sections allowing them to validly be
>>> accessed from the assembly code.
>>>
>>> This patch should not have any affect on builds that use small code
>>> model, which is the default mode.
>>>
>>> [1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-
>>> code-models
>>>
>>> Signed-off-by: Pranav Kant 
>>> ---
>>>  libavcodec/ac3dsp.c |  2 ++
>>>  libavcodec/cabac.c  |  2 ++
>>>  libavcodec/x86/constants.c  |  8 
>>>  libavutil/attributes.h  |  6 ++
>>>  libavutil/attributes_internal.h | 16 
>>>  5 files changed, 34 insertions(+)
>>>
>>> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
>>> index 730fa70fff..d16b6c24c3 100644
>>> --- a/libavcodec/ac3dsp.c
>>> +++ b/libavcodec/ac3dsp.c
>>> @@ -25,6 +25,7 @@
>>>
>>>  #include "config.h"
>>>  #include "libavutil/attributes.h"
>>> +#include "libavutil/attributes_internal.h"
>>>  #include "libavutil/common.h"
>>>  #include "libavutil/intmath.h"
>>>  #include "libavutil/mem_internal.h"
>>> @@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t
>>> mant_cnt[16], uint8_t *bap,
>>>  mant_cnt[bap[len]]++;
>>>  }
>>>
>>> +attribute_mcmodel_small
>>>  DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
>>
>> Shouldn't stuff like this be applied to the declaration so that C code
>> can also take advantage of the knowledge that this object will be placed
>> in the small code section?
>>
>>>  0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
>>>  };
>>> diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
>>> index 7d41cd2ae6..b8c6db29a2 100644
>>> --- a/libavcodec/cabac.c
>>> +++ b/libavcodec/cabac.c
>>> @@ -24,11 +24,13 @@
>>>   * Context Adaptive Binary Arithmetic Coder.
>>>   */
>>>
>>> +#include "libavutil/attributes_internal.h"
>>>  #include "libavutil/error.h"
>>>  #include "libavutil/mem_internal.h"
>>>
>>>  #include "cabac.h"
>>>
>>> +attribute_mcmodel_small
>>>  DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 +
>>> 4*2*64 + 4*64 + 63] = {
>>
>> Your commit message ("However, assembly files continue to access")
>> speaks only of assembly files, i.e. external asm. Yet this here is only
>> used by inline ASM. Looking through the code the reason for this is that
>> I thought that specifying the memory model is only necessary for stuff
>> used by external asm, yet ff_h264_cabac_tables does not seem to be used
>> by external ASM at all, only inline ASM. If I see this correctly, the
>> reason for this is that LOCAL_MANGLE (and therefore MANGLE) uses rip
>> addressing on x64 when configure sets the RIP define. But this means
>> that the set of files needing attribute_mcmodel_small is a superset of
>> the files currently using DECLARE_ASM_ALIGNED. This means that one would
>> only need two macros for the variables accessed by ASM: One for only
>> external ASM and one for inline ASM (and potentially external ASM)
>> instead of adding attribute_mcmodel_small at various places in the
>> codebase.
>>
>>>  9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
>>>  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
>>> diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
>>> index bc7f2b17b8..347b7dd1d3 100644
>>> --- a/libavcodec/x86/constants.c
>>> +++ b/libavcodec/x86/constants.c
>>> @@ -18,17 +18,21 @@
>>>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>> 02110-1301 USA
>>>   */
>>>
>>> +#include "libavutil/attributes_internal.h"
>>>  #include "libavutil/mem_internal.h"
>>>  #include "libavutil/x86/asm.h" // for xmm_reg
>>>  #include "constants.h"
>>>
>>> +attribute_mcmodel_small
>>>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)    = {
>> 0x0001000100010001ULL, 0x0001000100010001ULL,>
>>   0x0001000100010001ULL, 0x0001000100010001ULL };
>>>  DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)    =
>>> { 0x0002000200020002ULL, 0x0002000200020002ULL,
>>> 
>>> 0x0002000

Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-12 Thread Martin Storsjö

On Wed, 12 Mar 2025, Andreas Rheinhardt wrote:


Pranav Kant via ffmpeg-devel:

By default, all globals in C/C++ compiled by clang are allocated
in non-large data sections. See [1] for background on code models.
For PIC (Position independent code), this is fine as long as binary is
small but as binary size increases, users maybe want to use medium/large
code models (-mcmodel=medium) which moves data in to large sections.
As data in these large sections cannot be accessed using PIC code
anymore (as it may be too far away), compiler ends up using a different
instruction sequence when building C/C++ code -- using GOT to access
these globals (which can be relaxed by linker at link time if binary
ends up being smaller). However, assembly files continue to access these
globals defined in C/C++ files using older (and invalid instruction
sequence). So, we mark all such globals with an attribute that forces
them to be allocated in small sections allowing them to validly be
accessed from the assembly code.

This patch should not have any affect on builds that use small code
model, which is the default mode.

[1] https://eli.thegreenplace.net/2012/01/03/understanding-the-x64-code-models

Signed-off-by: Pranav Kant 
---
 libavcodec/ac3dsp.c |  2 ++
 libavcodec/cabac.c  |  2 ++
 libavcodec/x86/constants.c  |  8 
 libavutil/attributes.h  |  6 ++
 libavutil/attributes_internal.h | 16 
 5 files changed, 34 insertions(+)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 730fa70fff..d16b6c24c3 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -25,6 +25,7 @@

 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/attributes_internal.h"
 #include "libavutil/common.h"
 #include "libavutil/intmath.h"
 #include "libavutil/mem_internal.h"
@@ -104,6 +105,7 @@ static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], 
uint8_t *bap,
 mant_cnt[bap[len]]++;
 }

+attribute_mcmodel_small
 DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {


Shouldn't stuff like this be applied to the declaration so that C code
can also take advantage of the knowledge that this object will be placed
in the small code section?


 0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 };
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 7d41cd2ae6..b8c6db29a2 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -24,11 +24,13 @@
  * Context Adaptive Binary Arithmetic Coder.
  */

+#include "libavutil/attributes_internal.h"
 #include "libavutil/error.h"
 #include "libavutil/mem_internal.h"

 #include "cabac.h"

+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(1, const uint8_t, ff_h264_cabac_tables)[512 + 4*2*64 + 
4*64 + 63] = {


Your commit message ("However, assembly files continue to access")
speaks only of assembly files, i.e. external asm. Yet this here is only
used by inline ASM. Looking through the code the reason for this is that
I thought that specifying the memory model is only necessary for stuff
used by external asm, yet ff_h264_cabac_tables does not seem to be used
by external ASM at all, only inline ASM. If I see this correctly, the
reason for this is that LOCAL_MANGLE (and therefore MANGLE) uses rip
addressing on x64 when configure sets the RIP define. But this means
that the set of files needing attribute_mcmodel_small is a superset of
the files currently using DECLARE_ASM_ALIGNED. This means that one would
only need two macros for the variables accessed by ASM: One for only
external ASM and one for inline ASM (and potentially external ASM)
instead of adding attribute_mcmodel_small at various places in the codebase.


 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index bc7f2b17b8..347b7dd1d3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -18,17 +18,21 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

+#include "libavutil/attributes_internal.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/x86/asm.h" // for xmm_reg
 #include "constants.h"

+attribute_mcmodel_small
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)= {

0x0001000100010001ULL, 0x0001000100010001ULL,>
  0x0001000100010001ULL, 0x0001000100010001ULL };

 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)= { 0x0002000200020002ULL, 
0x0002000200020002ULL,
 0x0002000200020002ULL, 
0x0002000200020002ULL };
 DECLARE_ASM_ALIGNED(16, const xmm_reg,  ff_pw_3)= { 0x0003000300030003ULL, 
0x0003000300030003ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(32, const ymm_reg,  ff_pw_4)= { 0x0004000400040004ULL, 
0x0004000400040004ULL,
 0x0004000400040004ULL, 
0x0004000400040004ULL };
+attribute_mcmodel_small
 DECLARE_ASM_ALIGNED(16,

Re: [FFmpeg-devel] [PATCH v4] Mark C globals with small code model

2025-03-12 Thread Martin Storsjö

On Wed, 12 Mar 2025, Andreas Rheinhardt wrote:


Martin Storsjö:

On Wed, 12 Mar 2025, Andreas Rheinhardt wrote:


Pranav Kant via ffmpeg-devel:

+ * all globals in a data section that's unreachable with PC relative
instructions
+ * (small code model instruction sequence). We mark all such globals
with this
+ * attribute_mcmodel_small to ensure assembly accessible globals
continue to be
+ * allocated in sections reachable from PC relative instructions.
+ */
+#if ARCH_X86_64 && defined(__ELF__) && AV_HAS_ATTRIBUTE(model)


You make this ARCH_X86_64 only, yet the concept of code models also
exists for other arches, e.g. AArch64. I presume that assembly for other
arches presumes that we are not using the large code model for accesses,
so that the same issue can happen with them. Then we have two options:


FWIW, in e4ac156b7c47725327dff78bb83f5eecbaee3add we added
attribute_visibility_hidden to a bunch of symbols that are accessed from
aarch64 assembly, in a similar fashion. While the effects are different,
I wonder if we should abstract these two down to a more generic
attribute for any symbol accessed directly from any assembly.



As I said above: We would only need two macros: One for any object
accessed from inline assembly and one for objects accessed by external
assembly (and not inline assembly). The latter would be hidden
visibility+small code model, the former would also have av_used. Of
course, both would allow to specify alignment.

But naming is hard. How about DECLARE_ASM_VAR and DECLARE_INLINE_ASM_VAR?


That sounds ok to me.

// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [RFC] New swscale internal design prototype

2025-03-12 Thread Niklas Haas
On Wed, 12 Mar 2025 02:58:52 +0200 Rémi Denis-Courmont  wrote:
> 
> 
> Le 10 mars 2025 15:14:46 GMT+02:00, Niklas Haas  a écrit :
> >On Sun, 09 Mar 2025 17:57:48 -0700 Rémi Denis-Courmont  
> >wrote:
> >>
> >>
> >> Le 9 mars 2025 12:57:47 GMT-07:00, Niklas Haas  a écrit :
> >> >On Sun, 09 Mar 2025 11:18:04 -0700 Rémi Denis-Courmont  
> >> >wrote:
> >> >> Hi,
> >> >>
> >> >> Le 8 mars 2025 14:53:42 GMT-08:00, Niklas Haas  a 
> >> >> écrit :
> >> >> >https://github.com/haasn/FFmpeg/blob/swscale3/doc/swscale-v2.txt
> >> >>
> >> >> >I have spent the past week or so ironing
> >> >> >I wanted to post it here to gather some feedback on the approach. 
> >> >> >Where does
> >> >> >it fall on the "madness" scale? Is the new operations and optimizer 
> >> >> >design
> >> >> >comprehensible? Am I trying too hard to reinvent compilers? Are there 
> >> >> >any
> >> >> >platforms where the high number of function calls per frame would be
> >> >> >probitively expensive? What are the thoughts on the float-first 
> >> >> >approach? See
> >> >> >also the list of limitations and improvement ideas at the bottom of my 
> >> >> >design
> >> >> >document.
> >> >>
> >> >> Using floats internally may be fine if there's (almost) never any 
> >> >> spillage, but that necessarily implies custom calling conventions. And 
> >> >> won't work with as many as 32 pixels. On RVV 128-bit, you'd have only 4 
> >> >> vectors. On Arm NEON, it would be even worse as scalars/constants need 
> >> >> to be stored in vectors as well.
> >> >
> >> >I think that a custom calling convention is not as unreasonable as it may 
> >> >sound,
> >> >and will actually be easier to implement than the standard calling 
> >> >convention
> >> >since functions will not have to deal with pixel load/store, nor will 
> >> >there be
> >> >any need for "fused" versions of operations (whose only purpose is to 
> >> >avoid
> >> >the roundtrip through L1).
> >> >
> >> >The pixel chunk size is easily changed; it is a compile time constant and 
> >> >there
> >> >are no strict requirements on it. If RISC-V (or any other platform) 
> >> >struggles
> >> >with storing 32 floats in vector registers, we could go down to 16 (or 
> >> >even 8);
> >> >the number 32 was merely chosen by benchmarking and not through any 
> >> >careful
> >> >design consideration.
> >>
> >> It can't be a compile time constant on RVV nor (if it's ever introduced) 
> >> SVE because they are scalable. I doubt that a compile-time constant will 
> >> work well across all variants of x86 as well, but not that I'd know.
> >
> >It's my understanding that on existing RVV implementations, the number of
> >cycles needed to execute an m4/m2 operation is roughly 4x/2x the cost of
> >an equivalent m1 operation.
> 
> But that's exactly the problem! We want to use the *same* group multipler 
> regardless of the vector length to obtain roughly optimal bandwidth. That 
> means the number of elements will be proportional to the vector length. The 
> multiplier depends on the element size and perhaps the register pressure of a 
> given chunk processing, not the vector length.
> 
> And with SVE2, it'll most probably work optimally with a 2x unroll (like NEON 
> typically). This is more or less equivalent to RVV m2, and will also lead to 
> a chunk size proportional to the hardware vector length. 
> 
> If you calculate the chunk size based in the worst 128-bit case, then it'll 
> work on 256-bit but at only 50% of the possible speed, because half the CPU 
> time will be wasted working on tail or masked elements.

Okay, I came up with slightly different approach that will scale to arbitrary
chunk sizes, permit custom calling conventions or more exotic approaches, solve
some other issues with the current prototype (such as the expensive swizzle
operations), and also lower the per-call overhead.

I will post a refactored branch whenever it's done. In the meantime, let's put
this RFC on hold, I think I've gathered enough feedback.

> 
> >If this continues to be the case, the underlying VLEN of the implementation
> >should not matter much, even with a compile time constant chunk size, as long
> >as it does not greatly exceed 512.
> 
> No clue how you come to that conclusion. The maths don't add up here.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/1] avcodec/pcm: fix build warning by replacing deprecated method with avcodec_get_supported_config.

2025-03-12 Thread Andreas Rheinhardt
Zhao Zhili:
> 
> 
>> On Mar 11, 2025, at 17:25, joney...@gmail.com wrote:
>>
>> From: Jingwei Yao 
>>
>> Signed-off-by: Jingwei Yao 
>> ---
>> libavcodec/pcm.c | 12 ++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
>> index a23293dca2..e04ca78101 100644
>> --- a/libavcodec/pcm.c
>> +++ b/libavcodec/pcm.c
>> @@ -252,8 +252,10 @@ typedef struct PCMDecode {
>> static av_cold int pcm_decode_init(AVCodecContext *avctx)
>> {
>> PCMDecode *s = avctx->priv_data;
>> +const enum AVSampleFormat *sample_fmts;
>> AVFloatDSPContext *fdsp;
>> -int i;
>> +int num_sample_fmts;
>> +int i, ret;
>>
>> switch (avctx->codec_id) {
>> case AV_CODEC_ID_PCM_ALAW:
>> @@ -284,7 +286,13 @@ static av_cold int pcm_decode_init(AVCodecContext 
>> *avctx)
>> break;
>> }
>>
>> -avctx->sample_fmt = avctx->codec->sample_fmts[0];
>> +ret = avcodec_get_supported_config(avctx, NULL, 
>> AV_CODEC_CONFIG_SAMPLE_FORMAT,
>> +   0, (const void**)&sample_fmts, 
>> &num_sample_fmts);
>> +if (ret < 0)
>> +return ret;
>> +
>> +if (sample_fmts)
>> +avctx->sample_fmt = sample_fmts[0];
> 
> How about just disable the warning with FF_DISABLE_DEPRECATION_WARNINGS?
> 
> sample_fmts should be moved to FFCodec finally. So we can fix it finally by
> 
> avctx->sample_fmt = ffcodec(avctx->codec)->sample_fmts[0];

Actually, sample_fmts and the other stuff should be encoder-only. I
therefore just sent a patchset that avoids using it here (by the decoders).

> 
> It doesn’t worth the trouble and complexity to get some information which is 
> only an array
> in current translation unit.
> 
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context

2025-03-12 Thread Andreas Rheinhardt
Lynne:
> On 10/03/2025 18:42, Lynne wrote:
>> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>>> Lynne:
 ---
   libavcodec/ffv1.h    |  3 +++
   libavcodec/ffv1dec.c | 19 +--
   2 files changed, 20 insertions(+), 2 deletions(-)

 diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
 index 8c0e71284d..860a5c14b1 100644
 --- a/libavcodec/ffv1.h
 +++ b/libavcodec/ffv1.h
 @@ -174,6 +174,9 @@ typedef struct FFV1Context {
    * NOT shared between frame threads.
    */
   uint8_t   frame_damaged;
 +
 +    /* Reference to the current packet */
 +    AVPacket *pkt_ref;
   } FFV1Context;
   int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
 diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
 index eaa21eebdf..6396f22f79 100644
 --- a/libavcodec/ffv1dec.c
 +++ b/libavcodec/ffv1dec.c
 @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
 *avctx)
   f->pix_fmt = AV_PIX_FMT_NONE;
   f->configured_pix_fmt = AV_PIX_FMT_NONE;
 +    f->pkt_ref = av_packet_alloc();
 +    if (!f->pkt_ref)
 +    return AVERROR(ENOMEM);
 +
   if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
   return ret;
 @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
 AVFrame *rframe,
   /* Start */
   if (hwaccel) {
 +    ret = av_packet_ref(f->pkt_ref, avpkt);
 +    if (ret < 0)
 +    return ret;
 +
   ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
   if (ret < 0)
   return ret;
 @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
 AVFrame *rframe,
   uint32_t len;
   ret = find_next_slice(avctx, avpkt->data, buf_end, i,
     &pos, &len);
 -    if (ret < 0)
 +    if (ret < 0) {
 +    av_packet_unref(f->pkt_ref);
   return ret;
 +    }
   buf_end -= len;
   ret = hwaccel->decode_slice(avctx, pos, len);
 -    if (ret < 0)
 +    if (ret < 0) {
 +    av_packet_unref(f->pkt_ref);
   return ret;
 +    }
   }
 +
 +    av_packet_unref(f->pkt_ref);
   } else {
   ret = decode_slices(avctx, c, avpkt);
   if (ret < 0)
 @@ -827,6 +841,7 @@ static av_cold int
 ffv1_decode_close(AVCodecContext *avctx)
   ff_progress_frame_unref(&s->last_picture);
   av_refstruct_unref(&s->hwaccel_last_picture_private);
 +    av_packet_free(&s->pkt_ref);
   ff_ffv1_close(s);
   return 0;
>>>
>>> Why not simply use a const AVPacket*?
>>
>> No reason. Fixed locally.
>> Thanks.
> 
> *reverted this change.
> We need to ref the packet, since we map its memory and let the GPU use
> it directly without copying the contents. 6k16bit content at 24fps is
> typically around 2Gbps when compressed, so avoiding copies is important.

How long does the hwaccel need this data?

- Andreas

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/9] avcodec/msmpeg4enc: Inline constant

2025-03-12 Thread Andreas Rheinhardt
Patches attached.

- Andreas
From b3a259cf6bf825a34d9405a53e923883def1 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Fri, 7 Mar 2025 15:42:18 +0100
Subject: [PATCH 1/9] avcodec/msmpeg4enc: Inline constant

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/msmpeg4enc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavcodec/msmpeg4enc.c b/libavcodec/msmpeg4enc.c
index 2a9e16975f..8310e0a578 100644
--- a/libavcodec/msmpeg4enc.c
+++ b/libavcodec/msmpeg4enc.c
@@ -354,9 +354,8 @@ static void msmpeg4v2_encode_motion(MpegEncContext * s, int val)
 int range, bit_size, sign, code, bits;
 
 if (val == 0) {
-/* zero vector */
-code = 0;
-put_bits(&s->pb, ff_mvtab[code][1], ff_mvtab[code][0]);
+/* zero vector; corresponds to ff_mvtab[0] */
+put_bits(&s->pb, 1, 0x1);
 } else {
 bit_size = s->f_code - 1;
 range = 1 << bit_size;
-- 
2.45.2

From 78b6d8190d4dfbfefc80cc663a883e4e7ee7e805 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Fri, 7 Mar 2025 01:51:49 +0100
Subject: [PATCH 2/9] avcodec/mpegvideo_dec: Mark init, flush, close functions
 as av_cold

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/mpegvideo_dec.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/mpegvideo_dec.c b/libavcodec/mpegvideo_dec.c
index 532d8cf5c1..2856dbfbd6 100644
--- a/libavcodec/mpegvideo_dec.c
+++ b/libavcodec/mpegvideo_dec.c
@@ -43,7 +43,7 @@
 #include "threadprogress.h"
 #include "wmv2dec.h"
 
-int ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx)
+av_cold int ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx)
 {
 enum ThreadingStatus thread_status;
 
@@ -141,7 +141,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
 return 0;
 }
 
-int ff_mpv_decode_close(AVCodecContext *avctx)
+av_cold int ff_mpv_decode_close(AVCodecContext *avctx)
 {
 MpegEncContext *s = avctx->priv_data;
 
@@ -150,7 +150,7 @@ int ff_mpv_decode_close(AVCodecContext *avctx)
 return 0;
 }
 
-int ff_mpv_common_frame_size_change(MpegEncContext *s)
+av_cold int ff_mpv_common_frame_size_change(MpegEncContext *s)
 {
 int err = 0;
 
@@ -427,7 +427,7 @@ void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h)
s->first_field, s->low_delay);
 }
 
-void ff_mpeg_flush(AVCodecContext *avctx)
+av_cold void ff_mpeg_flush(AVCodecContext *avctx)
 {
 MpegEncContext *const s = avctx->priv_data;
 
-- 
2.45.2

From 5956d3424e2eb7c2419dcbf026cb28eee0d880fd Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Fri, 7 Mar 2025 02:06:40 +0100
Subject: [PATCH 3/9] avcodec/vc1_block: Stop setting write-only
 block_last_index

It is only used by the mpegvideo unquantize functions which
this decoder does not use at all.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/vc1_block.c | 32 +---
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c
index 8babbde38c..26adfbca1d 100644
--- a/libavcodec/vc1_block.c
+++ b/libavcodec/vc1_block.c
@@ -585,7 +585,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
 GetBitContext *gb = &v->s.gb;
 MpegEncContext *s = &v->s;
 int dc_pred_dir = 0; /* Direction of the DC prediction used */
-int i;
 int16_t *dc_val;
 int16_t *ac_val, *ac_val2;
 int dcdiff, scale;
@@ -622,7 +621,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
 scale = v->pq * 2 + v->halfpq;
 
 //AC Decoding
-i = !!coded;
 
 if (coded) {
 int last = 0, skip, value;
@@ -637,14 +635,14 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
 } else
 zz_table = v->zz_8x8[1];
 
-while (!last) {
+for (int i = 1; !last; ++i) {
 int ret = vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
 if (ret < 0)
 return ret;
 i += skip;
 if (i > 63)
 break;
-block[zz_table[i++]] = value;
+block[zz_table[i]] = value;
 }
 
 /* apply AC prediction if needed */
@@ -696,8 +694,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
 }
 }
 }
-if (s->ac_pred) i = 63;
-s->block_last_index[n] = i;
 
 return 0;
 }
@@ -716,7 +712,6 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
 GetBitContext *gb = &v->s.gb;
 MpegEncContext *s = &v->s;
 int dc_pred_dir = 0; /* Direction of the DC prediction used */
-int i;
 int16_t *dc_val = NULL;
 int16_t *ac_val, *ac_val2;
 int dcdiff;
@@ -778,7 +773,6 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
 }
 
 //AC Decoding
-i = 1;
 
 if (coded) {
 int last = 0, skip, value;
@@ -801,14 +795,14 @@ static int vc1_decode_i_bloc

Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context

2025-03-12 Thread Andreas Rheinhardt
Lynne:
> 
> 
> On 13/03/2025 02:24, Andreas Rheinhardt wrote:
>> Lynne:
>>> On 10/03/2025 18:42, Lynne wrote:
 On 10/03/2025 04:14, Andreas Rheinhardt wrote:
> Lynne:
>> ---
>>    libavcodec/ffv1.h    |  3 +++
>>    libavcodec/ffv1dec.c | 19 +--
>>    2 files changed, 20 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>> index 8c0e71284d..860a5c14b1 100644
>> --- a/libavcodec/ffv1.h
>> +++ b/libavcodec/ffv1.h
>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>     * NOT shared between frame threads.
>>     */
>>    uint8_t   frame_damaged;
>> +
>> +    /* Reference to the current packet */
>> +    AVPacket *pkt_ref;
>>    } FFV1Context;
>>    int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>> index eaa21eebdf..6396f22f79 100644
>> --- a/libavcodec/ffv1dec.c
>> +++ b/libavcodec/ffv1dec.c
>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
>> *avctx)
>>    f->pix_fmt = AV_PIX_FMT_NONE;
>>    f->configured_pix_fmt = AV_PIX_FMT_NONE;
>> +    f->pkt_ref = av_packet_alloc();
>> +    if (!f->pkt_ref)
>> +    return AVERROR(ENOMEM);
>> +
>>    if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>    return ret;
>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
>> AVFrame *rframe,
>>    /* Start */
>>    if (hwaccel) {
>> +    ret = av_packet_ref(f->pkt_ref, avpkt);
>> +    if (ret < 0)
>> +    return ret;
>> +
>>    ret = hwaccel->start_frame(avctx, avpkt->data, avpkt-
>> >size);
>>    if (ret < 0)
>>    return ret;
>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
>> AVFrame *rframe,
>>    uint32_t len;
>>    ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>  &pos, &len);
>> -    if (ret < 0)
>> +    if (ret < 0) {
>> +    av_packet_unref(f->pkt_ref);
>>    return ret;
>> +    }
>>    buf_end -= len;
>>    ret = hwaccel->decode_slice(avctx, pos, len);
>> -    if (ret < 0)
>> +    if (ret < 0) {
>> +    av_packet_unref(f->pkt_ref);
>>    return ret;
>> +    }
>>    }
>> +
>> +    av_packet_unref(f->pkt_ref);
>>    } else {
>>    ret = decode_slices(avctx, c, avpkt);
>>    if (ret < 0)
>> @@ -827,6 +841,7 @@ static av_cold int
>> ffv1_decode_close(AVCodecContext *avctx)
>>    ff_progress_frame_unref(&s->last_picture);
>>    av_refstruct_unref(&s->hwaccel_last_picture_private);
>> +    av_packet_free(&s->pkt_ref);
>>    ff_ffv1_close(s);
>>    return 0;
>
> Why not simply use a const AVPacket*?

 No reason. Fixed locally.
 Thanks.
>>>
>>> *reverted this change.
>>> We need to ref the packet, since we map its memory and let the GPU use
>>> it directly without copying the contents. 6k16bit content at 24fps is
>>> typically around 2Gbps when compressed, so avoiding copies is important.
>>
>> How long does the hwaccel need this data?
> 
> Until the frame has been asynchronously decoded. We give an output frame
> with a semaphore that receivers need to wait on to determine when that is.
> 
> On the decoder-side, the hardware has a fixed number of queues where
> submissions can be sent to asynchronously. We treat it as a ring buffer
> and keep a reference to all resources our side for each submission,
> until we need to reuse the slot, at which point we wait on the frame
> decoding to complete (which it usually has), and we release all
> resources used.
> 
> Output frames also have a bit of state that has to be freed once the
> frame is marked (unreferenced) by the decoder as no longer being needed
> as a reference, this is done in the FFHWAccel.free_frame_priv callback.
> There, we have to wait for the last internal use of the frame to be
> finished (done via the vp->wait_semaphores() call in vulkan_decode.c).
> 
> This is valid for both ASIC hardware decoders and a compute shader based
> implementation, since the two share the same code, except for decode
> submissions.

1. If you need a reference to the packet's data, then reference
AVPacket.buf, not the whole AVPacket. This avoids allocating a spare
AVPacket as well as copying side data.
2. It sounds very wrong and fragile that the decoder has to keep a
reference because the hwaccel might need it. There may be future
hwaccels that don't need such a reference etc. It seems better to extend
e.g. the start_frame callback and pass a r

[FFmpeg-devel] [PATCH 1/7] avcodec/pcm: Remove always-false check

2025-03-12 Thread Andreas Rheinhardt
Patches attached.

- Andreas
From d07c1ad85a60395eb20501a59ff35de1d60f029b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Tue, 11 Mar 2025 17:26:43 +0100
Subject: [PATCH 1/7] avcodec/pcm: Remove always-false check

All codecs here have a valid sample size at this point.
This check has (presumably) been added for DVD PCM,
but even for them the check was always-true after
381e195b46d080aee1d9b05ef2b6b140e9463519 (and the DVD
code was later moved out altogether). So just remove
this check and the leftover DVD comment.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/pcm.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index a23293dca2..5d8dcb8ff0 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -334,7 +334,6 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
 sample_size = av_get_bits_per_sample(avctx->codec_id) / 8;
 
-/* av_get_bits_per_sample returns 0 for AV_CODEC_ID_PCM_DVD */
 samples_per_block = 1;
 if (avctx->codec_id == AV_CODEC_ID_PCM_LXF) {
 /* we process 40-bit blocks per channel for LXF */
@@ -342,11 +341,6 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 sample_size   = 5;
 }
 
-if (sample_size == 0) {
-av_log(avctx, AV_LOG_ERROR, "Invalid sample_size\n");
-return AVERROR(EINVAL);
-}
-
 if (channels == 0) {
 av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
 return AVERROR(EINVAL);
-- 
2.45.2

From 8d65f524e5d5c5b262e5d9bac6c1c319f72d5a24 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Tue, 11 Mar 2025 17:53:06 +0100
Subject: [PATCH 2/7] avcodec/pcm: Cache sample_size value

No need to go through two switches per packet.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/pcm.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 5d8dcb8ff0..620acf0f46 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -243,6 +243,7 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 }
 
 typedef struct PCMDecode {
+int sample_size;
 short   table[256];
 void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
@@ -286,8 +287,14 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx)
 
 avctx->sample_fmt = avctx->codec->sample_fmts[0];
 
-if (avctx->sample_fmt == AV_SAMPLE_FMT_S32)
-avctx->bits_per_raw_sample = av_get_bits_per_sample(avctx->codec_id);
+if (avctx->codec_id != AV_CODEC_ID_PCM_LXF) {
+int bits_per_sample = av_get_exact_bits_per_sample(avctx->codec_id);
+if (avctx->sample_fmt == AV_SAMPLE_FMT_S32)
+avctx->bits_per_raw_sample = bits_per_sample;
+s->sample_size = bits_per_sample / 8;
+} else {
+s->sample_size = 5;
+}
 
 return 0;
 }
@@ -328,17 +335,14 @@ static int pcm_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 int buf_size   = avpkt->size;
 PCMDecode *s   = avctx->priv_data;
 int channels   = avctx->ch_layout.nb_channels;
-int sample_size, c, n, ret, samples_per_block;
+int sample_size = s->sample_size, c, n, ret, samples_per_block;
 uint8_t *samples;
 int32_t *dst_int32_t;
 
-sample_size = av_get_bits_per_sample(avctx->codec_id) / 8;
-
 samples_per_block = 1;
 if (avctx->codec_id == AV_CODEC_ID_PCM_LXF) {
 /* we process 40-bit blocks per channel for LXF */
 samples_per_block = 2;
-sample_size   = 5;
 }
 
 if (channels == 0) {
-- 
2.45.2

From c221a9f2f31c8fcb65e478e8d71b127d9466f3d3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt 
Date: Thu, 13 Mar 2025 04:37:12 +0100
Subject: [PATCH 3/7] avcodec/pcm: Remove duplication from FFCodec define
 macros

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/pcm.c | 78 +---
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 620acf0f46..6c1feecca3 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -550,7 +550,7 @@ const FFCodec ff_ ## name_ ## _encoder = {  \
 .p.name   = #name_, \
 CODEC_LONG_NAME(long_name_),\
 .p.type   = AVMEDIA_TYPE_AUDIO, \
-.p.id = AV_CODEC_ID_ ## id_,\
+.p.id = id_,\
 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | \
   AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,\
 .init = pcm_encode_init,\
@@ -563,7 +563,8 @@ const FFCodec ff_ ## name_ ## _encoder = {  \
 #d