[FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer

2022-09-12 Thread Paul B Mahol
Patch attached.
From d867b825507b5f38a051dd0ccf4612b7570a2088 Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Sun, 11 Sep 2022 20:10:27 +0200
Subject: [PATCH] avformat: add LAF demuxer

Signed-off-by: Paul B Mahol 
---
 libavformat/Makefile |   1 +
 libavformat/allformats.c |   1 +
 libavformat/lafdec.c | 253 +++
 3 files changed, 255 insertions(+)
 create mode 100644 libavformat/lafdec.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 5cdcda3239..19a4ba2a8f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -319,6 +319,7 @@ OBJS-$(CONFIG_JV_DEMUXER)+= jvdec.o
 OBJS-$(CONFIG_KUX_DEMUXER)   += flvdec.o
 OBJS-$(CONFIG_KVAG_DEMUXER)  += kvag.o
 OBJS-$(CONFIG_KVAG_MUXER)+= kvag.o rawenc.o
+OBJS-$(CONFIG_LAF_DEMUXER)   += lafdec.o
 OBJS-$(CONFIG_LATM_MUXER)+= latmenc.o rawenc.o
 OBJS-$(CONFIG_LMLM4_DEMUXER) += lmlm4.o
 OBJS-$(CONFIG_LOAS_DEMUXER)  += loasdec.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index cebd5e0c67..a545b5ff45 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -236,6 +236,7 @@ extern const AVInputFormat  ff_jv_demuxer;
 extern const AVInputFormat  ff_kux_demuxer;
 extern const AVInputFormat  ff_kvag_demuxer;
 extern const AVOutputFormat ff_kvag_muxer;
+extern const AVInputFormat  ff_laf_demuxer;
 extern const AVOutputFormat ff_latm_muxer;
 extern const AVInputFormat  ff_lmlm4_demuxer;
 extern const AVInputFormat  ff_loas_demuxer;
diff --git a/libavformat/lafdec.c b/libavformat/lafdec.c
new file mode 100644
index 00..35bce2b327
--- /dev/null
+++ b/libavformat/lafdec.c
@@ -0,0 +1,253 @@
+/*
+ * Limitless Audio Format demuxer
+ * Copyright (c) 2022 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+
+typedef struct StreamParams {
+float horizontal;
+float vertical;
+int lfe;
+AVChannelLayout layout;
+} StreamParams;
+
+typedef struct LAFContext {
+uint8_t *data;
+unsigned nb_stored;
+unsigned stored_index;
+unsigned index;
+unsigned bpp;
+
+StreamParams p[1024];
+} LAFContext;
+
+typedef struct LAFStream {
+unsigned stored;
+} LAFStream;
+
+static int laf_probe(const AVProbeData *p)
+{
+if (memcmp(p->buf, "LIMITLESS", 9))
+return 0;
+if (memcmp(p->buf + 9, "HEAD", 4))
+return 0;
+return AVPROBE_SCORE_MAX;
+}
+
+static int laf_read_header(AVFormatContext *ctx)
+{
+LAFContext *s = ctx->priv_data;
+AVIOContext *pb = ctx->pb;
+unsigned st_count, mode;
+unsigned sample_rate;
+int64_t duration;
+int codec_id;
+int quality;
+int bpp;
+
+avio_skip(pb, 9);
+if (avio_rb32(pb) != MKBETAG('H','E','A','D'))
+return AVERROR_INVALIDDATA;
+
+quality = avio_r8(pb);
+if (quality > 3)
+return AVERROR_INVALIDDATA;
+mode = avio_r8(pb);
+if (mode > 1)
+return AVERROR_INVALIDDATA;
+st_count = avio_rl32(pb);
+if (st_count == 0 || st_count > 1024)
+return AVERROR_INVALIDDATA;
+
+for (int i = 0; i < st_count; i++) {
+StreamParams *stp = &s->p[i];
+
+stp->vertical = av_int2float(avio_rl32(pb));
+stp->horizontal = av_int2float(avio_rl32(pb));
+stp->lfe = avio_r8(pb);
+if (stp->lfe) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_LOW_FREQUENCY));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == 0.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_CENTER));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == -30.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_LEFT));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == 30.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_RIGHT));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == -110.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LA

Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer

2022-09-12 Thread Andreas Rheinhardt
Paul B Mahol:
> +static int laf_read_header(AVFormatContext *ctx)
> +{
> +LAFContext *s = ctx->priv_data;
> +AVIOContext *pb = ctx->pb;
> +unsigned st_count, mode;
> +unsigned sample_rate;
> +int64_t duration;
> +int codec_id;
> +int quality;
> +int bpp;
> +
> +avio_skip(pb, 9);
> +if (avio_rb32(pb) != MKBETAG('H','E','A','D'))
> +return AVERROR_INVALIDDATA;
> +
> +quality = avio_r8(pb);
> +if (quality > 3)
> +return AVERROR_INVALIDDATA;
> +mode = avio_r8(pb);
> +if (mode > 1)
> +return AVERROR_INVALIDDATA;
> +st_count = avio_rl32(pb);
> +if (st_count == 0 || st_count > 1024)

I don't know whether the limit of 1024 is arbitrary or something from
some spec. If it is the latter, you should use a #define for it and also
for the size of the StreamParams array in the ctx. If it is the former,
you might just use FF_ARRAY_ELEMS(s->p) instead of 1024 here. Or a
define, as you prefer.

> +return AVERROR_INVALIDDATA;
> +
> +for (int i = 0; i < st_count; i++) {
> +StreamParams *stp = &s->p[i];
> +
> +stp->vertical = av_int2float(avio_rl32(pb));
> +stp->horizontal = av_int2float(avio_rl32(pb));
> +stp->lfe = avio_r8(pb);
> +if (stp->lfe) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_LOW_FREQUENCY));
> +} else if (stp->vertical == 0.f &&
> +   stp->horizontal == 0.f) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_FRONT_CENTER));
> +} else if (stp->vertical == 0.f &&
> +   stp->horizontal == -30.f) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_FRONT_LEFT));
> +} else if (stp->vertical == 0.f &&
> +   stp->horizontal == 30.f) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_FRONT_RIGHT));
> +} else if (stp->vertical == 0.f &&
> +   stp->horizontal == -110.f) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_SIDE_LEFT));
> +} else if (stp->vertical == 0.f &&
> +   stp->horizontal == 110.f) {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, 
> (AV_CH_SIDE_RIGHT));
> +} else {
> +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
> +}
> +}
> +
> +sample_rate = avio_rl32(pb);
> +duration = avio_rl64(pb) / st_count;
> +switch (quality) {
> +case 0:
> +codec_id = AV_CODEC_ID_PCM_U8;
> +bpp = 1;
> +break;
> +case 1:
> +codec_id = AV_CODEC_ID_PCM_S16LE;
> +bpp = 2;
> +break;
> +case 2:
> +codec_id = AV_CODEC_ID_PCM_F32LE;
> +bpp = 4;
> +break;
> +case 3:
> +codec_id = AV_CODEC_ID_PCM_S24LE;
> +bpp = 3;
> +break;
> +}
> +
> +s->index = 0;
> +s->stored_index = 0;
> +s->bpp = bpp;
> +s->data = av_mallocz(st_count * sample_rate * bpp);

sample_rate is read via avio_rl32() and therefore the multiplication on
the right can overflow (it's performed in 32bits, so this can happen
even on 64bit systems). Maybe use av_calloc(sample_rate, st_count *
bpp). But you also need to ensure that sample_rate actually fits into an
int and that st_count * sample_rate * bpp performed in the avio_read()
below also fits into an int, so you should probably just ensure this here.

> +if (!s->data)
> +return AVERROR(ENOMEM);
> +
> +for (int st = 0; st < st_count; st++) {
> +StreamParams *stp = &s->p[st];
> +LAFStream *lafst;
> +AVCodecParameters *par;
> +AVStream *st = avformat_new_stream(ctx, NULL);
> +if (!st)
> +return AVERROR(ENOMEM);
> +
> +par = st->codecpar;
> +par->codec_id = codec_id;
> +par->codec_type = AVMEDIA_TYPE_AUDIO;
> +par->ch_layout.nb_channels = 1;
> +par->ch_layout = stp->layout;
> +par->sample_rate = sample_rate;
> +st->duration = duration;
> +st->priv_data = lafst = av_mallocz(sizeof(LAFStream));

lafst is set-but-unused. And given that you are already imposing a
hardcoded limit on the number of streams you could just add an array of
1024 uint8_t to your context.

> +if (!st->priv_data)
> +return AVERROR(ENOMEM);
> +
> +avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> +}
> +
> +return 0;
> +}
> +
> +static int laf_read_packet(AVFormatContext *ctx, AVPacket *pkt)
> +{
> +AVIOContext *pb = ctx->pb;
> +LAFContext *s = ctx->priv_data;
> +AVStream *st = ctx->streams[0];
> +LAFStream *lafst = st->priv_data;
> +const int bpp = s->bpp;
> +int header_len = (ctx->nb_streams / 8) + !!(ctx->nb_streams & 7);

(ctx->nb_streams + 7) / 8.

> +int64_t pos;
> +int ret;
> +
> +again:
> +if (avio_feof(pb))
> +

Re: [FFmpeg-devel] [PATCH v2] avcodec/arm/sbcenc: avoid callee preserved vfp registers

2022-09-12 Thread Martin Storsjö

On Sun, 25 Aug 2019, James Cowgill wrote:


When compiling FFmpeg with GCC-9, some very random segfaults were
observed in code which had previously called down into the SBC encoder
NEON assembly routines. This was caused by these functions clobbering
some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was
using these registers to save local variables, but after these
functions returned, they would contain garbage.

Fix by reallocating the registers in the two affected functions in
the following way:
ff_sbc_analyze_4_neon: q2-q5 => q8-q11, then q1-q4 => q8-q11
ff_sbc_analyze_8_neon: q2-q9 => q8-q15

The reason for using these replacements is to keep closely related
sets of registers consecutively numbered which hopefully makes the
code more easy to follow. Since this commit only reallocates
registers, it should have no performance impact.

Signed-off-by: James Cowgill 
---

On 29/07/2019 19:59, Reimar Döffinger wrote:

Seems sensible to me, though extra points if you or someone has numbers on 
performance impact.
To know whether it would be worthwhile to check if it can be optimized...


Sorry for the long delay - been on various holidays.


Sorry for the even longer response ;-) I happened to run into this patch 
downstream, and noticed that it does look reasonable, but apparently the 
second round of the patch was missed back then in 2019.


Our current code is indeed broken and wrong - if we would have had 
checkasm tests for it, this issue would have been caught long ago.



I did a few tests on my original patch and overall it was about 2%
slower than before. In any case I think this new patch is a better
solution (although the diff is a lot larger). We don't actually need
that many registers in either of these functions, so instead of
pushing the clobbered callee saved registers, we can reallocate all
the registers to avoid them in the first place. This way there is no
performance impact.

I couldn't find any tests for this encoder, but I have tested a few
audio samples with it and verified the output is identical to what t
was before (and with what I get on x86).


Thanks a lot for doing that! Indeed that's the best we can do since we 
don't have tests for it.


I'll go ahead and push this patch soon.

// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()

2022-09-12 Thread Andreas Rheinhardt
Also fixes a "statement with no effect [-Wunused-value]"
warning from GCC.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/bonk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c
index f3d797d588..409694f710 100644
--- a/libavcodec/bonk.c
+++ b/libavcodec/bonk.c
@@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int 
order, int error)
 }
 
 // don't drift too far, to avoid overflows
-av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
+x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
 
 state[0] = x;
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()

2022-09-12 Thread James Almer

On 9/12/2022 9:20 AM, Andreas Rheinhardt wrote:

Also fixes a "statement with no effect [-Wunused-value]"
warning from GCC.

Signed-off-by: Andreas Rheinhardt 
---
  libavcodec/bonk.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c
index f3d797d588..409694f710 100644
--- a/libavcodec/bonk.c
+++ b/libavcodec/bonk.c
@@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int 
order, int error)
  }
  
  // don't drift too far, to avoid overflows

-av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
+x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
  
  state[0] = x;


LGTM, but this decoder needs a test to ensure it's actually doing the 
right thing.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] fftools/opt_common: check the return value of av_hwdevice_get_type_name before printing it

2022-09-12 Thread James Almer
It may be NULL, as is the case for D3D11VA_VLD.

Running "ffmpeg -h decoder=h264" on a Windows build

Before:
Decoder h264 [H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10]:
Supported hardware devices: dxva2 (null) d3d11va cuda

After:
Decoder h264 [H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10]:
Supported hardware devices: dxva2 d3d11va cuda

Signed-off-by: James Almer 
---
 fftools/opt_common.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fftools/opt_common.c b/fftools/opt_common.c
index 7cd8b1c66e..8a06df82df 100644
--- a/fftools/opt_common.c
+++ b/fftools/opt_common.c
@@ -335,9 +335,12 @@ static void print_codec(const AVCodec *c)
 printf("Supported hardware devices: ");
 for (int i = 0;; i++) {
 const AVCodecHWConfig *config = avcodec_get_hw_config(c, i);
+const char *name;
 if (!config)
 break;
-printf("%s ", av_hwdevice_get_type_name(config->device_type));
+name = av_hwdevice_get_type_name(config->device_type);
+if (name)
+printf("%s ", name);
 }
 printf("\n");
 }
-- 
2.37.3

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] Bonk, Bonk

2022-09-12 Thread Martijn van Beurden
Op wo 7 sep. 2022 om 19:59 schreef Paul B Mahol :

> Patches attached.
>
> Could decoder be made faster?
>
>
Haven't reviewed, but great to have another codec added. Concerning the
speed, as far as I know Bonk is slow to decode. Its website states that it
is slow: http://www.logarithmic.net/pfh/bonk Also, an old comparison that
included it (I haven't seen any more recent one) ranked it the slowest
decoding codec:
https://web.archive.org/web/20080225210007/http://flac.sourceforge.net/comparison.html

Although OptimFROG and the higher MP4ALS preset are probably much
slower/more CPU intensive to decode nowadays.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/2] fate/id3v2: Add test for reading and writing UTF-16 BOM tags

2022-09-12 Thread Andreas Rheinhardt
Andreas Rheinhardt:
> Signed-off-by: Andreas Rheinhardt 
> ---
>  tests/fate/id3v2.mak   |  5 
>  tests/ref/fate/id3v2-utf16-bom | 42 ++
>  2 files changed, 47 insertions(+)
>  create mode 100644 tests/ref/fate/id3v2-utf16-bom
> 
> diff --git a/tests/fate/id3v2.mak b/tests/fate/id3v2.mak
> index 4dca681e38..7ad4d877a4 100644
> --- a/tests/fate/id3v2.mak
> +++ b/tests/fate/id3v2.mak
> @@ -7,6 +7,11 @@ fate-id3v2-priv-remux: CMD = transcode mp3 
> $(TARGET_SAMPLES)/id3v2/id3v2_priv.mp
>  FATE_ID3V2_FFMPEG_FFPROBE-$(call REMUX, AIFF, WAV_DEMUXER) += 
> fate-id3v2-chapters
>  fate-id3v2-chapters: CMD = transcode wav 
> $(TARGET_SAMPLES)/wav/200828-005.wav aiff "-c copy -metadata:c:0 
> description=foo -metadata:c:0 date=2021 -metadata:c copyright=none 
> -metadata:c:1 genre=nonsense -write_id3v2 1" "-c copy -t 0.05" "-show_entries 
> format_tags:chapters"
>  
> +# Tests reading and writing UTF-16 BOM strings; also tests
> +# the AIFF muxer's and demuxer's ability to preserve channel layouts.
> +FATE_ID3V2_FFMPEG_FFPROBE-$(call REMUX, AIFF, WAV_DEMUXER FLAC_DEMUXER 
> PCM_S16LE_DECODER MJPEG_DECODER ARESAMPLE_FILTER CHANNELMAP_FILTER 
> PCM_S24BE_ENCODER) += fate-id3v2-utf16-bom
> +fate-id3v2-utf16-bom: CMD = transcode wav 
> $(TARGET_SAMPLES)/audio-reference/yo.raw-short.wav aiff "-map 0:a -map 1:v 
> -af aresample,channelmap=channel_layout=hexagonal,aresample -c:a pcm_s24be 
> -c:v copy -write_id3v2 1 -id3v2_version 3 -map_metadata:g:0 1:g 
> -map_metadata:s:v 1:g" "-c copy -t 0.05" "-show_entries 
> stream=channel_layout:stream_tags:format_tags" "-i 
> $(TARGET_SAMPLES)/cover_art/cover_art.flac"
> +
>  FATE_SAMPLES_FFPROBE+= $(FATE_ID3V2_FFPROBE-yes)
>  FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_ID3V2_FFMPEG_FFPROBE-yes)
>  fate-id3v2: $(FATE_ID3V2_FFPROBE-yes) $(FATE_ID3V2_FFMPEG_FFPROBE-yes)
> diff --git a/tests/ref/fate/id3v2-utf16-bom b/tests/ref/fate/id3v2-utf16-bom
> new file mode 100644
> index 00..dd2566de2b
> --- /dev/null
> +++ b/tests/ref/fate/id3v2-utf16-bom
> @@ -0,0 +1,42 @@
> +9b8bfdf87a8d3d089819ef9f6f264ec4 *tests/data/fate/id3v2-utf16-bom.aiff
> +885482 tests/data/fate/id3v2-utf16-bom.aiff
> +#tb 0: 1/9
> +#media_type 0: video
> +#codec_id 0: mjpeg
> +#dimensions 0: 350x350
> +#sar 0: 1/1
> +#tb 1: 1/48000
> +#media_type 1: audio
> +#codec_id 1: pcm_s24be
> +#sample_rate 1: 48000
> +#channel_layout_name 1: hexagonal
> +0,  0,  0,0,19650, 0xd5662610
> +1,  0,  0,  227, 4086, 0x
> +1,227,227,  227, 4086, 0x
> +1,454,454,  227, 4086, 0x
> +1,681,681,  227, 4086, 0x667b2643
> +1,908,908,  227, 4086, 0x9a09957d
> +1,   1135,   1135,  227, 4086, 0x763e27c5
> +1,   1362,   1362,  227, 4086, 0x2a47f536
> +1,   1589,   1589,  227, 4086, 0xed32e5f2
> +1,   1816,   1816,  227, 4086, 0x2e96c720
> +1,   2043,   2043,  227, 4086, 0x84c5b5f0
> +1,   2270,   2270,  227, 4086, 0xe3dfeefc
> +[STREAM]
> +channel_layout=hexagonal
> +[/STREAM]
> +[STREAM]
> +TAG:title=Дороги
> +TAG:comment=Other
> +[/STREAM]
> +[FORMAT]
> +TAG:artist=Мельница
> +TAG:RATING=0
> +TAG:album=Ангелофрения
> +TAG:title=Дороги
> +TAG:tracktotal=11
> +TAG:totaltracks=11
> +TAG:genre=Folk
> +TAG:track=2
> +TAG:date=2012
> +[/FORMAT]

Will apply this patchset tonight unless there are objections.

- Andreas
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avformat/bonk: Don't set data_offset to what it would be set to anyway

2022-09-12 Thread Andreas Rheinhardt
Signed-off-by: Andreas Rheinhardt 
---
 libavformat/bonk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavformat/bonk.c b/libavformat/bonk.c
index fc400979b3..0fff0b5bda 100644
--- a/libavformat/bonk.c
+++ b/libavformat/bonk.c
@@ -79,7 +79,6 @@ static int bonk_read_header(AVFormatContext *s)
 return AVERROR_INVALIDDATA;
 st->duration  = AV_RL32(st->codecpar->extradata + 1) / 
st->codecpar->ch_layout.nb_channels;
 avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
-ffformatcontext(s)->data_offset = avio_tell(s->pb);
 
 return 0;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/x86/Makefile: Don't build empty files

2022-09-12 Thread Andreas Rheinhardt
Should fix ticket #9909, fixing a regression since
bfb28b5ce89f3e950214b67ea95b45e3355c2caf.

Thanks to Carl Eugen Hoyos for analyzing the issue.

Signed-off-by: Andreas Rheinhardt 
---
This would be my solution. What do you think of it?

 libavcodec/x86/Makefile | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 4e448623af..41ca864849 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -140,8 +140,11 @@ X86ASM-OBJS-$(CONFIG_QPELDSP)  += x86/qpeldsp.o
 \
 X86ASM-OBJS-$(CONFIG_RV34DSP)  += x86/rv34dsp.o
 X86ASM-OBJS-$(CONFIG_VC1DSP)   += x86/vc1dsp_loopfilter.o   \
   x86/vc1dsp_mc.o
-X86ASM-OBJS-$(CONFIG_IDCTDSP)  += x86/simple_idct10.o   \
-  x86/simple_idct.o
+ifdef ARCH_X86_64
+X86ASM-OBJS-$(CONFIG_IDCTDSP)  += x86/simple_idct10.o
+else
+X86ASM-OBJS-$(CONFIG_IDCTDSP)  += x86/simple_idct.o
+endif
 X86ASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o
 X86ASM-OBJS-$(CONFIG_VP3DSP)   += x86/vp3dsp.o
 X86ASM-OBJS-$(CONFIG_VP8DSP)   += x86/vp8dsp.o  \
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct

2022-09-12 Thread Paul B Mahol
Patch attached.
From f7c47b8eefa1c06a74d17f13b4e9010785dc6430 Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Wed, 20 Jan 2021 16:58:31 +0100
Subject: [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct

Signed-off-by: Paul B Mahol 
---
 libavutil/x86/float_dsp.asm| 127 +
 libavutil/x86/float_dsp_init.c |   2 +
 2 files changed, 129 insertions(+)

diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index cca4d019c7..8f8e6dddf5 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -440,6 +440,133 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
 %endif
 RET
 
+INIT_YMM fma3
+cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
+xor   offsetq, offsetq
+xorps  m0, m0
+shl sized, 2
+mov  lenq, sizeq
+cmp  lenq, 32
+jl   .l16
+cmp  lenq, 64
+jl   .l32
+xorpsm1, m1
+cmp  lenq, 128
+jl   .l64
+andlenq, ~127
+xorpsm2, m2
+xorpsm3, m3
+.loop128:
+movups   m4, [v1q+offsetq]
+movups   m5, [v1q+offsetq + 32]
+movups   m6, [v1q+offsetq + 64]
+movups   m7, [v1q+offsetq + 96]
+fmaddps  m0, m4, [v2q+offsetq ], m0
+fmaddps  m1, m5, [v2q+offsetq + 32], m1
+fmaddps  m2, m6, [v2q+offsetq + 64], m2
+fmaddps  m3, m7, [v2q+offsetq + 96], m3
+add   offsetq, 128
+cmp   offsetq, lenq
+jl .loop128
+addpsm0, m2
+addpsm1, m3
+mov  lenq, sizeq
+and  lenq, 127
+cmp  lenq, 64
+jge .l64
+addpsm0, m1
+cmp  lenq, 32
+jge .l32
+vextractf128 xmm2, m0, 1
+addpsxmm0, xmm2
+cmp  lenq, 16
+jge .l16
+movhlps  xmm1, xmm0
+addpsxmm0, xmm1
+movssxmm1, xmm0
+shufps   xmm0, xmm0, 1
+addssxmm0, xmm1
+%if ARCH_X86_64 == 0
+movss r0m, xm0
+fld dword r0m
+%endif
+RET
+.l64:
+andlenq, ~63
+addlenq, offsetq
+.loop64:
+movups   m4, [v1q+offsetq]
+movups   m5, [v1q+offsetq + 32]
+fmaddps  m0, m4, [v2q+offsetq], m0
+fmaddps  m1, m5, [v2q+offsetq + 32], m1
+add   offsetq, 64
+cmp   offsetq, lenq
+jl .loop64
+addpsm0, m1
+mov  lenq, sizeq
+and  lenq, 63
+cmp  lenq, 32
+jge .l32
+vextractf128 xmm2, m0, 1
+addpsxmm0, xmm2
+cmp  lenq, 16
+jge .l16
+movhlps  xmm1, xmm0
+addpsxmm0, xmm1
+movssxmm1, xmm0
+shufps   xmm0, xmm0, 1
+addssxmm0, xmm1
+%if ARCH_X86_64 == 0
+movss r0m, xm0
+fld dword r0m
+%endif
+RET
+.l32:
+andlenq, ~31
+addlenq, offsetq
+.loop32:
+movups   m4, [v1q+offsetq]
+fmaddps  m0, m4, [v2q+offsetq], m0
+add   offsetq, 32
+cmp   offsetq, lenq
+jl .loop32
+vextractf128 xmm2, m0, 1
+addpsxmm0, xmm2
+mov  lenq, sizeq
+and  lenq, 31
+cmp  lenq, 16
+jge .l16
+movhlps  xmm1, xmm0
+addpsxmm0, xmm1
+movssxmm1, xmm0
+shufps   xmm0, xmm0, 1
+addssxmm0, xmm1
+%if ARCH_X86_64 == 0
+movss r0m, xm0
+fld dword r0m
+%endif
+RET
+.l16:
+andlenq, ~15
+addlenq, offsetq
+.loop16:
+movaps   xmm1, [v1q+offsetq]
+mulpsxmm1, [v2q+offsetq]
+addpsxmm0, xmm1
+add   offsetq, 16
+cmp   offsetq, lenq
+jl .loop16
+movhlps  xmm1, xmm0
+addpsxmm0, xmm1
+movssxmm1, xmm0
+shufps   xmm0, xmm0, 1
+addssxmm0, xmm1
+%if ARCH_X86_64 == 0
+movss r0m, xm0
+fld dword r0m
+%endif
+RET
+
 ;-
 ; void ff_butterflies_float(float *src0, float *src1, int len);
 ;-
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index ad17bc2044..ad6b506259 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -74,6 +74,7 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
  const float *src1, int len);
 
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
+float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order);
 
 void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len);
 
@@ -112,5 +113,6 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
 fdsp->vector_fmul_add= ff_vector_fmul_add_fma3;
 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3;
+fdsp->scalarproduct_float = ff_scalarproduct_float_fma3;
 }
 }
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "

[FFmpeg-devel] [PATCH 01/18] doc: reference the RISC-V specification

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 doc/optimization.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/doc/optimization.txt b/doc/optimization.txt
index 974e2f9af2..3ed29fe38c 100644
--- a/doc/optimization.txt
+++ b/doc/optimization.txt
@@ -267,6 +267,11 @@ CELL/SPU:
 
http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/30B3520C93F437AB87257060006FFE5E/$file/Language_Extensions_for_CBEA_2.4.pdf
 
http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/9F820A5FFA3ECE8C8725716A0062585F/$file/CBE_Handbook_v1.1_24APR2007_pub.pdf
 
+RISC-V-specific:
+
+The RISC-V Instruction Set Manual, Volume 1, Unprivileged ISA:
+https://riscv.org/technical/specifications/
+
 GCC asm links:
 --
 official doc but quite ugly
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 02/18] lavu/riscv: AV_READ_TIME cycle counter

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

This uses the architected RISC-V 64-bit cycle counter from the
RISC-V unprivileged instruction set.

In 64-bit and 128-bit, this is a straightforward CSR read.
In 32-bit mode, the 64-bit value is exposed as two CSRs, which
cannot be read atomically, so a loop is necessary to detect and fix up
the race condition where the bottom half wraps exactly between the two
reads.
---
 libavutil/riscv/timer.h | 53 +
 libavutil/timer.h   |  2 ++
 2 files changed, 55 insertions(+)
 create mode 100644 libavutil/riscv/timer.h

diff --git a/libavutil/riscv/timer.h b/libavutil/riscv/timer.h
new file mode 100644
index 00..a34157a566
--- /dev/null
+++ b/libavutil/riscv/timer.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RISCV_TIMER_H
+#define AVUTIL_RISCV_TIMER_H
+
+#include "config.h"
+
+#if HAVE_INLINE_ASM
+#include 
+
+static inline uint64_t rdcycle64(void)
+{
+#if (__riscv_xlen >= 64)
+uintptr_t cycles;
+
+__asm__ volatile ("rdcycle %0" : "=r"(cycles));
+
+#else
+uint64_t cycles;
+uint32_t hi, lo, check;
+
+__asm__ volatile (
+"1: rdcycleh %0\n"
+"   rdcycle  %1\n"
+"   rdcycleh %2\n"
+"   bne %0, %2, 1b\n" : "=r" (hi), "=r" (lo), "=r" (check));
+
+cycles = (((uint64_t)hi) << 32) | lo;
+
+#endif
+return cycles;
+}
+
+#define AV_READ_TIME rdcycle64
+
+#endif
+#endif /* AVUTIL_RISCV_TIMER_H */
diff --git a/libavutil/timer.h b/libavutil/timer.h
index 48e576739f..d3db5a27ef 100644
--- a/libavutil/timer.h
+++ b/libavutil/timer.h
@@ -57,6 +57,8 @@
 #   include "arm/timer.h"
 #elif ARCH_PPC
 #   include "ppc/timer.h"
+#elif ARCH_RISCV
+#   include "riscv/timer.h"
 #elif ARCH_X86
 #   include "x86/timer.h"
 #endif
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 03/18] configure/riscv: detect fast CLZ

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

RISC-V defines the CLZ instruction as part of the ratified Zbb subset
of the (not yet ratified) bit mapulation extension (B). We can detect
it from the __riscv_zbb predefined constant. At least GCC 12 already
supports this correctly.

Note that the macro will be non-zero if supported, zero if enabled
in the compiler flags (e.g. -march=rv64gzbb) but not known to the
compiler, and undefined otherwise.
---
 configure | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/configure b/configure
index 9e51abd0d3..b7dc1d8656 100755
--- a/configure
+++ b/configure
@@ -5334,6 +5334,12 @@ elif enabled ppc; then
 ;;
 esac
 
+elif enabled riscv; then
+
+if test_cpp_condition stddef.h "__riscv_zbb"; then
+enable fast_clz
+fi
+
 elif enabled sparc; then
 
 case $cpu in
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 04/18] lavu/riscv: byte-swap operations

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

If the target supports the Basic bit-manipulation (Zbb) extension, then
the REV8 instruction is available to reverse byte order.

Note that this instruction only exists at the "XLEN" register size,
so we need to right shift the result down to the data width.

If Zbb is not supported, then this patchset does nothing. Support for
run-time detection is left for the future. Currently, there are no
bits in auxv/ELF HWCAP for Z-extensions, so there are no clean ways to
do this.
---
 libavutil/bswap.h   |  2 ++
 libavutil/riscv/bswap.h | 74 +
 2 files changed, 76 insertions(+)
 create mode 100644 libavutil/riscv/bswap.h

diff --git a/libavutil/bswap.h b/libavutil/bswap.h
index 91cb79538d..4840ab433f 100644
--- a/libavutil/bswap.h
+++ b/libavutil/bswap.h
@@ -40,6 +40,8 @@
 #   include "arm/bswap.h"
 #elif ARCH_AVR32
 #   include "avr32/bswap.h"
+#elif ARCH_RISCV
+#   include "riscv/bswap.h"
 #elif ARCH_SH4
 #   include "sh4/bswap.h"
 #elif ARCH_X86
diff --git a/libavutil/riscv/bswap.h b/libavutil/riscv/bswap.h
new file mode 100644
index 00..de1429c0f7
--- /dev/null
+++ b/libavutil/riscv/bswap.h
@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RISCV_BSWAP_H
+#define AVUTIL_RISCV_BSWAP_H
+
+#include 
+#include "config.h"
+#include "libavutil/attributes.h"
+
+#if defined (__riscv_zbb) && (__riscv_zbb > 0) && HAVE_INLINE_ASM
+
+static av_always_inline av_const uintptr_t av_bswap_xlen(uintptr_t x)
+{
+uintptr_t y;
+
+__asm__("rev8 %0, %1" : "=r" (y) : "r" (x));
+return y;
+}
+
+#define av_bswap16 av_bswap16
+
+static av_always_inline av_const uint_fast16_t av_bswap16(uint_fast16_t x)
+{
+return av_bswap_xlen(x) >> (__riscv_xlen - 16);
+}
+
+#if (__riscv_xlen == 32)
+#define av_bswap32 av_bswap_xlen
+#define av_bswap64 av_bswap64
+
+static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
+{
+return (((uint64_t)av_bswap32(x)) << 32) | av_bswap32(x >> 32);
+}
+
+#else
+#define av_bswap32 av_bswap32
+
+static av_always_inline av_const uint_fast32_t av_bswap32(uint_fast32_t x)
+{
+return av_bswap_xlen(x) >> (__riscv_xlen - 32);
+}
+
+#if (__riscv_xlen == 64)
+#define av_bswap64 av_bswap_xlen
+
+#else
+#define av_bswap64 av_bswap64
+
+static av_always_inline av_const uint_fast64_t av_bswap64(uint_fast64_t x)
+{
+return av_bswap_xlen(x) >> (__riscv_xlen - 64);
+}
+
+#endif /* __riscv_xlen > 64 */
+#endif /* __riscv_xlen > 32 */
+#endif /* __riscv_zbb */
+#endif /* AVUTIL_RISCV_BSWAP_H */
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 05/18] lavu/riscv: add optimisations

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

This provides some micro-optimisations for signed integer clipping, and
support for bit weight with the Zbb extension.
---
 libavutil/intmath.h   |   5 +-
 libavutil/riscv/intmath.h | 103 ++
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 libavutil/riscv/intmath.h

diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index 9573109e9d..c54d23b7bf 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -28,8 +28,9 @@
 
 #if ARCH_ARM
 #   include "arm/intmath.h"
-#endif
-#if ARCH_X86
+#elif ARCH_RISCV
+#   include "riscv/intmath.h"
+#elif ARCH_X86
 #   include "x86/intmath.h"
 #endif
 
diff --git a/libavutil/riscv/intmath.h b/libavutil/riscv/intmath.h
new file mode 100644
index 00..78f7ba930a
--- /dev/null
+++ b/libavutil/riscv/intmath.h
@@ -0,0 +1,103 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_RISCV_INTMATH_H
+#define AVUTIL_RISCV_INTMATH_H
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+
+/*
+ * The compiler is forced to sign-extend the result anyhow, so it is faster to
+ * compute it explicitly and use it.
+ */
+#define av_clip_int8 av_clip_int8_rvi
+static av_always_inline av_const int8_t av_clip_int8_rvi(int a)
+{
+union { uint8_t u; int8_t s; } u = { .u = a };
+
+if (a != u.s)
+a = ((a >> 31) ^ 0x7F);
+return a;
+}
+
+#define av_clip_int16 av_clip_int16_rvi
+static av_always_inline av_const int16_t av_clip_int16_rvi(int a)
+{
+union { uint8_t u; int8_t s; } u = { .u = a };
+
+if (a != u.s)
+a = ((a >> 31) ^ 0x7F);
+return a;
+}
+
+#define av_clipl_int32 av_clipl_int32_rvi
+static av_always_inline av_const int32_t av_clipl_int32_rvi(int64_t a)
+{
+union { uint32_t u; int32_t s; } u = { .u = a };
+
+if (a != u.s)
+a = ((a >> 63) ^ 0x7FFF);
+return a;
+}
+
+#define av_clip_intp2 av_clip_intp2_rvi
+static av_always_inline av_const int av_clip_intp2_rvi(int a, int p)
+{
+const int shift = 32 - p;
+int b = (a << shift) >> shift;
+
+if (a != b)
+b = (a >> 31) ^ ((1 << p) - 1);
+return b;
+}
+
+#if defined (__riscv_zbb) && (__riscv_zbb > 0) && HAVE_INLINE_ASM
+
+#define av_popcount av_popcount_rvb
+static av_always_inline av_const int av_popcount_rvb(uint32_t x)
+{
+int ret;
+
+#if (__riscv_xlen >= 64)
+__asm__ ("cpopw %0, %1\n" : "=r" (ret) : "r" (x));
+#else
+__asm__ ("cpop %0, %1\n" : "=r" (ret) : "r" (x));
+#endif
+return ret;
+}
+
+#if (__riscv_xlen >= 64)
+#define av_popcount64 av_popcount64_rvb
+static av_always_inline av_const int av_popcount64_rvb(uint64_t x)
+{
+int ret;
+
+#if (__riscv_xlen >= 128)
+__asm__ ("cpopd %0, %1\n" : "=r" (ret) : "r" (x));
+#else
+__asm__ ("cpop %0, %1\n" : "=r" (ret) : "r" (x));
+#endif
+return ret;
+}
+#endif /* __riscv_xlen >= 64 */
+#endif /* __riscv_zbb */
+
+#endif /* AVUTIL_RISCV_INTMATH_H */
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 07/18] lavu/riscv: initial common header for assembler macros

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/asm.S | 74 +++
 1 file changed, 74 insertions(+)
 create mode 100644 libavutil/riscv/asm.S

diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
new file mode 100644
index 00..7623c161cf
--- /dev/null
+++ b/libavutil/riscv/asm.S
@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#if defined (__riscv_float_abi_soft)
+#define NOHWF
+#define NOHWD
+#define HWF   #
+#define HWD   #
+#elif defined (__riscv_float_abi_single)
+#define NOHWF #
+#define NOHWD
+#define HWF
+#define HWD   #
+#else
+#define NOHWF #
+#define NOHWD #
+#define HWF
+#define HWD
+#endif
+
+.macro func sym, ext=
+.text
+.align 2
+
+.option push
+.ifnb \ext
+.option arch, +\ext
+.endif
+
+.global \sym
+.hidden \sym
+.type   \sym, %function
+\sym:
+
+.macro endfunc
+.size   \sym, . - \sym
+.option pop
+.previous
+.purgem endfunc
+.endm
+.endm
+
+.macro const sym, align=3, relocate=0
+.if \relocate
+.pushsection .data.rel.ro
+.else
+.pushsection .rodata
+.endif
+.align \align
+\sym:
+
+.macro endconst
+.size  \sym, . - \sym
+.popsection
+.purgem endconst
+.endm
+.endm
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 06/18] configure: probe RISC-V Vector extension

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 Makefile |  2 +-
 configure| 15 +++
 ffbuild/arch.mak |  2 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 61f79e27ae..1fb742f390 100644
--- a/Makefile
+++ b/Makefile
@@ -91,7 +91,7 @@ ffbuild/.config: $(CONFIGURABLE_COMPONENTS)
 SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS   \
HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS\
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
-   ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS\
+   ALTIVEC-OBJS VSX-OBJS RVV-OBJS MMX-OBJS X86ASM-OBJS   \
MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \
MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS SHLIBOBJS   \
STLIBOBJS HOSTOBJS TESTOBJS
diff --git a/configure b/configure
index b7dc1d8656..c5f20cc323 100755
--- a/configure
+++ b/configure
@@ -462,6 +462,7 @@ Optimization options (experts only):
   --disable-mmidisable Loongson MMI optimizations
   --disable-lsxdisable Loongson LSX optimizations
   --disable-lasx   disable Loongson LASX optimizations
+  --disable-rvvdisable RISC-V Vector optimizations
   --disable-fast-unaligned consider unaligned accesses slow
 
 Developer options (useful when working on FFmpeg itself):
@@ -2126,6 +2127,10 @@ ARCH_EXT_LIST_PPC="
 vsx
 "
 
+ARCH_EXT_LIST_RISCV="
+rvv
+"
+
 ARCH_EXT_LIST_X86="
 $ARCH_EXT_LIST_X86_SIMD
 cpunop
@@ -2135,6 +2140,7 @@ ARCH_EXT_LIST_X86="
 ARCH_EXT_LIST="
 $ARCH_EXT_LIST_ARM
 $ARCH_EXT_LIST_PPC
+$ARCH_EXT_LIST_RISCV
 $ARCH_EXT_LIST_X86
 $ARCH_EXT_LIST_MIPS
 $ARCH_EXT_LIST_LOONGSON
@@ -2642,6 +2648,8 @@ ppc4xx_deps="ppc"
 vsx_deps="altivec"
 power8_deps="vsx"
 
+rvv_deps="riscv"
+
 loongson2_deps="mips"
 loongson3_deps="mips"
 mmi_deps_any="loongson2 loongson3"
@@ -6110,6 +6118,10 @@ elif enabled ppc; then
 check_cpp_condition power8 "altivec.h" "defined(_ARCH_PWR8)"
 fi
 
+elif enabled riscv; then
+
+enabled rvv && check_inline_asm rvv '".option arch, +v\nvsetivli zero, 0, 
e8, m1, ta, ma"'
+
 elif enabled x86; then
 
 check_builtin rdtscintrin.h   "__rdtsc()"
@@ -7596,6 +7608,9 @@ if enabled loongarch; then
 echo "LSX enabled   ${lsx-no}"
 echo "LASX enabled  ${lasx-no}"
 fi
+if enabled riscv; then
+echo "RISC-V Vector enabled ${riscv-no}"
+fi
 echo "debug symbols ${debug-no}"
 echo "strip symbols ${stripping-no}"
 echo "optimize for size ${small-no}"
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index 997e31e85e..39d76ee152 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -15,5 +15,7 @@ OBJS-$(HAVE_LASX)  += $(LASX-OBJS)   $(LASX-OBJS-yes)
 OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes)
 
+OBJS-$(HAVE_RVV) += $(RVV-OBJS) $(RVV-OBJS-yes)
+
 OBJS-$(HAVE_MMX) += $(MMX-OBJS) $(MMX-OBJS-yes)
 OBJS-$(HAVE_X86ASM)  += $(X86ASM-OBJS)  $(X86ASM-OBJS-yes)
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 09/18] checkasm: register the RISC-V V subsets

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 tests/checkasm/checkasm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e56fd3850e..a5d0503811 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -226,6 +226,11 @@ static const struct {
 { "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
 { "VSX",  "vsx",  AV_CPU_FLAG_VSX },
 { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
+#elif ARCH_RISCV
+{ "Zve32x",   "zve32x",   AV_CPU_FLAG_ZVE32X },
+{ "Zve32f",   "zve32f",   AV_CPU_FLAG_ZVE32F },
+{ "Zve64x",   "zve64x",   AV_CPU_FLAG_ZVE64X },
+{ "Zve64d",   "zve64d",   AV_CPU_FLAG_ZVE64D },
 #elif ARCH_MIPS
 { "MMI",  "mmi",  AV_CPU_FLAG_MMI },
 { "MSA",  "msa",  AV_CPU_FLAG_MSA },
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 08/18] lavu/riscv: add CPU flags for the RISC-V Vector extension

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

RVV defines a total of 12 different extensions, including:

- 5 different instruction subsets:
  - Zve32x: 8-, 16- and 32-bit integers,
  - Zve32f: Zve32x plus single precision floats,
  - Zve64x: Zve32x plus 64-bit integers,
  - Zve64f: Zve32f plus Zve64x,
  - Zve64d: Zve64f plus double precision floats.

- 6 different vector lengths:
  - Zvl32b (embedded only),
  - Zvl64b (embedded only),
  - Zvl128b,
  - Zvl256b,
  - Zvl512b,
  - Zvl1024b,

- and the V extension proper: equivalent to Zve64f and Zvl128b.

In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (ZVE32X), floats (ZV32F),
64-bit ints (ZV64X) and doubles (ZVE64D).

Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
---
 libavutil/cpu.c  | 15 +++
 libavutil/cpu.h  |  6 +
 libavutil/cpu_internal.h |  1 +
 libavutil/riscv/Makefile |  1 +
 libavutil/riscv/cpu.c| 57 
 5 files changed, 80 insertions(+)
 create mode 100644 libavutil/riscv/Makefile
 create mode 100644 libavutil/riscv/cpu.c

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 0035e927a5..89d2fb6f56 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@ static int get_cpu_flags(void)
 return ff_get_cpu_flags_arm();
 #elif ARCH_PPC
 return ff_get_cpu_flags_ppc();
+#elif ARCH_RISCV
+return ff_get_cpu_flags_riscv();
 #elif ARCH_X86
 return ff_get_cpu_flags_x86();
 #elif ARCH_LOONGARCH
@@ -178,6 +180,19 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
 #elif ARCH_LOONGARCH
 { "lsx",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX 
 },.unit = "flags" },
 { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX
 },.unit = "flags" },
+#elif ARCH_RISCV
+#define AV_CPU_FLAG_ZVE32X_M (AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE32F_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE32F)
+#define AV_CPU_FLAG_ZVE64X_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE64X)
+#define AV_CPU_FLAG_ZVE64F_M (AV_CPU_FLAG_ZVE32F_M | AV_CPU_FLAG_ZVE64X)
+#define AV_CPU_FLAG_ZVE64D_M (AV_CPU_FLAG_ZVE64F_M | AV_CPU_FLAG_ZVE64D)
+#define AV_CPU_FLAG_VECTORS  AV_CPU_FLAG_ZVE64D_M
+{ "vectors",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VECTORS 
 },.unit = "flags" },
+{ "zve32x",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32X  
 },.unit = "flags" },
+{ "zve32f",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_ZVE32F_M },.unit = "flags" },
+{ "zve64x",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_ZVE64X_M },.unit = "flags" },
+{ "zve64f",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_ZVE64F_M },.unit = "flags" },
+{ "zve64d",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
AV_CPU_FLAG_ZVE64D_M },.unit = "flags" },
 #endif
 { NULL },
 };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9711e574c5..44836e50d6 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -78,6 +78,12 @@
 #define AV_CPU_FLAG_LSX  (1 << 0)
 #define AV_CPU_FLAG_LASX (1 << 1)
 
+// RISC-V Vector extension
+#define AV_CPU_FLAG_ZVE32X   (1 << 0) /* 8-, 16-, 32-bit integers */
+#define AV_CPU_FLAG_ZVE32F   (1 << 1) /* single precision scalars */
+#define AV_CPU_FLAG_ZVE64X   (1 << 2) /* 64-bit integers */
+#define AV_CPU_FLAG_ZVE64D   (1 << 3) /* double precision scalars */
+
 /**
  * Return the flags which specify extensions supported by the CPU.
  * The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 650d47fc96..634f28bac4 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -48,6 +48,7 @@ int ff_get_cpu_flags_mips(void);
 int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_riscv(void);
 int ff_get_cpu_flags_x86(void);
 int ff_get_cpu_flags_loongarch(void);
 
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
new file mode 100644
index 00..1f818043dc
--- /dev/null
+++ b/libavutil/riscv/Makefile
@@ -0,0 +1 @@
+OBJS += riscv/cpu.o
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
new file mode 100644
index 00..9e4cce5e8b
--- /dev/null
+++ b/libavutil/riscv/cpu.c
@@ -0,0 +1,57 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; witho

[FFmpeg-devel] [PATCH 11/18] lavu/riscv: float vector-vector multiplication with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  9 -
 libavutil/riscv/float_dsp_rvv.S  | 34 
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index f1d3d52877..903da4eeda 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -23,9 +23,13 @@
 #include "libavutil/cpu.h"
 #include "libavutil/float_dsp.h"
 
+void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
+ int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 int len);
 
+void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
+ int len);
 void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
 int len);
 
@@ -35,10 +39,13 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext 
*fdsp)
 int flags = av_get_cpu_flags();
 
 if (flags & AV_CPU_FLAG_ZVE32F) {
+fdsp->vector_fmul = ff_vector_fmul_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 
-if (flags & AV_CPU_FLAG_ZVE64D)
+if (flags & AV_CPU_FLAG_ZVE64D) {
+fdsp->vector_dmul = ff_vector_dmul_rvv;
 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+}
 }
 #endif
 }
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 365e00190c..65c3a77b01 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -19,6 +19,23 @@
 #include "config.h"
 #include "asm.S"
 
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_fmul_rvv, zve32f
+1:  vsetvli  t0, a3, e32, m8, ta, ma
+slli t1, t0, 2
+vle32.v  v16, (a1)
+add  a1, a1, t1
+vle32.v  v24, (a2)
+add  a2, a2, t1
+vfmul.vv v16, v16, v24
+sub  a3, a3, t0
+vse32.v  v16, (a0)
+add  a0, a0, t1
+bnez a3, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_fmul_scalar_rvv, zve32f
 NOHWF   fmv.w.x  fa0, a2
@@ -37,6 +54,23 @@ NOHWF   mv   a2, a3
 ret
 endfunc
 
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_dmul_rvv, zve64d
+1:  vsetvli  t0, a3, e64, m8, ta, ma
+slli t1, t0, 3
+vle64.v  v16, (a1)
+add  a1, a1, t1
+vle64.v  v24, (a2)
+add  a2, a2, t1
+vfmul.vv v16, v16, v24
+sub  a3, a3, t0
+vse64.v  v16, (a0)
+add  a0, a0, t1
+bnez a3, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_dmul_scalar_rvv, zve64d
 NOHWD   fmv.d.x  fa0, a2
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 10/18] lavu/riscv: float vector-scalar multiplication with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

This is based on existing code from the VLC git tree with two minor
changes to account for the different function prototypes.
---
 libavutil/float_dsp.c|  2 ++
 libavutil/float_dsp.h|  1 +
 libavutil/riscv/Makefile |  4 ++-
 libavutil/riscv/float_dsp_init.c | 44 +
 libavutil/riscv/float_dsp_rvv.S  | 56 
 5 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 libavutil/riscv/float_dsp_init.c
 create mode 100644 libavutil/riscv/float_dsp_rvv.S

diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 8676c8b0f8..742dd679d2 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int 
bit_exact)
 ff_float_dsp_init_arm(fdsp);
 #elif ARCH_PPC
 ff_float_dsp_init_ppc(fdsp, bit_exact);
+#elif ARCH_RISCV
+ff_float_dsp_init_riscv(fdsp);
 #elif ARCH_X86
 ff_float_dsp_init_x86(fdsp);
 #elif ARCH_MIPS
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 9c664592bd..7cad9fc622 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const 
float *v2, int len);
 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
+void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
 
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 1f818043dc..89a8d0d990 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1 +1,3 @@
-OBJS += riscv/cpu.o
+OBJS += riscv/float_dsp_init.o \
+riscv/cpu.o
+RVV-OBJS += riscv/float_dsp_rvv.o
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
new file mode 100644
index 00..f1d3d52877
--- /dev/null
+++ b/libavutil/riscv/float_dsp_init.c
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/float_dsp.h"
+
+void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
+int len);
+
+void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
+int len);
+
+av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
+{
+#if HAVE_RVV
+int flags = av_get_cpu_flags();
+
+if (flags & AV_CPU_FLAG_ZVE32F) {
+fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+
+if (flags & AV_CPU_FLAG_ZVE64D)
+fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+}
+#endif
+}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
new file mode 100644
index 00..365e00190c
--- /dev/null
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a1) * fa0 [0..a2-1]
+func ff_vector_fmul_scalar_rvv, zve32f
+NOHWF   fmv.w.x  fa0, a2
+NOHWF   mv   a2, a3
+
+1:  vsetvli  t0, a2, e32, m8, ta, ma
+slli t1, t0, 2
+vle32.v  v16, (a1)
+add  a1, a1, t1
+vfmul.vf v16, v16, fa0
+sub  a2, a2, t0
+vse32.v  v16, (a0)
+a

[FFmpeg-devel] [PATCH 12/18] lavu/riscv: float vector multiply-accumulate with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  6 +
 libavutil/riscv/float_dsp_rvv.S  | 38 
 2 files changed, 44 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 903da4eeda..1381eadab6 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -25,11 +25,15 @@
 
 void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
  int len);
+void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
+int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
  int len);
+void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
+int len);
 void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
 int len);
 
@@ -40,10 +44,12 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext 
*fdsp)
 
 if (flags & AV_CPU_FLAG_ZVE32F) {
 fdsp->vector_fmul = ff_vector_fmul_rvv;
+fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 
 if (flags & AV_CPU_FLAG_ZVE64D) {
 fdsp->vector_dmul = ff_vector_dmul_rvv;
+fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
 }
 }
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 65c3a77b01..5a7d92abd6 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -36,6 +36,25 @@ func ff_vector_fmul_rvv, zve32f
 ret
 endfunc
 
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_fmac_scalar_rvv, zve32f
+NOHWF   fmv.w.x   fa0, a2
+NOHWF   mva2, a3
+
+1:  vsetvli   t0, a2, e32, m8, ta, ma
+slli  t1, t0, 2
+vle32.v   v24, (a1)
+add   a1, a1, t1
+vle32.v   v16, (a0)
+vfmacc.vf v16, fa0, v24
+sub   a2, a2, t0
+vse32.v   v16, (a0)
+add   a0, a0, t1
+bnez  a2, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_fmul_scalar_rvv, zve32f
 NOHWF   fmv.w.x  fa0, a2
@@ -71,6 +90,25 @@ func ff_vector_dmul_rvv, zve64d
 ret
 endfunc
 
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_dmac_scalar_rvv, zve64d
+NOHWD   fmv.d.x   fa0, a2
+NOHWD   mva2, a3
+
+1:  vsetvli   t0, a2, e64, m8, ta, ma
+slli  t1, t0, 3
+vle64.v   v24, (a1)
+add   a1, a1, t1
+vle64.v   v16, (a0)
+vfmacc.vf v16, fa0, v24
+sub   a2, a2, t0
+vse64.v   v16, (a0)
+add   a0, a0, t1
+bnez  a2, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_dmul_scalar_rvv, zve64d
 NOHWD   fmv.d.x  fa0, a2
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 18/18] lavu/riscv: fixed vector sum-and-difference with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/fixed_dsp.c|  4 +++-
 libavutil/fixed_dsp.h|  1 +
 libavutil/riscv/Makefile |  4 +++-
 libavutil/riscv/fixed_dsp_init.c | 36 ++
 libavutil/riscv/fixed_dsp_rvv.S  | 38 
 5 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 libavutil/riscv/fixed_dsp_init.c
 create mode 100644 libavutil/riscv/fixed_dsp_rvv.S

diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c
index 154f3bc2d3..bc847949dc 100644
--- a/libavutil/fixed_dsp.c
+++ b/libavutil/fixed_dsp.c
@@ -162,7 +162,9 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact)
 fdsp->butterflies_fixed = butterflies_fixed_c;
 fdsp->scalarproduct_fixed = scalarproduct_fixed_c;
 
-#if ARCH_X86
+#if ARCH_RISCV
+ff_fixed_dsp_init_riscv(fdsp);
+#elif ARCH_X86
 ff_fixed_dsp_init_x86(fdsp);
 #endif
 
diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
index fec806ff2d..1217d3a53b 100644
--- a/libavutil/fixed_dsp.h
+++ b/libavutil/fixed_dsp.h
@@ -161,6 +161,7 @@ typedef struct AVFixedDSPContext {
  */
 AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict);
 
+void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp);
 void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp);
 
 /**
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 89a8d0d990..1597154ba5 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1,3 +1,5 @@
 OBJS += riscv/float_dsp_init.o \
+riscv/fixed_dsp_init.o \
 riscv/cpu.o
-RVV-OBJS += riscv/float_dsp_rvv.o
+RVV-OBJS += riscv/float_dsp_rvv.o \
+riscv/fixed_dsp_rvv.o
diff --git a/libavutil/riscv/fixed_dsp_init.c b/libavutil/riscv/fixed_dsp_init.c
new file mode 100644
index 00..fc143fb419
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_init.c
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/fixed_dsp.h"
+
+void ff_butterflies_fixed_rvv(int *v1, int *v2, int len);
+
+av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
+{
+#if HAVE_RVV
+int flags = av_get_cpu_flags();
+
+if (flags & AV_CPU_FLAG_ZVE32X)
+fdsp->butterflies_fixed = ff_butterflies_fixed_rvv;
+#endif
+}
diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S
new file mode 100644
index 00..beb1b949f7
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_rvv.S
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_fixed_rvv, zve32x
+1:  vsetvli t0, a2, e32, m8, ta, ma
+sllit1, t0, 2
+vle32.v v16, (a0)
+vle32.v v24, (a1)
+vadd.vv v0, v16, v24
+vsub.vv v8, v16, v24
+sub a2, a2, t0
+vse32.v v0, (a0)
+add a0, a0, t1
+vse32.v v8, (a1)
+add a1, a1, t1
+bneza2, 1b
+
+ret
+endfunc
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 17/18] lavu/riscv: float vector dot product with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  2 ++
 libavutil/riscv/float_dsp_rvv.S  | 21 +
 2 files changed, 23 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index cf8c995d7c..055cdc7520 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -36,6 +36,7 @@ void ff_vector_fmul_add_rvv(float *dst, const float *src0, 
const float *src1,
 void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
  const float *src1, int len);
 void ff_butterflies_float_rvv(float *v1, float *v2, int len);
+float ff_scalarproduct_float_rvv(const float *v1, const float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
  int len);
@@ -57,6 +58,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
 fdsp->butterflies_float = ff_butterflies_float_rvv;
+fdsp->scalarproduct_float = ff_scalarproduct_float_rvv;
 
 if (flags & AV_CPU_FLAG_ZVE64D) {
 fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 65daaa2d27..81bd0e510a 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -167,6 +167,27 @@ func ff_butterflies_float_rvv, zve32f
 ret
 endfunc
 
+// a0 = (a0).(a1) [0..a2-1]
+func ff_scalarproduct_float_rvv, zve32f
+vsetvli  zero, zero, e32, m8, ta, ma
+vmv.s.x  v8, zero
+
+1:  vsetvli  t0, a2, e32, m8, ta, ma
+slli t1, t0, 2
+vle32.v  v16, (a0)
+add  a0, a0, t1
+vle32.v  v24, (a1)
+add  a1, a1, t1
+vfmul.vv v16, v16, v24
+sub  a2, a2, t0
+vfredusum.vs v8, v16, v8
+bnez a2, 1b
+
+vfmv.f.s fa0, v8
+NOHWF   fmv.x.w  a0, fa0
+ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:  vsetvli  t0, a3, e64, m8, ta, ma
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 13/18] lavu/riscv: float vector multiplication-addition with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 19 +++
 2 files changed, 22 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 1381eadab6..9bc1976d04 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -29,6 +29,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, 
float mul,
 int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 int len);
+void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
+ const float *src2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
  int len);
@@ -46,6 +48,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 fdsp->vector_fmul = ff_vector_fmul_rvv;
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
 
 if (flags & AV_CPU_FLAG_ZVE64D) {
 fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 5a7d92abd6..efbf12179f 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -73,6 +73,25 @@ NOHWF   mv   a2, a3
 ret
 endfunc
 
+// (a0) = (a1) * (a2) + (a3) [0..a4-1]
+func ff_vector_fmul_add_rvv, zve32f
+1:  vsetvli   t0, a4, e32, m8, ta, ma
+slli  t1, t0, 2
+vle32.v   v8, (a1)
+add   a1, a1, t1
+vle32.v   v16, (a2)
+add   a2, a2, t1
+vle32.v   v24, (a3)
+add   a3, a3, t1
+vfmadd.vv v8, v16, v24
+sub   a4, a4, t0
+vse32.v   v8, (a0)
+add   a0, a0, t1
+bnez  a4, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:  vsetvli  t0, a3, e64, m8, ta, ma
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 16/18] lavu/riscv: float vector windowed overlap/add with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 35 
 2 files changed, 38 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index ae089d2fdb..cf8c995d7c 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -29,6 +29,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, 
float mul,
 int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 int len);
+void ff_vector_fmul_window_rvv(float *dst, const float *src0,
+const float *src1, const float *win, int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
  const float *src2, int len);
 void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
@@ -51,6 +53,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 fdsp->vector_fmul = ff_vector_fmul_rvv;
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+fdsp->vector_fmul_window = ff_vector_fmul_window_rvv;
 fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
 fdsp->butterflies_float = ff_butterflies_float_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index b376392294..65daaa2d27 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -73,6 +73,41 @@ NOHWF   mv   a2, a3
 ret
 endfunc
 
+func ff_vector_fmul_window_rvv, zve32f
+// a0: dst, a1: src0, a2: src1, a3: window, a4: length
+addi   t0, a4, -1
+addt1, t0, a4
+slli   t0, t0, 2
+slli   t1, t1, 2
+adda2, a2, t0
+addt0, a0, t1
+addt3, a3, t1
+li t1, -4 // byte stride
+
+1:  vsetvlit2, a4, e32, m4, ta, ma
+slli   t4, t2, 2
+vle32.vv16, (a1)
+adda1, a1, t4
+vlse32.v   v20, (a2), t1
+suba2, a2, t4
+vle32.vv24, (a3)
+adda3, a3, t4
+vlse32.v   v28, (t3), t1
+subt3, t3, t4
+vfmul.vv   v0, v16, v28
+suba4, a4, t2
+vfmul.vv   v8, v16, v24
+vfnmsac.vv v0, v20, v24
+vfmacc.vv  v8, v20, v28
+vse32.vv0, (a0)
+adda0, a0, t4
+vsse32.v   v8, (t0), t1
+subt0, t0, t4
+bnez   a4, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * (a2) + (a3) [0..a4-1]
 func ff_vector_fmul_add_rvv, zve32f
 1:  vsetvli   t0, a4, e32, m8, ta, ma
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 14/18] lavu/riscv: float vector sum-and-difference with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  2 ++
 libavutil/riscv/float_dsp_rvv.S  | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 9bc1976d04..c2b72c3b25 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -31,6 +31,7 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, 
float mul,
 int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
  const float *src2, int len);
+void ff_butterflies_float_rvv(float *v1, float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
  int len);
@@ -49,6 +50,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+fdsp->butterflies_float = ff_butterflies_float_rvv;
 
 if (flags & AV_CPU_FLAG_ZVE64D) {
 fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index efbf12179f..1c3b08b94f 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -92,6 +92,24 @@ func ff_vector_fmul_add_rvv, zve32f
 ret
 endfunc
 
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_float_rvv, zve32f
+1:  vsetvli  t0, a2, e32, m8, ta, ma
+slli t1, t0, 2
+vle32.v  v16, (a0)
+vle32.v  v24, (a1)
+vfadd.vv v0, v16, v24
+vfsub.vv v8, v16, v24
+sub  a2, a2, t0
+vse32.v  v0, (a0)
+add  a0, a0, t1
+vse32.v  v8, (a1)
+add  a1, a1, t1
+bnez a2, 1b
+
+ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:  vsetvli  t0, a3, e64, m8, ta, ma
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 15/18] lavu/riscv: float reversed vector multiplication with RVV

2022-09-12 Thread remi
From: Rémi Denis-Courmont 

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 22 ++
 2 files changed, 25 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index c2b72c3b25..ae089d2fdb 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -31,6 +31,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, 
float mul,
 int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
  const float *src2, int len);
+void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
+ const float *src1, int len);
 void ff_butterflies_float_rvv(float *v1, float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
@@ -50,6 +52,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
 fdsp->butterflies_float = ff_butterflies_float_rvv;
 
 if (flags & AV_CPU_FLAG_ZVE64D) {
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 1c3b08b94f..b376392294 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -92,6 +92,28 @@ func ff_vector_fmul_add_rvv, zve32f
 ret
 endfunc
 
+// (a0) = (a1) * reverse(a2) [0..a3-1]
+func ff_vector_fmul_reverse_rvv, zve32f
+add  t3, a3, -1
+li   t2, -4 // byte stride
+slli t3, t3, 2
+add  a2, a2, t3
+
+1:  vsetvli  t0, a3, e32, m8, ta, ma
+slli t1, t0, 2
+vle32.v  v16, (a1)
+add  a1, a1, t1
+vlse32.v v24, (a2), t2
+sub  a2, a2, t1
+vfmul.vv v16, v16, v24
+sub  a3, a3, t0
+vse32.v  v16, (a0)
+add  a0, a0, t1
+bnez a3, 1b
+
+ret
+endfunc
+
 // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
 func ff_butterflies_float_rvv, zve32f
 1:  vsetvli  t0, a2, e32, m8, ta, ma
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCHv5 0/18] RISC-V cycle counters, Zbb & initial V extension support

2022-09-12 Thread Rémi Denis-Courmont
The following changes since commit 3ce6fa6b6d099dcad43bb0178334441ab72df4cc:

  avformat: add bonk demuxer (2022-09-12 11:35:43 +0200)

are available in the Git repository at:

  git.remlab.net:git/ffmpeg.git rvv

for you to fetch changes up to ceb5ead12aba107292a09a342570f6fd12c9951d:

  lavu/riscv: fixed vector sum-and-difference with RVV (2022-09-12 17:57:07 
+0300)

Change since v4:
- Marked RVV makefile variablers as directory-specific to pave the way
  for optimisations elsewhere than libavutil.


Rémi Denis-Courmont (18):
  doc: reference the RISC-V specification
  lavu/riscv: AV_READ_TIME cycle counter
  configure/riscv: detect fast CLZ
  lavu/riscv: byte-swap operations
  lavu/riscv: add  optimisations
  configure: probe RISC-V Vector extension
  lavu/riscv: initial common header for assembler macros
  lavu/riscv: add CPU flags for the RISC-V Vector extension
  checkasm: register the RISC-V V subsets
  lavu/riscv: float vector-scalar multiplication with RVV
  lavu/riscv: float vector-vector multiplication with RVV
  lavu/riscv: float vector multiply-accumulate with RVV
  lavu/riscv: float vector multiplication-addition with RVV
  lavu/riscv: float vector sum-and-difference with RVV
  lavu/riscv: float reversed vector multiplication with RVV
  lavu/riscv: float vector windowed overlap/add with RVV
  lavu/riscv: float vector dot product with RVV
  lavu/riscv: fixed vector sum-and-difference with RVV

 Makefile |   2 +-
 configure|  21 
 doc/optimization.txt |   5 +
 ffbuild/arch.mak |   2 +
 libavutil/bswap.h|   2 +
 libavutil/cpu.c  |  15 +++
 libavutil/cpu.h  |   6 +
 libavutil/cpu_internal.h |   1 +
 libavutil/fixed_dsp.c|   4 +-
 libavutil/fixed_dsp.h|   1 +
 libavutil/float_dsp.c|   2 +
 libavutil/float_dsp.h|   1 +
 libavutil/intmath.h  |   5 +-
 libavutil/riscv/Makefile |   5 +
 libavutil/riscv/asm.S|  74 
 libavutil/riscv/bswap.h  |  74 
 libavutil/riscv/cpu.c|  57 +
 libavutil/riscv/fixed_dsp_init.c |  36 ++
 libavutil/riscv/fixed_dsp_rvv.S  |  38 ++
 libavutil/riscv/float_dsp_init.c |  70 +++
 libavutil/riscv/float_dsp_rvv.S  | 243 +++
 libavutil/riscv/intmath.h| 103 +
 libavutil/riscv/timer.h  |  53 +
 libavutil/timer.h|   2 +
 tests/checkasm/checkasm.c|   5 +
 25 files changed, 823 insertions(+), 4 deletions(-)
 create mode 100644 libavutil/riscv/Makefile
 create mode 100644 libavutil/riscv/asm.S
 create mode 100644 libavutil/riscv/bswap.h
 create mode 100644 libavutil/riscv/cpu.c
 create mode 100644 libavutil/riscv/fixed_dsp_init.c
 create mode 100644 libavutil/riscv/fixed_dsp_rvv.S
 create mode 100644 libavutil/riscv/float_dsp_init.c
 create mode 100644 libavutil/riscv/float_dsp_rvv.S
 create mode 100644 libavutil/riscv/intmath.h
 create mode 100644 libavutil/riscv/timer.h

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 1/3] avfilter/avfilter: Don't use AVFrame.channel_layout

2022-09-12 Thread Andreas Rheinhardt
Signed-off-by: Andreas Rheinhardt 
---
 libavfilter/avfilter.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 965f5d0f63..bde41637dd 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/buffer.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/common.h"
@@ -45,6 +46,7 @@
 
 static void tlog_ref(void *ctx, AVFrame *ref, int end)
 {
+#ifdef TRACE
 ff_tlog(ctx,
 "ref[%p buf:%p data:%p linesize[%d, %d, %d, %d] pts:%"PRId64" 
pos:%"PRId64,
 ref, ref->buf, ref->data[0],
@@ -61,13 +63,19 @@ static void tlog_ref(void *ctx, AVFrame *ref, int end)
 av_get_picture_type_char(ref->pict_type));
 }
 if (ref->nb_samples) {
-ff_tlog(ctx, " cl:%"PRId64"d n:%d r:%d",
-ref->channel_layout,
+AVBPrint bprint;
+
+av_bprint_init(&bprint, 1, AV_BPRINT_SIZE_UNLIMITED);
+av_channel_layout_describe_bprint(&ref->ch_layout, &bprint);
+ff_tlog(ctx, " cl:%s n:%d r:%d",
+bprint.str,
 ref->nb_samples,
 ref->sample_rate);
+av_bprint_finalize(&bprint, NULL);
 }
 
 ff_tlog(ctx, "]%s", end ? "\n" : "");
+#endif
 }
 
 void ff_command_queue_pop(AVFilterContext *filter)
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/3] avfilter/avfilter: #if ff_tlog_link() away when empty

2022-09-12 Thread Andreas Rheinhardt
It is currently calling av_channel_layout_describe()
unnecessarily.

Signed-off-by: Andreas Rheinhardt 
---
 libavfilter/avfilter.c | 2 ++
 libavfilter/internal.h | 4 
 2 files changed, 6 insertions(+)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index bde41637dd..f34204e650 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -381,6 +381,7 @@ int avfilter_config_links(AVFilterContext *filter)
 return 0;
 }
 
+#ifdef TRACE
 void ff_tlog_link(void *ctx, AVFilterLink *link, int end)
 {
 if (link->type == AVMEDIA_TYPE_VIDEO) {
@@ -404,6 +405,7 @@ void ff_tlog_link(void *ctx, AVFilterLink *link, int end)
 end ? "\n" : "");
 }
 }
+#endif
 
 int ff_request_frame(AVFilterLink *link)
 {
diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index 0f8da367d0..0128820be0 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h
@@ -268,7 +268,11 @@ void ff_command_queue_pop(AVFilterContext *filter);
 
 char *ff_get_ref_perms_string(char *buf, size_t buf_size, int perms);
 
+#ifdef TRACE
 void ff_tlog_link(void *ctx, AVFilterLink *link, int end);
+#else
+#define ff_tlog_link(ctx, link, end) do { } while(0)
+#endif
 
 /**
  * Append a new input/output pad to the filter's list of such pads.
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 3/3] avfilter/video: Fix newline in trace output

2022-09-12 Thread Andreas Rheinhardt
Forgotten in 7e350379f87e7f74420b4813170fe808e2313911.

Signed-off-by: Andreas Rheinhardt 
---
Does anyone actually use this? The fact that this went unnoticed
for so long suggest "no".

 libavfilter/video.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/video.c b/libavfilter/video.c
index e9eb110ff4..7683ef6fd4 100644
--- a/libavfilter/video.c
+++ b/libavfilter/video.c
@@ -102,7 +102,7 @@ AVFrame *ff_get_video_buffer(AVFilterLink *link, int w, int 
h)
 {
 AVFrame *ret = NULL;
 
-FF_TPRINTF_START(NULL, get_video_buffer); ff_tlog_link(NULL, link, 0);
+FF_TPRINTF_START(NULL, get_video_buffer); ff_tlog_link(NULL, link, 1);
 
 if (link->dstpad->get_buffer.video)
 ret = link->dstpad->get_buffer.video(link, w, h);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/tiff: Fix loop detection

2022-09-12 Thread Michael Niedermayer
Fixes regression with tickets/4364/L1004220.DNG

Signed-off-by: Michael Niedermayer 
---
 libavcodec/tiff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index beb427e0074..226050744fc 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -1747,7 +1747,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 int *got_frame, AVPacket *avpkt)
 {
 TiffContext *const s = avctx->priv_data;
-unsigned off, last_off;
+unsigned off, last_off = 0;
 int le, ret, plane, planes;
 int i, j, entries, stride;
 unsigned soff, ssize;
@@ -1812,7 +1812,6 @@ again:
 /** whether we should process this multi-page IFD's next page */
 retry_for_page = s->get_page && s->cur_page + 1 < s->get_page;  // 
get_page is 1-indexed
 
-last_off = off;
 if (retry_for_page) {
 // set offset to the next IFD
 off = ff_tget_long(&s->gb, le);
@@ -1830,6 +1829,7 @@ again:
 avpriv_request_sample(s->avctx, "non increasing IFD offset");
 return AVERROR_INVALIDDATA;
 }
+last_off = off;
 if (off >= UINT_MAX - 14 || avpkt->size < off + 14) {
 av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image 
size\n");
 return AVERROR_INVALIDDATA;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

2022-09-12 Thread Paul B Mahol
Patch attached.

How to get more speed?
From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Mon, 12 Sep 2022 18:53:31 +0200
Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

Signed-off-by: Paul B Mahol 
---
 libavcodec/x86/audiodsp.asm| 24 
 libavcodec/x86/audiodsp_init.c |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index b604b0443c..55051f6aa7 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
 movd   eax, m2
 RET
 
+INIT_YMM avx2
+cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset
+xor offsetq, offsetq
+add orderd, orderd
+pxorm1, m1
+cmp orderd, 32
+jl   .l16
+.loop:
+movum0, [v1q + offsetq]
+pmaddwd m0, [v2q + offsetq]
+paddd   m1, m0
+add offsetq, mmsize
+cmp offsetq, orderq
+jl .loop
+HADDD   m1, m0
+movd   eax, xm1
+RET
+.l16:
+movuxm0, [v1q + offsetq]
+pmaddwd xm0, [v2q + offsetq]
+paddd   xm1, xm0
+HADDD  xm1, xm0
+movd   eax, xm1
+RET
 
 ;-
 ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index aa5e43e570..77d5948442 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -24,6 +24,9 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/audiodsp.h"
 
+int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2,
+int order);
+
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
 int order);
 
@@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
 
 if (EXTERNAL_SSE4(cpu_flags))
 c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+
+if (EXTERNAL_AVX2(cpu_flags))
+c->scalarproduct_int16 = ff_scalarproduct_int16_avx2;
 }
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

2022-09-12 Thread James Almer

From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Mon, 12 Sep 2022 18:53:31 +0200
Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

Signed-off-by: Paul B Mahol 
---
 libavcodec/x86/audiodsp.asm| 24 
 libavcodec/x86/audiodsp_init.c |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index b604b0443c..55051f6aa7 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
 movd   eax, m2
 RET
 
+INIT_YMM avx2

+cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset
+xor offsetq, offsetq
+add orderd, orderd
+pxorm1, m1
+cmp orderd, 32


This parameter needs to be multiple of 16. What will happen below if 
it's for example 48? Are both buffers padded enough to handle 16 bytes 
of overread?



+jl   .l16
+.loop:
+movum0, [v1q + offsetq]
+pmaddwd m0, [v2q + offsetq]
+paddd   m1, m0
+add offsetq, mmsize
+cmp offsetq, orderq


You should use the neg trick from the sse2 version so you can remove the 
cmp from this loop.



+jl .loop
+HADDD   m1, m0
+movd   eax, xm1
+RET
+.l16:
+movuxm0, [v1q + offsetq]
+pmaddwd xm0, [v2q + offsetq]
+paddd   xm1, xm0
+HADDD  xm1, xm0
+movd   eax, xm1
+RET
 
 ;-

 ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index aa5e43e570..77d5948442 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -24,6 +24,9 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/audiodsp.h"
 
+int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2,

+int order);
+
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
 int order);
 
@@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
 
 if (EXTERNAL_SSE4(cpu_flags))

 c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+
+if (EXTERNAL_AVX2(cpu_flags))
+c->scalarproduct_int16 = ff_scalarproduct_int16_avx2;
 }
--
2.37.2


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

2022-09-12 Thread James Almer



On 9/12/2022 3:39 PM, James Almer wrote:

From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Mon, 12 Sep 2022 18:53:31 +0200
Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2

Signed-off-by: Paul B Mahol 
---
 libavcodec/x86/audiodsp.asm    | 24 
 libavcodec/x86/audiodsp_init.c |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index b604b0443c..55051f6aa7 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
 movd   eax, m2
 RET

+INIT_YMM avx2
+cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset
+    xor offsetq, offsetq
+    add orderd, orderd
+    pxor    m1, m1
+    cmp orderd, 32


This parameter needs to be multiple of 16. What will happen below if 
it's for example 48? Are both buffers padded enough to handle 16 bytes 
of overread?


Nevermind, it's int16_t* buffers.

You can simplify this as:

INIT_YMM avx2
cglobal scalarproduct_int16, 3,3,3, v1, v2, order
add orderd, orderd
add v1q, orderq
add v2q, orderq
neg orderq
pxorm1, m1
.loop:
movum0, [v1q + orderq]
pmaddwd m0, [v2q + orderq]
paddd   m1, m0
add orderq, mmsize
jl .loop
HADDD   m1, m0
movd   eax, xm1
RET




+    jl   .l16
+.loop:
+    movu    m0, [v1q + offsetq]
+    pmaddwd m0, [v2q + offsetq]
+    paddd   m1, m0
+    add offsetq, mmsize
+    cmp offsetq, orderq


You should use the neg trick from the sse2 version so you can remove the 
cmp from this loop.



+    jl .loop
+    HADDD   m1, m0
+    movd   eax, xm1
+    RET
+.l16:
+    movu    xm0, [v1q + offsetq]
+    pmaddwd xm0, [v2q + offsetq]
+    paddd   xm1, xm0
+    HADDD  xm1, xm0
+    movd   eax, xm1
+    RET

 ;- 

 ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t 
min,
diff --git a/libavcodec/x86/audiodsp_init.c 
b/libavcodec/x86/audiodsp_init.c

index aa5e43e570..77d5948442 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -24,6 +24,9 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/audiodsp.h"

+int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t 
*v2,

+    int order);
+
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t 
*v2,

 int order);

@@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)

 if (EXTERNAL_SSE4(cpu_flags))
 c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+
+    if (EXTERNAL_AVX2(cpu_flags))
+    c->scalarproduct_int16 = ff_scalarproduct_int16_avx2;
 }
--
2.37.2


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer

2022-09-12 Thread Paul B Mahol
On 9/12/22, Paul B Mahol  wrote:
> Patch attached.
>

Updated patch attached.
From 33efa252db96d9eac7f162f17b22c1cd8b3b1c14 Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Sun, 11 Sep 2022 20:10:27 +0200
Subject: [PATCH] avformat: add LAF demuxer

Signed-off-by: Paul B Mahol 
---
 libavformat/Makefile |   1 +
 libavformat/allformats.c |   1 +
 libavformat/lafdec.c | 271 +++
 3 files changed, 273 insertions(+)
 create mode 100644 libavformat/lafdec.c

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 5cdcda3239..19a4ba2a8f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -319,6 +319,7 @@ OBJS-$(CONFIG_JV_DEMUXER)+= jvdec.o
 OBJS-$(CONFIG_KUX_DEMUXER)   += flvdec.o
 OBJS-$(CONFIG_KVAG_DEMUXER)  += kvag.o
 OBJS-$(CONFIG_KVAG_MUXER)+= kvag.o rawenc.o
+OBJS-$(CONFIG_LAF_DEMUXER)   += lafdec.o
 OBJS-$(CONFIG_LATM_MUXER)+= latmenc.o rawenc.o
 OBJS-$(CONFIG_LMLM4_DEMUXER) += lmlm4.o
 OBJS-$(CONFIG_LOAS_DEMUXER)  += loasdec.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index cebd5e0c67..a545b5ff45 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -236,6 +236,7 @@ extern const AVInputFormat  ff_jv_demuxer;
 extern const AVInputFormat  ff_kux_demuxer;
 extern const AVInputFormat  ff_kvag_demuxer;
 extern const AVOutputFormat ff_kvag_muxer;
+extern const AVInputFormat  ff_laf_demuxer;
 extern const AVOutputFormat ff_latm_muxer;
 extern const AVInputFormat  ff_lmlm4_demuxer;
 extern const AVInputFormat  ff_loas_demuxer;
diff --git a/libavformat/lafdec.c b/libavformat/lafdec.c
new file mode 100644
index 00..12b0d8540b
--- /dev/null
+++ b/libavformat/lafdec.c
@@ -0,0 +1,271 @@
+/*
+ * Limitless Audio Format demuxer
+ * Copyright (c) 2022 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+
+#define MAX_STREAMS 4096
+
+typedef struct StreamParams {
+AVChannelLayout layout;
+float horizontal;
+float vertical;
+int lfe;
+int stored;
+} StreamParams;
+
+typedef struct LAFContext {
+uint8_t *data;
+unsigned nb_stored;
+unsigned stored_index;
+unsigned index;
+unsigned bpp;
+
+StreamParams p[MAX_STREAMS];
+
+int header_len;
+uint8_t header[(MAX_STREAMS + 7) / 8];
+} LAFContext;
+
+static int laf_probe(const AVProbeData *p)
+{
+if (memcmp(p->buf, "LIMITLESS", 9))
+return 0;
+if (memcmp(p->buf + 9, "HEAD", 4))
+return 0;
+return AVPROBE_SCORE_MAX;
+}
+
+static int laf_read_header(AVFormatContext *ctx)
+{
+LAFContext *s = ctx->priv_data;
+AVIOContext *pb = ctx->pb;
+unsigned st_count, mode;
+unsigned sample_rate;
+int64_t duration;
+int codec_id;
+int quality;
+int bpp;
+
+avio_skip(pb, 9);
+if (avio_rb32(pb) != MKBETAG('H','E','A','D'))
+return AVERROR_INVALIDDATA;
+
+quality = avio_r8(pb);
+if (quality > 3)
+return AVERROR_INVALIDDATA;
+mode = avio_r8(pb);
+if (mode > 1)
+return AVERROR_INVALIDDATA;
+st_count = avio_rl32(pb);
+if (st_count == 0 || st_count > MAX_STREAMS)
+return AVERROR_INVALIDDATA;
+
+for (int i = 0; i < st_count; i++) {
+StreamParams *stp = &s->p[i];
+
+stp->vertical = av_int2float(avio_rl32(pb));
+stp->horizontal = av_int2float(avio_rl32(pb));
+stp->lfe = avio_r8(pb);
+if (stp->lfe) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_LOW_FREQUENCY));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == 0.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_CENTER));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == -30.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_LEFT));
+} else if (stp->vertical == 0.f &&
+   stp->horizontal == 30.f) {
+stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_RIGHT));
+} else if (stp->ver

Re: [FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()

2022-09-12 Thread Paul B Mahol
On 9/12/22, Andreas Rheinhardt  wrote:
> Also fixes a "statement with no effect [-Wunused-value]"
> warning from GCC.
>
> Signed-off-by: Andreas Rheinhardt 
> ---
>  libavcodec/bonk.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c
> index f3d797d588..409694f710 100644
> --- a/libavcodec/bonk.c
> +++ b/libavcodec/bonk.c
> @@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int
> order, int error)
>  }
>
>  // don't drift too far, to avoid overflows
> -av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
> +x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16);
>
>  state[0] = x;
>

LGTM

> --
> 2.34.1
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] FFmpeg 5.1.2

2022-09-12 Thread Michael Niedermayer
Hi all

due to more bugfixes i intend to make 5.1.2 soon (within the next days)
I do plan to make releases from older still maintained/used branches
following 5.1.2

thx

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Breaking DRM is a little like attempting to break through a door even
though the window is wide open and the only thing in the house is a bunch
of things you dont want and which you would get tomorrow for free anyway


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] swsresample/swresample: abort on invalid layouts

2022-09-12 Thread James Almer

On 9/8/2022 8:00 PM, James Almer wrote:

On 9/8/2022 7:47 PM, Andreas Rheinhardt wrote:

James Almer:
If it's unsupported or invalid, then there's no point trying to 
rebuild it
using a value that may have been derived from the same layout to 
begin with.


Move the checks before the attempts at copying the layout while at it.

Fixes ticket #9908.

Signed-off-by: James Almer 
---
  libswresample/swresample.c | 48 +-
  1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 6f04d130d3..5884f8d533 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -227,7 +227,7 @@ av_cold int swr_init(struct SwrContext *s){
  s->in_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
  s->in_ch_layout.nb_channels = s->user_in_ch_count;
  }
-    } else
+    } else if (av_channel_layout_check(&s->user_in_chlayout))
  av_channel_layout_copy(&s->in_ch_layout, 
&s->user_in_chlayout);
  if ((s->user_out_ch_count && s->user_out_ch_count != 
s->user_out_chlayout.nb_channels) ||

@@ -240,17 +240,45 @@ av_cold int swr_init(struct SwrContext *s){
  s->out_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
  s->out_ch_layout.nb_channels = s->user_out_ch_count;
  }
-    } else
+    } else if (av_channel_layout_check(&s->user_out_chlayout))
  av_channel_layout_copy(&s->out_ch_layout, 
&s->user_out_chlayout);

  if (!s->out.ch_count && !s->user_out_ch_layout)
  s->out.ch_count  = s->out_ch_layout.nb_channels;
  if (!s-> in.ch_count && !s-> user_in_ch_layout)
  s-> in.ch_count  = s->in_ch_layout.nb_channels;
+
+    if (!(ret = av_channel_layout_check(&s->in_ch_layout)) || 
s->in_ch_layout.nb_channels > SWR_CH_MAX) {

+    if (ret)
+    av_channel_layout_describe(&s->in_ch_layout, l1, 
sizeof(l1));
+    av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is 
invalid or unsupported.\n", ret ? l1 : "");

+    return AVERROR(EINVAL);
+    }
+
+    if (!(ret = av_channel_layout_check(&s->out_ch_layout)) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {

+    if (ret)
+    av_channel_layout_describe(&s->out_ch_layout, l2, 
sizeof(l2));
+    av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is 
invalid or unsupported.\n", ret ? l2 : "");

+    return AVERROR(EINVAL);
+    }
  #else
  s->out.ch_count  = s-> user_out_chlayout.nb_channels;
  s-> in.ch_count  = s->  user_in_chlayout.nb_channels;
+    if (!(ret = av_channel_layout_check(&s->user_in_chlayout)) || 
s->user_in_chlayout.nb_channels > SWR_CH_MAX) {

+    if (ret)
+    av_channel_layout_describe(&s->user_in_chlayout, l1, 
sizeof(l1));
+    av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is 
invalid or unsupported.\n", ret ? l1 : "");

+    return AVERROR(EINVAL);
+    }
+
+    if (!(ret = av_channel_layout_check(&s->user_out_chlayout)) || 
s->user_out_chlayout.nb_channels > SWR_CH_MAX) {

+    if (ret)
+    av_channel_layout_describe(&s->user_out_chlayout, l2, 
sizeof(l2));
+    av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is 
invalid or unsupported.\n", ret ? l2 : "");


Why are you using  AV_LOG_WARNING when you are erroring out?


+    return AVERROR(EINVAL);
+    }
+
  ret  = av_channel_layout_copy(&s->in_ch_layout, 
&s->user_in_chlayout);
  ret |= av_channel_layout_copy(&s->out_ch_layout, 
&s->user_out_chlayout);

  if (ret < 0)
@@ -261,18 +289,6 @@ av_cold int swr_init(struct SwrContext *s){
  s->dither.method = s->user_dither_method;
-    if (!av_channel_layout_check(&s->in_ch_layout) || 
s->in_ch_layout.nb_channels > SWR_CH_MAX) {

-    av_channel_layout_describe(&s->in_ch_layout, l1, sizeof(l1));
-    av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is 
invalid or unsupported.\n", l1);

-    av_channel_layout_uninit(&s->in_ch_layout);
-    }
-
-    if (!av_channel_layout_check(&s->out_ch_layout) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {

-    av_channel_layout_describe(&s->out_ch_layout, l2, sizeof(l2));
-    av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is 
invalid or unsupported.\n", l2);

-    av_channel_layout_uninit(&s->out_ch_layout);
-    }
-
  switch(s->engine){
  #if CONFIG_LIBSOXR
  case SWR_ENGINE_SOXR: s->resampler = &swri_soxr_resampler; 
break;

@@ -291,9 +307,9 @@ av_cold int swr_init(struct SwrContext *s){
  av_channel_layout_uninit(&s->in_ch_layout);
  }
-    if (!s->in_ch_layout.nb_channels || s->in_ch_layout.order == 
AV_CHANNEL_ORDER_UNSPEC)

+    if (s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
  av_channel_layout_default(&s->in_ch_layout, s->used_ch_count);
-    if (!s->out_ch_layout.nb_channels || s->out_ch_layout.order == 
AV_CHANNEL_ORDER_UNSPEC)

+    if (s->out_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
  av

Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer

2022-09-12 Thread Andreas Rheinhardt
Paul B Mahol:
> +case 3:
> +for (int n = 0; n < st->codecpar->sample_rate; n++)
> +AV_WL24(pkt->data + n * 3, AV_RL24(s->data + n * s->nb_stored * 
> 3 + s->stored_index * 3));
> +break;

Looking at intreadwrite.h shows that we actually have AV_RN24 and AV_WN24.

- Andreas
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer

2022-09-12 Thread Paul B Mahol
On 9/13/22, Andreas Rheinhardt  wrote:
> Paul B Mahol:
>> +case 3:
>> +for (int n = 0; n < st->codecpar->sample_rate; n++)
>> +AV_WL24(pkt->data + n * 3, AV_RL24(s->data + n * s->nb_stored
>> * 3 + s->stored_index * 3));
>> +break;
>
> Looking at intreadwrite.h shows that we actually have AV_RN24 and AV_WN24.

Didnt compile.

>
> - Andreas
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] fate/spdif: Add spdif tests

2022-09-12 Thread Andreas Rheinhardt
Andreas Rheinhardt:
> These tests test both the demuxer as well as the muxer
> wherever possible. It is not always possible due to the fact
> that the muxer supports more codecs than the demuxer.
> 
> The spdif demuxer does currently not set the need_parsing flag.
> If one were to set this to AVSTREAM_PARSE_FULL, the test results
> would change as follows:
> - For spdif-aac-remux, the packets are currently padded to 16bits,
> i.e. if the actual packet size is odd, there is a padding byte.
> The parser splits this byte away into a one byte packet of its own.
> Insanely, these one byte packets get the same duration as normal
> packets, i.e. timing is ruined.
> - The DCA-remux tests get proper duration/timestamps.
> - In the spdif-mp2-remux test the demuxer marks the stream as
> being MP2; the parser sets it to MP3 and this triggers
> the "Codec change in IEC 61937" codepath; this test therefore
> returns only two packets with the parser.
> - For spdif-mp3-remux some bytes end up in different packets:
> Some input packets of this file have an odd length (417B instead
> of 418B like all the other packets) and are padded to 418B.
> Without a parser, all returned packets from the spdif-demuxer
> are 418B. With a parser, the packets that were originally 417B
> are 417B again, but the padding byte has not been discarded,
> but added to the next packet which is now 419B.
> This fixes "Multiple frames in a packet" warning and avoids
> an "Invalid data found when processing input" error when decoding.
> 
> Signed-off-by: Andreas Rheinhardt 
> ---
>  tests/Makefile |1 +
>  tests/fate/spdif.mak   |   44 +
>  tests/ref/fate/spdif-aac-remux |   93 ++
>  tests/ref/fate/spdif-ac3-remux |   63 ++
>  tests/ref/fate/spdif-dca-core-bswap|1 +
>  tests/ref/fate/spdif-dca-core-remux|   14 +
>  tests/ref/fate/spdif-dca-master|1 +
>  tests/ref/fate/spdif-dca-master-core   |1 +
>  tests/ref/fate/spdif-dca-master-core-remux | 1179 
>  tests/ref/fate/spdif-eac3  |1 +
>  tests/ref/fate/spdif-mlp   |1 +
>  tests/ref/fate/spdif-mp2-remux |   49 +
>  tests/ref/fate/spdif-mp3-remux |   47 +
>  tests/ref/fate/spdif-truehd|1 +
>  14 files changed, 1496 insertions(+)
>  create mode 100644 tests/fate/spdif.mak
>  create mode 100644 tests/ref/fate/spdif-aac-remux
>  create mode 100644 tests/ref/fate/spdif-ac3-remux
>  create mode 100644 tests/ref/fate/spdif-dca-core-bswap
>  create mode 100644 tests/ref/fate/spdif-dca-core-remux
>  create mode 100644 tests/ref/fate/spdif-dca-master
>  create mode 100644 tests/ref/fate/spdif-dca-master-core
>  create mode 100644 tests/ref/fate/spdif-dca-master-core-remux
>  create mode 100644 tests/ref/fate/spdif-eac3
>  create mode 100644 tests/ref/fate/spdif-mlp
>  create mode 100644 tests/ref/fate/spdif-mp2-remux
>  create mode 100644 tests/ref/fate/spdif-mp3-remux
>  create mode 100644 tests/ref/fate/spdif-truehd
> 
> diff --git a/tests/Makefile b/tests/Makefile
> index d9c509a415..06494a9cc4 100644
> --- a/tests/Makefile
> +++ b/tests/Makefile
> @@ -231,6 +231,7 @@ include $(SRC_PATH)/tests/fate/real.mak
>  include $(SRC_PATH)/tests/fate/screen.mak
>  include $(SRC_PATH)/tests/fate/segment.mak
>  include $(SRC_PATH)/tests/fate/source.mak
> +include $(SRC_PATH)/tests/fate/spdif.mak
>  include $(SRC_PATH)/tests/fate/speedhq.mak
>  include $(SRC_PATH)/tests/fate/subtitles.mak
>  include $(SRC_PATH)/tests/fate/truehd.mak
> diff --git a/tests/fate/spdif.mak b/tests/fate/spdif.mak
> new file mode 100644
> index 00..093b8138e8
> --- /dev/null
> +++ b/tests/fate/spdif.mak
> @@ -0,0 +1,44 @@
> +# This padds the AAC frames to 16 bit words (the actual size is
> +# still available in the ADTS headers).
> +FATE_SPDIF_REMUX-$(call ALLYES, AAC_DEMUXER AAC_DECODER) += 
> fate-spdif-aac-remux
> +fate-spdif-aac-remux: CMD = transcode aac $(TARGET_SAMPLES)/aac/foo.aac 
> spdif "-c copy" "-c copy"
> +
> +FATE_SPDIF_REMUX-$(call ALLYES, AC3_DEMUXER AC3_DECODER) += 
> fate-spdif-ac3-remux
> +fate-spdif-ac3-remux: CMD = transcode ac3 
> $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3 spdif "-c copy" "-c copy"
> +
> +FATE_SPDIF_REMUX-$(call ALLYES, DTS_DEMUXER DCA_DECODER) += 
> fate-spdif-dca-core-remux
> +fate-spdif-dca-core-remux: CMD = transcode dts 
> $(TARGET_SAMPLES)/dts/dcadec-suite/core_51_24_48_768_0.dtshd spdif "-c copy" 
> "-c copy"
> +
> +FATE_SPDIF-$(call DEMMUX, DTSHD, SPDIF) += fate-spdif-dca-core-bswap
> +fate-spdif-dca-core-bswap: CMD = md5 -i 
> $(TARGET_SAMPLES)/dts/dcadec-suite/core_51_24_48_768_0.dtshd -c copy 
> -spdif_flags +be -f spdif
> +
> +# Only the core will be transferred, extensions are discarded.
> +FATE_SPDIF_REMUX-$(call ALLYES, DTS_DEMUXER DCA_DECODER) += 
> fate-spdif-dca-master-core-remux
> +fate-spdif-dca-master-core-remux: CMD = transcode dts 
> $(TARGET_SAMP

Re: [FFmpeg-devel] [PATCH 1/1] avcodec/mpegutils: add motion_vec debug mode

2022-09-12 Thread Chema Gonzalez
Hi Paul,

I tried to understand the per-frame side-data (and metadata)
mechanism. Adding my notes here in case they help a future reader.

Metadata and side-data seem like similar mechanisms to add auxiliary
information to each frame. Main difference seems to be that metadata
is simpler (a key/value dictionary), while side-data allows adding any
data struct.

Metadata operation is also simpler: There are filters that generate
metadata (e.g. "signalstats" generates key-values such as
`lavfi.signalstats.YMIN=3`, while "silencedetect" generates
audio-related key-values like `lavfi.silence_start=0`). There are also
2x filters that print metadata ("vf_metadata" and "af_ametadata"). So
e.g. to see what signalstats/silencedetect are generating, you can do:

```
$ ffmpeg -i in.264 -vf signalstats,metadata=mode=print -f null -
...
[Parsed_metadata_1 @ ...] frame:0pts:0   pts_time:0
[Parsed_metadata_1 @ ...] lavfi.signalstats.YMIN=3
...
```

or:
```
$ ffmpeg -y -i in.wav -af "silencedetect=n=-10dB:d=1,ametadata=print"
/tmp/out.wav
...
[Parsed_ametadata_1 @ ...] frame:23   pts:47104   pts_time:0.981333
[Parsed_ametadata_1 @ ...] lavfi.silence_start=0
...
```

Side-data operation is more complicated. There is some side-data
information already generated (e.g. SEI_UNREGISTERED side-data). Some
information requires explicitly asking for it. For example, the
`MOTION_VECTORS` side-data, you need to enable
AV_CODEC_FLAG2_EXPORT_MVS (which means calling ffmpeg/ffplay/ffprobe
with "-flags2 +export_mvs"). The main filter to print side-data
information is showinfo (`vf_showinfo` and `af_ashowinfo`). Now, the
`vf_showinfo` filter only knows how to dump some of the side-data
structs. In particular, it does not know how to dump MOTION_VECTORS
side-data. So, if we add the motion vectors, and then ask showinfo to
print it, we see:

```
$ ffmpeg -hide_banner -flags2 +export_mvs -export_side_data +mvs
-export_side_data +prft -export_side_data +venc_params
-export_side_data +film_grain -i /tmp/in.264 -vf showinfo -f null
/dev/null
...
-- frame 0 is a key frame: We can see SEI_UNREGISTERED and VIDEO_ENC_PARAMS info
[Parsed_showinfo_0 @ 0x308fd40] config in time_base: 1/120, frame_rate: 25/1
[Parsed_showinfo_0 @ 0x308fd40] config out time_base: 0/0, frame_rate: 0/0
[Parsed_showinfo_0 @ 0x308fd40] n:   0 pts:  0 pts_time:0
duration:  48000 duration_time:0.04pos:0 fmt:yuv420p
sar:0/1 s:1920x1080 i:P iskey:1 type:I checksum:F6BBEA9F
plane_checksum:[AFB1432E 63F2F255 2887B50D] mean:[109 119 138]
stdev:[43.0 12.7 13.3]
[Parsed_showinfo_0 @ 0x308fd40]   side data - User Data Unregistered:
[Parsed_showinfo_0 @ 0x308fd40] UUID=47564adc-5c4c-433f-94ef-c5113cd143a8
[Parsed_showinfo_0 @ 0x308fd40] User Data=01ffff0200e4dd42
[Parsed_showinfo_0 @ 0x308fd40]
[Parsed_showinfo_0 @ 0x308fd40]   side data - video encoding
parameters: type 1; qp=26; 8160 blocks;
[Parsed_showinfo_0 @ 0x308fd40] color_range:tv color_space:bt709
color_primaries:bt709 color_trc:bt709
...
-- frame 1 is a P-frame: we can see VIDEO_ENC_PARAMS info, and a
complain about "side-data type 8" (MOTION_VECTORS)
[Parsed_showinfo_0 @ 0x308fd40] n:   1 pts:  48000 pts_time:0.04
duration:  48000 duration_time:0.04pos:   259304 fmt:yuv420p
sar:0/1 s:1920x1080 i:P iskey:0 type:B checksum:BC4E5C12
plane_checksum:[AEA8857A 34697DA4 805E58E5] mean:[109 119 138]
stdev:[43.0 12.6 13.3]
[Parsed_showinfo_0 @ 0x308fd40]   side data - video encoding
parameters: type 1; qp=26; 8160 blocks;
-- showinfo does not dump MOTION_VECTORS side-data
[Parsed_showinfo_0 @ 0x308fd40]   side data - unknown side data type 8
(547280 bytes)
[Parsed_showinfo_0 @ 0x308fd40]
[Parsed_showinfo_0 @ 0x308fd40] color_range:tv color_space:bt709
color_primaries:bt709 color_trc:bt709
...
```

So the best way right now to see the MVs is to use
`doc/examples/extract_mvs`, which does exactly that:

```
$ make examples -j
...
$ doc/examples/extract_mvs in.264 | head -40 | \
csvcut -C framenum,source,flags |csvlook
| blockw | blockh |  srcx | srcy |  dstx | dsty | motion_x | motion_y
| motion_scale |
| -- | -- | - |  | - |  |  | 
|  |
| 16 | 16 |20 |   26 | 8 |8 |   49 |   72
|4 |
| 16 | 16 |   152 |   15 |   136 |8 |   65 |   28
|4 |
| 16 |  8 |   360 |3 |   360 |4 |1 |   -6
|4 |
| 16 |  8 |   360 |   13 |   360 |   12 |   -1 |4
|4 |
| 16 | 16 |   440 |   10 |   440 |8 |3 |   10
|4 |
|  8 | 16 |   829 |7 |   836 |8 |  -31 |   -6
|4 |
|  8 | 16 |   844 |7 |   844 |8 |   -1 |   -4
|4 |
```

> Yes, it's called codecview. We can help understand how it works if you ask 
> more specific questions, but something like "git grep EXPORT_DATA_MVS 
> ../libavcodec/mpeg*.c" and checking the complement code i

[FFmpeg-devel] [PATCH] doc/examples/extract_mvs: add motion information

2022-09-12 Thread Chema Gonzalez
Note that the motion information includes subpel motion information

This was likely forgotten in 56bdf61baa04c4fd8d165f34499115ce0aa97c43.

Tested:
```
$ make examples -j
...
$ doc/examples/extract_mvs in.264 | head -40 | \
csvcut -C framenum,source,flags |csvlook
| blockw | blockh |  srcx | srcy |  dstx | dsty | motion_x | motion_y | 
motion_scale |
| -- | -- | - |  | - |  |  |  | 
 |
| 16 | 16 |20 |   26 | 8 |8 |   49 |   72 | 
   4 |
| 16 | 16 |   152 |   15 |   136 |8 |   65 |   28 | 
   4 |
| 16 |  8 |   360 |3 |   360 |4 |1 |   -6 | 
   4 |
| 16 |  8 |   360 |   13 |   360 |   12 |   -1 |4 | 
   4 |
| 16 | 16 |   440 |   10 |   440 |8 |3 |   10 | 
   4 |
|  8 | 16 |   829 |7 |   836 |8 |  -31 |   -6 | 
   4 |
|  8 | 16 |   844 |7 |   844 |8 |   -1 |   -4 | 
   4 |
| 16 | 16 | 1,004 |   14 | 1,048 |8 | -177 |   24 | 
   4 |
| 16 | 16 | 1,096 |8 | 1,096 |8 |   -1 |0 | 
   4 |
| 16 |  8 | 1,417 |   24 | 1,416 |4 |7 |   82 | 
   4 |
| 16 |  8 | 1,416 |   13 | 1,416 |   12 |0 |6 | 
   4 |
| 16 |  8 |87 |   20 |88 |   20 |   -7 |0 | 
   4 |
| 16 |  8 |99 |   44 |88 |   28 |   45 |   66 | 
   4 |
...
```

Also:
```
$ make fate -j
...
```
---
 doc/examples/extract_mvs.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c
index cc1311da91..b80ba26bb7 100644
--- a/doc/examples/extract_mvs.c
+++ b/doc/examples/extract_mvs.c
@@ -61,10 +61,11 @@ static int decode_packet(const AVPacket *pkt)
 const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
 for (i = 0; i < sd->size / sizeof(*mvs); i++) {
 const AVMotionVector *mv = &mvs[i];
-printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64"\n",
+
printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64",%4d,%4d,%4d\n",
 video_frame_count, mv->source,
 mv->w, mv->h, mv->src_x, mv->src_y,
-mv->dst_x, mv->dst_y, mv->flags);
+mv->dst_x, mv->dst_y, mv->flags,
+mv->motion_x, mv->motion_y, mv->motion_scale);
 }
 }
 av_frame_unref(frame);
@@ -166,7 +167,7 @@ int main(int argc, char **argv)
 goto end;
 }
 
-printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n");
+
printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags,motion_x,motion_y,motion_scale\n");
 
 /* read frames from the file */
 while (av_read_frame(fmt_ctx, pkt) >= 0) {
-- 
2.37.3

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] multithreading hwaccel is broken on 5.1 and master branch

2022-09-12 Thread Wang Bin
cc867f2c09d2b69cee8a0eccd62aff002cbbfe11 this change breaks hwaccel. The
assertion av_assert0(!p->parent->stash_hwaccel) fails when seeking. So this
makes video players unusable. videotoolbox has another crash when starting
to decode. The crash can be reproduce easily in ffmpeg:
./ffmpeg -stream_loop -1 -an -hwaccel vaapi test.mp4 -f null - >/dev/null

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avutil/intreadwrite: Always provide AV_[RW]N(24|48)

2022-09-12 Thread Andreas Rheinhardt
Currently, only the AVR32-arch provides some of these (namely
the 24 bit variants), but this should not depend on the arch.

Signed-off-by: Andreas Rheinhardt 
---
When I started writing this patch, I believed that whether intreadwrite.h
provided AV_[RW]N24 depended upon the arch, namely AVR32; but later
I noticed that this issue actually only exists for internal users,
as an API user never got AV_[RW]24, because the AVR32-specific
header is not public. So I am no longer sure about this patch.

 libavutil/intreadwrite.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h
index 4c8413a536..8a18233f55 100644
--- a/libavutil/intreadwrite.h
+++ b/libavutil/intreadwrite.h
@@ -510,6 +510,34 @@ union unaligned_16 { uint16_t l; } __attribute__((packed)) 
av_alias;
 } while(0)
 #endif
 
+#if AV_HAVE_BIGENDIAN
+#   ifndef AV_WN24
+#   define AV_WN24(p, v) AV_WB24(p, v)
+#   endif
+#   ifndef AV_RN24
+#   define AV_RN24(p) AV_RB24(p)
+#   endif
+#   ifndef AV_WN48
+#   define AV_WN48(p, v) AV_WB48(p, v)
+#   endif
+#   ifndef AV_RN48
+#   define AV_RN48(p) AV_RB48(p)
+#   endif
+#else
+#   ifndef AV_WN24
+#   define AV_WN24(p, v) AV_WL24(p, v)
+#   endif
+#   ifndef AV_RN24
+#   define AV_RN24(p) AV_RL24(p)
+#   endif
+#   ifndef AV_WN48
+#   define AV_WN48(p, v) AV_WL48(p, v)
+#   endif
+#   ifndef AV_RN48
+#   define AV_RN48(p) AV_RL48(p)
+#   endif
+#endif
+
 /*
  * The AV_[RW]NA macros access naturally aligned data
  * in a type-safe way.
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".