[FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer
Patch attached. From d867b825507b5f38a051dd0ccf4612b7570a2088 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 11 Sep 2022 20:10:27 +0200 Subject: [PATCH] avformat: add LAF demuxer Signed-off-by: Paul B Mahol --- libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/lafdec.c | 253 +++ 3 files changed, 255 insertions(+) create mode 100644 libavformat/lafdec.c diff --git a/libavformat/Makefile b/libavformat/Makefile index 5cdcda3239..19a4ba2a8f 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -319,6 +319,7 @@ OBJS-$(CONFIG_JV_DEMUXER)+= jvdec.o OBJS-$(CONFIG_KUX_DEMUXER) += flvdec.o OBJS-$(CONFIG_KVAG_DEMUXER) += kvag.o OBJS-$(CONFIG_KVAG_MUXER)+= kvag.o rawenc.o +OBJS-$(CONFIG_LAF_DEMUXER) += lafdec.o OBJS-$(CONFIG_LATM_MUXER)+= latmenc.o rawenc.o OBJS-$(CONFIG_LMLM4_DEMUXER) += lmlm4.o OBJS-$(CONFIG_LOAS_DEMUXER) += loasdec.o rawdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index cebd5e0c67..a545b5ff45 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -236,6 +236,7 @@ extern const AVInputFormat ff_jv_demuxer; extern const AVInputFormat ff_kux_demuxer; extern const AVInputFormat ff_kvag_demuxer; extern const AVOutputFormat ff_kvag_muxer; +extern const AVInputFormat ff_laf_demuxer; extern const AVOutputFormat ff_latm_muxer; extern const AVInputFormat ff_lmlm4_demuxer; extern const AVInputFormat ff_loas_demuxer; diff --git a/libavformat/lafdec.c b/libavformat/lafdec.c new file mode 100644 index 00..35bce2b327 --- /dev/null +++ b/libavformat/lafdec.c @@ -0,0 +1,253 @@ +/* + * Limitless Audio Format demuxer + * Copyright (c) 2022 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "avformat.h" +#include "internal.h" + +typedef struct StreamParams { +float horizontal; +float vertical; +int lfe; +AVChannelLayout layout; +} StreamParams; + +typedef struct LAFContext { +uint8_t *data; +unsigned nb_stored; +unsigned stored_index; +unsigned index; +unsigned bpp; + +StreamParams p[1024]; +} LAFContext; + +typedef struct LAFStream { +unsigned stored; +} LAFStream; + +static int laf_probe(const AVProbeData *p) +{ +if (memcmp(p->buf, "LIMITLESS", 9)) +return 0; +if (memcmp(p->buf + 9, "HEAD", 4)) +return 0; +return AVPROBE_SCORE_MAX; +} + +static int laf_read_header(AVFormatContext *ctx) +{ +LAFContext *s = ctx->priv_data; +AVIOContext *pb = ctx->pb; +unsigned st_count, mode; +unsigned sample_rate; +int64_t duration; +int codec_id; +int quality; +int bpp; + +avio_skip(pb, 9); +if (avio_rb32(pb) != MKBETAG('H','E','A','D')) +return AVERROR_INVALIDDATA; + +quality = avio_r8(pb); +if (quality > 3) +return AVERROR_INVALIDDATA; +mode = avio_r8(pb); +if (mode > 1) +return AVERROR_INVALIDDATA; +st_count = avio_rl32(pb); +if (st_count == 0 || st_count > 1024) +return AVERROR_INVALIDDATA; + +for (int i = 0; i < st_count; i++) { +StreamParams *stp = &s->p[i]; + +stp->vertical = av_int2float(avio_rl32(pb)); +stp->horizontal = av_int2float(avio_rl32(pb)); +stp->lfe = avio_r8(pb); +if (stp->lfe) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_LOW_FREQUENCY)); +} else if (stp->vertical == 0.f && + stp->horizontal == 0.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_CENTER)); +} else if (stp->vertical == 0.f && + stp->horizontal == -30.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_LEFT)); +} else if (stp->vertical == 0.f && + stp->horizontal == 30.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_RIGHT)); +} else if (stp->vertical == 0.f && + stp->horizontal == -110.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LA
Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer
Paul B Mahol: > +static int laf_read_header(AVFormatContext *ctx) > +{ > +LAFContext *s = ctx->priv_data; > +AVIOContext *pb = ctx->pb; > +unsigned st_count, mode; > +unsigned sample_rate; > +int64_t duration; > +int codec_id; > +int quality; > +int bpp; > + > +avio_skip(pb, 9); > +if (avio_rb32(pb) != MKBETAG('H','E','A','D')) > +return AVERROR_INVALIDDATA; > + > +quality = avio_r8(pb); > +if (quality > 3) > +return AVERROR_INVALIDDATA; > +mode = avio_r8(pb); > +if (mode > 1) > +return AVERROR_INVALIDDATA; > +st_count = avio_rl32(pb); > +if (st_count == 0 || st_count > 1024) I don't know whether the limit of 1024 is arbitrary or something from some spec. If it is the latter, you should use a #define for it and also for the size of the StreamParams array in the ctx. If it is the former, you might just use FF_ARRAY_ELEMS(s->p) instead of 1024 here. Or a define, as you prefer. > +return AVERROR_INVALIDDATA; > + > +for (int i = 0; i < st_count; i++) { > +StreamParams *stp = &s->p[i]; > + > +stp->vertical = av_int2float(avio_rl32(pb)); > +stp->horizontal = av_int2float(avio_rl32(pb)); > +stp->lfe = avio_r8(pb); > +if (stp->lfe) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_LOW_FREQUENCY)); > +} else if (stp->vertical == 0.f && > + stp->horizontal == 0.f) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_FRONT_CENTER)); > +} else if (stp->vertical == 0.f && > + stp->horizontal == -30.f) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_FRONT_LEFT)); > +} else if (stp->vertical == 0.f && > + stp->horizontal == 30.f) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_FRONT_RIGHT)); > +} else if (stp->vertical == 0.f && > + stp->horizontal == -110.f) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_SIDE_LEFT)); > +} else if (stp->vertical == 0.f && > + stp->horizontal == 110.f) { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, > (AV_CH_SIDE_RIGHT)); > +} else { > +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; > +} > +} > + > +sample_rate = avio_rl32(pb); > +duration = avio_rl64(pb) / st_count; > +switch (quality) { > +case 0: > +codec_id = AV_CODEC_ID_PCM_U8; > +bpp = 1; > +break; > +case 1: > +codec_id = AV_CODEC_ID_PCM_S16LE; > +bpp = 2; > +break; > +case 2: > +codec_id = AV_CODEC_ID_PCM_F32LE; > +bpp = 4; > +break; > +case 3: > +codec_id = AV_CODEC_ID_PCM_S24LE; > +bpp = 3; > +break; > +} > + > +s->index = 0; > +s->stored_index = 0; > +s->bpp = bpp; > +s->data = av_mallocz(st_count * sample_rate * bpp); sample_rate is read via avio_rl32() and therefore the multiplication on the right can overflow (it's performed in 32bits, so this can happen even on 64bit systems). Maybe use av_calloc(sample_rate, st_count * bpp). But you also need to ensure that sample_rate actually fits into an int and that st_count * sample_rate * bpp performed in the avio_read() below also fits into an int, so you should probably just ensure this here. > +if (!s->data) > +return AVERROR(ENOMEM); > + > +for (int st = 0; st < st_count; st++) { > +StreamParams *stp = &s->p[st]; > +LAFStream *lafst; > +AVCodecParameters *par; > +AVStream *st = avformat_new_stream(ctx, NULL); > +if (!st) > +return AVERROR(ENOMEM); > + > +par = st->codecpar; > +par->codec_id = codec_id; > +par->codec_type = AVMEDIA_TYPE_AUDIO; > +par->ch_layout.nb_channels = 1; > +par->ch_layout = stp->layout; > +par->sample_rate = sample_rate; > +st->duration = duration; > +st->priv_data = lafst = av_mallocz(sizeof(LAFStream)); lafst is set-but-unused. And given that you are already imposing a hardcoded limit on the number of streams you could just add an array of 1024 uint8_t to your context. > +if (!st->priv_data) > +return AVERROR(ENOMEM); > + > +avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); > +} > + > +return 0; > +} > + > +static int laf_read_packet(AVFormatContext *ctx, AVPacket *pkt) > +{ > +AVIOContext *pb = ctx->pb; > +LAFContext *s = ctx->priv_data; > +AVStream *st = ctx->streams[0]; > +LAFStream *lafst = st->priv_data; > +const int bpp = s->bpp; > +int header_len = (ctx->nb_streams / 8) + !!(ctx->nb_streams & 7); (ctx->nb_streams + 7) / 8. > +int64_t pos; > +int ret; > + > +again: > +if (avio_feof(pb)) > +
Re: [FFmpeg-devel] [PATCH v2] avcodec/arm/sbcenc: avoid callee preserved vfp registers
On Sun, 25 Aug 2019, James Cowgill wrote: When compiling FFmpeg with GCC-9, some very random segfaults were observed in code which had previously called down into the SBC encoder NEON assembly routines. This was caused by these functions clobbering some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was using these registers to save local variables, but after these functions returned, they would contain garbage. Fix by reallocating the registers in the two affected functions in the following way: ff_sbc_analyze_4_neon: q2-q5 => q8-q11, then q1-q4 => q8-q11 ff_sbc_analyze_8_neon: q2-q9 => q8-q15 The reason for using these replacements is to keep closely related sets of registers consecutively numbered which hopefully makes the code more easy to follow. Since this commit only reallocates registers, it should have no performance impact. Signed-off-by: James Cowgill --- On 29/07/2019 19:59, Reimar Döffinger wrote: Seems sensible to me, though extra points if you or someone has numbers on performance impact. To know whether it would be worthwhile to check if it can be optimized... Sorry for the long delay - been on various holidays. Sorry for the even longer response ;-) I happened to run into this patch downstream, and noticed that it does look reasonable, but apparently the second round of the patch was missed back then in 2019. Our current code is indeed broken and wrong - if we would have had checkasm tests for it, this issue would have been caught long ago. I did a few tests on my original patch and overall it was about 2% slower than before. In any case I think this new patch is a better solution (although the diff is a lot larger). We don't actually need that many registers in either of these functions, so instead of pushing the clobbered callee saved registers, we can reallocate all the registers to avoid them in the first place. This way there is no performance impact. I couldn't find any tests for this encoder, but I have tested a few audio samples with it and verified the output is identical to what t was before (and with what I get on x86). Thanks a lot for doing that! Indeed that's the best we can do since we don't have tests for it. I'll go ahead and push this patch soon. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()
Also fixes a "statement with no effect [-Wunused-value]" warning from GCC. Signed-off-by: Andreas Rheinhardt --- libavcodec/bonk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c index f3d797d588..409694f710 100644 --- a/libavcodec/bonk.c +++ b/libavcodec/bonk.c @@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int order, int error) } // don't drift too far, to avoid overflows -av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); +x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); state[0] = x; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()
On 9/12/2022 9:20 AM, Andreas Rheinhardt wrote: Also fixes a "statement with no effect [-Wunused-value]" warning from GCC. Signed-off-by: Andreas Rheinhardt --- libavcodec/bonk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c index f3d797d588..409694f710 100644 --- a/libavcodec/bonk.c +++ b/libavcodec/bonk.c @@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int order, int error) } // don't drift too far, to avoid overflows -av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); +x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); state[0] = x; LGTM, but this decoder needs a test to ensure it's actually doing the right thing. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] fftools/opt_common: check the return value of av_hwdevice_get_type_name before printing it
It may be NULL, as is the case for D3D11VA_VLD. Running "ffmpeg -h decoder=h264" on a Windows build Before: Decoder h264 [H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10]: Supported hardware devices: dxva2 (null) d3d11va cuda After: Decoder h264 [H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10]: Supported hardware devices: dxva2 d3d11va cuda Signed-off-by: James Almer --- fftools/opt_common.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fftools/opt_common.c b/fftools/opt_common.c index 7cd8b1c66e..8a06df82df 100644 --- a/fftools/opt_common.c +++ b/fftools/opt_common.c @@ -335,9 +335,12 @@ static void print_codec(const AVCodec *c) printf("Supported hardware devices: "); for (int i = 0;; i++) { const AVCodecHWConfig *config = avcodec_get_hw_config(c, i); +const char *name; if (!config) break; -printf("%s ", av_hwdevice_get_type_name(config->device_type)); +name = av_hwdevice_get_type_name(config->device_type); +if (name) +printf("%s ", name); } printf("\n"); } -- 2.37.3 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] Bonk, Bonk
Op wo 7 sep. 2022 om 19:59 schreef Paul B Mahol : > Patches attached. > > Could decoder be made faster? > > Haven't reviewed, but great to have another codec added. Concerning the speed, as far as I know Bonk is slow to decode. Its website states that it is slow: http://www.logarithmic.net/pfh/bonk Also, an old comparison that included it (I haven't seen any more recent one) ranked it the slowest decoding codec: https://web.archive.org/web/20080225210007/http://flac.sourceforge.net/comparison.html Although OptimFROG and the higher MP4ALS preset are probably much slower/more CPU intensive to decode nowadays. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 1/2] fate/id3v2: Add test for reading and writing UTF-16 BOM tags
Andreas Rheinhardt: > Signed-off-by: Andreas Rheinhardt > --- > tests/fate/id3v2.mak | 5 > tests/ref/fate/id3v2-utf16-bom | 42 ++ > 2 files changed, 47 insertions(+) > create mode 100644 tests/ref/fate/id3v2-utf16-bom > > diff --git a/tests/fate/id3v2.mak b/tests/fate/id3v2.mak > index 4dca681e38..7ad4d877a4 100644 > --- a/tests/fate/id3v2.mak > +++ b/tests/fate/id3v2.mak > @@ -7,6 +7,11 @@ fate-id3v2-priv-remux: CMD = transcode mp3 > $(TARGET_SAMPLES)/id3v2/id3v2_priv.mp > FATE_ID3V2_FFMPEG_FFPROBE-$(call REMUX, AIFF, WAV_DEMUXER) += > fate-id3v2-chapters > fate-id3v2-chapters: CMD = transcode wav > $(TARGET_SAMPLES)/wav/200828-005.wav aiff "-c copy -metadata:c:0 > description=foo -metadata:c:0 date=2021 -metadata:c copyright=none > -metadata:c:1 genre=nonsense -write_id3v2 1" "-c copy -t 0.05" "-show_entries > format_tags:chapters" > > +# Tests reading and writing UTF-16 BOM strings; also tests > +# the AIFF muxer's and demuxer's ability to preserve channel layouts. > +FATE_ID3V2_FFMPEG_FFPROBE-$(call REMUX, AIFF, WAV_DEMUXER FLAC_DEMUXER > PCM_S16LE_DECODER MJPEG_DECODER ARESAMPLE_FILTER CHANNELMAP_FILTER > PCM_S24BE_ENCODER) += fate-id3v2-utf16-bom > +fate-id3v2-utf16-bom: CMD = transcode wav > $(TARGET_SAMPLES)/audio-reference/yo.raw-short.wav aiff "-map 0:a -map 1:v > -af aresample,channelmap=channel_layout=hexagonal,aresample -c:a pcm_s24be > -c:v copy -write_id3v2 1 -id3v2_version 3 -map_metadata:g:0 1:g > -map_metadata:s:v 1:g" "-c copy -t 0.05" "-show_entries > stream=channel_layout:stream_tags:format_tags" "-i > $(TARGET_SAMPLES)/cover_art/cover_art.flac" > + > FATE_SAMPLES_FFPROBE+= $(FATE_ID3V2_FFPROBE-yes) > FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_ID3V2_FFMPEG_FFPROBE-yes) > fate-id3v2: $(FATE_ID3V2_FFPROBE-yes) $(FATE_ID3V2_FFMPEG_FFPROBE-yes) > diff --git a/tests/ref/fate/id3v2-utf16-bom b/tests/ref/fate/id3v2-utf16-bom > new file mode 100644 > index 00..dd2566de2b > --- /dev/null > +++ b/tests/ref/fate/id3v2-utf16-bom > @@ -0,0 +1,42 @@ > +9b8bfdf87a8d3d089819ef9f6f264ec4 *tests/data/fate/id3v2-utf16-bom.aiff > +885482 tests/data/fate/id3v2-utf16-bom.aiff > +#tb 0: 1/9 > +#media_type 0: video > +#codec_id 0: mjpeg > +#dimensions 0: 350x350 > +#sar 0: 1/1 > +#tb 1: 1/48000 > +#media_type 1: audio > +#codec_id 1: pcm_s24be > +#sample_rate 1: 48000 > +#channel_layout_name 1: hexagonal > +0, 0, 0,0,19650, 0xd5662610 > +1, 0, 0, 227, 4086, 0x > +1,227,227, 227, 4086, 0x > +1,454,454, 227, 4086, 0x > +1,681,681, 227, 4086, 0x667b2643 > +1,908,908, 227, 4086, 0x9a09957d > +1, 1135, 1135, 227, 4086, 0x763e27c5 > +1, 1362, 1362, 227, 4086, 0x2a47f536 > +1, 1589, 1589, 227, 4086, 0xed32e5f2 > +1, 1816, 1816, 227, 4086, 0x2e96c720 > +1, 2043, 2043, 227, 4086, 0x84c5b5f0 > +1, 2270, 2270, 227, 4086, 0xe3dfeefc > +[STREAM] > +channel_layout=hexagonal > +[/STREAM] > +[STREAM] > +TAG:title=Дороги > +TAG:comment=Other > +[/STREAM] > +[FORMAT] > +TAG:artist=Мельница > +TAG:RATING=0 > +TAG:album=Ангелофрения > +TAG:title=Дороги > +TAG:tracktotal=11 > +TAG:totaltracks=11 > +TAG:genre=Folk > +TAG:track=2 > +TAG:date=2012 > +[/FORMAT] Will apply this patchset tonight unless there are objections. - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avformat/bonk: Don't set data_offset to what it would be set to anyway
Signed-off-by: Andreas Rheinhardt --- libavformat/bonk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavformat/bonk.c b/libavformat/bonk.c index fc400979b3..0fff0b5bda 100644 --- a/libavformat/bonk.c +++ b/libavformat/bonk.c @@ -79,7 +79,6 @@ static int bonk_read_header(AVFormatContext *s) return AVERROR_INVALIDDATA; st->duration = AV_RL32(st->codecpar->extradata + 1) / st->codecpar->ch_layout.nb_channels; avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); -ffformatcontext(s)->data_offset = avio_tell(s->pb); return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/x86/Makefile: Don't build empty files
Should fix ticket #9909, fixing a regression since bfb28b5ce89f3e950214b67ea95b45e3355c2caf. Thanks to Carl Eugen Hoyos for analyzing the issue. Signed-off-by: Andreas Rheinhardt --- This would be my solution. What do you think of it? libavcodec/x86/Makefile | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 4e448623af..41ca864849 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -140,8 +140,11 @@ X86ASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ X86ASM-OBJS-$(CONFIG_RV34DSP) += x86/rv34dsp.o X86ASM-OBJS-$(CONFIG_VC1DSP) += x86/vc1dsp_loopfilter.o \ x86/vc1dsp_mc.o -X86ASM-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct10.o \ - x86/simple_idct.o +ifdef ARCH_X86_64 +X86ASM-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct10.o +else +X86ASM-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct.o +endif X86ASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o X86ASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o X86ASM-OBJS-$(CONFIG_VP8DSP) += x86/vp8dsp.o \ -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct
Patch attached. From f7c47b8eefa1c06a74d17f13b4e9010785dc6430 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 20 Jan 2021 16:58:31 +0100 Subject: [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct Signed-off-by: Paul B Mahol --- libavutil/x86/float_dsp.asm| 127 + libavutil/x86/float_dsp_init.c | 2 + 2 files changed, 129 insertions(+) diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index cca4d019c7..8f8e6dddf5 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -440,6 +440,133 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset %endif RET +INIT_YMM fma3 +cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset +xor offsetq, offsetq +xorps m0, m0 +shl sized, 2 +mov lenq, sizeq +cmp lenq, 32 +jl .l16 +cmp lenq, 64 +jl .l32 +xorpsm1, m1 +cmp lenq, 128 +jl .l64 +andlenq, ~127 +xorpsm2, m2 +xorpsm3, m3 +.loop128: +movups m4, [v1q+offsetq] +movups m5, [v1q+offsetq + 32] +movups m6, [v1q+offsetq + 64] +movups m7, [v1q+offsetq + 96] +fmaddps m0, m4, [v2q+offsetq ], m0 +fmaddps m1, m5, [v2q+offsetq + 32], m1 +fmaddps m2, m6, [v2q+offsetq + 64], m2 +fmaddps m3, m7, [v2q+offsetq + 96], m3 +add offsetq, 128 +cmp offsetq, lenq +jl .loop128 +addpsm0, m2 +addpsm1, m3 +mov lenq, sizeq +and lenq, 127 +cmp lenq, 64 +jge .l64 +addpsm0, m1 +cmp lenq, 32 +jge .l32 +vextractf128 xmm2, m0, 1 +addpsxmm0, xmm2 +cmp lenq, 16 +jge .l16 +movhlps xmm1, xmm0 +addpsxmm0, xmm1 +movssxmm1, xmm0 +shufps xmm0, xmm0, 1 +addssxmm0, xmm1 +%if ARCH_X86_64 == 0 +movss r0m, xm0 +fld dword r0m +%endif +RET +.l64: +andlenq, ~63 +addlenq, offsetq +.loop64: +movups m4, [v1q+offsetq] +movups m5, [v1q+offsetq + 32] +fmaddps m0, m4, [v2q+offsetq], m0 +fmaddps m1, m5, [v2q+offsetq + 32], m1 +add offsetq, 64 +cmp offsetq, lenq +jl .loop64 +addpsm0, m1 +mov lenq, sizeq +and lenq, 63 +cmp lenq, 32 +jge .l32 +vextractf128 xmm2, m0, 1 +addpsxmm0, xmm2 +cmp lenq, 16 +jge .l16 +movhlps xmm1, xmm0 +addpsxmm0, xmm1 +movssxmm1, xmm0 +shufps xmm0, xmm0, 1 +addssxmm0, xmm1 +%if ARCH_X86_64 == 0 +movss r0m, xm0 +fld dword r0m +%endif +RET +.l32: +andlenq, ~31 +addlenq, offsetq +.loop32: +movups m4, [v1q+offsetq] +fmaddps m0, m4, [v2q+offsetq], m0 +add offsetq, 32 +cmp offsetq, lenq +jl .loop32 +vextractf128 xmm2, m0, 1 +addpsxmm0, xmm2 +mov lenq, sizeq +and lenq, 31 +cmp lenq, 16 +jge .l16 +movhlps xmm1, xmm0 +addpsxmm0, xmm1 +movssxmm1, xmm0 +shufps xmm0, xmm0, 1 +addssxmm0, xmm1 +%if ARCH_X86_64 == 0 +movss r0m, xm0 +fld dword r0m +%endif +RET +.l16: +andlenq, ~15 +addlenq, offsetq +.loop16: +movaps xmm1, [v1q+offsetq] +mulpsxmm1, [v2q+offsetq] +addpsxmm0, xmm1 +add offsetq, 16 +cmp offsetq, lenq +jl .loop16 +movhlps xmm1, xmm0 +addpsxmm0, xmm1 +movssxmm1, xmm0 +shufps xmm0, xmm0, 1 +addssxmm0, xmm1 +%if ARCH_X86_64 == 0 +movss r0m, xm0 +fld dword r0m +%endif +RET + ;- ; void ff_butterflies_float(float *src0, float *src1, int len); ;- diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index ad17bc2044..ad6b506259 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -74,6 +74,7 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, const float *src1, int len); float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); +float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order); void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len); @@ -112,5 +113,6 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; fdsp->vector_fmul_add= ff_vector_fmul_add_fma3; fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3; +fdsp->scalarproduct_float = ff_scalarproduct_float_fma3; } } -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "
[FFmpeg-devel] [PATCH 01/18] doc: reference the RISC-V specification
From: Rémi Denis-Courmont --- doc/optimization.txt | 5 + 1 file changed, 5 insertions(+) diff --git a/doc/optimization.txt b/doc/optimization.txt index 974e2f9af2..3ed29fe38c 100644 --- a/doc/optimization.txt +++ b/doc/optimization.txt @@ -267,6 +267,11 @@ CELL/SPU: http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/30B3520C93F437AB87257060006FFE5E/$file/Language_Extensions_for_CBEA_2.4.pdf http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/9F820A5FFA3ECE8C8725716A0062585F/$file/CBE_Handbook_v1.1_24APR2007_pub.pdf +RISC-V-specific: + +The RISC-V Instruction Set Manual, Volume 1, Unprivileged ISA: +https://riscv.org/technical/specifications/ + GCC asm links: -- official doc but quite ugly -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 02/18] lavu/riscv: AV_READ_TIME cycle counter
From: Rémi Denis-Courmont This uses the architected RISC-V 64-bit cycle counter from the RISC-V unprivileged instruction set. In 64-bit and 128-bit, this is a straightforward CSR read. In 32-bit mode, the 64-bit value is exposed as two CSRs, which cannot be read atomically, so a loop is necessary to detect and fix up the race condition where the bottom half wraps exactly between the two reads. --- libavutil/riscv/timer.h | 53 + libavutil/timer.h | 2 ++ 2 files changed, 55 insertions(+) create mode 100644 libavutil/riscv/timer.h diff --git a/libavutil/riscv/timer.h b/libavutil/riscv/timer.h new file mode 100644 index 00..a34157a566 --- /dev/null +++ b/libavutil/riscv/timer.h @@ -0,0 +1,53 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_RISCV_TIMER_H +#define AVUTIL_RISCV_TIMER_H + +#include "config.h" + +#if HAVE_INLINE_ASM +#include + +static inline uint64_t rdcycle64(void) +{ +#if (__riscv_xlen >= 64) +uintptr_t cycles; + +__asm__ volatile ("rdcycle %0" : "=r"(cycles)); + +#else +uint64_t cycles; +uint32_t hi, lo, check; + +__asm__ volatile ( +"1: rdcycleh %0\n" +" rdcycle %1\n" +" rdcycleh %2\n" +" bne %0, %2, 1b\n" : "=r" (hi), "=r" (lo), "=r" (check)); + +cycles = (((uint64_t)hi) << 32) | lo; + +#endif +return cycles; +} + +#define AV_READ_TIME rdcycle64 + +#endif +#endif /* AVUTIL_RISCV_TIMER_H */ diff --git a/libavutil/timer.h b/libavutil/timer.h index 48e576739f..d3db5a27ef 100644 --- a/libavutil/timer.h +++ b/libavutil/timer.h @@ -57,6 +57,8 @@ # include "arm/timer.h" #elif ARCH_PPC # include "ppc/timer.h" +#elif ARCH_RISCV +# include "riscv/timer.h" #elif ARCH_X86 # include "x86/timer.h" #endif -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 03/18] configure/riscv: detect fast CLZ
From: Rémi Denis-Courmont RISC-V defines the CLZ instruction as part of the ratified Zbb subset of the (not yet ratified) bit mapulation extension (B). We can detect it from the __riscv_zbb predefined constant. At least GCC 12 already supports this correctly. Note that the macro will be non-zero if supported, zero if enabled in the compiler flags (e.g. -march=rv64gzbb) but not known to the compiler, and undefined otherwise. --- configure | 6 ++ 1 file changed, 6 insertions(+) diff --git a/configure b/configure index 9e51abd0d3..b7dc1d8656 100755 --- a/configure +++ b/configure @@ -5334,6 +5334,12 @@ elif enabled ppc; then ;; esac +elif enabled riscv; then + +if test_cpp_condition stddef.h "__riscv_zbb"; then +enable fast_clz +fi + elif enabled sparc; then case $cpu in -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 04/18] lavu/riscv: byte-swap operations
From: Rémi Denis-Courmont If the target supports the Basic bit-manipulation (Zbb) extension, then the REV8 instruction is available to reverse byte order. Note that this instruction only exists at the "XLEN" register size, so we need to right shift the result down to the data width. If Zbb is not supported, then this patchset does nothing. Support for run-time detection is left for the future. Currently, there are no bits in auxv/ELF HWCAP for Z-extensions, so there are no clean ways to do this. --- libavutil/bswap.h | 2 ++ libavutil/riscv/bswap.h | 74 + 2 files changed, 76 insertions(+) create mode 100644 libavutil/riscv/bswap.h diff --git a/libavutil/bswap.h b/libavutil/bswap.h index 91cb79538d..4840ab433f 100644 --- a/libavutil/bswap.h +++ b/libavutil/bswap.h @@ -40,6 +40,8 @@ # include "arm/bswap.h" #elif ARCH_AVR32 # include "avr32/bswap.h" +#elif ARCH_RISCV +# include "riscv/bswap.h" #elif ARCH_SH4 # include "sh4/bswap.h" #elif ARCH_X86 diff --git a/libavutil/riscv/bswap.h b/libavutil/riscv/bswap.h new file mode 100644 index 00..de1429c0f7 --- /dev/null +++ b/libavutil/riscv/bswap.h @@ -0,0 +1,74 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_RISCV_BSWAP_H +#define AVUTIL_RISCV_BSWAP_H + +#include +#include "config.h" +#include "libavutil/attributes.h" + +#if defined (__riscv_zbb) && (__riscv_zbb > 0) && HAVE_INLINE_ASM + +static av_always_inline av_const uintptr_t av_bswap_xlen(uintptr_t x) +{ +uintptr_t y; + +__asm__("rev8 %0, %1" : "=r" (y) : "r" (x)); +return y; +} + +#define av_bswap16 av_bswap16 + +static av_always_inline av_const uint_fast16_t av_bswap16(uint_fast16_t x) +{ +return av_bswap_xlen(x) >> (__riscv_xlen - 16); +} + +#if (__riscv_xlen == 32) +#define av_bswap32 av_bswap_xlen +#define av_bswap64 av_bswap64 + +static av_always_inline av_const uint64_t av_bswap64(uint64_t x) +{ +return (((uint64_t)av_bswap32(x)) << 32) | av_bswap32(x >> 32); +} + +#else +#define av_bswap32 av_bswap32 + +static av_always_inline av_const uint_fast32_t av_bswap32(uint_fast32_t x) +{ +return av_bswap_xlen(x) >> (__riscv_xlen - 32); +} + +#if (__riscv_xlen == 64) +#define av_bswap64 av_bswap_xlen + +#else +#define av_bswap64 av_bswap64 + +static av_always_inline av_const uint_fast64_t av_bswap64(uint_fast64_t x) +{ +return av_bswap_xlen(x) >> (__riscv_xlen - 64); +} + +#endif /* __riscv_xlen > 64 */ +#endif /* __riscv_xlen > 32 */ +#endif /* __riscv_zbb */ +#endif /* AVUTIL_RISCV_BSWAP_H */ -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 05/18] lavu/riscv: add optimisations
From: Rémi Denis-Courmont This provides some micro-optimisations for signed integer clipping, and support for bit weight with the Zbb extension. --- libavutil/intmath.h | 5 +- libavutil/riscv/intmath.h | 103 ++ 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 libavutil/riscv/intmath.h diff --git a/libavutil/intmath.h b/libavutil/intmath.h index 9573109e9d..c54d23b7bf 100644 --- a/libavutil/intmath.h +++ b/libavutil/intmath.h @@ -28,8 +28,9 @@ #if ARCH_ARM # include "arm/intmath.h" -#endif -#if ARCH_X86 +#elif ARCH_RISCV +# include "riscv/intmath.h" +#elif ARCH_X86 # include "x86/intmath.h" #endif diff --git a/libavutil/riscv/intmath.h b/libavutil/riscv/intmath.h new file mode 100644 index 00..78f7ba930a --- /dev/null +++ b/libavutil/riscv/intmath.h @@ -0,0 +1,103 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_RISCV_INTMATH_H +#define AVUTIL_RISCV_INTMATH_H + +#include + +#include "config.h" +#include "libavutil/attributes.h" + +/* + * The compiler is forced to sign-extend the result anyhow, so it is faster to + * compute it explicitly and use it. + */ +#define av_clip_int8 av_clip_int8_rvi +static av_always_inline av_const int8_t av_clip_int8_rvi(int a) +{ +union { uint8_t u; int8_t s; } u = { .u = a }; + +if (a != u.s) +a = ((a >> 31) ^ 0x7F); +return a; +} + +#define av_clip_int16 av_clip_int16_rvi +static av_always_inline av_const int16_t av_clip_int16_rvi(int a) +{ +union { uint8_t u; int8_t s; } u = { .u = a }; + +if (a != u.s) +a = ((a >> 31) ^ 0x7F); +return a; +} + +#define av_clipl_int32 av_clipl_int32_rvi +static av_always_inline av_const int32_t av_clipl_int32_rvi(int64_t a) +{ +union { uint32_t u; int32_t s; } u = { .u = a }; + +if (a != u.s) +a = ((a >> 63) ^ 0x7FFF); +return a; +} + +#define av_clip_intp2 av_clip_intp2_rvi +static av_always_inline av_const int av_clip_intp2_rvi(int a, int p) +{ +const int shift = 32 - p; +int b = (a << shift) >> shift; + +if (a != b) +b = (a >> 31) ^ ((1 << p) - 1); +return b; +} + +#if defined (__riscv_zbb) && (__riscv_zbb > 0) && HAVE_INLINE_ASM + +#define av_popcount av_popcount_rvb +static av_always_inline av_const int av_popcount_rvb(uint32_t x) +{ +int ret; + +#if (__riscv_xlen >= 64) +__asm__ ("cpopw %0, %1\n" : "=r" (ret) : "r" (x)); +#else +__asm__ ("cpop %0, %1\n" : "=r" (ret) : "r" (x)); +#endif +return ret; +} + +#if (__riscv_xlen >= 64) +#define av_popcount64 av_popcount64_rvb +static av_always_inline av_const int av_popcount64_rvb(uint64_t x) +{ +int ret; + +#if (__riscv_xlen >= 128) +__asm__ ("cpopd %0, %1\n" : "=r" (ret) : "r" (x)); +#else +__asm__ ("cpop %0, %1\n" : "=r" (ret) : "r" (x)); +#endif +return ret; +} +#endif /* __riscv_xlen >= 64 */ +#endif /* __riscv_zbb */ + +#endif /* AVUTIL_RISCV_INTMATH_H */ -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 07/18] lavu/riscv: initial common header for assembler macros
From: Rémi Denis-Courmont --- libavutil/riscv/asm.S | 74 +++ 1 file changed, 74 insertions(+) create mode 100644 libavutil/riscv/asm.S diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S new file mode 100644 index 00..7623c161cf --- /dev/null +++ b/libavutil/riscv/asm.S @@ -0,0 +1,74 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#if defined (__riscv_float_abi_soft) +#define NOHWF +#define NOHWD +#define HWF # +#define HWD # +#elif defined (__riscv_float_abi_single) +#define NOHWF # +#define NOHWD +#define HWF +#define HWD # +#else +#define NOHWF # +#define NOHWD # +#define HWF +#define HWD +#endif + +.macro func sym, ext= +.text +.align 2 + +.option push +.ifnb \ext +.option arch, +\ext +.endif + +.global \sym +.hidden \sym +.type \sym, %function +\sym: + +.macro endfunc +.size \sym, . - \sym +.option pop +.previous +.purgem endfunc +.endm +.endm + +.macro const sym, align=3, relocate=0 +.if \relocate +.pushsection .data.rel.ro +.else +.pushsection .rodata +.endif +.align \align +\sym: + +.macro endconst +.size \sym, . - \sym +.popsection +.purgem endconst +.endm +.endm -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 06/18] configure: probe RISC-V Vector extension
From: Rémi Denis-Courmont --- Makefile | 2 +- configure| 15 +++ ffbuild/arch.mak | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 61f79e27ae..1fb742f390 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ ffbuild/.config: $(CONFIGURABLE_COMPONENTS) SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS\ ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ - ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS\ + ALTIVEC-OBJS VSX-OBJS RVV-OBJS MMX-OBJS X86ASM-OBJS \ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS SHLIBOBJS \ STLIBOBJS HOSTOBJS TESTOBJS diff --git a/configure b/configure index b7dc1d8656..c5f20cc323 100755 --- a/configure +++ b/configure @@ -462,6 +462,7 @@ Optimization options (experts only): --disable-mmidisable Loongson MMI optimizations --disable-lsxdisable Loongson LSX optimizations --disable-lasx disable Loongson LASX optimizations + --disable-rvvdisable RISC-V Vector optimizations --disable-fast-unaligned consider unaligned accesses slow Developer options (useful when working on FFmpeg itself): @@ -2126,6 +2127,10 @@ ARCH_EXT_LIST_PPC=" vsx " +ARCH_EXT_LIST_RISCV=" +rvv +" + ARCH_EXT_LIST_X86=" $ARCH_EXT_LIST_X86_SIMD cpunop @@ -2135,6 +2140,7 @@ ARCH_EXT_LIST_X86=" ARCH_EXT_LIST=" $ARCH_EXT_LIST_ARM $ARCH_EXT_LIST_PPC +$ARCH_EXT_LIST_RISCV $ARCH_EXT_LIST_X86 $ARCH_EXT_LIST_MIPS $ARCH_EXT_LIST_LOONGSON @@ -2642,6 +2648,8 @@ ppc4xx_deps="ppc" vsx_deps="altivec" power8_deps="vsx" +rvv_deps="riscv" + loongson2_deps="mips" loongson3_deps="mips" mmi_deps_any="loongson2 loongson3" @@ -6110,6 +6118,10 @@ elif enabled ppc; then check_cpp_condition power8 "altivec.h" "defined(_ARCH_PWR8)" fi +elif enabled riscv; then + +enabled rvv && check_inline_asm rvv '".option arch, +v\nvsetivli zero, 0, e8, m1, ta, ma"' + elif enabled x86; then check_builtin rdtscintrin.h "__rdtsc()" @@ -7596,6 +7608,9 @@ if enabled loongarch; then echo "LSX enabled ${lsx-no}" echo "LASX enabled ${lasx-no}" fi +if enabled riscv; then +echo "RISC-V Vector enabled ${riscv-no}" +fi echo "debug symbols ${debug-no}" echo "strip symbols ${stripping-no}" echo "optimize for size ${small-no}" diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak index 997e31e85e..39d76ee152 100644 --- a/ffbuild/arch.mak +++ b/ffbuild/arch.mak @@ -15,5 +15,7 @@ OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes) +OBJS-$(HAVE_RVV) += $(RVV-OBJS) $(RVV-OBJS-yes) + OBJS-$(HAVE_MMX) += $(MMX-OBJS) $(MMX-OBJS-yes) OBJS-$(HAVE_X86ASM) += $(X86ASM-OBJS) $(X86ASM-OBJS-yes) -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 09/18] checkasm: register the RISC-V V subsets
From: Rémi Denis-Courmont --- tests/checkasm/checkasm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index e56fd3850e..a5d0503811 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -226,6 +226,11 @@ static const struct { { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC }, { "VSX", "vsx", AV_CPU_FLAG_VSX }, { "POWER8", "power8", AV_CPU_FLAG_POWER8 }, +#elif ARCH_RISCV +{ "Zve32x", "zve32x", AV_CPU_FLAG_ZVE32X }, +{ "Zve32f", "zve32f", AV_CPU_FLAG_ZVE32F }, +{ "Zve64x", "zve64x", AV_CPU_FLAG_ZVE64X }, +{ "Zve64d", "zve64d", AV_CPU_FLAG_ZVE64D }, #elif ARCH_MIPS { "MMI", "mmi", AV_CPU_FLAG_MMI }, { "MSA", "msa", AV_CPU_FLAG_MSA }, -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 08/18] lavu/riscv: add CPU flags for the RISC-V Vector extension
From: Rémi Denis-Courmont RVV defines a total of 12 different extensions, including: - 5 different instruction subsets: - Zve32x: 8-, 16- and 32-bit integers, - Zve32f: Zve32x plus single precision floats, - Zve64x: Zve32x plus 64-bit integers, - Zve64f: Zve32f plus Zve64x, - Zve64d: Zve64f plus double precision floats. - 6 different vector lengths: - Zvl32b (embedded only), - Zvl64b (embedded only), - Zvl128b, - Zvl256b, - Zvl512b, - Zvl1024b, - and the V extension proper: equivalent to Zve64f and Zvl128b. In total, there are 6 different possible sets of supported instructions (including the empty set), but for convenience we allocate one bit for each type sets: up-to-32-bit ints (ZVE32X), floats (ZV32F), 64-bit ints (ZV64X) and doubles (ZVE64D). Whence the vector size is needed, it can be retrieved by reading the unprivileged read-only vlenb CSR. This should probably be a separate helper macro if needed at a later point. --- libavutil/cpu.c | 15 +++ libavutil/cpu.h | 6 + libavutil/cpu_internal.h | 1 + libavutil/riscv/Makefile | 1 + libavutil/riscv/cpu.c| 57 5 files changed, 80 insertions(+) create mode 100644 libavutil/riscv/Makefile create mode 100644 libavutil/riscv/cpu.c diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 0035e927a5..89d2fb6f56 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -62,6 +62,8 @@ static int get_cpu_flags(void) return ff_get_cpu_flags_arm(); #elif ARCH_PPC return ff_get_cpu_flags_ppc(); +#elif ARCH_RISCV +return ff_get_cpu_flags_riscv(); #elif ARCH_X86 return ff_get_cpu_flags_x86(); #elif ARCH_LOONGARCH @@ -178,6 +180,19 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) #elif ARCH_LOONGARCH { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX },.unit = "flags" }, { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX },.unit = "flags" }, +#elif ARCH_RISCV +#define AV_CPU_FLAG_ZVE32X_M (AV_CPU_FLAG_ZVE32X) +#define AV_CPU_FLAG_ZVE32F_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE32F) +#define AV_CPU_FLAG_ZVE64X_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE64X) +#define AV_CPU_FLAG_ZVE64F_M (AV_CPU_FLAG_ZVE32F_M | AV_CPU_FLAG_ZVE64X) +#define AV_CPU_FLAG_ZVE64D_M (AV_CPU_FLAG_ZVE64F_M | AV_CPU_FLAG_ZVE64D) +#define AV_CPU_FLAG_VECTORS AV_CPU_FLAG_ZVE64D_M +{ "vectors", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VECTORS },.unit = "flags" }, +{ "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32X },.unit = "flags" }, +{ "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32F_M },.unit = "flags" }, +{ "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64X_M },.unit = "flags" }, +{ "zve64f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64F_M },.unit = "flags" }, +{ "zve64d", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64D_M },.unit = "flags" }, #endif { NULL }, }; diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 9711e574c5..44836e50d6 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -78,6 +78,12 @@ #define AV_CPU_FLAG_LSX (1 << 0) #define AV_CPU_FLAG_LASX (1 << 1) +// RISC-V Vector extension +#define AV_CPU_FLAG_ZVE32X (1 << 0) /* 8-, 16-, 32-bit integers */ +#define AV_CPU_FLAG_ZVE32F (1 << 1) /* single precision scalars */ +#define AV_CPU_FLAG_ZVE64X (1 << 2) /* 64-bit integers */ +#define AV_CPU_FLAG_ZVE64D (1 << 3) /* double precision scalars */ + /** * Return the flags which specify extensions supported by the CPU. * The returned value is affected by av_force_cpu_flags() if that was used diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 650d47fc96..634f28bac4 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -48,6 +48,7 @@ int ff_get_cpu_flags_mips(void); int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); +int ff_get_cpu_flags_riscv(void); int ff_get_cpu_flags_x86(void); int ff_get_cpu_flags_loongarch(void); diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile new file mode 100644 index 00..1f818043dc --- /dev/null +++ b/libavutil/riscv/Makefile @@ -0,0 +1 @@ +OBJS += riscv/cpu.o diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c new file mode 100644 index 00..9e4cce5e8b --- /dev/null +++ b/libavutil/riscv/cpu.c @@ -0,0 +1,57 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; witho
[FFmpeg-devel] [PATCH 11/18] lavu/riscv: float vector-vector multiplication with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 9 - libavutil/riscv/float_dsp_rvv.S | 34 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index f1d3d52877..903da4eeda 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -23,9 +23,13 @@ #include "libavutil/cpu.h" #include "libavutil/float_dsp.h" +void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1, + int len); void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); +void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, + int len); void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul, int len); @@ -35,10 +39,13 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) int flags = av_get_cpu_flags(); if (flags & AV_CPU_FLAG_ZVE32F) { +fdsp->vector_fmul = ff_vector_fmul_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; -if (flags & AV_CPU_FLAG_ZVE64D) +if (flags & AV_CPU_FLAG_ZVE64D) { +fdsp->vector_dmul = ff_vector_dmul_rvv; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv; +} } #endif } diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 365e00190c..65c3a77b01 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -19,6 +19,23 @@ #include "config.h" #include "asm.S" +// (a0) = (a1) * (a2) [0..a3-1] +func ff_vector_fmul_rvv, zve32f +1: vsetvli t0, a3, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v16, (a1) +add a1, a1, t1 +vle32.v v24, (a2) +add a2, a2, t1 +vfmul.vv v16, v16, v24 +sub a3, a3, t0 +vse32.v v16, (a0) +add a0, a0, t1 +bnez a3, 1b + +ret +endfunc + // (a0) = (a1) * fa0 [0..a2-1] func ff_vector_fmul_scalar_rvv, zve32f NOHWF fmv.w.x fa0, a2 @@ -37,6 +54,23 @@ NOHWF mv a2, a3 ret endfunc +// (a0) = (a1) * (a2) [0..a3-1] +func ff_vector_dmul_rvv, zve64d +1: vsetvli t0, a3, e64, m8, ta, ma +slli t1, t0, 3 +vle64.v v16, (a1) +add a1, a1, t1 +vle64.v v24, (a2) +add a2, a2, t1 +vfmul.vv v16, v16, v24 +sub a3, a3, t0 +vse64.v v16, (a0) +add a0, a0, t1 +bnez a3, 1b + +ret +endfunc + // (a0) = (a1) * fa0 [0..a2-1] func ff_vector_dmul_scalar_rvv, zve64d NOHWD fmv.d.x fa0, a2 -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 10/18] lavu/riscv: float vector-scalar multiplication with RVV
From: Rémi Denis-Courmont This is based on existing code from the VLC git tree with two minor changes to account for the different function prototypes. --- libavutil/float_dsp.c| 2 ++ libavutil/float_dsp.h| 1 + libavutil/riscv/Makefile | 4 ++- libavutil/riscv/float_dsp_init.c | 44 + libavutil/riscv/float_dsp_rvv.S | 56 5 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 libavutil/riscv/float_dsp_init.c create mode 100644 libavutil/riscv/float_dsp_rvv.S diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 8676c8b0f8..742dd679d2 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact) ff_float_dsp_init_arm(fdsp); #elif ARCH_PPC ff_float_dsp_init_ppc(fdsp, bit_exact); +#elif ARCH_RISCV +ff_float_dsp_init_riscv(fdsp); #elif ARCH_X86 ff_float_dsp_init_x86(fdsp); #elif ARCH_MIPS diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 9c664592bd..7cad9fc622 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp); void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); +void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp); void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile index 1f818043dc..89a8d0d990 100644 --- a/libavutil/riscv/Makefile +++ b/libavutil/riscv/Makefile @@ -1 +1,3 @@ -OBJS += riscv/cpu.o +OBJS += riscv/float_dsp_init.o \ +riscv/cpu.o +RVV-OBJS += riscv/float_dsp_rvv.o diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c new file mode 100644 index 00..f1d3d52877 --- /dev/null +++ b/libavutil/riscv/float_dsp_init.c @@ -0,0 +1,44 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/float_dsp.h" + +void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, +int len); + +void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul, +int len); + +av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) +{ +#if HAVE_RVV +int flags = av_get_cpu_flags(); + +if (flags & AV_CPU_FLAG_ZVE32F) { +fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; + +if (flags & AV_CPU_FLAG_ZVE64D) +fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv; +} +#endif +} diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S new file mode 100644 index 00..365e00190c --- /dev/null +++ b/libavutil/riscv/float_dsp_rvv.S @@ -0,0 +1,56 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "asm.S" + +// (a0) = (a1) * fa0 [0..a2-1] +func ff_vector_fmul_scalar_rvv, zve32f +NOHWF fmv.w.x fa0, a2 +NOHWF mv a2, a3 + +1: vsetvli t0, a2, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v16, (a1) +add a1, a1, t1 +vfmul.vf v16, v16, fa0 +sub a2, a2, t0 +vse32.v v16, (a0) +a
[FFmpeg-devel] [PATCH 12/18] lavu/riscv: float vector multiply-accumulate with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 6 + libavutil/riscv/float_dsp_rvv.S | 38 2 files changed, 44 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index 903da4eeda..1381eadab6 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -25,11 +25,15 @@ void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1, int len); +void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul, +int len); void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, int len); +void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul, +int len); void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul, int len); @@ -40,10 +44,12 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) if (flags & AV_CPU_FLAG_ZVE32F) { fdsp->vector_fmul = ff_vector_fmul_rvv; +fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; if (flags & AV_CPU_FLAG_ZVE64D) { fdsp->vector_dmul = ff_vector_dmul_rvv; +fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv; } } diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 65c3a77b01..5a7d92abd6 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -36,6 +36,25 @@ func ff_vector_fmul_rvv, zve32f ret endfunc +// (a0) += (a1) * fa0 [0..a2-1] +func ff_vector_fmac_scalar_rvv, zve32f +NOHWF fmv.w.x fa0, a2 +NOHWF mva2, a3 + +1: vsetvli t0, a2, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v24, (a1) +add a1, a1, t1 +vle32.v v16, (a0) +vfmacc.vf v16, fa0, v24 +sub a2, a2, t0 +vse32.v v16, (a0) +add a0, a0, t1 +bnez a2, 1b + +ret +endfunc + // (a0) = (a1) * fa0 [0..a2-1] func ff_vector_fmul_scalar_rvv, zve32f NOHWF fmv.w.x fa0, a2 @@ -71,6 +90,25 @@ func ff_vector_dmul_rvv, zve64d ret endfunc +// (a0) += (a1) * fa0 [0..a2-1] +func ff_vector_dmac_scalar_rvv, zve64d +NOHWD fmv.d.x fa0, a2 +NOHWD mva2, a3 + +1: vsetvli t0, a2, e64, m8, ta, ma +slli t1, t0, 3 +vle64.v v24, (a1) +add a1, a1, t1 +vle64.v v16, (a0) +vfmacc.vf v16, fa0, v24 +sub a2, a2, t0 +vse64.v v16, (a0) +add a0, a0, t1 +bnez a2, 1b + +ret +endfunc + // (a0) = (a1) * fa0 [0..a2-1] func ff_vector_dmul_scalar_rvv, zve64d NOHWD fmv.d.x fa0, a2 -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 18/18] lavu/riscv: fixed vector sum-and-difference with RVV
From: Rémi Denis-Courmont --- libavutil/fixed_dsp.c| 4 +++- libavutil/fixed_dsp.h| 1 + libavutil/riscv/Makefile | 4 +++- libavutil/riscv/fixed_dsp_init.c | 36 ++ libavutil/riscv/fixed_dsp_rvv.S | 38 5 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 libavutil/riscv/fixed_dsp_init.c create mode 100644 libavutil/riscv/fixed_dsp_rvv.S diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c index 154f3bc2d3..bc847949dc 100644 --- a/libavutil/fixed_dsp.c +++ b/libavutil/fixed_dsp.c @@ -162,7 +162,9 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact) fdsp->butterflies_fixed = butterflies_fixed_c; fdsp->scalarproduct_fixed = scalarproduct_fixed_c; -#if ARCH_X86 +#if ARCH_RISCV +ff_fixed_dsp_init_riscv(fdsp); +#elif ARCH_X86 ff_fixed_dsp_init_x86(fdsp); #endif diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h index fec806ff2d..1217d3a53b 100644 --- a/libavutil/fixed_dsp.h +++ b/libavutil/fixed_dsp.h @@ -161,6 +161,7 @@ typedef struct AVFixedDSPContext { */ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict); +void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp); void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp); /** diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile index 89a8d0d990..1597154ba5 100644 --- a/libavutil/riscv/Makefile +++ b/libavutil/riscv/Makefile @@ -1,3 +1,5 @@ OBJS += riscv/float_dsp_init.o \ +riscv/fixed_dsp_init.o \ riscv/cpu.o -RVV-OBJS += riscv/float_dsp_rvv.o +RVV-OBJS += riscv/float_dsp_rvv.o \ +riscv/fixed_dsp_rvv.o diff --git a/libavutil/riscv/fixed_dsp_init.c b/libavutil/riscv/fixed_dsp_init.c new file mode 100644 index 00..fc143fb419 --- /dev/null +++ b/libavutil/riscv/fixed_dsp_init.c @@ -0,0 +1,36 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/fixed_dsp.h" + +void ff_butterflies_fixed_rvv(int *v1, int *v2, int len); + +av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp) +{ +#if HAVE_RVV +int flags = av_get_cpu_flags(); + +if (flags & AV_CPU_FLAG_ZVE32X) +fdsp->butterflies_fixed = ff_butterflies_fixed_rvv; +#endif +} diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S new file mode 100644 index 00..beb1b949f7 --- /dev/null +++ b/libavutil/riscv/fixed_dsp_rvv.S @@ -0,0 +1,38 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "asm.S" + +// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] +func ff_butterflies_fixed_rvv, zve32x +1: vsetvli t0, a2, e32, m8, ta, ma +sllit1, t0, 2 +vle32.v v16, (a0) +vle32.v v24, (a1) +vadd.vv v0, v16, v24 +vsub.vv v8, v16, v24 +sub a2, a2, t0 +vse32.v v0, (a0) +add a0, a0, t1 +vse32.v v8, (a1) +add a1, a1, t1 +bneza2, 1b + +ret +endfunc -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 17/18] lavu/riscv: float vector dot product with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 2 ++ libavutil/riscv/float_dsp_rvv.S | 21 + 2 files changed, 23 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index cf8c995d7c..055cdc7520 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -36,6 +36,7 @@ void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, void ff_vector_fmul_reverse_rvv(float *dst, const float *src0, const float *src1, int len); void ff_butterflies_float_rvv(float *v1, float *v2, int len); +float ff_scalarproduct_float_rvv(const float *v1, const float *v2, int len); void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, int len); @@ -57,6 +58,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv; fdsp->butterflies_float = ff_butterflies_float_rvv; +fdsp->scalarproduct_float = ff_scalarproduct_float_rvv; if (flags & AV_CPU_FLAG_ZVE64D) { fdsp->vector_dmul = ff_vector_dmul_rvv; diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 65daaa2d27..81bd0e510a 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -167,6 +167,27 @@ func ff_butterflies_float_rvv, zve32f ret endfunc +// a0 = (a0).(a1) [0..a2-1] +func ff_scalarproduct_float_rvv, zve32f +vsetvli zero, zero, e32, m8, ta, ma +vmv.s.x v8, zero + +1: vsetvli t0, a2, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v16, (a0) +add a0, a0, t1 +vle32.v v24, (a1) +add a1, a1, t1 +vfmul.vv v16, v16, v24 +sub a2, a2, t0 +vfredusum.vs v8, v16, v8 +bnez a2, 1b + +vfmv.f.s fa0, v8 +NOHWF fmv.x.w a0, fa0 +ret +endfunc + // (a0) = (a1) * (a2) [0..a3-1] func ff_vector_dmul_rvv, zve64d 1: vsetvli t0, a3, e64, m8, ta, ma -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 13/18] lavu/riscv: float vector multiplication-addition with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 3 +++ libavutil/riscv/float_dsp_rvv.S | 19 +++ 2 files changed, 22 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index 1381eadab6..9bc1976d04 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -29,6 +29,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul, int len); void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, + const float *src2, int len); void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, int len); @@ -46,6 +48,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) fdsp->vector_fmul = ff_vector_fmul_rvv; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; +fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; if (flags & AV_CPU_FLAG_ZVE64D) { fdsp->vector_dmul = ff_vector_dmul_rvv; diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 5a7d92abd6..efbf12179f 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -73,6 +73,25 @@ NOHWF mv a2, a3 ret endfunc +// (a0) = (a1) * (a2) + (a3) [0..a4-1] +func ff_vector_fmul_add_rvv, zve32f +1: vsetvli t0, a4, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v8, (a1) +add a1, a1, t1 +vle32.v v16, (a2) +add a2, a2, t1 +vle32.v v24, (a3) +add a3, a3, t1 +vfmadd.vv v8, v16, v24 +sub a4, a4, t0 +vse32.v v8, (a0) +add a0, a0, t1 +bnez a4, 1b + +ret +endfunc + // (a0) = (a1) * (a2) [0..a3-1] func ff_vector_dmul_rvv, zve64d 1: vsetvli t0, a3, e64, m8, ta, ma -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 16/18] lavu/riscv: float vector windowed overlap/add with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 3 +++ libavutil/riscv/float_dsp_rvv.S | 35 2 files changed, 38 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index ae089d2fdb..cf8c995d7c 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -29,6 +29,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul, int len); void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_window_rvv(float *dst, const float *src0, +const float *src1, const float *win, int len); void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, const float *src2, int len); void ff_vector_fmul_reverse_rvv(float *dst, const float *src0, @@ -51,6 +53,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) fdsp->vector_fmul = ff_vector_fmul_rvv; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; +fdsp->vector_fmul_window = ff_vector_fmul_window_rvv; fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv; fdsp->butterflies_float = ff_butterflies_float_rvv; diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index b376392294..65daaa2d27 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -73,6 +73,41 @@ NOHWF mv a2, a3 ret endfunc +func ff_vector_fmul_window_rvv, zve32f +// a0: dst, a1: src0, a2: src1, a3: window, a4: length +addi t0, a4, -1 +addt1, t0, a4 +slli t0, t0, 2 +slli t1, t1, 2 +adda2, a2, t0 +addt0, a0, t1 +addt3, a3, t1 +li t1, -4 // byte stride + +1: vsetvlit2, a4, e32, m4, ta, ma +slli t4, t2, 2 +vle32.vv16, (a1) +adda1, a1, t4 +vlse32.v v20, (a2), t1 +suba2, a2, t4 +vle32.vv24, (a3) +adda3, a3, t4 +vlse32.v v28, (t3), t1 +subt3, t3, t4 +vfmul.vv v0, v16, v28 +suba4, a4, t2 +vfmul.vv v8, v16, v24 +vfnmsac.vv v0, v20, v24 +vfmacc.vv v8, v20, v28 +vse32.vv0, (a0) +adda0, a0, t4 +vsse32.v v8, (t0), t1 +subt0, t0, t4 +bnez a4, 1b + +ret +endfunc + // (a0) = (a1) * (a2) + (a3) [0..a4-1] func ff_vector_fmul_add_rvv, zve32f 1: vsetvli t0, a4, e32, m8, ta, ma -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 14/18] lavu/riscv: float vector sum-and-difference with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 2 ++ libavutil/riscv/float_dsp_rvv.S | 18 ++ 2 files changed, 20 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index 9bc1976d04..c2b72c3b25 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -31,6 +31,7 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, const float *src2, int len); +void ff_butterflies_float_rvv(float *v1, float *v2, int len); void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, int len); @@ -49,6 +50,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; +fdsp->butterflies_float = ff_butterflies_float_rvv; if (flags & AV_CPU_FLAG_ZVE64D) { fdsp->vector_dmul = ff_vector_dmul_rvv; diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index efbf12179f..1c3b08b94f 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -92,6 +92,24 @@ func ff_vector_fmul_add_rvv, zve32f ret endfunc +// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] +func ff_butterflies_float_rvv, zve32f +1: vsetvli t0, a2, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v16, (a0) +vle32.v v24, (a1) +vfadd.vv v0, v16, v24 +vfsub.vv v8, v16, v24 +sub a2, a2, t0 +vse32.v v0, (a0) +add a0, a0, t1 +vse32.v v8, (a1) +add a1, a1, t1 +bnez a2, 1b + +ret +endfunc + // (a0) = (a1) * (a2) [0..a3-1] func ff_vector_dmul_rvv, zve64d 1: vsetvli t0, a3, e64, m8, ta, ma -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 15/18] lavu/riscv: float reversed vector multiplication with RVV
From: Rémi Denis-Courmont --- libavutil/riscv/float_dsp_init.c | 3 +++ libavutil/riscv/float_dsp_rvv.S | 22 ++ 2 files changed, 25 insertions(+) diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c index c2b72c3b25..ae089d2fdb 100644 --- a/libavutil/riscv/float_dsp_init.c +++ b/libavutil/riscv/float_dsp_init.c @@ -31,6 +31,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, int len); void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, const float *src2, int len); +void ff_vector_fmul_reverse_rvv(float *dst, const float *src0, + const float *src1, int len); void ff_butterflies_float_rvv(float *v1, float *v2, int len); void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, @@ -50,6 +52,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp) fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; +fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv; fdsp->butterflies_float = ff_butterflies_float_rvv; if (flags & AV_CPU_FLAG_ZVE64D) { diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 1c3b08b94f..b376392294 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -92,6 +92,28 @@ func ff_vector_fmul_add_rvv, zve32f ret endfunc +// (a0) = (a1) * reverse(a2) [0..a3-1] +func ff_vector_fmul_reverse_rvv, zve32f +add t3, a3, -1 +li t2, -4 // byte stride +slli t3, t3, 2 +add a2, a2, t3 + +1: vsetvli t0, a3, e32, m8, ta, ma +slli t1, t0, 2 +vle32.v v16, (a1) +add a1, a1, t1 +vlse32.v v24, (a2), t2 +sub a2, a2, t1 +vfmul.vv v16, v16, v24 +sub a3, a3, t0 +vse32.v v16, (a0) +add a0, a0, t1 +bnez a3, 1b + +ret +endfunc + // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] func ff_butterflies_float_rvv, zve32f 1: vsetvli t0, a2, e32, m8, ta, ma -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCHv5 0/18] RISC-V cycle counters, Zbb & initial V extension support
The following changes since commit 3ce6fa6b6d099dcad43bb0178334441ab72df4cc: avformat: add bonk demuxer (2022-09-12 11:35:43 +0200) are available in the Git repository at: git.remlab.net:git/ffmpeg.git rvv for you to fetch changes up to ceb5ead12aba107292a09a342570f6fd12c9951d: lavu/riscv: fixed vector sum-and-difference with RVV (2022-09-12 17:57:07 +0300) Change since v4: - Marked RVV makefile variablers as directory-specific to pave the way for optimisations elsewhere than libavutil. Rémi Denis-Courmont (18): doc: reference the RISC-V specification lavu/riscv: AV_READ_TIME cycle counter configure/riscv: detect fast CLZ lavu/riscv: byte-swap operations lavu/riscv: add optimisations configure: probe RISC-V Vector extension lavu/riscv: initial common header for assembler macros lavu/riscv: add CPU flags for the RISC-V Vector extension checkasm: register the RISC-V V subsets lavu/riscv: float vector-scalar multiplication with RVV lavu/riscv: float vector-vector multiplication with RVV lavu/riscv: float vector multiply-accumulate with RVV lavu/riscv: float vector multiplication-addition with RVV lavu/riscv: float vector sum-and-difference with RVV lavu/riscv: float reversed vector multiplication with RVV lavu/riscv: float vector windowed overlap/add with RVV lavu/riscv: float vector dot product with RVV lavu/riscv: fixed vector sum-and-difference with RVV Makefile | 2 +- configure| 21 doc/optimization.txt | 5 + ffbuild/arch.mak | 2 + libavutil/bswap.h| 2 + libavutil/cpu.c | 15 +++ libavutil/cpu.h | 6 + libavutil/cpu_internal.h | 1 + libavutil/fixed_dsp.c| 4 +- libavutil/fixed_dsp.h| 1 + libavutil/float_dsp.c| 2 + libavutil/float_dsp.h| 1 + libavutil/intmath.h | 5 +- libavutil/riscv/Makefile | 5 + libavutil/riscv/asm.S| 74 libavutil/riscv/bswap.h | 74 libavutil/riscv/cpu.c| 57 + libavutil/riscv/fixed_dsp_init.c | 36 ++ libavutil/riscv/fixed_dsp_rvv.S | 38 ++ libavutil/riscv/float_dsp_init.c | 70 +++ libavutil/riscv/float_dsp_rvv.S | 243 +++ libavutil/riscv/intmath.h| 103 + libavutil/riscv/timer.h | 53 + libavutil/timer.h| 2 + tests/checkasm/checkasm.c| 5 + 25 files changed, 823 insertions(+), 4 deletions(-) create mode 100644 libavutil/riscv/Makefile create mode 100644 libavutil/riscv/asm.S create mode 100644 libavutil/riscv/bswap.h create mode 100644 libavutil/riscv/cpu.c create mode 100644 libavutil/riscv/fixed_dsp_init.c create mode 100644 libavutil/riscv/fixed_dsp_rvv.S create mode 100644 libavutil/riscv/float_dsp_init.c create mode 100644 libavutil/riscv/float_dsp_rvv.S create mode 100644 libavutil/riscv/intmath.h create mode 100644 libavutil/riscv/timer.h -- 雷米‧德尼-库尔蒙 http://www.remlab.net/ ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 1/3] avfilter/avfilter: Don't use AVFrame.channel_layout
Signed-off-by: Andreas Rheinhardt --- libavfilter/avfilter.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index 965f5d0f63..bde41637dd 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -21,6 +21,7 @@ #include "libavutil/avassert.h" #include "libavutil/avstring.h" +#include "libavutil/bprint.h" #include "libavutil/buffer.h" #include "libavutil/channel_layout.h" #include "libavutil/common.h" @@ -45,6 +46,7 @@ static void tlog_ref(void *ctx, AVFrame *ref, int end) { +#ifdef TRACE ff_tlog(ctx, "ref[%p buf:%p data:%p linesize[%d, %d, %d, %d] pts:%"PRId64" pos:%"PRId64, ref, ref->buf, ref->data[0], @@ -61,13 +63,19 @@ static void tlog_ref(void *ctx, AVFrame *ref, int end) av_get_picture_type_char(ref->pict_type)); } if (ref->nb_samples) { -ff_tlog(ctx, " cl:%"PRId64"d n:%d r:%d", -ref->channel_layout, +AVBPrint bprint; + +av_bprint_init(&bprint, 1, AV_BPRINT_SIZE_UNLIMITED); +av_channel_layout_describe_bprint(&ref->ch_layout, &bprint); +ff_tlog(ctx, " cl:%s n:%d r:%d", +bprint.str, ref->nb_samples, ref->sample_rate); +av_bprint_finalize(&bprint, NULL); } ff_tlog(ctx, "]%s", end ? "\n" : ""); +#endif } void ff_command_queue_pop(AVFilterContext *filter) -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] avfilter/avfilter: #if ff_tlog_link() away when empty
It is currently calling av_channel_layout_describe() unnecessarily. Signed-off-by: Andreas Rheinhardt --- libavfilter/avfilter.c | 2 ++ libavfilter/internal.h | 4 2 files changed, 6 insertions(+) diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index bde41637dd..f34204e650 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -381,6 +381,7 @@ int avfilter_config_links(AVFilterContext *filter) return 0; } +#ifdef TRACE void ff_tlog_link(void *ctx, AVFilterLink *link, int end) { if (link->type == AVMEDIA_TYPE_VIDEO) { @@ -404,6 +405,7 @@ void ff_tlog_link(void *ctx, AVFilterLink *link, int end) end ? "\n" : ""); } } +#endif int ff_request_frame(AVFilterLink *link) { diff --git a/libavfilter/internal.h b/libavfilter/internal.h index 0f8da367d0..0128820be0 100644 --- a/libavfilter/internal.h +++ b/libavfilter/internal.h @@ -268,7 +268,11 @@ void ff_command_queue_pop(AVFilterContext *filter); char *ff_get_ref_perms_string(char *buf, size_t buf_size, int perms); +#ifdef TRACE void ff_tlog_link(void *ctx, AVFilterLink *link, int end); +#else +#define ff_tlog_link(ctx, link, end) do { } while(0) +#endif /** * Append a new input/output pad to the filter's list of such pads. -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/3] avfilter/video: Fix newline in trace output
Forgotten in 7e350379f87e7f74420b4813170fe808e2313911. Signed-off-by: Andreas Rheinhardt --- Does anyone actually use this? The fact that this went unnoticed for so long suggest "no". libavfilter/video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/video.c b/libavfilter/video.c index e9eb110ff4..7683ef6fd4 100644 --- a/libavfilter/video.c +++ b/libavfilter/video.c @@ -102,7 +102,7 @@ AVFrame *ff_get_video_buffer(AVFilterLink *link, int w, int h) { AVFrame *ret = NULL; -FF_TPRINTF_START(NULL, get_video_buffer); ff_tlog_link(NULL, link, 0); +FF_TPRINTF_START(NULL, get_video_buffer); ff_tlog_link(NULL, link, 1); if (link->dstpad->get_buffer.video) ret = link->dstpad->get_buffer.video(link, w, h); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/tiff: Fix loop detection
Fixes regression with tickets/4364/L1004220.DNG Signed-off-by: Michael Niedermayer --- libavcodec/tiff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c index beb427e0074..226050744fc 100644 --- a/libavcodec/tiff.c +++ b/libavcodec/tiff.c @@ -1747,7 +1747,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p, int *got_frame, AVPacket *avpkt) { TiffContext *const s = avctx->priv_data; -unsigned off, last_off; +unsigned off, last_off = 0; int le, ret, plane, planes; int i, j, entries, stride; unsigned soff, ssize; @@ -1812,7 +1812,6 @@ again: /** whether we should process this multi-page IFD's next page */ retry_for_page = s->get_page && s->cur_page + 1 < s->get_page; // get_page is 1-indexed -last_off = off; if (retry_for_page) { // set offset to the next IFD off = ff_tget_long(&s->gb, le); @@ -1830,6 +1829,7 @@ again: avpriv_request_sample(s->avctx, "non increasing IFD offset"); return AVERROR_INVALIDDATA; } +last_off = off; if (off >= UINT_MAX - 14 || avpkt->size < off + 14) { av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image size\n"); return AVERROR_INVALIDDATA; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2
Patch attached. How to get more speed? From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 12 Sep 2022 18:53:31 +0200 Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2 Signed-off-by: Paul B Mahol --- libavcodec/x86/audiodsp.asm| 24 libavcodec/x86/audiodsp_init.c | 6 ++ 2 files changed, 30 insertions(+) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index b604b0443c..55051f6aa7 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order movd eax, m2 RET +INIT_YMM avx2 +cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset +xor offsetq, offsetq +add orderd, orderd +pxorm1, m1 +cmp orderd, 32 +jl .l16 +.loop: +movum0, [v1q + offsetq] +pmaddwd m0, [v2q + offsetq] +paddd m1, m0 +add offsetq, mmsize +cmp offsetq, orderq +jl .loop +HADDD m1, m0 +movd eax, xm1 +RET +.l16: +movuxm0, [v1q + offsetq] +pmaddwd xm0, [v2q + offsetq] +paddd xm1, xm0 +HADDD xm1, xm0 +movd eax, xm1 +RET ;- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index aa5e43e570..77d5948442 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -24,6 +24,9 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/audiodsp.h" +int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2, +int order); + int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order); @@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) if (EXTERNAL_SSE4(cpu_flags)) c->vector_clip_int32 = ff_vector_clip_int32_sse4; + +if (EXTERNAL_AVX2(cpu_flags)) +c->scalarproduct_int16 = ff_scalarproduct_int16_avx2; } -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2
From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 12 Sep 2022 18:53:31 +0200 Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2 Signed-off-by: Paul B Mahol --- libavcodec/x86/audiodsp.asm| 24 libavcodec/x86/audiodsp_init.c | 6 ++ 2 files changed, 30 insertions(+) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index b604b0443c..55051f6aa7 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order movd eax, m2 RET +INIT_YMM avx2 +cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset +xor offsetq, offsetq +add orderd, orderd +pxorm1, m1 +cmp orderd, 32 This parameter needs to be multiple of 16. What will happen below if it's for example 48? Are both buffers padded enough to handle 16 bytes of overread? +jl .l16 +.loop: +movum0, [v1q + offsetq] +pmaddwd m0, [v2q + offsetq] +paddd m1, m0 +add offsetq, mmsize +cmp offsetq, orderq You should use the neg trick from the sse2 version so you can remove the cmp from this loop. +jl .loop +HADDD m1, m0 +movd eax, xm1 +RET +.l16: +movuxm0, [v1q + offsetq] +pmaddwd xm0, [v2q + offsetq] +paddd xm1, xm0 +HADDD xm1, xm0 +movd eax, xm1 +RET ;- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index aa5e43e570..77d5948442 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -24,6 +24,9 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/audiodsp.h" +int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2, +int order); + int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order); @@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) if (EXTERNAL_SSE4(cpu_flags)) c->vector_clip_int32 = ff_vector_clip_int32_sse4; + +if (EXTERNAL_AVX2(cpu_flags)) +c->scalarproduct_int16 = ff_scalarproduct_int16_avx2; } -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2
On 9/12/2022 3:39 PM, James Almer wrote: From 55eb5a18b4bf029f52f9d9108a750c576ba780ee Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 12 Sep 2022 18:53:31 +0200 Subject: [PATCH] avcodec/x86/audiodsp: add scalarproduct avx2 Signed-off-by: Paul B Mahol --- libavcodec/x86/audiodsp.asm | 24 libavcodec/x86/audiodsp_init.c | 6 ++ 2 files changed, 30 insertions(+) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index b604b0443c..55051f6aa7 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -44,6 +44,30 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order movd eax, m2 RET +INIT_YMM avx2 +cglobal scalarproduct_int16, 3,4,3, v1, v2, order, offset + xor offsetq, offsetq + add orderd, orderd + pxor m1, m1 + cmp orderd, 32 This parameter needs to be multiple of 16. What will happen below if it's for example 48? Are both buffers padded enough to handle 16 bytes of overread? Nevermind, it's int16_t* buffers. You can simplify this as: INIT_YMM avx2 cglobal scalarproduct_int16, 3,3,3, v1, v2, order add orderd, orderd add v1q, orderq add v2q, orderq neg orderq pxorm1, m1 .loop: movum0, [v1q + orderq] pmaddwd m0, [v2q + orderq] paddd m1, m0 add orderq, mmsize jl .loop HADDD m1, m0 movd eax, xm1 RET + jl .l16 +.loop: + movu m0, [v1q + offsetq] + pmaddwd m0, [v2q + offsetq] + paddd m1, m0 + add offsetq, mmsize + cmp offsetq, orderq You should use the neg trick from the sse2 version so you can remove the cmp from this loop. + jl .loop + HADDD m1, m0 + movd eax, xm1 + RET +.l16: + movu xm0, [v1q + offsetq] + pmaddwd xm0, [v2q + offsetq] + paddd xm1, xm0 + HADDD xm1, xm0 + movd eax, xm1 + RET ;- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index aa5e43e570..77d5948442 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -24,6 +24,9 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/audiodsp.h" +int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2, + int order); + int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order); @@ -53,4 +56,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) if (EXTERNAL_SSE4(cpu_flags)) c->vector_clip_int32 = ff_vector_clip_int32_sse4; + + if (EXTERNAL_AVX2(cpu_flags)) + c->scalarproduct_int16 = ff_scalarproduct_int16_avx2; } -- 2.37.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer
On 9/12/22, Paul B Mahol wrote: > Patch attached. > Updated patch attached. From 33efa252db96d9eac7f162f17b22c1cd8b3b1c14 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 11 Sep 2022 20:10:27 +0200 Subject: [PATCH] avformat: add LAF demuxer Signed-off-by: Paul B Mahol --- libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/lafdec.c | 271 +++ 3 files changed, 273 insertions(+) create mode 100644 libavformat/lafdec.c diff --git a/libavformat/Makefile b/libavformat/Makefile index 5cdcda3239..19a4ba2a8f 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -319,6 +319,7 @@ OBJS-$(CONFIG_JV_DEMUXER)+= jvdec.o OBJS-$(CONFIG_KUX_DEMUXER) += flvdec.o OBJS-$(CONFIG_KVAG_DEMUXER) += kvag.o OBJS-$(CONFIG_KVAG_MUXER)+= kvag.o rawenc.o +OBJS-$(CONFIG_LAF_DEMUXER) += lafdec.o OBJS-$(CONFIG_LATM_MUXER)+= latmenc.o rawenc.o OBJS-$(CONFIG_LMLM4_DEMUXER) += lmlm4.o OBJS-$(CONFIG_LOAS_DEMUXER) += loasdec.o rawdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index cebd5e0c67..a545b5ff45 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -236,6 +236,7 @@ extern const AVInputFormat ff_jv_demuxer; extern const AVInputFormat ff_kux_demuxer; extern const AVInputFormat ff_kvag_demuxer; extern const AVOutputFormat ff_kvag_muxer; +extern const AVInputFormat ff_laf_demuxer; extern const AVOutputFormat ff_latm_muxer; extern const AVInputFormat ff_lmlm4_demuxer; extern const AVInputFormat ff_loas_demuxer; diff --git a/libavformat/lafdec.c b/libavformat/lafdec.c new file mode 100644 index 00..12b0d8540b --- /dev/null +++ b/libavformat/lafdec.c @@ -0,0 +1,271 @@ +/* + * Limitless Audio Format demuxer + * Copyright (c) 2022 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "avformat.h" +#include "internal.h" + +#define MAX_STREAMS 4096 + +typedef struct StreamParams { +AVChannelLayout layout; +float horizontal; +float vertical; +int lfe; +int stored; +} StreamParams; + +typedef struct LAFContext { +uint8_t *data; +unsigned nb_stored; +unsigned stored_index; +unsigned index; +unsigned bpp; + +StreamParams p[MAX_STREAMS]; + +int header_len; +uint8_t header[(MAX_STREAMS + 7) / 8]; +} LAFContext; + +static int laf_probe(const AVProbeData *p) +{ +if (memcmp(p->buf, "LIMITLESS", 9)) +return 0; +if (memcmp(p->buf + 9, "HEAD", 4)) +return 0; +return AVPROBE_SCORE_MAX; +} + +static int laf_read_header(AVFormatContext *ctx) +{ +LAFContext *s = ctx->priv_data; +AVIOContext *pb = ctx->pb; +unsigned st_count, mode; +unsigned sample_rate; +int64_t duration; +int codec_id; +int quality; +int bpp; + +avio_skip(pb, 9); +if (avio_rb32(pb) != MKBETAG('H','E','A','D')) +return AVERROR_INVALIDDATA; + +quality = avio_r8(pb); +if (quality > 3) +return AVERROR_INVALIDDATA; +mode = avio_r8(pb); +if (mode > 1) +return AVERROR_INVALIDDATA; +st_count = avio_rl32(pb); +if (st_count == 0 || st_count > MAX_STREAMS) +return AVERROR_INVALIDDATA; + +for (int i = 0; i < st_count; i++) { +StreamParams *stp = &s->p[i]; + +stp->vertical = av_int2float(avio_rl32(pb)); +stp->horizontal = av_int2float(avio_rl32(pb)); +stp->lfe = avio_r8(pb); +if (stp->lfe) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_LOW_FREQUENCY)); +} else if (stp->vertical == 0.f && + stp->horizontal == 0.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_CENTER)); +} else if (stp->vertical == 0.f && + stp->horizontal == -30.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_LEFT)); +} else if (stp->vertical == 0.f && + stp->horizontal == 30.f) { +stp->layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MASK(1, (AV_CH_FRONT_RIGHT)); +} else if (stp->ver
Re: [FFmpeg-devel] [PATCH] avcodec/bonk: Actually clip when using av_clip()
On 9/12/22, Andreas Rheinhardt wrote: > Also fixes a "statement with no effect [-Wunused-value]" > warning from GCC. > > Signed-off-by: Andreas Rheinhardt > --- > libavcodec/bonk.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c > index f3d797d588..409694f710 100644 > --- a/libavcodec/bonk.c > +++ b/libavcodec/bonk.c > @@ -280,7 +280,7 @@ static int predictor_calc_error(int *k, int *state, int > order, int error) > } > > // don't drift too far, to avoid overflows > -av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); > +x = av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); > > state[0] = x; > LGTM > -- > 2.34.1 > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] FFmpeg 5.1.2
Hi all due to more bugfixes i intend to make 5.1.2 soon (within the next days) I do plan to make releases from older still maintained/used branches following 5.1.2 thx -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Breaking DRM is a little like attempting to break through a door even though the window is wide open and the only thing in the house is a bunch of things you dont want and which you would get tomorrow for free anyway signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] swsresample/swresample: abort on invalid layouts
On 9/8/2022 8:00 PM, James Almer wrote: On 9/8/2022 7:47 PM, Andreas Rheinhardt wrote: James Almer: If it's unsupported or invalid, then there's no point trying to rebuild it using a value that may have been derived from the same layout to begin with. Move the checks before the attempts at copying the layout while at it. Fixes ticket #9908. Signed-off-by: James Almer --- libswresample/swresample.c | 48 +- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/libswresample/swresample.c b/libswresample/swresample.c index 6f04d130d3..5884f8d533 100644 --- a/libswresample/swresample.c +++ b/libswresample/swresample.c @@ -227,7 +227,7 @@ av_cold int swr_init(struct SwrContext *s){ s->in_ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; s->in_ch_layout.nb_channels = s->user_in_ch_count; } - } else + } else if (av_channel_layout_check(&s->user_in_chlayout)) av_channel_layout_copy(&s->in_ch_layout, &s->user_in_chlayout); if ((s->user_out_ch_count && s->user_out_ch_count != s->user_out_chlayout.nb_channels) || @@ -240,17 +240,45 @@ av_cold int swr_init(struct SwrContext *s){ s->out_ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; s->out_ch_layout.nb_channels = s->user_out_ch_count; } - } else + } else if (av_channel_layout_check(&s->user_out_chlayout)) av_channel_layout_copy(&s->out_ch_layout, &s->user_out_chlayout); if (!s->out.ch_count && !s->user_out_ch_layout) s->out.ch_count = s->out_ch_layout.nb_channels; if (!s-> in.ch_count && !s-> user_in_ch_layout) s-> in.ch_count = s->in_ch_layout.nb_channels; + + if (!(ret = av_channel_layout_check(&s->in_ch_layout)) || s->in_ch_layout.nb_channels > SWR_CH_MAX) { + if (ret) + av_channel_layout_describe(&s->in_ch_layout, l1, sizeof(l1)); + av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or unsupported.\n", ret ? l1 : ""); + return AVERROR(EINVAL); + } + + if (!(ret = av_channel_layout_check(&s->out_ch_layout)) || s->out_ch_layout.nb_channels > SWR_CH_MAX) { + if (ret) + av_channel_layout_describe(&s->out_ch_layout, l2, sizeof(l2)); + av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or unsupported.\n", ret ? l2 : ""); + return AVERROR(EINVAL); + } #else s->out.ch_count = s-> user_out_chlayout.nb_channels; s-> in.ch_count = s-> user_in_chlayout.nb_channels; + if (!(ret = av_channel_layout_check(&s->user_in_chlayout)) || s->user_in_chlayout.nb_channels > SWR_CH_MAX) { + if (ret) + av_channel_layout_describe(&s->user_in_chlayout, l1, sizeof(l1)); + av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or unsupported.\n", ret ? l1 : ""); + return AVERROR(EINVAL); + } + + if (!(ret = av_channel_layout_check(&s->user_out_chlayout)) || s->user_out_chlayout.nb_channels > SWR_CH_MAX) { + if (ret) + av_channel_layout_describe(&s->user_out_chlayout, l2, sizeof(l2)); + av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or unsupported.\n", ret ? l2 : ""); Why are you using AV_LOG_WARNING when you are erroring out? + return AVERROR(EINVAL); + } + ret = av_channel_layout_copy(&s->in_ch_layout, &s->user_in_chlayout); ret |= av_channel_layout_copy(&s->out_ch_layout, &s->user_out_chlayout); if (ret < 0) @@ -261,18 +289,6 @@ av_cold int swr_init(struct SwrContext *s){ s->dither.method = s->user_dither_method; - if (!av_channel_layout_check(&s->in_ch_layout) || s->in_ch_layout.nb_channels > SWR_CH_MAX) { - av_channel_layout_describe(&s->in_ch_layout, l1, sizeof(l1)); - av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or unsupported.\n", l1); - av_channel_layout_uninit(&s->in_ch_layout); - } - - if (!av_channel_layout_check(&s->out_ch_layout) || s->out_ch_layout.nb_channels > SWR_CH_MAX) { - av_channel_layout_describe(&s->out_ch_layout, l2, sizeof(l2)); - av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or unsupported.\n", l2); - av_channel_layout_uninit(&s->out_ch_layout); - } - switch(s->engine){ #if CONFIG_LIBSOXR case SWR_ENGINE_SOXR: s->resampler = &swri_soxr_resampler; break; @@ -291,9 +307,9 @@ av_cold int swr_init(struct SwrContext *s){ av_channel_layout_uninit(&s->in_ch_layout); } - if (!s->in_ch_layout.nb_channels || s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) + if (s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av_channel_layout_default(&s->in_ch_layout, s->used_ch_count); - if (!s->out_ch_layout.nb_channels || s->out_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) + if (s->out_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) av
Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer
Paul B Mahol: > +case 3: > +for (int n = 0; n < st->codecpar->sample_rate; n++) > +AV_WL24(pkt->data + n * 3, AV_RL24(s->data + n * s->nb_stored * > 3 + s->stored_index * 3)); > +break; Looking at intreadwrite.h shows that we actually have AV_RN24 and AV_WN24. - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat: add Limitless Audio Format demuxer
On 9/13/22, Andreas Rheinhardt wrote: > Paul B Mahol: >> +case 3: >> +for (int n = 0; n < st->codecpar->sample_rate; n++) >> +AV_WL24(pkt->data + n * 3, AV_RL24(s->data + n * s->nb_stored >> * 3 + s->stored_index * 3)); >> +break; > > Looking at intreadwrite.h shows that we actually have AV_RN24 and AV_WN24. Didnt compile. > > - Andreas > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] fate/spdif: Add spdif tests
Andreas Rheinhardt: > These tests test both the demuxer as well as the muxer > wherever possible. It is not always possible due to the fact > that the muxer supports more codecs than the demuxer. > > The spdif demuxer does currently not set the need_parsing flag. > If one were to set this to AVSTREAM_PARSE_FULL, the test results > would change as follows: > - For spdif-aac-remux, the packets are currently padded to 16bits, > i.e. if the actual packet size is odd, there is a padding byte. > The parser splits this byte away into a one byte packet of its own. > Insanely, these one byte packets get the same duration as normal > packets, i.e. timing is ruined. > - The DCA-remux tests get proper duration/timestamps. > - In the spdif-mp2-remux test the demuxer marks the stream as > being MP2; the parser sets it to MP3 and this triggers > the "Codec change in IEC 61937" codepath; this test therefore > returns only two packets with the parser. > - For spdif-mp3-remux some bytes end up in different packets: > Some input packets of this file have an odd length (417B instead > of 418B like all the other packets) and are padded to 418B. > Without a parser, all returned packets from the spdif-demuxer > are 418B. With a parser, the packets that were originally 417B > are 417B again, but the padding byte has not been discarded, > but added to the next packet which is now 419B. > This fixes "Multiple frames in a packet" warning and avoids > an "Invalid data found when processing input" error when decoding. > > Signed-off-by: Andreas Rheinhardt > --- > tests/Makefile |1 + > tests/fate/spdif.mak | 44 + > tests/ref/fate/spdif-aac-remux | 93 ++ > tests/ref/fate/spdif-ac3-remux | 63 ++ > tests/ref/fate/spdif-dca-core-bswap|1 + > tests/ref/fate/spdif-dca-core-remux| 14 + > tests/ref/fate/spdif-dca-master|1 + > tests/ref/fate/spdif-dca-master-core |1 + > tests/ref/fate/spdif-dca-master-core-remux | 1179 > tests/ref/fate/spdif-eac3 |1 + > tests/ref/fate/spdif-mlp |1 + > tests/ref/fate/spdif-mp2-remux | 49 + > tests/ref/fate/spdif-mp3-remux | 47 + > tests/ref/fate/spdif-truehd|1 + > 14 files changed, 1496 insertions(+) > create mode 100644 tests/fate/spdif.mak > create mode 100644 tests/ref/fate/spdif-aac-remux > create mode 100644 tests/ref/fate/spdif-ac3-remux > create mode 100644 tests/ref/fate/spdif-dca-core-bswap > create mode 100644 tests/ref/fate/spdif-dca-core-remux > create mode 100644 tests/ref/fate/spdif-dca-master > create mode 100644 tests/ref/fate/spdif-dca-master-core > create mode 100644 tests/ref/fate/spdif-dca-master-core-remux > create mode 100644 tests/ref/fate/spdif-eac3 > create mode 100644 tests/ref/fate/spdif-mlp > create mode 100644 tests/ref/fate/spdif-mp2-remux > create mode 100644 tests/ref/fate/spdif-mp3-remux > create mode 100644 tests/ref/fate/spdif-truehd > > diff --git a/tests/Makefile b/tests/Makefile > index d9c509a415..06494a9cc4 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -231,6 +231,7 @@ include $(SRC_PATH)/tests/fate/real.mak > include $(SRC_PATH)/tests/fate/screen.mak > include $(SRC_PATH)/tests/fate/segment.mak > include $(SRC_PATH)/tests/fate/source.mak > +include $(SRC_PATH)/tests/fate/spdif.mak > include $(SRC_PATH)/tests/fate/speedhq.mak > include $(SRC_PATH)/tests/fate/subtitles.mak > include $(SRC_PATH)/tests/fate/truehd.mak > diff --git a/tests/fate/spdif.mak b/tests/fate/spdif.mak > new file mode 100644 > index 00..093b8138e8 > --- /dev/null > +++ b/tests/fate/spdif.mak > @@ -0,0 +1,44 @@ > +# This padds the AAC frames to 16 bit words (the actual size is > +# still available in the ADTS headers). > +FATE_SPDIF_REMUX-$(call ALLYES, AAC_DEMUXER AAC_DECODER) += > fate-spdif-aac-remux > +fate-spdif-aac-remux: CMD = transcode aac $(TARGET_SAMPLES)/aac/foo.aac > spdif "-c copy" "-c copy" > + > +FATE_SPDIF_REMUX-$(call ALLYES, AC3_DEMUXER AC3_DECODER) += > fate-spdif-ac3-remux > +fate-spdif-ac3-remux: CMD = transcode ac3 > $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3 spdif "-c copy" "-c copy" > + > +FATE_SPDIF_REMUX-$(call ALLYES, DTS_DEMUXER DCA_DECODER) += > fate-spdif-dca-core-remux > +fate-spdif-dca-core-remux: CMD = transcode dts > $(TARGET_SAMPLES)/dts/dcadec-suite/core_51_24_48_768_0.dtshd spdif "-c copy" > "-c copy" > + > +FATE_SPDIF-$(call DEMMUX, DTSHD, SPDIF) += fate-spdif-dca-core-bswap > +fate-spdif-dca-core-bswap: CMD = md5 -i > $(TARGET_SAMPLES)/dts/dcadec-suite/core_51_24_48_768_0.dtshd -c copy > -spdif_flags +be -f spdif > + > +# Only the core will be transferred, extensions are discarded. > +FATE_SPDIF_REMUX-$(call ALLYES, DTS_DEMUXER DCA_DECODER) += > fate-spdif-dca-master-core-remux > +fate-spdif-dca-master-core-remux: CMD = transcode dts > $(TARGET_SAMP
Re: [FFmpeg-devel] [PATCH 1/1] avcodec/mpegutils: add motion_vec debug mode
Hi Paul, I tried to understand the per-frame side-data (and metadata) mechanism. Adding my notes here in case they help a future reader. Metadata and side-data seem like similar mechanisms to add auxiliary information to each frame. Main difference seems to be that metadata is simpler (a key/value dictionary), while side-data allows adding any data struct. Metadata operation is also simpler: There are filters that generate metadata (e.g. "signalstats" generates key-values such as `lavfi.signalstats.YMIN=3`, while "silencedetect" generates audio-related key-values like `lavfi.silence_start=0`). There are also 2x filters that print metadata ("vf_metadata" and "af_ametadata"). So e.g. to see what signalstats/silencedetect are generating, you can do: ``` $ ffmpeg -i in.264 -vf signalstats,metadata=mode=print -f null - ... [Parsed_metadata_1 @ ...] frame:0pts:0 pts_time:0 [Parsed_metadata_1 @ ...] lavfi.signalstats.YMIN=3 ... ``` or: ``` $ ffmpeg -y -i in.wav -af "silencedetect=n=-10dB:d=1,ametadata=print" /tmp/out.wav ... [Parsed_ametadata_1 @ ...] frame:23 pts:47104 pts_time:0.981333 [Parsed_ametadata_1 @ ...] lavfi.silence_start=0 ... ``` Side-data operation is more complicated. There is some side-data information already generated (e.g. SEI_UNREGISTERED side-data). Some information requires explicitly asking for it. For example, the `MOTION_VECTORS` side-data, you need to enable AV_CODEC_FLAG2_EXPORT_MVS (which means calling ffmpeg/ffplay/ffprobe with "-flags2 +export_mvs"). The main filter to print side-data information is showinfo (`vf_showinfo` and `af_ashowinfo`). Now, the `vf_showinfo` filter only knows how to dump some of the side-data structs. In particular, it does not know how to dump MOTION_VECTORS side-data. So, if we add the motion vectors, and then ask showinfo to print it, we see: ``` $ ffmpeg -hide_banner -flags2 +export_mvs -export_side_data +mvs -export_side_data +prft -export_side_data +venc_params -export_side_data +film_grain -i /tmp/in.264 -vf showinfo -f null /dev/null ... -- frame 0 is a key frame: We can see SEI_UNREGISTERED and VIDEO_ENC_PARAMS info [Parsed_showinfo_0 @ 0x308fd40] config in time_base: 1/120, frame_rate: 25/1 [Parsed_showinfo_0 @ 0x308fd40] config out time_base: 0/0, frame_rate: 0/0 [Parsed_showinfo_0 @ 0x308fd40] n: 0 pts: 0 pts_time:0 duration: 48000 duration_time:0.04pos:0 fmt:yuv420p sar:0/1 s:1920x1080 i:P iskey:1 type:I checksum:F6BBEA9F plane_checksum:[AFB1432E 63F2F255 2887B50D] mean:[109 119 138] stdev:[43.0 12.7 13.3] [Parsed_showinfo_0 @ 0x308fd40] side data - User Data Unregistered: [Parsed_showinfo_0 @ 0x308fd40] UUID=47564adc-5c4c-433f-94ef-c5113cd143a8 [Parsed_showinfo_0 @ 0x308fd40] User Data=01ffff0200e4dd42 [Parsed_showinfo_0 @ 0x308fd40] [Parsed_showinfo_0 @ 0x308fd40] side data - video encoding parameters: type 1; qp=26; 8160 blocks; [Parsed_showinfo_0 @ 0x308fd40] color_range:tv color_space:bt709 color_primaries:bt709 color_trc:bt709 ... -- frame 1 is a P-frame: we can see VIDEO_ENC_PARAMS info, and a complain about "side-data type 8" (MOTION_VECTORS) [Parsed_showinfo_0 @ 0x308fd40] n: 1 pts: 48000 pts_time:0.04 duration: 48000 duration_time:0.04pos: 259304 fmt:yuv420p sar:0/1 s:1920x1080 i:P iskey:0 type:B checksum:BC4E5C12 plane_checksum:[AEA8857A 34697DA4 805E58E5] mean:[109 119 138] stdev:[43.0 12.6 13.3] [Parsed_showinfo_0 @ 0x308fd40] side data - video encoding parameters: type 1; qp=26; 8160 blocks; -- showinfo does not dump MOTION_VECTORS side-data [Parsed_showinfo_0 @ 0x308fd40] side data - unknown side data type 8 (547280 bytes) [Parsed_showinfo_0 @ 0x308fd40] [Parsed_showinfo_0 @ 0x308fd40] color_range:tv color_space:bt709 color_primaries:bt709 color_trc:bt709 ... ``` So the best way right now to see the MVs is to use `doc/examples/extract_mvs`, which does exactly that: ``` $ make examples -j ... $ doc/examples/extract_mvs in.264 | head -40 | \ csvcut -C framenum,source,flags |csvlook | blockw | blockh | srcx | srcy | dstx | dsty | motion_x | motion_y | motion_scale | | -- | -- | - | | - | | | | | | 16 | 16 |20 | 26 | 8 |8 | 49 | 72 |4 | | 16 | 16 | 152 | 15 | 136 |8 | 65 | 28 |4 | | 16 | 8 | 360 |3 | 360 |4 |1 | -6 |4 | | 16 | 8 | 360 | 13 | 360 | 12 | -1 |4 |4 | | 16 | 16 | 440 | 10 | 440 |8 |3 | 10 |4 | | 8 | 16 | 829 |7 | 836 |8 | -31 | -6 |4 | | 8 | 16 | 844 |7 | 844 |8 | -1 | -4 |4 | ``` > Yes, it's called codecview. We can help understand how it works if you ask > more specific questions, but something like "git grep EXPORT_DATA_MVS > ../libavcodec/mpeg*.c" and checking the complement code i
[FFmpeg-devel] [PATCH] doc/examples/extract_mvs: add motion information
Note that the motion information includes subpel motion information This was likely forgotten in 56bdf61baa04c4fd8d165f34499115ce0aa97c43. Tested: ``` $ make examples -j ... $ doc/examples/extract_mvs in.264 | head -40 | \ csvcut -C framenum,source,flags |csvlook | blockw | blockh | srcx | srcy | dstx | dsty | motion_x | motion_y | motion_scale | | -- | -- | - | | - | | | | | | 16 | 16 |20 | 26 | 8 |8 | 49 | 72 | 4 | | 16 | 16 | 152 | 15 | 136 |8 | 65 | 28 | 4 | | 16 | 8 | 360 |3 | 360 |4 |1 | -6 | 4 | | 16 | 8 | 360 | 13 | 360 | 12 | -1 |4 | 4 | | 16 | 16 | 440 | 10 | 440 |8 |3 | 10 | 4 | | 8 | 16 | 829 |7 | 836 |8 | -31 | -6 | 4 | | 8 | 16 | 844 |7 | 844 |8 | -1 | -4 | 4 | | 16 | 16 | 1,004 | 14 | 1,048 |8 | -177 | 24 | 4 | | 16 | 16 | 1,096 |8 | 1,096 |8 | -1 |0 | 4 | | 16 | 8 | 1,417 | 24 | 1,416 |4 |7 | 82 | 4 | | 16 | 8 | 1,416 | 13 | 1,416 | 12 |0 |6 | 4 | | 16 | 8 |87 | 20 |88 | 20 | -7 |0 | 4 | | 16 | 8 |99 | 44 |88 | 28 | 45 | 66 | 4 | ... ``` Also: ``` $ make fate -j ... ``` --- doc/examples/extract_mvs.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c index cc1311da91..b80ba26bb7 100644 --- a/doc/examples/extract_mvs.c +++ b/doc/examples/extract_mvs.c @@ -61,10 +61,11 @@ static int decode_packet(const AVPacket *pkt) const AVMotionVector *mvs = (const AVMotionVector *)sd->data; for (i = 0; i < sd->size / sizeof(*mvs); i++) { const AVMotionVector *mv = &mvs[i]; -printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64"\n", + printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64",%4d,%4d,%4d\n", video_frame_count, mv->source, mv->w, mv->h, mv->src_x, mv->src_y, -mv->dst_x, mv->dst_y, mv->flags); +mv->dst_x, mv->dst_y, mv->flags, +mv->motion_x, mv->motion_y, mv->motion_scale); } } av_frame_unref(frame); @@ -166,7 +167,7 @@ int main(int argc, char **argv) goto end; } -printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n"); + printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags,motion_x,motion_y,motion_scale\n"); /* read frames from the file */ while (av_read_frame(fmt_ctx, pkt) >= 0) { -- 2.37.3 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] multithreading hwaccel is broken on 5.1 and master branch
cc867f2c09d2b69cee8a0eccd62aff002cbbfe11 this change breaks hwaccel. The assertion av_assert0(!p->parent->stash_hwaccel) fails when seeking. So this makes video players unusable. videotoolbox has another crash when starting to decode. The crash can be reproduce easily in ffmpeg: ./ffmpeg -stream_loop -1 -an -hwaccel vaapi test.mp4 -f null - >/dev/null Regards ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avutil/intreadwrite: Always provide AV_[RW]N(24|48)
Currently, only the AVR32-arch provides some of these (namely the 24 bit variants), but this should not depend on the arch. Signed-off-by: Andreas Rheinhardt --- When I started writing this patch, I believed that whether intreadwrite.h provided AV_[RW]N24 depended upon the arch, namely AVR32; but later I noticed that this issue actually only exists for internal users, as an API user never got AV_[RW]24, because the AVR32-specific header is not public. So I am no longer sure about this patch. libavutil/intreadwrite.h | 28 1 file changed, 28 insertions(+) diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h index 4c8413a536..8a18233f55 100644 --- a/libavutil/intreadwrite.h +++ b/libavutil/intreadwrite.h @@ -510,6 +510,34 @@ union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias; } while(0) #endif +#if AV_HAVE_BIGENDIAN +# ifndef AV_WN24 +# define AV_WN24(p, v) AV_WB24(p, v) +# endif +# ifndef AV_RN24 +# define AV_RN24(p) AV_RB24(p) +# endif +# ifndef AV_WN48 +# define AV_WN48(p, v) AV_WB48(p, v) +# endif +# ifndef AV_RN48 +# define AV_RN48(p) AV_RB48(p) +# endif +#else +# ifndef AV_WN24 +# define AV_WN24(p, v) AV_WL24(p, v) +# endif +# ifndef AV_RN24 +# define AV_RN24(p) AV_RL24(p) +# endif +# ifndef AV_WN48 +# define AV_WN48(p, v) AV_WL48(p, v) +# endif +# ifndef AV_RN48 +# define AV_RN48(p) AV_RL48(p) +# endif +#endif + /* * The AV_[RW]NA macros access naturally aligned data * in a type-safe way. -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".