[FFmpeg-devel] [PATCH v2 4/5] libswscale: Enable hscale_avx2 for all input sizes.
ff_shuffle_filter_coefficients shuffles the tail as required. --- libswscale/utils.c| 19 --- libswscale/x86/swscale.c | 6 ++ tests/checkasm/sw_scale.c | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/libswscale/utils.c b/libswscale/utils.c index cb4f5b521c..544b7fee96 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -266,8 +266,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, #if ARCH_X86_64 int i, j, k; int cpu_flags = av_get_cpu_flags(); -// avx2 hscale filter processes 16 pixel blocks. -if (!filter || dstW % 16 != 0) +if (!filter) return 0; if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) { if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { @@ -279,9 +278,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, } // Do not swap filterPos for pixels which won't be processed by // the main loop. - for (i = 0; i + 8 <= dstW; i += 8) { + for (i = 0; i + 16 <= dstW; i += 16) { FFSWAP(int, filterPos[i + 2], filterPos[i + 4]); FFSWAP(int, filterPos[i + 3], filterPos[i + 5]); + FFSWAP(int, filterPos[i + 10], filterPos[i + 12]); + FFSWAP(int, filterPos[i + 11], filterPos[i + 13]); } if (filterSize > 4) { // 16 pixels are processed at a time. @@ -295,6 +296,18 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, } } } + // 4 pixels are processed at a time in the tail. + for (; i < dstW; i += 4) { + // 4 filter coeffs are processed at a time. + int rem = dstW - i >= 4 ? 4 : dstW - i; + for (k = 0; k + 4 <= filterSize; k += 4) { + for (j = 0; j < rem; ++j) { + int from = (i + j) * filterSize + k; + int to = i * filterSize + j * 4 + k * 4; + memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t)); + } + } + } } av_free(filterCopy); } diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 628f12137c..f628c71bd4 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -626,10 +626,8 @@ switch(c->dstBpc){ \ if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) { if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { -if (c->chrDstW % 16 == 0) -ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize); -if (c->dstW % 16 == 0) -ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize); +ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize); +ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize); } } diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c index b643a47c30..798990a6cf 100644 --- a/tests/checkasm/sw_scale.c +++ b/tests/checkasm/sw_scale.c @@ -223,7 +223,7 @@ static void check_hscale(void) ff_sws_init_scale(ctx); memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH)); if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) -ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS); +ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW); if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) { memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0])); -- 2.37.0.170.g444d1eabd0-goog ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 5/5] checkasm/sw_scale: hscale does not requires cpuflag test.
This is done in ff_shuffle_filter_coefficients. --- tests/checkasm/sw_scale.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c index 798990a6cf..7be107bef1 100644 --- a/tests/checkasm/sw_scale.c +++ b/tests/checkasm/sw_scale.c @@ -172,8 +172,6 @@ static void check_hscale(void) const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); -int cpu_flags = av_get_cpu_flags(); - ctx = sws_alloc_context(); if (sws_init_context(ctx, NULL, NULL) < 0) fail(); @@ -222,8 +220,7 @@ static void check_hscale(void) ctx->dstW = ctx->chrDstW = input_sizes[dstWi]; ff_sws_init_scale(ctx); memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH)); -if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) -ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW); +ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW); if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) { memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0])); -- 2.37.0.170.g444d1eabd0-goog ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2 4/5] libswscale: Enable hscale_avx2 for all input sizes.
Hi Michael, Thanks for looking at this. I fixed the test issue. Alan On Fri, Jul 15, 2022 at 4:59 PM Alan Kelly wrote: > ff_shuffle_filter_coefficients shuffles the tail as required. > --- > libswscale/utils.c| 19 --- > libswscale/x86/swscale.c | 6 ++ > tests/checkasm/sw_scale.c | 2 +- > 3 files changed, 19 insertions(+), 8 deletions(-) > > diff --git a/libswscale/utils.c b/libswscale/utils.c > index cb4f5b521c..544b7fee96 100644 > --- a/libswscale/utils.c > +++ b/libswscale/utils.c > @@ -266,8 +266,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int > *filterPos, > #if ARCH_X86_64 > int i, j, k; > int cpu_flags = av_get_cpu_flags(); > -// avx2 hscale filter processes 16 pixel blocks. > -if (!filter || dstW % 16 != 0) > +if (!filter) > return 0; > if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & > AV_CPU_FLAG_SLOW_GATHER)) { > if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { > @@ -279,9 +278,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int > *filterPos, > } > // Do not swap filterPos for pixels which won't be processed by > // the main loop. > - for (i = 0; i + 8 <= dstW; i += 8) { > + for (i = 0; i + 16 <= dstW; i += 16) { > FFSWAP(int, filterPos[i + 2], filterPos[i + 4]); > FFSWAP(int, filterPos[i + 3], filterPos[i + 5]); > + FFSWAP(int, filterPos[i + 10], filterPos[i + 12]); > + FFSWAP(int, filterPos[i + 11], filterPos[i + 13]); > } > if (filterSize > 4) { > // 16 pixels are processed at a time. > @@ -295,6 +296,18 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int > *filterPos, > } > } > } > + // 4 pixels are processed at a time in the tail. > + for (; i < dstW; i += 4) { > + // 4 filter coeffs are processed at a time. > + int rem = dstW - i >= 4 ? 4 : dstW - i; > + for (k = 0; k + 4 <= filterSize; k += 4) { > + for (j = 0; j < rem; ++j) { > + int from = (i + j) * filterSize + k; > + int to = i * filterSize + j * 4 + k * 4; > + memcpy(&filter[to], &filterCopy[from], 4 * > sizeof(int16_t)); > + } > + } > + } > } > av_free(filterCopy); > } > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > index 628f12137c..f628c71bd4 100644 > --- a/libswscale/x86/swscale.c > +++ b/libswscale/x86/swscale.c > @@ -626,10 +626,8 @@ switch(c->dstBpc){ \ > > if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & > AV_CPU_FLAG_SLOW_GATHER)) { > if ((c->srcBpc == 8) && (c->dstBpc <= 14)) { > -if (c->chrDstW % 16 == 0) > -ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize); > -if (c->dstW % 16 == 0) > -ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize); > +ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize); > +ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize); > } > } > > diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c > index b643a47c30..798990a6cf 100644 > --- a/tests/checkasm/sw_scale.c > +++ b/tests/checkasm/sw_scale.c > @@ -223,7 +223,7 @@ static void check_hscale(void) > ff_sws_init_scale(ctx); > memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS > * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH)); > if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & > AV_CPU_FLAG_SLOW_GATHER)) > -ff_shuffle_filter_coefficients(ctx, filterPosAvx, > width, filterAvx2, SRC_PIXELS); > +ff_shuffle_filter_coefficients(ctx, filterPosAvx, > width, filterAvx2, ctx->dstW); > > if (check_func(ctx->hcScale, > "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, > ctx->dstW)) { > memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0])); > -- > 2.37.0.170.g444d1eabd0-goog > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] avcodec/libjxlenc: avoid hard failure with unspecified primaries
This patch prevents the libjxl encoder wrapper from failing to encode images when the input video has untagged primaries. It will instead assume BT.709/sRGB primaries and print a warning. Signed-off-by: Leo Izen --- libavcodec/libjxlenc.c | 5 + 1 file changed, 5 insertions(+) diff --git a/libavcodec/libjxlenc.c b/libavcodec/libjxlenc.c index 6a948cc3ae..3910d93e82 100644 --- a/libavcodec/libjxlenc.c +++ b/libavcodec/libjxlenc.c @@ -211,6 +211,11 @@ static int libjxl_populate_primaries(JxlColorEncoding *jxl_color, enum AVColorPr jxl_color->primaries = JXL_PRIMARIES_P3; jxl_color->white_point = JXL_WHITE_POINT_D65; return 0; +case AVCOL_PRI_UNSPECIFIED: +av_log(avctx, AV_LOG_WARNING, "Unknown primaries, assuming BT.709/sRGB. Colors may be wrong.\n"); +jxl_color->primaries = JXL_PRIMARIES_SRGB; +jxl_color->white_point = JXL_WHITE_POINT_D65; +return 0; } desc = av_csp_primaries_desc_from_id(prm); -- 2.37.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3] avcodec/libjxlenc: avoid hard failure with unspecified primaries
This patch prevents the libjxl encoder wrapper from failing to encode images when the input video has untagged primaries. It will instead assume BT.709/sRGB primaries and print a warning. Signed-off-by: Leo Izen --- libavcodec/libjxlenc.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libavcodec/libjxlenc.c b/libavcodec/libjxlenc.c index 6a948cc3ae..9d98a112e1 100644 --- a/libavcodec/libjxlenc.c +++ b/libavcodec/libjxlenc.c @@ -190,7 +190,7 @@ static av_cold int libjxl_encode_init(AVCodecContext *avctx) * Populate a JxlColorEncoding with the given enum AVColorPrimaries. * @return < 0 upon failure, >= 0 upon success */ -static int libjxl_populate_primaries(JxlColorEncoding *jxl_color, enum AVColorPrimaries prm) +static int libjxl_populate_primaries(void *avctx, JxlColorEncoding *jxl_color, enum AVColorPrimaries prm) { const AVColorPrimariesDesc *desc; @@ -211,6 +211,11 @@ static int libjxl_populate_primaries(JxlColorEncoding *jxl_color, enum AVColorPr jxl_color->primaries = JXL_PRIMARIES_P3; jxl_color->white_point = JXL_WHITE_POINT_D65; return 0; +case AVCOL_PRI_UNSPECIFIED: +av_log(avctx, AV_LOG_WARNING, "Unknown primaries, assuming BT.709/sRGB. Colors may be wrong.\n"); +jxl_color->primaries = JXL_PRIMARIES_SRGB; +jxl_color->white_point = JXL_WHITE_POINT_D65; +return 0; } desc = av_csp_primaries_desc_from_id(prm); @@ -340,7 +345,7 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra else jxl_color.color_space = JXL_COLOR_SPACE_RGB; -ret = libjxl_populate_primaries(&jxl_color, +ret = libjxl_populate_primaries(avctx, &jxl_color, frame->color_primaries && frame->color_primaries != AVCOL_PRI_UNSPECIFIED ? frame->color_primaries : avctx->color_primaries); if (ret < 0) -- 2.37.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration
On Fri, 15 Jul 2022, Swinney, Jonathan wrote: If the max height is just 16, then this should be fine. I assumed that h could have a much higher value (>1024), but if that is not the case, then this is a useful optimization. At least according to the me_cmp.h header, which says: /* Motion estimation: * h is limited to { width / 2, width, 2 * width }, * but never larger than 16 and never smaller than 2. * Although currently h < 4 is not used as functions with * width < 8 are neither used nor implemented. */ So with that in mind, I think this should be safe to do. // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration
On Fri, Jul 15, 2022 at 10:56:03PM +0300, Martin Storsjö wrote: > On Fri, 15 Jul 2022, Swinney, Jonathan wrote: > > > If the max height is just 16, then this should be fine. I assumed that h > > could have a much higher value (>1024), but if that is not the case, > > then this is a useful optimization. > > At least according to the me_cmp.h header, which says: > > /* Motion estimation: > * h is limited to { width / 2, width, 2 * width }, > * but never larger than 16 and never smaller than 2. > * Although currently h < 4 is not used as functions with > * width < 8 are neither used nor implemented. */ These rules where written with support for encoding of all standard formats in mind at the time that was written. today it may make sense to extend these rules to cover the things which where created since then thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Republics decline into democracies and democracies degenerate into despotisms. -- Aristotle signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration
On Fri, 15 Jul 2022, Michael Niedermayer wrote: On Fri, Jul 15, 2022 at 10:56:03PM +0300, Martin Storsjö wrote: On Fri, 15 Jul 2022, Swinney, Jonathan wrote: If the max height is just 16, then this should be fine. I assumed that h could have a much higher value (>1024), but if that is not the case, then this is a useful optimization. At least according to the me_cmp.h header, which says: /* Motion estimation: * h is limited to { width / 2, width, 2 * width }, * but never larger than 16 and never smaller than 2. * Although currently h < 4 is not used as functions with * width < 8 are neither used nor implemented. */ These rules where written with support for encoding of all standard formats in mind at the time that was written. today it may make sense to extend these rules to cover the things which where created since then Right, but if that suddenly changes, such a change also must expect that it might need updates to all assembly implementations that implement that interface currently. Right now, both the defacto case (any callers in the codebase) and the explicit documentation says that it can't be called with parameters outside of that range. Even if it's raised from the current <= 16, this particular optimization should be fine as long as h <= 256 - which should be fine for at least all current-gen mainstream codecs since, I think? // Martin ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avcodec/libspeexdec: initialize channels
speex has no header in flv container, libspeexdec reports 'Invalid channel count' when decoding a flv file as the channels is not initialized. Reproduce this issue with: 1) ffmpeg -f lavfi -i anullsrc -ac 1 -ar 16000 -acodec libspeex test.flv 2) ffplay -acodec libspeex test.flv Signed-off-by: huheng --- libavcodec/libspeexdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c index 8c9e05e51d..bb8e1a7db9 100644 --- a/libavcodec/libspeexdec.c +++ b/libavcodec/libspeexdec.c @@ -43,7 +43,7 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx) LibSpeexContext *s = avctx->priv_data; const SpeexMode *mode; SpeexHeader *header = NULL; -int spx_mode, channels; +int spx_mode, channels = avctx->ch_layout.nb_channels; if (avctx->extradata && avctx->extradata_size >= 80) { header = speex_packet_to_header(avctx->extradata, -- 2.36.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".