[FFmpeg-devel] [PATCH v2 4/5] libswscale: Enable hscale_avx2 for all input sizes.

2022-07-15 Thread Alan Kelly
ff_shuffle_filter_coefficients shuffles the tail as required.
---
 libswscale/utils.c| 19 ---
 libswscale/x86/swscale.c  |  6 ++
 tests/checkasm/sw_scale.c |  2 +-
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index cb4f5b521c..544b7fee96 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -266,8 +266,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int 
*filterPos,
 #if ARCH_X86_64
 int i, j, k;
 int cpu_flags = av_get_cpu_flags();
-// avx2 hscale filter processes 16 pixel blocks.
-if (!filter || dstW % 16 != 0)
+if (!filter)
 return 0;
 if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & 
AV_CPU_FLAG_SLOW_GATHER)) {
 if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
@@ -279,9 +278,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int 
*filterPos,
}
// Do not swap filterPos for pixels which won't be processed by
// the main loop.
-   for (i = 0; i + 8 <= dstW; i += 8) {
+   for (i = 0; i + 16 <= dstW; i += 16) {
FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+   FFSWAP(int, filterPos[i + 10], filterPos[i + 12]);
+   FFSWAP(int, filterPos[i + 11], filterPos[i + 13]);
}
if (filterSize > 4) {
// 16 pixels are processed at a time.
@@ -295,6 +296,18 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int 
*filterPos,
}
}
}
+   // 4 pixels are processed at a time in the tail.
+   for (; i < dstW; i += 4) {
+   // 4 filter coeffs are processed at a time.
+   int rem = dstW - i >= 4 ? 4 : dstW - i;
+   for (k = 0; k + 4 <= filterSize; k += 4) {
+   for (j = 0; j < rem; ++j) {
+   int from = (i + j) * filterSize + k;
+   int to = i * filterSize + j * 4 + k * 4;
+   memcpy(&filter[to], &filterCopy[from], 4 * 
sizeof(int16_t));
+   }
+   }
+   }
}
av_free(filterCopy);
 }
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 628f12137c..f628c71bd4 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -626,10 +626,8 @@ switch(c->dstBpc){ \
 
 if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & 
AV_CPU_FLAG_SLOW_GATHER)) {
 if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
-if (c->chrDstW % 16 == 0)
-ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
-if (c->dstW % 16 == 0)
-ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
+ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
+ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
 }
 }
 
diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index b643a47c30..798990a6cf 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -223,7 +223,7 @@ static void check_hscale(void)
 ff_sws_init_scale(ctx);
 memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * 
MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
 if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & 
AV_CPU_FLAG_SLOW_GATHER))
-ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, 
filterAvx2, SRC_PIXELS);
+ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, 
filterAvx2, ctx->dstW);
 
 if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", 
ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {
 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
-- 
2.37.0.170.g444d1eabd0-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 5/5] checkasm/sw_scale: hscale does not requires cpuflag test.

2022-07-15 Thread Alan Kelly
This is done in ff_shuffle_filter_coefficients.
---
 tests/checkasm/sw_scale.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index 798990a6cf..7be107bef1 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -172,8 +172,6 @@ static void check_hscale(void)
   const uint8_t *src, const int16_t *filter,
   const int32_t *filterPos, int filterSize);
 
-int cpu_flags = av_get_cpu_flags();
-
 ctx = sws_alloc_context();
 if (sws_init_context(ctx, NULL, NULL) < 0)
 fail();
@@ -222,8 +220,7 @@ static void check_hscale(void)
 ctx->dstW = ctx->chrDstW = input_sizes[dstWi];
 ff_sws_init_scale(ctx);
 memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * 
MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
-if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & 
AV_CPU_FLAG_SLOW_GATHER))
-ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, 
filterAvx2, ctx->dstW);
+ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, 
filterAvx2, ctx->dstW);
 
 if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", 
ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {
 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
-- 
2.37.0.170.g444d1eabd0-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2 4/5] libswscale: Enable hscale_avx2 for all input sizes.

2022-07-15 Thread Alan Kelly
Hi Michael,

Thanks for looking at this. I fixed the test issue.

Alan

On Fri, Jul 15, 2022 at 4:59 PM Alan Kelly  wrote:

> ff_shuffle_filter_coefficients shuffles the tail as required.
> ---
>  libswscale/utils.c| 19 ---
>  libswscale/x86/swscale.c  |  6 ++
>  tests/checkasm/sw_scale.c |  2 +-
>  3 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index cb4f5b521c..544b7fee96 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -266,8 +266,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int
> *filterPos,
>  #if ARCH_X86_64
>  int i, j, k;
>  int cpu_flags = av_get_cpu_flags();
> -// avx2 hscale filter processes 16 pixel blocks.
> -if (!filter || dstW % 16 != 0)
> +if (!filter)
>  return 0;
>  if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER)) {
>  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
> @@ -279,9 +278,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int
> *filterPos,
> }
> // Do not swap filterPos for pixels which won't be processed by
> // the main loop.
> -   for (i = 0; i + 8 <= dstW; i += 8) {
> +   for (i = 0; i + 16 <= dstW; i += 16) {
> FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
> FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
> +   FFSWAP(int, filterPos[i + 10], filterPos[i + 12]);
> +   FFSWAP(int, filterPos[i + 11], filterPos[i + 13]);
> }
> if (filterSize > 4) {
> // 16 pixels are processed at a time.
> @@ -295,6 +296,18 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int
> *filterPos,
> }
> }
> }
> +   // 4 pixels are processed at a time in the tail.
> +   for (; i < dstW; i += 4) {
> +   // 4 filter coeffs are processed at a time.
> +   int rem = dstW - i >= 4 ? 4 : dstW - i;
> +   for (k = 0; k + 4 <= filterSize; k += 4) {
> +   for (j = 0; j < rem; ++j) {
> +   int from = (i + j) * filterSize + k;
> +   int to = i * filterSize + j * 4 + k * 4;
> +   memcpy(&filter[to], &filterCopy[from], 4 *
> sizeof(int16_t));
> +   }
> +   }
> +   }
> }
> av_free(filterCopy);
>  }
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index 628f12137c..f628c71bd4 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -626,10 +626,8 @@ switch(c->dstBpc){ \
>
>  if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER)) {
>  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
> -if (c->chrDstW % 16 == 0)
> -ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
> -if (c->dstW % 16 == 0)
> -ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
> +ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
> +ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
>  }
>  }
>
> diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
> index b643a47c30..798990a6cf 100644
> --- a/tests/checkasm/sw_scale.c
> +++ b/tests/checkasm/sw_scale.c
> @@ -223,7 +223,7 @@ static void check_hscale(void)
>  ff_sws_init_scale(ctx);
>  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS
> * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
>  if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER))
> -ff_shuffle_filter_coefficients(ctx, filterPosAvx,
> width, filterAvx2, SRC_PIXELS);
> +ff_shuffle_filter_coefficients(ctx, filterPosAvx,
> width, filterAvx2, ctx->dstW);
>
>  if (check_func(ctx->hcScale,
> "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width,
> ctx->dstW)) {
>  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
> --
> 2.37.0.170.g444d1eabd0-goog
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] avcodec/libjxlenc: avoid hard failure with unspecified primaries

2022-07-15 Thread Leo Izen
This patch prevents the libjxl encoder wrapper from failing to
encode images when the input video has untagged primaries. It will
instead assume BT.709/sRGB primaries and print a warning.

Signed-off-by: Leo Izen 
---
 libavcodec/libjxlenc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/libjxlenc.c b/libavcodec/libjxlenc.c
index 6a948cc3ae..3910d93e82 100644
--- a/libavcodec/libjxlenc.c
+++ b/libavcodec/libjxlenc.c
@@ -211,6 +211,11 @@ static int libjxl_populate_primaries(JxlColorEncoding 
*jxl_color, enum AVColorPr
 jxl_color->primaries = JXL_PRIMARIES_P3;
 jxl_color->white_point = JXL_WHITE_POINT_D65;
 return 0;
+case AVCOL_PRI_UNSPECIFIED:
+av_log(avctx, AV_LOG_WARNING, "Unknown primaries, assuming 
BT.709/sRGB. Colors may be wrong.\n");
+jxl_color->primaries = JXL_PRIMARIES_SRGB;
+jxl_color->white_point = JXL_WHITE_POINT_D65;
+return 0;
 }
 
 desc = av_csp_primaries_desc_from_id(prm);
-- 
2.37.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v3] avcodec/libjxlenc: avoid hard failure with unspecified primaries

2022-07-15 Thread Leo Izen
This patch prevents the libjxl encoder wrapper from failing to
encode images when the input video has untagged primaries. It will
instead assume BT.709/sRGB primaries and print a warning.

Signed-off-by: Leo Izen 
---
 libavcodec/libjxlenc.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/libjxlenc.c b/libavcodec/libjxlenc.c
index 6a948cc3ae..9d98a112e1 100644
--- a/libavcodec/libjxlenc.c
+++ b/libavcodec/libjxlenc.c
@@ -190,7 +190,7 @@ static av_cold int libjxl_encode_init(AVCodecContext *avctx)
  * Populate a JxlColorEncoding with the given enum AVColorPrimaries.
  * @return < 0 upon failure, >= 0 upon success
  */
-static int libjxl_populate_primaries(JxlColorEncoding *jxl_color, enum 
AVColorPrimaries prm)
+static int libjxl_populate_primaries(void *avctx, JxlColorEncoding *jxl_color, 
enum AVColorPrimaries prm)
 {
 const AVColorPrimariesDesc *desc;
 
@@ -211,6 +211,11 @@ static int libjxl_populate_primaries(JxlColorEncoding 
*jxl_color, enum AVColorPr
 jxl_color->primaries = JXL_PRIMARIES_P3;
 jxl_color->white_point = JXL_WHITE_POINT_D65;
 return 0;
+case AVCOL_PRI_UNSPECIFIED:
+av_log(avctx, AV_LOG_WARNING, "Unknown primaries, assuming 
BT.709/sRGB. Colors may be wrong.\n");
+jxl_color->primaries = JXL_PRIMARIES_SRGB;
+jxl_color->white_point = JXL_WHITE_POINT_D65;
+return 0;
 }
 
 desc = av_csp_primaries_desc_from_id(prm);
@@ -340,7 +345,7 @@ static int libjxl_encode_frame(AVCodecContext *avctx, 
AVPacket *pkt, const AVFra
 else
 jxl_color.color_space = JXL_COLOR_SPACE_RGB;
 
-ret = libjxl_populate_primaries(&jxl_color,
+ret = libjxl_populate_primaries(avctx, &jxl_color,
 frame->color_primaries && frame->color_primaries != 
AVCOL_PRI_UNSPECIFIED
 ? frame->color_primaries : avctx->color_primaries);
 if (ret < 0)
-- 
2.37.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration

2022-07-15 Thread Martin Storsjö

On Fri, 15 Jul 2022, Swinney, Jonathan wrote:

If the max height is just 16, then this should be fine. I assumed that h 
could have a much higher value (>1024), but if that is not the case, 
then this is a useful optimization.


At least according to the me_cmp.h header, which says:

/* Motion estimation:
 * h is limited to { width / 2, width, 2 * width },
 * but never larger than 16 and never smaller than 2.
 * Although currently h < 4 is not used as functions with
 * width < 8 are neither used nor implemented. */

So with that in mind, I think this should be safe to do.

// Martin

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration

2022-07-15 Thread Michael Niedermayer
On Fri, Jul 15, 2022 at 10:56:03PM +0300, Martin Storsjö wrote:
> On Fri, 15 Jul 2022, Swinney, Jonathan wrote:
> 
> > If the max height is just 16, then this should be fine. I assumed that h
> > could have a much higher value (>1024), but if that is not the case,
> > then this is a useful optimization.
> 
> At least according to the me_cmp.h header, which says:
> 
> /* Motion estimation:
>  * h is limited to { width / 2, width, 2 * width },
>  * but never larger than 16 and never smaller than 2.
>  * Although currently h < 4 is not used as functions with
>  * width < 8 are neither used nor implemented. */

These rules where written with support for encoding of all
standard formats in mind at the time that was written.
today it may make sense to extend these rules to cover the
things which where created since then

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Republics decline into democracies and democracies degenerate into
despotisms. -- Aristotle


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 5/5] aarch64: me_cmp: Don't do uaddlv once per iteration

2022-07-15 Thread Martin Storsjö

On Fri, 15 Jul 2022, Michael Niedermayer wrote:


On Fri, Jul 15, 2022 at 10:56:03PM +0300, Martin Storsjö wrote:

On Fri, 15 Jul 2022, Swinney, Jonathan wrote:


If the max height is just 16, then this should be fine. I assumed that h
could have a much higher value (>1024), but if that is not the case,
then this is a useful optimization.


At least according to the me_cmp.h header, which says:

/* Motion estimation:
 * h is limited to { width / 2, width, 2 * width },
 * but never larger than 16 and never smaller than 2.
 * Although currently h < 4 is not used as functions with
 * width < 8 are neither used nor implemented. */


These rules where written with support for encoding of all
standard formats in mind at the time that was written.
today it may make sense to extend these rules to cover the
things which where created since then


Right, but if that suddenly changes, such a change also must expect that 
it might need updates to all assembly implementations that implement that 
interface currently. Right now, both the defacto case (any callers in the 
codebase) and the explicit documentation says that it can't be called with 
parameters outside of that range.


Even if it's raised from the current <= 16, this particular optimization 
should be fine as long as h <= 256 - which should be fine for at least all 
current-gen mainstream codecs since, I think?


// Martin
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/libspeexdec: initialize channels

2022-07-15 Thread huheng
speex has no header in flv container, libspeexdec reports 'Invalid channel 
count'
when decoding a flv file as the channels is not initialized. Reproduce this 
issue with:
1) ffmpeg -f lavfi -i anullsrc -ac 1 -ar 16000 -acodec libspeex test.flv
2) ffplay -acodec libspeex test.flv

Signed-off-by: huheng 
---
 libavcodec/libspeexdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c
index 8c9e05e51d..bb8e1a7db9 100644
--- a/libavcodec/libspeexdec.c
+++ b/libavcodec/libspeexdec.c
@@ -43,7 +43,7 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
 LibSpeexContext *s = avctx->priv_data;
 const SpeexMode *mode;
 SpeexHeader *header = NULL;
-int spx_mode, channels;
+int spx_mode, channels = avctx->ch_layout.nb_channels;
 
 if (avctx->extradata && avctx->extradata_size >= 80) {
 header = speex_packet_to_header(avctx->extradata,
-- 
2.36.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".