From: Niklas Haas <g...@haasn.dev> Because of the lack of an external ABI on low-level kernels, we cannot directly test internal functions. Instead, we construct a minimal op chain consisting of a read, the op to be tested, and a write.
The bigger complication arises from the fact that the backend may generate arbitrary internal state that needs to be passed back to the implementation, which means we cannot directly call `func_ref` on the generated chain. To get around this, always compile the op chain twice - once using the backend to be tested, and once using the reference C backend. --- tests/checkasm/Makefile | 8 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/sw_ops.c | 748 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 757 insertions(+), 1 deletion(-) create mode 100644 tests/checkasm/sw_ops.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index d5c50e5599..be4c6b265f 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -65,7 +65,13 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes) # swscale tests -SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o sw_yuv2yuv.o +SWSCALEOBJS += sw_gbrp.o \ + sw_ops.o \ + sw_range_convert.o \ + sw_rgb.o \ + sw_scale.o \ + sw_yuv2rgb.o \ + sw_yuv2yuv.o CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 6e99d33d70..5f3a900bfd 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -294,6 +294,7 @@ static const struct { { "sw_scale", checkasm_check_sw_scale }, { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb }, { "sw_yuv2yuv", checkasm_check_sw_yuv2yuv }, + { "sw_ops", checkasm_check_sw_ops }, #endif #if CONFIG_AVUTIL { "aes", checkasm_check_aes }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 0f8dea82e9..959e66d9f8 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -131,6 +131,7 @@ void checkasm_check_sw_rgb(void); void checkasm_check_sw_scale(void); void checkasm_check_sw_yuv2rgb(void); void checkasm_check_sw_yuv2yuv(void); +void checkasm_check_sw_ops(void); void checkasm_check_takdsp(void); void checkasm_check_utvideodsp(void); void checkasm_check_v210dec(void); diff --git a/tests/checkasm/sw_ops.c b/tests/checkasm/sw_ops.c new file mode 100644 index 0000000000..7b38bd6902 --- /dev/null +++ b/tests/checkasm/sw_ops.c @@ -0,0 +1,748 @@ +/** + * Copyright (C) 2025 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> + +#include "libavutil/avassert.h" +#include "libavutil/mem_internal.h" +#include "libavutil/refstruct.h" + +#include "libswscale/ops.h" +#include "libswscale/ops_internal.h" + +#include "checkasm.h" + +enum { + PIXELS = 64, +}; + +enum { + U8 = SWS_PIXEL_U8, + U16 = SWS_PIXEL_U16, + U32 = SWS_PIXEL_U32, + F32 = SWS_PIXEL_F32, +}; + +#define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__) +static const char *tprintf(char buf[], size_t size, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, size, fmt, ap); + va_end(ap); + return buf; +} + +static int rw_pixel_bits(const SwsOp *op) +{ + const int elems = op->rw.packed ? op->rw.elems : 1; + const int size = ff_sws_pixel_type_size(op->type); + const int bits = 8 >> op->rw.frac; + av_assert1(bits >= 1); + return elems * size * bits; +} + +static float rndf(void) +{ + union { uint32_t u; float f; } x; + do { + x.u = rnd(); + } while (!isnormal(x.f)); + return x.f; +} + +static void fill32f(float *line, int num, unsigned range) +{ + const float scale = (float) range / UINT32_MAX; + for (int i = 0; i < num; i++) + line[i] = range ? scale * rnd() : rndf(); +} + +static void fill32(uint32_t *line, int num, unsigned range) +{ + for (int i = 0; i < num; i++) + line[i] = range ? rnd() % (range + 1) : rnd(); +} + +static void fill16(uint16_t *line, int num, unsigned range) +{ + if (!range) { + fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0); + } else { + for (int i = 0; i < num; i++) + line[i] = rnd() % (range + 1); + } +} + +static void fill8(uint8_t *line, int num, unsigned range) +{ + if (!range) { + fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0); + } else { + for (int i = 0; i < num; i++) + line[i] = rnd() % (range + 1); + } +} + +static void check_ops(const char *report, unsigned range, const SwsOp *ops) +{ + SwsContext *ctx = sws_alloc_context(); + SwsCompiledOp comp_ref = {0}, comp_new = {0}; + SwsOpList oplist = { .ops = (SwsOp *) ops }; + const SwsOp *read_op, *write_op; + + declare_func(void, const SwsOpExec *exec, const void *priv, int pixels); + + DECLARE_ALIGNED_64(char, src0)[4][PIXELS * sizeof(uint32_t[4])]; + DECLARE_ALIGNED_64(char, src1)[4][PIXELS * sizeof(uint32_t[4])]; + DECLARE_ALIGNED_64(char, dst0)[4][PIXELS * sizeof(uint32_t[4])]; + DECLARE_ALIGNED_64(char, dst1)[4][PIXELS * sizeof(uint32_t[4])]; + + if (!ctx) + return; + ctx->flags = SWS_BITEXACT; + + read_op = &ops[0]; + for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++) + write_op = &ops[oplist.num_ops]; + + for (int p = 0; p < 4; p++) { + void *plane = src0[p]; + switch (read_op->type) { + case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), range); break; + case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), range); break; + case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), range); break; + case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), range); break; + } + } + + memcpy(src1, src0, sizeof(src0)); + memset(dst0, 0, sizeof(dst0)); + memset(dst1, 0, sizeof(dst1)); + + /* Compile `ops` using both the asm and c backends */ + for (int n = 0; ff_sws_op_backends[n]; n++) { + const SwsOpBackend *backend = ff_sws_op_backends[n]; + const bool is_ref = !strcmp(backend->name, "c"); + if (is_ref || !comp_new.func) { + SwsCompiledOp comp; + int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp); + if (ret == AVERROR(ENOTSUP)) + continue; + else if (ret < 0) + fail(); + else if (PIXELS % comp.block_size != 0) + fail(); + + if (is_ref) + comp_ref = comp; + if (!comp_new.func) + comp_new = comp; + } + } + + av_assert0(comp_ref.func && comp_new.func); + + SwsOpExec exec = {0}; + exec.pixel_bits_in = rw_pixel_bits(read_op); + exec.pixel_bits_out = rw_pixel_bits(write_op); + exec.width = PIXELS; + exec.height = exec.slice_h = 1; + for (int i = 0; i < 4; i++) { + exec.in_stride[i] = sizeof(src0[i]); + exec.out_stride[i] = sizeof(dst0[i]); + } + + if (check_func(comp_new.func, "%s", report)) { + func_ref = comp_ref.func; /* ignore any other asm versions */ + + for (int i = 0; i < 4; i++) { + exec.in[i] = (void *) src0[i]; + exec.out[i] = (void *) dst0[i]; + } + call_ref(&exec, comp_ref.priv, PIXELS / comp_ref.block_size); + + for (int i = 0; i < 4; i++) { + exec.in[i] = (void *) src1[i]; + exec.out[i] = (void *) dst1[i]; + } + call_new(&exec, comp_new.priv, PIXELS / comp_new.block_size); + + for (int i = 0; i < 4; i++) { + const char *name = FMT("%s[%d]", report, i); + const int size = PIXELS * exec.pixel_bits_out >> 3; + const int stride = sizeof(dst0[i]); + + switch (write_op->type) { + case U8: + checkasm_check(uint8_t, (void *) dst0[i], stride, + (void *) dst1[i], stride, + size, 1, name); + break; + case U16: + checkasm_check(uint16_t, (void *) dst0[i], stride, + (void *) dst1[i], stride, + size >> 1, 1, name); + break; + case U32: + checkasm_check(uint32_t, (void *) dst0[i], stride, + (void *) dst1[i], stride, + size >> 2, 1, name); + break; + case F32: + checkasm_check(float, (void *) dst0[i], stride, + (void *) dst1[i], stride, + size >> 2, 1, name); + break; + } + + /* Check for over-write */ + for (int x = size + comp_new.over_write; x < sizeof(dst1[i]); x++) { + if (dst1[i][x] != 0) { + fprintf(stderr, "Overwrite detected in %s: [%d] = 0x%02x\n", + name, x, dst1[i][x]); + fail(); + } + } + + if (write_op->rw.packed) + break; + } + + bench_new(&exec, comp_new.priv, PIXELS / comp_new.block_size); + } + + if (comp_new.func != comp_ref.func && comp_new.free) + comp_new.free(comp_new.priv); + if (comp_ref.free) + comp_ref.free(comp_ref.priv); + sws_free_context(&ctx); +} + +#define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \ + do { \ + check_ops(NAME, RANGE, (SwsOp[]) { \ + { \ + .op = SWS_OP_READ, \ + .type = IN, \ + .rw.elems = N_IN, \ + }, \ + __VA_ARGS__, \ + { \ + .op = SWS_OP_WRITE, \ + .type = OUT, \ + .rw.elems = N_OUT, \ + }, {0} \ + }); \ + } while (0) + +#define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \ + CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \ + CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \ + CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \ + CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \ + .op = SWS_OP_SWIZZLE, \ + .type = OUT, \ + .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \ + }) + +#define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \ + CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__) + +#define CHECK_COMMON(NAME, IN, OUT, ...) \ + CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__) + +static void check_read_write(void) +{ + for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + for (int i = 1; i <= 4; i++) { + /* Test N->N planar read/write */ + for (int o = 1; o <= i; o++) { + check_ops(FMT("rw_%d_%d_%s", i, o, type), 0, (SwsOp[]) { + { + .op = SWS_OP_READ, + .type = t, + .rw.elems = i, + }, { + .op = SWS_OP_WRITE, + .type = t, + .rw.elems = o, + }, {0} + }); + } + + /* Test packed read/write */ + if (i == 1) + continue; + + check_ops(FMT("read_packed%d_%s", i, type), 0, (SwsOp[]) { + { + .op = SWS_OP_READ, + .type = t, + .rw.elems = i, + .rw.packed = true, + }, { + .op = SWS_OP_WRITE, + .type = t, + .rw.elems = i, + }, {0} + }); + + check_ops(FMT("write_packed%d_%s", i, type), 0, (SwsOp[]) { + { + .op = SWS_OP_READ, + .type = t, + .rw.elems = i, + }, { + .op = SWS_OP_WRITE, + .type = t, + .rw.elems = i, + .rw.packed = true, + }, {0} + }); + } + } + + /* Test fractional reads/writes */ + for (int frac = 1; frac <= 3; frac++) { + const int bits = 8 >> frac; + const int range = (1 << bits) - 1; + if (bits == 2) + continue; /* no 2 bit packed formats currently exist */ + + check_ops(FMT("read_frac%d", frac), 0, (SwsOp[]) { + { + .op = SWS_OP_READ, + .type = U8, + .rw.elems = 1, + .rw.frac = frac, + }, { + .op = SWS_OP_WRITE, + .type = U8, + .rw.elems = 1, + }, {0} + }); + + check_ops(FMT("write_frac%d", frac), range, (SwsOp[]) { + { + .op = SWS_OP_READ, + .type = U8, + .rw.elems = 1, + }, { + .op = SWS_OP_WRITE, + .type = U8, + .rw.elems = 1, + .rw.frac = frac, + }, {0} + }); + } +} + +static void check_swap_bytes(void) +{ + CHECK_COMMON("swap_bytes_16", U16, U16, { + .op = SWS_OP_SWAP_BYTES, + .type = U16, + }); + + CHECK_COMMON("swap_bytes_32", U32, U32, { + .op = SWS_OP_SWAP_BYTES, + .type = U32, + }); +} + +static void check_pack_unpack(void) +{ + const struct { + SwsPixelType type; + SwsPackOp op; + } patterns[] = { + { U8, {{ 3, 3, 2 }}}, + { U8, {{ 2, 3, 3 }}}, + { U8, {{ 1, 2, 1 }}}, + {U16, {{ 5, 6, 5 }}}, + {U16, {{ 5, 5, 5 }}}, + {U16, {{ 4, 4, 4 }}}, + {U32, {{ 2, 10, 10, 10 }}}, + {U32, {{10, 10, 10, 2 }}}, + }; + + for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { + const SwsPixelType type = patterns[i].type; + const SwsPackOp pack = patterns[i].op; + const int num = pack.pattern[3] ? 4 : 3; + const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1], + pack.pattern[2], pack.pattern[3]); + + CHECK(FMT("pack_%s", pat), num, 1, type, type, { + .op = SWS_OP_PACK, + .type = type, + .pack = pack, + }); + + CHECK(FMT("unpack_%s", pat), 1, num, type, type, { + .op = SWS_OP_UNPACK, + .type = type, + .pack = pack, + }); + } +} + +static AVRational rndq(SwsPixelType t) +{ + const unsigned num = rnd(); + if (ff_sws_pixel_type_is_int(t)) { + const unsigned mask = (1 << (ff_sws_pixel_type_size(t) * 8)) - 1; + return (AVRational) { num & mask, 1 }; + } else { + const unsigned den = rnd(); + return (AVRational) { num, den ? den : 1 }; + } +} + +static void check_clear(void) +{ + for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + const int bits = ff_sws_pixel_type_size(t) * 8; + + /* TODO: AVRational can't fit 32 bit constants */ + if (bits < 32) { + const AVRational chroma = (AVRational) { 1 << (bits - 1), 1}; + const AVRational alpha = (AVRational) { (1 << bits) - 1, 1}; + const AVRational zero = (AVRational) { 0, 1}; + const AVRational none = {0}; + + const SwsConst patterns[] = { + /* Zero only */ + {.q4 = { none, none, none, zero }}, + {.q4 = { zero, none, none, none }}, + /* Alpha only */ + {.q4 = { none, none, none, alpha }}, + {.q4 = { alpha, none, none, none }}, + /* Chroma only */ + {.q4 = { chroma, chroma, none, none }}, + {.q4 = { none, chroma, chroma, none }}, + {.q4 = { none, none, chroma, chroma }}, + {.q4 = { chroma, none, chroma, none }}, + {.q4 = { none, chroma, none, chroma }}, + /* Alpha+chroma */ + {.q4 = { chroma, chroma, none, alpha }}, + {.q4 = { none, chroma, chroma, alpha }}, + {.q4 = { alpha, none, chroma, chroma }}, + {.q4 = { chroma, none, chroma, alpha }}, + {.q4 = { alpha, chroma, none, chroma }}, + /* Random values */ + {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, + {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, + {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, + {.q4 = { none, rndq(t), rndq(t), rndq(t) }}, + }; + + for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { + CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, { + .op = SWS_OP_CLEAR, + .type = t, + .c = patterns[i], + }); + } + } else if (!ff_sws_pixel_type_is_int(t)) { + /* Floating point YUV doesn't exist, only alpha needs to be cleared */ + CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, { + .op = SWS_OP_CLEAR, + .type = t, + .c.q4[3] = { 0, 1 }, + }); + } + } +} + +static void check_shift(void) +{ + for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + if (!ff_sws_pixel_type_is_int(t)) + continue; + + for (int shift = 1; shift <= 8; shift++) { + CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, { + .op = SWS_OP_LSHIFT, + .type = t, + .c.u = shift, + }); + + CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, { + .op = SWS_OP_RSHIFT, + .type = t, + .c.u = shift, + }); + } + } +} + +static void check_swizzle(void) +{ + for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + static const int patterns[][4] = { + /* Pure swizzle */ + {3, 0, 1, 2}, + {3, 0, 2, 1}, + {2, 1, 0, 3}, + {3, 2, 1, 0}, + {3, 1, 0, 2}, + {3, 2, 0, 1}, + {1, 2, 0, 3}, + {1, 0, 2, 3}, + {2, 0, 1, 3}, + {2, 3, 1, 0}, + {2, 1, 3, 0}, + {1, 2, 3, 0}, + {1, 3, 2, 0}, + {0, 2, 1, 3}, + {0, 2, 3, 1}, + {0, 3, 1, 2}, + {3, 1, 2, 0}, + {0, 3, 2, 1}, + /* Luma expansion */ + {0, 0, 0, 3}, + {3, 0, 0, 0}, + {0, 0, 0, 1}, + {1, 0, 0, 0}, + }; + + for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) { + const int x = patterns[i][0], y = patterns[i][1], + z = patterns[i][2], w = patterns[i][3]; + CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, { + .op = SWS_OP_SWIZZLE, + .type = t, + .swizzle = SWS_SWIZZLE(x, y, z, w), + }); + } + } +} + +static void check_convert(void) +{ + for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) { + const char *itype = ff_sws_pixel_type_name(i); + const int isize = ff_sws_pixel_type_size(i); + for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) { + const char *otype = ff_sws_pixel_type_name(o); + const int osize = ff_sws_pixel_type_size(o); + const char *name = FMT("convert_%s_%s", itype, otype); + if (i == o) + continue; + + if (isize < osize || !ff_sws_pixel_type_is_int(o)) { + CHECK_COMMON(name, i, o, { + .op = SWS_OP_CONVERT, + .type = i, + .convert.to = o, + }); + } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) { + uint32_t range = (1 << osize * 8) - 1; + CHECK_COMMON_RANGE(name, range, i, o, { + .op = SWS_OP_CONVERT, + .type = i, + .convert.to = o, + }); + } + } + } + + /* Check expanding conversions */ + CHECK_COMMON("expand16", U8, U16, { + .op = SWS_OP_CONVERT, + .type = U8, + .convert.to = U16, + .convert.expand = true, + }); + + CHECK_COMMON("expand32", U8, U32, { + .op = SWS_OP_CONVERT, + .type = U8, + .convert.to = U32, + .convert.expand = true, + }); +} + +static void check_dither(void) +{ + for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + if (ff_sws_pixel_type_is_int(t)) + continue; + + /* Test all sizes up to 16x16 */ + for (int size_log2 = 0; size_log2 <= 4; size_log2++) { + const int size = 1 << size_log2; + AVRational *matrix = av_refstruct_allocz(size * size * sizeof(*matrix)); + if (!matrix) { + fail(); + return; + } + + if (size == 1) { + matrix[0] = (AVRational) { 1, 2 }; + } else { + for (int i = 0; i < size * size; i++) + matrix[i] = rndq(t); + } + + CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, { + .op = SWS_OP_DITHER, + .type = t, + .dither.size_log2 = size_log2, + .dither.matrix = matrix, + }); + + av_refstruct_unref(&matrix); + } + } +} + +static void check_min_max(void) +{ + for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + CHECK_COMMON(FMT("min_%s", type), t, t, { + .op = SWS_OP_MIN, + .type = t, + .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) }, + }); + + CHECK_COMMON(FMT("max_%s", type), t, t, { + .op = SWS_OP_MAX, + .type = t, + .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) }, + }); + } +} + +static void check_linear(void) +{ + static const struct { + const char *name; + uint32_t mask; + } patterns[] = { + { "noop", 0 }, + { "luma", SWS_MASK_LUMA }, + { "alpha", SWS_MASK_ALPHA }, + { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA }, + { "dot3", 0b111 }, + { "dot4", 0b1111 }, + { "row0", SWS_MASK_ROW(0) }, + { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA }, + { "off3", SWS_MASK_OFF3 }, + { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA }, + { "diag3", SWS_MASK_DIAG3 }, + { "diag4", SWS_MASK_DIAG4 }, + { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA }, + { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 }, + { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, + { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 }, + { "matrix3", SWS_MASK_MAT3 }, + { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 }, + { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA }, + { "matrix4", SWS_MASK_MAT4 }, + { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 }, + }; + + for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + if (ff_sws_pixel_type_is_int(t)) + continue; + + for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) { + const uint32_t mask = patterns[p].mask; + SwsLinearOp lin = { .mask = mask }; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 5; j++) { + if (mask & SWS_MASK(i, j)) { + lin.m[i][j] = rndq(t); + } else { + lin.m[i][j] = (AVRational) { i == j, 1 }; + } + } + } + + CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, { + .op = SWS_OP_LINEAR, + .type = t, + .lin = lin, + }); + } + } +} + +static void check_scale(void) +{ + for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) { + const char *type = ff_sws_pixel_type_name(t); + const int bits = ff_sws_pixel_type_size(t) * 8; + if (ff_sws_pixel_type_is_int(t)) { + /* Ensure the result won't exceed the value range */ + const unsigned max = (1 << bits) - 1; + const unsigned scale = rnd() & max; + const unsigned range = max / (scale ? scale : 1); + CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, { + .op = SWS_OP_SCALE, + .type = t, + .c.q = { scale, 1 }, + }); + } else { + CHECK_COMMON(FMT("scale_%s", type), t, t, { + .op = SWS_OP_SCALE, + .type = t, + .c.q = rndq(t), + }); + } + } +} + +void checkasm_check_sw_ops(void) +{ + check_read_write(); + report("read_write"); + check_swap_bytes(); + report("swap_bytes"); + check_pack_unpack(); + report("pack_unpack"); + check_clear(); + report("clear"); + check_shift(); + report("shift"); + check_swizzle(); + report("swizzle"); + check_convert(); + report("convert"); + check_dither(); + report("dither"); + check_min_max(); + report("min_max"); + check_linear(); + report("linear"); + check_scale(); + report("scale"); +} -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".