This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit cd9e6996afe679d713b726e1580a5f855cf7b255 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Fri Jun 26 21:46:13 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Thu Jul 2 20:39:29 2026 +0200 swscale/uops: Move stuff for generating uops_macros.h out It is not used for normal builds and is more an auxiliary dev tool; move the code into a new file, uops_macros_gen.c to be built as a DEVPROG. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libswscale/Makefile | 2 + libswscale/tests/sws_ops.c | 13 -- libswscale/uops.c | 390 +--------------------------------------- libswscale/uops.h | 12 -- libswscale/uops_list.h | 61 +++++++ libswscale/uops_macros_gen.c | 415 +++++++++++++++++++++++++++++++++++++++++++ tests/fate/libswscale.mak | 4 +- 7 files changed, 483 insertions(+), 414 deletions(-) diff --git a/libswscale/Makefile b/libswscale/Makefile index e98780a61f..9c3aae2304 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -55,3 +55,5 @@ TESTPROGS = colorspace \ TESTPROGS-$(CONFIG_UNSTABLE) = sws_ops \ sws_ops_aarch64 \ + +DEVPROGS-$(CONFIG_UNSTABLE) = uops_macros_gen \ diff --git a/libswscale/tests/sws_ops.c b/libswscale/tests/sws_ops.c index 0da59b473f..e18b6e5ac0 100644 --- a/libswscale/tests/sws_ops.c +++ b/libswscale/tests/sws_ops.c @@ -110,7 +110,6 @@ int main(int argc, char **argv) enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE; SwsContext *ctx = NULL; SwsGraph *graph = NULL; - bool macros_gen = false; int ret = 1; #ifdef _WIN32 @@ -129,8 +128,6 @@ int main(int argc, char **argv) " Only test the specified source pixel format\n" " -v <level>\n" " Enable log verbosity at given level\n" - " -macros\n" - " Generate helper macros\n" ); return 0; } @@ -157,8 +154,6 @@ int main(int argc, char **argv) goto bad_option; av_log_set_level(atoi(argv[i + 1])); i++; - } else if (!strcmp(argv[i], "-macros")) { - macros_gen = true; } else { bad_option: fprintf(stderr, "bad option or argument missing (%s) see -help\n", argv[i]); @@ -166,14 +161,6 @@ bad_option: } } - if (macros_gen) { - char *macros = NULL; - ret = ff_sws_uops_macros_gen(¯os); - if (ret >= 0) - puts(macros); - av_free(macros); - return ret; - } /* Allocate dummy graph and context for ff_sws_compile_pass() */ graph = ff_sws_graph_alloc(); if (!graph) diff --git a/libswscale/uops.c b/libswscale/uops.c index 4c37a65829..f8af9c41c2 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -23,11 +23,10 @@ #include "libavutil/avassert.h" #include "libavutil/mem.h" #include "libavutil/refstruct.h" -#include "libavutil/tree.h" #include "ops.h" -#include "ops_internal.h" #include "uops.h" +#include "uops_list.h" int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b) { @@ -41,60 +40,13 @@ int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b) } static const struct { - char full[32]; char abbr[32]; } uop_names[SWS_UOP_TYPE_NB] = { -#define UOP_NAME(OP, ABBR) [OP] = { #OP, ABBR } - UOP_NAME(SWS_UOP_INVALID, "invalid"), - UOP_NAME(SWS_UOP_READ_PLANAR, "read_planar"), - UOP_NAME(SWS_UOP_READ_PLANAR_FH, "read_planar_fh"), - UOP_NAME(SWS_UOP_READ_PLANAR_FV, "read_planar_fv"), - UOP_NAME(SWS_UOP_READ_PLANAR_FV_FMA, "read_planar_fv_fma"), - UOP_NAME(SWS_UOP_READ_PACKED, "read_packed"), - UOP_NAME(SWS_UOP_READ_NIBBLE, "read_nibble"), - UOP_NAME(SWS_UOP_READ_BIT, "read_bit"), - UOP_NAME(SWS_UOP_READ_PALETTE, "read_palette"), - UOP_NAME(SWS_UOP_WRITE_PLANAR, "write_planar"), - UOP_NAME(SWS_UOP_WRITE_PACKED, "write_packed"), - UOP_NAME(SWS_UOP_WRITE_NIBBLE, "write_nibble"), - UOP_NAME(SWS_UOP_WRITE_BIT, "write_bit"), - UOP_NAME(SWS_UOP_PERMUTE, "permute"), - UOP_NAME(SWS_UOP_COPY, "copy"), - UOP_NAME(SWS_UOP_MOVE, "move"), - UOP_NAME(SWS_UOP_SWAP_BYTES, "swap_bytes"), - UOP_NAME(SWS_UOP_EXPAND_BIT, "expand_bit"), - UOP_NAME(SWS_UOP_EXPAND_PAIR, "expand_pair"), - UOP_NAME(SWS_UOP_EXPAND_QUAD, "expand_quad"), - UOP_NAME(SWS_UOP_TO_U8, "to_u8"), - UOP_NAME(SWS_UOP_TO_U16, "to_u16"), - UOP_NAME(SWS_UOP_TO_U32, "to_u32"), - UOP_NAME(SWS_UOP_TO_F32, "to_f32"), - UOP_NAME(SWS_UOP_SCALE, "scale"), - UOP_NAME(SWS_UOP_LINEAR, "linear"), - UOP_NAME(SWS_UOP_LINEAR_FMA, "linear_fma"), - UOP_NAME(SWS_UOP_ADD, "add"), - UOP_NAME(SWS_UOP_MIN, "min"), - UOP_NAME(SWS_UOP_MAX, "max"), - UOP_NAME(SWS_UOP_UNPACK, "unpack"), - UOP_NAME(SWS_UOP_PACK, "pack"), - UOP_NAME(SWS_UOP_LSHIFT, "lshift"), - UOP_NAME(SWS_UOP_RSHIFT, "rshift"), - UOP_NAME(SWS_UOP_CLEAR, "clear"), - UOP_NAME(SWS_UOP_DITHER, "dither"), +#define UOP_NAME(OP, ABBR) [OP] = { ABBR }, + UOPS_LIST(UOP_NAME) #undef UOP_NAME }; -static const struct { - char full[16]; - char prefix[8]; -} pixel_types[SWS_PIXEL_TYPE_NB] = { - [SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" }, - [SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" }, - [SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" }, - [SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" }, - [SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" }, -}; - static SwsPixel pixel_from_q64(SwsPixelType type, AVRational64 val) { av_assert1(val.den != 0); @@ -219,135 +171,6 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]) av_assert0(av_bprint_is_complete(&bp)); } -static int generate_entry_struct(void *opaque, void *key) -{ - const SwsUOp *ref = opaque; - const SwsUOp *uop = key; - AVBPrint *bp = ref->data.opaque; - char name[SWS_UOP_NAME_MAX]; - ff_sws_uop_name(uop, name); - av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name); - av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x", - pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask); - - const SwsUOpParams *par = &uop->par; - switch (uop->uop) { - case SWS_UOP_READ_PLANAR_FH: - case SWS_UOP_READ_PLANAR_FV: - case SWS_UOP_READ_PLANAR_FV_FMA: - av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full); - break; - case SWS_UOP_LSHIFT: - case SWS_UOP_RSHIFT: - av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount); - break; - case SWS_UOP_PERMUTE: - case SWS_UOP_COPY: - av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}", - par->swizzle.in[0], par->swizzle.in[1], - par->swizzle.in[2], par->swizzle.in[3]); - break; - case SWS_UOP_MOVE: - av_bprintf(bp, ", .par.move.num_moves = %d", par->move.num_moves); - av_bprintf(bp, ", .par.move.dst = {%d, %d, %d, %d, %d, %d}", - par->move.dst[0], par->move.dst[1], par->move.dst[2], - par->move.dst[3], par->move.dst[4], par->move.dst[5]); - av_bprintf(bp, ", .par.move.src = {%d, %d, %d, %d, %d, %d}", - par->move.src[0], par->move.src[1], par->move.src[2], - par->move.src[3], par->move.src[4], par->move.src[5]); - break; - case SWS_UOP_PACK: - case SWS_UOP_UNPACK: - av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}", - par->pack.pattern[0], par->pack.pattern[1], - par->pack.pattern[2], par->pack.pattern[3]); - break; - case SWS_UOP_CLEAR: - av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x", - par->clear.one, par->clear.zero); - break; - case SWS_UOP_LINEAR: - case SWS_UOP_LINEAR_FMA: - av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x", - par->lin.one, par->lin.zero); - if (uop->uop == SWS_UOP_LINEAR_FMA) - av_bprintf(bp, ", .par.lin.exact = 0x%x", par->lin.exact); - break; - case SWS_UOP_DITHER: - av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }", - par->dither.y_offset[0], par->dither.y_offset[1], - par->dither.y_offset[2], par->dither.y_offset[3], - par->dither.size_log2); - break; - } - - av_bprintf(bp, ")"); - return 0; -} - -static int generate_entry_args(void *opaque, void *key) -{ - const SwsUOp *ref = opaque; - const SwsUOp *uop = key; - AVBPrint *bp = ref->data.opaque; - char name[SWS_UOP_NAME_MAX]; - ff_sws_uop_name(uop, name); - av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x", - name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask); - - const SwsUOpParams *par = &uop->par; - switch (uop->uop) { - case SWS_UOP_READ_PLANAR_FH: - case SWS_UOP_READ_PLANAR_FV: - case SWS_UOP_READ_PLANAR_FV_FMA: - av_bprintf(bp, ", %s", pixel_types[par->filter.type].full); - break; - case SWS_UOP_LSHIFT: - case SWS_UOP_RSHIFT: - av_bprintf(bp, ", %u", par->shift.amount); - break; - case SWS_UOP_PERMUTE: - case SWS_UOP_COPY: - av_bprintf(bp, ", %d, %d, %d, %d", - par->swizzle.in[0], par->swizzle.in[1], - par->swizzle.in[2], par->swizzle.in[3]); - break; - case SWS_UOP_MOVE: - av_bprintf(bp, ", %d", par->move.num_moves); - av_bprintf(bp, ", %d, %d, %d, %d, %d, %d", - par->move.dst[0], par->move.dst[1], par->move.dst[2], - par->move.dst[3], par->move.dst[4], par->move.dst[5]); - av_bprintf(bp, ", %d, %d, %d, %d, %d, %d", - par->move.src[0], par->move.src[1], par->move.src[2], - par->move.src[3], par->move.src[4], par->move.src[5]); - break; - case SWS_UOP_PACK: - case SWS_UOP_UNPACK: - av_bprintf(bp, ", %d, %d, %d, %d", - par->pack.pattern[0], par->pack.pattern[1], - par->pack.pattern[2], par->pack.pattern[3]); - break; - case SWS_UOP_CLEAR: - av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero); - break; - case SWS_UOP_LINEAR: - case SWS_UOP_LINEAR_FMA: - av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero); - if (uop->uop == SWS_UOP_LINEAR_FMA) - av_bprintf(bp, ", 0x%05x", par->lin.exact); - break; - case SWS_UOP_DITHER: - av_bprintf(bp, ", %u, %u, %u, %u, %u", - par->dither.y_offset[0], par->dither.y_offset[1], - par->dither.y_offset[2], par->dither.y_offset[3], - par->dither.size_log2); - break; - } - - av_bprintf(bp, ")"); - return 0; -} - static void uop_uninit(SwsUOp *uop) { switch (uop->uop) { @@ -871,210 +694,3 @@ int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, } return 0; } - -static int register_uop(struct AVTreeNode **root, const SwsUOp *uop) -{ - SwsUOp *key = av_memdup(uop, sizeof(*uop)); - if (!key) - return AVERROR(ENOMEM); - memset(&key->data, 0, sizeof(key->data)); - - struct AVTreeNode *node = av_tree_node_alloc(); - if (!node) { - av_free(key); - return AVERROR(ENOMEM); - } - - av_tree_insert(root, key, ff_sws_uop_cmp_v, &node); - if (node) { - av_free(node); - av_free(key); - } - return 0; -} - -static int register_flags(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags) -{ - SwsUOpList *uops = ff_sws_uop_list_alloc(); - if (!uops) - return AVERROR(ENOMEM); - - int ret = ff_sws_ops_translate(ctx, ops, flags, uops); - if (ret < 0) - goto fail; - - struct AVTreeNode **root = ctx->opaque; - for (int i = 0; i < uops->num_ops; i++) { - ret = register_uop(root, &uops->ops[i]); - if (ret < 0) - goto fail; - } - -fail: - ff_sws_uop_list_free(&uops); - return ret; -} - -static const SwsUOpFlags uop_flags[] = { - 0, - SWS_UOP_FLAG_FMA | SWS_UOP_FLAG_MOVE, /* x86 backend */ -}; - -static int register_uops(SwsContext *ctx, const SwsOpList *ops, - SwsCompiledOp *out) -{ - for (int i = 0; i < FF_ARRAY_ELEMS(uop_flags); i++) { - int ret = register_flags(ctx, ops, uop_flags[i]); - if (ret < 0) - return ret; - } - - *out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */ - return 0; -} - -/* Dummy backend that just registers all seen uops */ -static const SwsOpBackend backend_uops = { - .name = "uops_gen", - .compile = register_uops, -}; - -static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops) -{ - /* ff_sws_compile_pass() takes over ownership of `ops` */ - SwsOpList *copy = ff_sws_op_list_duplicate(ops); - if (!copy) - return AVERROR(ENOMEM); - - const int flags = SWS_OP_FLAG_DRY_RUN | SWS_OP_FLAG_SPLIT_MEMCPY; - return ff_sws_compile_pass(graph, &backend_uops, ©, flags, NULL, NULL); -} - -static const SwsFlags flags[] = { - 0, - SWS_ACCURATE_RND, /* may insert extra 1x1 dither ops (for accurate rounding) */ - SWS_BITEXACT, /* prevents some FMA optimizations */ - SWS_ACCURATE_RND | SWS_BITEXACT, -}; - -/* Limit the range of av_tree_enumerate() to only matching uop and type */ -static int enum_type(void *opaque, void *elem) -{ - const SwsUOp *a = opaque, *b = elem; - if (a->type != b->type) - return (int) b->type - a->type; - if (a->uop != b->uop) - return (int) b->uop - a->uop; - return 0; -} - -static int free_uop_key(void *opaque, void *key) -{ - av_free(key); - return 0; -} - -int ff_sws_uops_macros_gen(char **out_str) -{ - int ret; - struct AVTreeNode *root = NULL; - - AVBPrint bprint, *const bp = &bprint; - av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED); - - /* Allocate dummy graph and context for ff_sws_compile_pass() */ - SwsGraph *graph = ff_sws_graph_alloc(); - if (!graph) - return AVERROR(ENOMEM); - - SwsContext *ctx = graph->ctx = sws_alloc_context(); - if (!ctx) { - ret = AVERROR(ENOMEM); - goto fail; - } - - /* Use this to plumb the tree state through all the layers of abstraction */ - ctx->opaque = &root; - ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */ - - /* Register all unique uops over every relevant combination of flags */ - for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) { - ctx->flags = flags[i]; - ret = ff_sws_enum_op_lists(ctx, graph, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, - register_all_uops); - if (ret < 0) - goto fail; - } - - /** - * Additionally make sure planar reads/writes are always available for all - * formats, because checkasm depends on them to be able to verify the - * input/output of any other operations. - */ - for (enum SwsPixelType type = SWS_PIXEL_NONE+1; type < SWS_PIXEL_TYPE_NB; type++) { - if (!ff_sws_pixel_type_is_int(type)) - continue; - for (int elems = 1; elems <= 4; elems++) { - for (int rw = 0; rw < 2; rw++) { - SwsUOp uop = { - .type = type, - .uop = rw ? SWS_UOP_WRITE_PLANAR : SWS_UOP_READ_PLANAR, - .mask = SWS_COMP_ELEMS(elems), - }; - - ret = register_uop(&root, &uop); - if (ret < 0) - goto fail; - } - } - } - - #define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str)) - BPRINT_STR( -"/**\n" -" * This file is automatically generated. Do not edit manually.\n" -" * To regenerate, run: make fate-sws-uops-macros GEN=1\n" -" */\n" -"\n" -"#ifndef SWSCALE_UOPS_MACROS_H\n" -"#define SWSCALE_UOPS_MACROS_H\n" -"\n" -"/**\n" -" * Boilerplate helper macros, for template-based backends. These will be\n" -" * instantiated like this, with parameters in struct order:\n" -" * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n" -" * The _STRUCT variants pass all arguments in C struct syntax, while the\n" -" * plain variants give them as separate C values (e.g. for use in calls)\n" -" */\n" -"#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n" -"#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n" -" SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n" -"#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n" -" SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n" -"\n"); - - SwsUOp key = { .data.opaque = bp }; - for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) { - for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) { - const char *macro = uop_names[key.uop].full + sizeof("SWS_UOP_") - 1; - const char *prefix = pixel_types[key.type].prefix; - av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro); - av_tree_enumerate(root, &key, enum_type, generate_entry_args); - av_bprintf(bp, "\n"); - av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro); - av_tree_enumerate(root, &key, enum_type, generate_entry_struct); - av_bprintf(bp, "\n"); - } - } - - BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */"); - ret = av_bprint_finalize(bp, out_str); - -fail: - av_bprint_finalize(bp, NULL); - av_tree_enumerate(root, NULL, NULL, free_uop_key); - av_tree_destroy(root); - ff_sws_graph_free(&graph); - sws_free_context(&ctx); - return ret; -} diff --git a/libswscale/uops.h b/libswscale/uops.h index 284f74042b..1ad14efc58 100644 --- a/libswscale/uops.h +++ b/libswscale/uops.h @@ -272,16 +272,4 @@ int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop); int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops); -/** - * Generate a set of boilerplate C preprocessor macros for describing and - * programmatically iterating over all possible SwsUOps. - * - * This function can be quite slow as it iterates over every possible - * combination of pixel formats and flags. - * - * Returns 0 or a negative error code. On success, an allocated string is - * returned via `out_str`, and must be av_free()'d by the caller. - */ -int ff_sws_uops_macros_gen(char **out_str); - #endif diff --git a/libswscale/uops_list.h b/libswscale/uops_list.h new file mode 100644 index 0000000000..793678b1d3 --- /dev/null +++ b/libswscale/uops_list.h @@ -0,0 +1,61 @@ +/** + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWSCALE_UOPS_LIST_H +#define SWSCALE_UOPS_LIST_H + +// ENTRY(OP, ABBR) +#define UOPS_LIST(ENTRY) \ + ENTRY(SWS_UOP_INVALID, "invalid") \ + ENTRY(SWS_UOP_READ_PLANAR, "read_planar") \ + ENTRY(SWS_UOP_READ_PLANAR_FH, "read_planar_fh") \ + ENTRY(SWS_UOP_READ_PLANAR_FV, "read_planar_fv") \ + ENTRY(SWS_UOP_READ_PLANAR_FV_FMA, "read_planar_fv_fma") \ + ENTRY(SWS_UOP_READ_PACKED, "read_packed") \ + ENTRY(SWS_UOP_READ_NIBBLE, "read_nibble") \ + ENTRY(SWS_UOP_READ_BIT, "read_bit") \ + ENTRY(SWS_UOP_READ_PALETTE, "read_palette") \ + ENTRY(SWS_UOP_WRITE_PLANAR, "write_planar") \ + ENTRY(SWS_UOP_WRITE_PACKED, "write_packed") \ + ENTRY(SWS_UOP_WRITE_NIBBLE, "write_nibble") \ + ENTRY(SWS_UOP_WRITE_BIT, "write_bit") \ + ENTRY(SWS_UOP_PERMUTE, "permute") \ + ENTRY(SWS_UOP_COPY, "copy") \ + ENTRY(SWS_UOP_MOVE, "move") \ + ENTRY(SWS_UOP_SWAP_BYTES, "swap_bytes") \ + ENTRY(SWS_UOP_EXPAND_BIT, "expand_bit") \ + ENTRY(SWS_UOP_EXPAND_PAIR, "expand_pair") \ + ENTRY(SWS_UOP_EXPAND_QUAD, "expand_quad") \ + ENTRY(SWS_UOP_TO_U8, "to_u8") \ + ENTRY(SWS_UOP_TO_U16, "to_u16") \ + ENTRY(SWS_UOP_TO_U32, "to_u32") \ + ENTRY(SWS_UOP_TO_F32, "to_f32") \ + ENTRY(SWS_UOP_SCALE, "scale") \ + ENTRY(SWS_UOP_LINEAR, "linear") \ + ENTRY(SWS_UOP_LINEAR_FMA, "linear_fma") \ + ENTRY(SWS_UOP_ADD, "add") \ + ENTRY(SWS_UOP_MIN, "min") \ + ENTRY(SWS_UOP_MAX, "max") \ + ENTRY(SWS_UOP_UNPACK, "unpack") \ + ENTRY(SWS_UOP_PACK, "pack") \ + ENTRY(SWS_UOP_LSHIFT, "lshift") \ + ENTRY(SWS_UOP_RSHIFT, "rshift") \ + ENTRY(SWS_UOP_CLEAR, "clear") \ + ENTRY(SWS_UOP_DITHER, "dither") \ + +#endif /* SWSCALE_UOPS_LIST_H */ diff --git a/libswscale/uops_macros_gen.c b/libswscale/uops_macros_gen.c new file mode 100644 index 0000000000..e01ec4c6c8 --- /dev/null +++ b/libswscale/uops_macros_gen.c @@ -0,0 +1,415 @@ +/** + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#ifdef _WIN32 +#include <io.h> +#include <fcntl.h> +#endif + +#include "libavutil/bprint.h" +#include "libavutil/error.h" +#include "libavutil/macros.h" +#include "libavutil/mem.h" +#include "libavutil/pixfmt.h" +#include "libavutil/tree.h" +#include "ops.h" +#include "ops_dispatch.h" +#include "swscale.h" +#include "uops.h" +#include "uops_list.h" + +static const struct { + char full[32]; + char abbr[32]; +} uop_names[SWS_UOP_TYPE_NB] = { +#define UOP_NAME(OP, ABBR) [OP] = { #OP, ABBR }, + UOPS_LIST(UOP_NAME) +}; + +static const struct { + char full[16]; + char prefix[8]; +} pixel_types[SWS_PIXEL_TYPE_NB] = { + [SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" }, + [SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" }, + [SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" }, + [SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" }, + [SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" }, +}; + +static int generate_entry_struct(void *opaque, void *key) +{ + const SwsUOp *ref = opaque; + const SwsUOp *uop = key; + AVBPrint *bp = ref->data.opaque; + char name[SWS_UOP_NAME_MAX]; + ff_sws_uop_name(uop, name); + av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name); + av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x", + pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask); + + const SwsUOpParams *par = &uop->par; + switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full); + break; + case SWS_UOP_LSHIFT: + case SWS_UOP_RSHIFT: + av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount); + break; + case SWS_UOP_PERMUTE: + case SWS_UOP_COPY: + av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}", + par->swizzle.in[0], par->swizzle.in[1], + par->swizzle.in[2], par->swizzle.in[3]); + break; + case SWS_UOP_MOVE: + av_bprintf(bp, ", .par.move.num_moves = %d", par->move.num_moves); + av_bprintf(bp, ", .par.move.dst = {%d, %d, %d, %d, %d, %d}", + par->move.dst[0], par->move.dst[1], par->move.dst[2], + par->move.dst[3], par->move.dst[4], par->move.dst[5]); + av_bprintf(bp, ", .par.move.src = {%d, %d, %d, %d, %d, %d}", + par->move.src[0], par->move.src[1], par->move.src[2], + par->move.src[3], par->move.src[4], par->move.src[5]); + break; + case SWS_UOP_PACK: + case SWS_UOP_UNPACK: + av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}", + par->pack.pattern[0], par->pack.pattern[1], + par->pack.pattern[2], par->pack.pattern[3]); + break; + case SWS_UOP_CLEAR: + av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x", + par->clear.one, par->clear.zero); + break; + case SWS_UOP_LINEAR: + case SWS_UOP_LINEAR_FMA: + av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x", + par->lin.one, par->lin.zero); + if (uop->uop == SWS_UOP_LINEAR_FMA) + av_bprintf(bp, ", .par.lin.exact = 0x%x", par->lin.exact); + break; + case SWS_UOP_DITHER: + av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }", + par->dither.y_offset[0], par->dither.y_offset[1], + par->dither.y_offset[2], par->dither.y_offset[3], + par->dither.size_log2); + break; + } + + av_bprintf(bp, ")"); + return 0; +} + +static int generate_entry_args(void *opaque, void *key) +{ + const SwsUOp *ref = opaque; + const SwsUOp *uop = key; + AVBPrint *bp = ref->data.opaque; + char name[SWS_UOP_NAME_MAX]; + ff_sws_uop_name(uop, name); + av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x", + name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask); + + const SwsUOpParams *par = &uop->par; + switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", %s", pixel_types[par->filter.type].full); + break; + case SWS_UOP_LSHIFT: + case SWS_UOP_RSHIFT: + av_bprintf(bp, ", %u", par->shift.amount); + break; + case SWS_UOP_PERMUTE: + case SWS_UOP_COPY: + av_bprintf(bp, ", %d, %d, %d, %d", + par->swizzle.in[0], par->swizzle.in[1], + par->swizzle.in[2], par->swizzle.in[3]); + break; + case SWS_UOP_MOVE: + av_bprintf(bp, ", %d", par->move.num_moves); + av_bprintf(bp, ", %d, %d, %d, %d, %d, %d", + par->move.dst[0], par->move.dst[1], par->move.dst[2], + par->move.dst[3], par->move.dst[4], par->move.dst[5]); + av_bprintf(bp, ", %d, %d, %d, %d, %d, %d", + par->move.src[0], par->move.src[1], par->move.src[2], + par->move.src[3], par->move.src[4], par->move.src[5]); + break; + case SWS_UOP_PACK: + case SWS_UOP_UNPACK: + av_bprintf(bp, ", %d, %d, %d, %d", + par->pack.pattern[0], par->pack.pattern[1], + par->pack.pattern[2], par->pack.pattern[3]); + break; + case SWS_UOP_CLEAR: + av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero); + break; + case SWS_UOP_LINEAR: + case SWS_UOP_LINEAR_FMA: + av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero); + if (uop->uop == SWS_UOP_LINEAR_FMA) + av_bprintf(bp, ", 0x%05x", par->lin.exact); + break; + case SWS_UOP_DITHER: + av_bprintf(bp, ", %u, %u, %u, %u, %u", + par->dither.y_offset[0], par->dither.y_offset[1], + par->dither.y_offset[2], par->dither.y_offset[3], + par->dither.size_log2); + break; + } + + av_bprintf(bp, ")"); + return 0; +} + +static int register_uop(struct AVTreeNode **root, const SwsUOp *uop) +{ + SwsUOp *key = av_memdup(uop, sizeof(*uop)); + if (!key) + return AVERROR(ENOMEM); + memset(&key->data, 0, sizeof(key->data)); + + struct AVTreeNode *node = av_tree_node_alloc(); + if (!node) { + av_free(key); + return AVERROR(ENOMEM); + } + + av_tree_insert(root, key, ff_sws_uop_cmp_v, &node); + if (node) { + av_free(node); + av_free(key); + } + return 0; +} + +static int register_flags(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags) +{ + SwsUOpList *uops = ff_sws_uop_list_alloc(); + if (!uops) + return AVERROR(ENOMEM); + + int ret = ff_sws_ops_translate(ctx, ops, flags, uops); + if (ret < 0) + goto fail; + + struct AVTreeNode **root = ctx->opaque; + for (int i = 0; i < uops->num_ops; i++) { + ret = register_uop(root, &uops->ops[i]); + if (ret < 0) + goto fail; + } + +fail: + ff_sws_uop_list_free(&uops); + return ret; +} + +static const SwsUOpFlags uop_flags[] = { + 0, + SWS_UOP_FLAG_FMA | SWS_UOP_FLAG_MOVE, /* x86 backend */ +}; + +static int register_uops(SwsContext *ctx, const SwsOpList *ops, + SwsCompiledOp *out) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(uop_flags); i++) { + int ret = register_flags(ctx, ops, uop_flags[i]); + if (ret < 0) + return ret; + } + + *out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */ + return 0; +} + +/* Dummy backend that just registers all seen uops */ +static const SwsOpBackend backend_uops = { + .name = "uops_gen", + .compile = register_uops, +}; + +static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops) +{ + /* ff_sws_compile_pass() takes over ownership of `ops` */ + SwsOpList *copy = ff_sws_op_list_duplicate(ops); + if (!copy) + return AVERROR(ENOMEM); + + const int flags = SWS_OP_FLAG_DRY_RUN | SWS_OP_FLAG_SPLIT_MEMCPY; + return ff_sws_compile_pass(graph, &backend_uops, ©, flags, NULL, NULL); +} + +static const SwsFlags flags_list[] = { + 0, + SWS_ACCURATE_RND, /* may insert extra 1x1 dither ops (for accurate rounding) */ + SWS_BITEXACT, /* prevents some FMA optimizations */ + SWS_ACCURATE_RND | SWS_BITEXACT, +}; + +/* Limit the range of av_tree_enumerate() to only matching uop and type */ +static int enum_type(void *opaque, void *elem) +{ + const SwsUOp *a = opaque, *b = elem; + if (a->type != b->type) + return (int) b->type - a->type; + if (a->uop != b->uop) + return (int) b->uop - a->uop; + return 0; +} + +static int free_uop_key(void *opaque, void *key) +{ + av_free(key); + return 0; +} + +/** + * Generate a set of boilerplate C preprocessor macros for describing and + * programmatically iterating over all possible SwsUOps. + * + * This function can be quite slow as it iterates over every possible + * combination of pixel formats and flags. + * + * Returns 0 or a negative error code. On success, an allocated string is + * returned via `out_str`, and must be av_free()'d by the caller. + */ +static int sws_uops_macros_gen(char **out_str) +{ + int ret; + struct AVTreeNode *root = NULL; + + AVBPrint bprint, *const bp = &bprint; + av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED); + + /* Allocate dummy graph and context for ff_sws_compile_pass() */ + SwsGraph *graph = ff_sws_graph_alloc(); + if (!graph) + return AVERROR(ENOMEM); + + SwsContext *ctx = graph->ctx = sws_alloc_context(); + if (!ctx) { + ret = AVERROR(ENOMEM); + goto fail; + } + + /* Use this to plumb the tree state through all the layers of abstraction */ + ctx->opaque = &root; + ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */ + + /* Register all unique uops over every relevant combination of flags */ + for (int i = 0; i < FF_ARRAY_ELEMS(flags_list); i++) { + ctx->flags = flags_list[i]; + ret = ff_sws_enum_op_lists(ctx, graph, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, + register_all_uops); + if (ret < 0) + goto fail; + } + + /** + * Additionally make sure planar reads/writes are always available for all + * formats, because checkasm depends on them to be able to verify the + * input/output of any other operations. + */ + for (enum SwsPixelType type = SWS_PIXEL_NONE+1; type < SWS_PIXEL_TYPE_NB; type++) { + if (!ff_sws_pixel_type_is_int(type)) + continue; + for (int elems = 1; elems <= 4; elems++) { + for (int rw = 0; rw < 2; rw++) { + SwsUOp uop = { + .type = type, + .uop = rw ? SWS_UOP_WRITE_PLANAR : SWS_UOP_READ_PLANAR, + .mask = SWS_COMP_ELEMS(elems), + }; + + ret = register_uop(&root, &uop); + if (ret < 0) + goto fail; + } + } + } + + #define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str)) + BPRINT_STR( +"/**\n" +" * This file is automatically generated. Do not edit manually.\n" +" * To regenerate, run: make fate-sws-uops-macros GEN=1\n" +" */\n" +"\n" +"#ifndef SWSCALE_UOPS_MACROS_H\n" +"#define SWSCALE_UOPS_MACROS_H\n" +"\n" +"/**\n" +" * Boilerplate helper macros, for template-based backends. These will be\n" +" * instantiated like this, with parameters in struct order:\n" +" * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n" +" * The _STRUCT variants pass all arguments in C struct syntax, while the\n" +" * plain variants give them as separate C values (e.g. for use in calls)\n" +" */\n" +"#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n" +"#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n" +" SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n" +"#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n" +" SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n" +"\n"); + + SwsUOp key = { .data.opaque = bp }; + for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) { + for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) { + const char *macro = uop_names[key.uop].full + sizeof("SWS_UOP_") - 1; + const char *prefix = pixel_types[key.type].prefix; + av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro); + av_tree_enumerate(root, &key, enum_type, generate_entry_args); + av_bprintf(bp, "\n"); + av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro); + av_tree_enumerate(root, &key, enum_type, generate_entry_struct); + av_bprintf(bp, "\n"); + } + } + + BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */"); + ret = av_bprint_finalize(bp, out_str); + +fail: + av_bprint_finalize(bp, NULL); + av_tree_enumerate(root, NULL, NULL, free_uop_key); + av_tree_destroy(root); + ff_sws_graph_free(&graph); + sws_free_context(&ctx); + return ret; +} + +int main(int argc, char **argv) +{ +#ifdef _WIN32 + _setmode(_fileno(stdout), _O_BINARY); +#endif + + char *macros = NULL; + int ret = sws_uops_macros_gen(¯os); + if (ret >= 0) + puts(macros); + av_free(macros); + return ret; +} diff --git a/tests/fate/libswscale.mak b/tests/fate/libswscale.mak index 622f88041c..9de79d736b 100644 --- a/tests/fate/libswscale.mak +++ b/tests/fate/libswscale.mak @@ -56,9 +56,9 @@ endif # Disable on bigendian because it would result in a different iteration order # (and thus output) due to sorting by memcmp() on the parameters struct. -fate-sws-uops-macros: libswscale/tests/sws_ops$(EXESUF) +fate-sws-uops-macros: libswscale/uops_macros_gen$(EXESUF) fate-sws-uops-macros: REF = $(SRC_PATH)/libswscale/uops_macros.h -fate-sws-uops-macros: CMD = run libswscale/tests/sws_ops$(EXESUF) -macros +fate-sws-uops-macros: CMD = run libswscale/uops_macros_gen$(EXESUF) endif _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
