From: Niklas Haas <g...@haasn.dev> This can turn any compatible sequence of operations into a single packed shuffle, including packed swizzling, grayscale->RGB conversion, endianness swapping, RGB bit depth conversions, rgb24->rgb0 alpha clearing and more. --- libswscale/ops_internal.h | 17 +++++++ libswscale/ops_optimizer.c | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+)
diff --git a/libswscale/ops_internal.h b/libswscale/ops_internal.h index 9fd866430b..ab957b0837 100644 --- a/libswscale/ops_internal.h +++ b/libswscale/ops_internal.h @@ -105,4 +105,21 @@ int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, */ int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out); +/** + * "Solve" an op list into a fixed shuffle mask, with an optional ability to + * also directly clear the output value (for e.g. rgb24 -> rgb0). + * + * @param ops The operation list to decompose. + * @param shuffle The output shuffle mask. + * @param size The size (in bytes) of the output shuffle mask. + * @param clear_val If nonzero, this index will be used to clear the output. + * @param read_bytes Returns the number of bytes read per shuffle iteration. + * @param write_bytes Returns the number of bytes written per shuffle iteration. + * + * @return The number of pixels processed per iteration, or a negative error + code; in particular AVERROR(ENOTSUP) for unsupported operations. + */ +int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, + uint8_t clear_val, int *read_bytes, int *write_bytes); + #endif diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c index d503bf7bf3..9cde60ed58 100644 --- a/libswscale/ops_optimizer.c +++ b/libswscale/ops_optimizer.c @@ -19,9 +19,11 @@ */ #include "libavutil/avassert.h" +#include <libavutil/bswap.h> #include "libavutil/rational.h" #include "ops.h" +#include "ops_internal.h" #define Q(N) ((AVRational) { N, 1 }) @@ -781,3 +783,97 @@ retry: return 0; } + +int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], + int shuffle_size, uint8_t clear_val, + int *out_read_bytes, int *out_write_bytes) +{ + const SwsOp read = ops->ops[0]; + const int read_size = ff_sws_pixel_type_size(read.type); + uint32_t mask[4] = {0}; + + if (!ops->num_ops || read.op != SWS_OP_READ) + return AVERROR(EINVAL); + if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1)) + return AVERROR(ENOTSUP); + + for (int i = 0; i < read.rw.elems; i++) + mask[i] = 0x01010101 * i * read_size + 0x03020100; + + for (int opidx = 1; opidx < ops->num_ops; opidx++) { + const SwsOp *op = &ops->ops[opidx]; + switch (op->op) { + case SWS_OP_SWIZZLE: { + uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; + for (int i = 0; i < 4; i++) + mask[i] = orig[op->swizzle.in[i]]; + break; + } + + case SWS_OP_SWAP_BYTES: + for (int i = 0; i < 4; i++) { + switch (ff_sws_pixel_type_size(op->type)) { + case 2: mask[i] = av_bswap16(mask[i]); break; + case 4: mask[i] = av_bswap32(mask[i]); break; + } + } + break; + + case SWS_OP_CLEAR: + for (int i = 0; i < 4; i++) { + if (!op->c.q4[i].den) + continue; + if (op->c.q4[i].num != 0 || !clear_val) + return AVERROR(ENOTSUP); + mask[i] = 0x1010101ul * clear_val; + } + break; + + case SWS_OP_CONVERT: { + if (!op->convert.expand) + return AVERROR(ENOTSUP); + for (int i = 0; i < 4; i++) { + switch (ff_sws_pixel_type_size(op->type)) { + case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; + case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; + } + } + break; + } + + case SWS_OP_WRITE: { + if (op->rw.frac || !op->rw.packed) + return AVERROR(ENOTSUP); + + /* Initialize to no-op */ + memset(shuffle, clear_val, shuffle_size); + + const int write_size = ff_sws_pixel_type_size(op->type); + const int read_chunk = read.rw.elems * read_size; + const int write_chunk = op->rw.elems * write_size; + const int num_groups = shuffle_size / FFMAX(read_chunk, write_chunk); + for (int n = 0; n < num_groups; n++) { + const int base_in = n * read_chunk; + const int base_out = n * write_chunk; + for (int i = 0; i < op->rw.elems; i++) { + const int offset = base_out + i * write_size; + for (int b = 0; b < write_size; b++) { + const uint8_t idx = mask[i] >> (b * 8); + if (idx != clear_val) + shuffle[offset + b] = base_in + idx; + } + } + } + + *out_read_bytes = num_groups * read_chunk; + *out_write_bytes = num_groups * write_chunk; + return num_groups; + } + + default: + return AVERROR(ENOTSUP); + } + } + + return AVERROR(EINVAL); +} -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".