This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 3b1d7cd1f7308a1b5a6ed31a6b8dd4e4d73fe742 Author: Niklas Haas <[email protected]> AuthorDate: Thu Jun 4 11:06:44 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Thu Jun 4 11:44:52 2026 +0200 tests/checkasm: switch to shared libcheckasm implementation The checkasm tool originated in x264. It was later rewritten and modernized for FFmpeg (and relicensed to LGPL). For the dav1d project, it was relicensed again to 2-clause BSD (with permission from the relevant authors). The FFmpeg and dav1d implementations of checkasm have since evolved independently (with some amount of ported code between the two, with relicensing permission where relevant). To synchronize the development, and to make it possible to easily adopt checkasm in other projects, it has been split out into a standalone project/library on its own, developed at https://code.videolan.org/videolan/checkasm/. That version has all the features of checkasm in both FFmpeg and dav1d, and has got a number of extra improvements on top: - More/fixed tests (e.g. properly clobbering high bits of 32-bit registers on most platforms), - Vastly improved overall performance / runtime for benchmarking, due primarily to the ability to scale the runtime of each test to that test's complexity. - Much more robust statistical analysis of benchmarking results; including robust outlier rejection, an estimation of the histogram, and the ability to report the variance / stddev in addition to the (trimmed) mean. - Interactive HTML and JSON output formats in addition to CSV/TSV. - More readable and user-friendly output across the board, especially for failures and data dumps (e.g. also showing errors inside padding bytes). - Better cross-platform support, including dynamic fallback of timer implementations on ARM platforms, a better RISC-V and AArch64 harness, and more. On AArch64, it tests which timer out of pmccntr_el0, linux perf, macos kperf, cntvct_el0 is available, without the user needing to configure things, and falling back on clock_gettime if neither of them can be used. This means one automatically gets the best available timer, if userspace access to pmccntr_el0 has been unlocked with a kernel module, or if one has permission to use the perf API, or if the cntvct_el0 is exact enough to be useful. On AArch64 macOS, there is now a test harness that catches clobbered registers and stack clobbering, like on other platforms. - An option for setting affinity, for benchmarking on heterogenous core systems. (On Linux, this is already easily done through taskset, but on Windows, the checkasm built in option makes it possible there as well, and portable.) - Printing of the tested CPU core name, where possible. To integrate this external implementation of checkasm into FFmpeg, without having to build libcheckasm as an external library, the upstream sources are added as a git subtree, and integrated into the FFmpeg build system as a foreign source. For the long and storied history of how we arrived at this solution, see: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22546 The relevant config headers for checkasm are generated by configure, and the sources are built as part of the main ffmpeg build. The upstream sources, while they use meson as primary build system, are structured to make it easy to build as part of a foreign build system. The existing testcases are mostly kept untouched (only three minor changes are required, in crc.c, sw_ops.c and vp8dsp.c), while the majority of the logic from checkasm.c, checkasm.h and the arch specific assembly files are removed, replaced with the external implementation. Co-Authored-By: Martin Storsjö <[email protected]> Signed-off-by: Niklas Haas <[email protected]> --- .gitignore | 2 + Makefile | 2 +- configure | 66 +++ tests/checkasm/Makefile | 47 +- tests/checkasm/aarch64/checkasm.S | 198 -------- tests/checkasm/arm/checkasm.S | 197 -------- tests/checkasm/checkasm.c | 988 +------------------------------------- tests/checkasm/checkasm.h | 373 +------------- tests/checkasm/crc.c | 2 +- tests/checkasm/riscv/checkasm.S | 337 ------------- tests/checkasm/sw_ops.c | 4 +- tests/checkasm/vp8dsp.c | 2 +- tests/checkasm/x86/checkasm.asm | 244 ---------- 13 files changed, 143 insertions(+), 2319 deletions(-) diff --git a/.gitignore b/.gitignore index 073bbf8e65..48182e7780 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,8 @@ /ffmpeg /ffplay /ffprobe +/checkasm_config_generated.asm +/checkasm_config_generated.h /config.asm /config.h /config_components.asm diff --git a/Makefile b/Makefile index f296e87ed4..131d2b62ef 100644 --- a/Makefile +++ b/Makefile @@ -187,7 +187,7 @@ clean:: $(RM) -rf coverage.info coverage.info.in lcov distclean:: clean - $(RM) .version config.asm config.h config_components.* mapfile \ + $(RM) .version checkasm_config_generated.* config.asm config.h config_components.* mapfile \ ffbuild/.config ffbuild/config.* libavutil/avconfig.h \ version.h libavutil/ffversion.h libavcodec/codec_names.h \ libavcodec/bsf_list.c libavformat/protocol_list.c \ diff --git a/configure b/configure index 7ef7adc53d..a67ed344ae 100755 --- a/configure +++ b/configure @@ -2188,6 +2188,7 @@ HWACCEL_AUTODETECT_LIBRARY_LIST=" # catchall list of things that require external libs to link EXTRALIBS_LIST=" + checkasm cpu_init cws2fws " @@ -2572,6 +2573,7 @@ SYSTEM_FUNCS=" glXGetProcAddress gmtime_r inet_aton + ioctl isatty kbhit localtime_r @@ -2589,6 +2591,7 @@ SYSTEM_FUNCS=" prctl pthread_cancel pthread_set_name_np + pthread_setaffinity_np pthread_setname_np sched_getaffinity SecItemImport @@ -2597,6 +2600,8 @@ SYSTEM_FUNCS=" SetDllDirectory setmode setrlimit + sigaction + siglongjmp Sleep strerror_r sysconf @@ -4356,6 +4361,7 @@ vaapi_transcode_example_deps="avcodec avformat avutil h264_vaapi_encoder" qsv_transcode_example_deps="avcodec avformat avutil h264_qsv_encoder" # EXTRALIBS_LIST +checkasm_extralibs="advapi32_extralibs pthreads_extralibs" cpu_init_extralibs="pthreads_extralibs" cws2fws_extralibs="zlib_extralibs" @@ -7010,7 +7016,10 @@ check_func_headers conio.h kbhit check_func_headers io.h setmode check_func_headers lzo/lzo1x.h lzo1x_999_compress check_func_headers mach/mach_time.h mach_absolute_time +check_func_headers setjmp.h siglongjmp +check_func_headers signal.h sigaction check_func_headers stdlib.h getenv +check_func_headers sys/ioctl.h ioctl check_func_headers sys/stat.h lstat check_func_headers sys/auxv.h getauxval check_func_headers sys/auxv.h elf_aux_info @@ -7258,6 +7267,9 @@ if ! disabled pthreads && ! enabled w32threads && ! enabled os2threads; then fi check_lib pthread_set_name_np "$hdrs" pthread_set_name_np -lpthread check_lib pthread_setname_np "$hdrs" pthread_setname_np -lpthread + # _GNU_SOURCE gets defined locally in checkasm where + # pthread_setaffinity_np gets used. + check_lib pthread_setaffinity_np "$hdrs" pthread_setaffinity_np -lpthread -D_GNU_SOURCE fi fi @@ -8844,6 +8856,60 @@ echo "endif # FFMPEG_CONFIG_MAK" >> ffbuild/config.mak cp_if_changed $TMPH config_components.h enabled x86asm && cp_if_changed $TMPASM config_components.asm +# Reopen a new TMPH for checkasm_config_generated.h +cat > $TMPH <<EOF +/* Automatically generated by configure - do not modify! */ +#ifndef FFMPEG_CHECKASM_CONFIG_GENERATED_H +#define FFMPEG_CHECKASM_CONFIG_GENERATED_H + +#define CHECKASM_VERSION "1.2.0" +EOF + +print_config HAVE_ "$TMPH" elf_aux_info getauxval ioctl isatty linux_perf pthread_np_h pthread_setaffinity_np sigaction siglongjmp clock_gettime prctl +print_config CHECKASM_ARCH_ "$TMPH" $ARCH_LIST + +# The callcheck harness requires x86asm/rv support on these platforms, so +# override arch-autodetection in the header to avoid tests depending on them +if enabled x86 && disabled x86asm; then + cat >> $TMPH <<EOF +#undef CHECKASM_ARCH_X86 +#undef CHECKASM_ARCH_X86_32 +#undef CHECKASM_ARCH_X86_64 +#define CHECKASM_ARCH_X86 0 +EOF +fi + +if enabled riscv && disabled rv; then + cat >> $TMPH <<EOF +#undef CHECKASM_ARCH_RISCV +#define CHECKASM_ARCH_RISCV 0 +EOF +fi + +if enabled aarch64; then + print_config HAVE_ "$TMPH" as_archext_sve_directive sve + print_config HAVE_ "$TMPH" as_archext_sme_directive sme +elif enabled riscv; then + print_config HAVE_ "$TMPH" sys_hwprobe_h asm_hwprobe_h +fi + +cat >> $TMPH <<EOF +#endif /* FFMPEG_CHECKASM_CONFIG_H */ +EOF + +cp_if_changed $TMPH checkasm_config_generated.h + +if enabled x86asm; then + append config_files $TMPASM + cat > $TMPASM <<EOF +; Automatically generated by configure - do not modify! +EOF + # We pass -DPREFIX on the command line when necessary; we don't need + # to include it in the generated config header. + print_config "" "$TMPASM" pic + cp_if_changed $TMPASM checkasm_config_generated.asm +fi + cat > $TMPH <<EOF /* Generated by ffmpeg configure */ #ifndef AVUTIL_AVCONFIG_H diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 53d8f3ec66..16711a30c5 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -106,26 +106,57 @@ AVUTILOBJS-$(CONFIG_PIXELUTILS) += pixelutils.o CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) $(AVUTILOBJS-yes) -CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o -CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o -CHECKASMOBJS-$(HAVE_RV) += riscv/checkasm.o -CHECKASMOBJS-$(HAVE_X86ASM) += x86/checkasm.o - -CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o +EXT_CHECKASMOBJS-$(ARCH_AARCH64) += ext/src/arm/checkasm_64.o +EXT_CHECKASMOBJS-$(ARCH_ARM) += ext/src/arm/checkasm_32.o +EXT_CHECKASMOBJS-$(ARCH_LOONGARCH) += ext/src/loongarch/checkasm.o +EXT_CHECKASMOBJS-$(HAVE_RV) += ext/src/riscv/callcheck.o +EXT_CHECKASMOBJS-$(HAVE_X86ASM) += ext/src/x86/checkasm.o + +EXT_CHECKASMOBJS += ext/src/arm/cpu.o \ + ext/src/checkasm.o \ + ext/src/cpu.o \ + ext/src/function.o \ + ext/src/perf.o \ + ext/src/perf/arm.o \ + ext/src/perf/linux.o \ + ext/src/perf/macos_kperf.o \ + ext/src/riscv/cpu.o \ + ext/src/signal.o \ + ext/src/stackguard.o \ + ext/src/stats.o \ + ext/src/utils.o \ + ext/src/x86/cpu.o \ + +EXT_CHECKASMOBJS += $(EXT_CHECKASMOBJS-yes) +$(EXT_CHECKASMOBJS:%=tests/checkasm/%): CFLAGS += -I$(SRC_PATH)/tests/checkasm/ext/src +$(EXT_CHECKASMOBJS:%=tests/checkasm/%): ASFLAGS += -I$(SRC_PATH)/tests/checkasm/ext/src +$(EXT_CHECKASMOBJS:%=tests/checkasm/%): X86ASMFLAGS += -I$(SRC_PATH)/tests/checkasm/ext/src + +CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o $(EXT_CHECKASMOBJS) CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) -include $(CHECKASMOBJS:.o=.d) CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS))) $(CHECKASMOBJS): | $(CHECKASMDIRS) +# Prepend the local checkasm include path before any potential external +# (possibly outdated) version of libcheckasm. +$(CHECKASMOBJS): CFLAGS := -I$(SRC_PATH)/tests/checkasm/ext/include $(CFLAGS) +$(CHECKASMOBJS): ASFLAGS := -I$(SRC_PATH)/tests/checkasm/ext/include $(ASFLAGS) OUTDIRS += $(CHECKASMDIRS) +# Expose checkasm_config_generated.h/asm to checkasm tests as well, as it +# modifies the behavior of include/checkasm/header_config.h +$(CHECKASMOBJS): CFLAGS += -DCHECKASM_HAVE_GENERATED_H +$(CHECKASMOBJS): ASFLAGS += -DCHECKASM_HAVE_GENERATED_H +$(CHECKASMOBJS): X86ASMFLAGS += -DCHECKASM_HAVE_GENERATED_H + tests/checkasm/checkasm.o: CFLAGS += -Umain CHECKASM := tests/checkasm/checkasm$(EXESUF) $(CHECKASM): $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) - $(call LINK,$(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS)) + $(call LINK,$(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS-avcodec) $(EXTRALIBS-avfilter) $(EXTRALIBS-avformat) $(EXTRALIBS-avutil) $(EXTRALIBS-swresample) $(EXTRALIBS) $(EXTRALIBS-checkasm)) run-checkasm: $(CHECKASM) run-checkasm: @@ -136,6 +167,6 @@ checkasm: $(CHECKASM) testclean:: checkasmclean checkasmclean: - $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) + $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) $(CHECKASMOBJS) .PHONY: checkasm diff --git a/tests/checkasm/aarch64/checkasm.S b/tests/checkasm/aarch64/checkasm.S deleted file mode 100644 index 4b2db86a9e..0000000000 --- a/tests/checkasm/aarch64/checkasm.S +++ /dev/null @@ -1,198 +0,0 @@ -/**************************************************************************** - * Assembly testing and benchmarking tool - * Copyright (c) 2015 Martin Storsjo - * Copyright (c) 2015 Janne Grunau - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "libavutil/aarch64/asm.S" - -const register_init, align=4 - .quad 0x21f86d66c8ca00ce - .quad 0x75b6ba21077c48ad - .quad 0xed56bb2dcb3c7736 - .quad 0x8bda43d3fd1a7e06 - .quad 0xb64a9c9e5d318408 - .quad 0xdf9a54b303f1d3a3 - .quad 0x4a75479abd64e097 - .quad 0x249214109d5d1c88 - .quad 0x1a1b2550a612b48c - .quad 0x79445c159ce79064 - .quad 0x2eed899d5a28ddcd - .quad 0x86b2536fcd8cf636 - .quad 0xb0856806085e7943 - .quad 0x3f2bf84fc0fcca4e - .quad 0xacbd382dcf5b8de2 - .quad 0xd229e1f5b281303f - .quad 0x71aeaff20b095fd9 - .quad 0xab63e2e11fa38ed9 -endconst - - -const error_message_register - .asciz "failed to preserve register" -error_message_stack: - .asciz "stack clobbered" -endconst - - -// max number of args used by any asm function. -#define MAX_ARGS 15 - -#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15) - -function checkasm_stack_clobber, export=1 - mov x3, sp - mov x2, #CLOBBER_STACK -1: - stp x0, x1, [sp, #-16]! - subs x2, x2, #16 - b.gt 1b - mov sp, x3 - ret -endfunc - -// + 16 for stack canary reference -#define ARG_STACK (((8*(MAX_ARGS - 8) + 15) & ~15) + 16) - -function checkasm_checked_call, export=1 - stp x29, x30, [sp, #-16]! - mov x29, sp - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - stp x27, x28, [sp, #-16]! - stp d8, d9, [sp, #-16]! - stp d10, d11, [sp, #-16]! - stp d12, d13, [sp, #-16]! - stp d14, d15, [sp, #-16]! - - movrel x9, register_init - ldp d8, d9, [x9], #16 - ldp d10, d11, [x9], #16 - ldp d12, d13, [x9], #16 - ldp d14, d15, [x9], #16 - ldp x19, x20, [x9], #16 - ldp x21, x22, [x9], #16 - ldp x23, x24, [x9], #16 - ldp x25, x26, [x9], #16 - ldp x27, x28, [x9], #16 - - sub sp, sp, #ARG_STACK -.equ pos, 0 -.rept MAX_ARGS-8 - // Skip the first 8 args, that are loaded into registers - ldr x9, [x29, #16 + 8*8 + pos] - str x9, [sp, #pos] -.equ pos, pos + 8 -.endr - - // Fill x8-x17 with garbage. This doesn't have to be preserved, - // but avoids relying on them having any particular value. - movrel x9, register_init - ldp x10, x11, [x9], #32 - ldp x12, x13, [x9], #32 - ldp x14, x15, [x9], #32 - ldp x16, x17, [x9], #32 - ldp x8, x9, [x9] - - // For stack overflows, the callee is free to overwrite the parameters - // that were passed on the stack (if any), so we can only check after - // that point. First figure out how many parameters the function - // really took on the stack: - ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8] - // Load the first non-parameter value from the stack, that should be - // left untouched by the function. Store a copy of it inverted, so that - // e.g. overwriting everything with zero would be noticed. - ldr x2, [sp, x2, lsl #3] - mvn x2, x2 - str x2, [sp, #ARG_STACK-8] - - // Load the in-register arguments - mov x12, x0 - ldp x0, x1, [x29, #16] - ldp x2, x3, [x29, #32] - ldp x4, x5, [x29, #48] - ldp x6, x7, [x29, #64] - // Call the target function - blr x12 - - // Load the number of stack parameters, stack canary and its reference - ldr w2, [x29, #16 + 8*8 + (MAX_ARGS-8)*8] - ldr x2, [sp, x2, lsl #3] - ldr x3, [sp, #ARG_STACK-8] - - add sp, sp, #ARG_STACK - stp x0, x1, [sp, #-16]! - - mvn x3, x3 - cmp x2, x3 - b.ne 2f - - movrel x9, register_init - movi v3.8h, #0 - -.macro check_reg_neon reg1, reg2 - ldr q1, [x9], #16 - uzp1 v2.2d, v\reg1\().2d, v\reg2\().2d - eor v1.16b, v1.16b, v2.16b - orr v3.16b, v3.16b, v1.16b -.endm - check_reg_neon 8, 9 - check_reg_neon 10, 11 - check_reg_neon 12, 13 - check_reg_neon 14, 15 - uqxtn v3.8b, v3.8h - umov x3, v3.d[0] - -.macro check_reg reg1, reg2 - ldp x0, x1, [x9], #16 - eor x0, x0, \reg1 - eor x1, x1, \reg2 - orr x3, x3, x0 - orr x3, x3, x1 -.endm - check_reg x19, x20 - check_reg x21, x22 - check_reg x23, x24 - check_reg x25, x26 - check_reg x27, x28 - - cbz x3, 0f - - movrel x0, error_message_register - b 1f -2: - movrel x0, error_message_stack -1: - bl X(checkasm_fail_func) -0: - ldp x0, x1, [sp], #16 - ldp d14, d15, [sp], #16 - ldp d12, d13, [sp], #16 - ldp d10, d11, [sp], #16 - ldp d8, d9, [sp], #16 - ldp x27, x28, [sp], #16 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 - ldp x29, x30, [sp], #16 - ret -endfunc diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S deleted file mode 100644 index 601c2f66b8..0000000000 --- a/tests/checkasm/arm/checkasm.S +++ /dev/null @@ -1,197 +0,0 @@ -/**************************************************************************** - * Assembly testing and benchmarking tool - * Copyright (c) 2015 Martin Storsjo - * Copyright (c) 2015 Janne Grunau - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "libavutil/arm/asm.S" - -/* override fpu so that NEON instructions are rejected */ -#if HAVE_VFP -FPU .fpu vfp -ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch -#endif - -const register_init, align=3 - .quad 0x21f86d66c8ca00ce - .quad 0x75b6ba21077c48ad - .quad 0xed56bb2dcb3c7736 - .quad 0x8bda43d3fd1a7e06 - .quad 0xb64a9c9e5d318408 - .quad 0xdf9a54b303f1d3a3 - .quad 0x4a75479abd64e097 - .quad 0x249214109d5d1c88 -endconst - -const error_message_fpscr - .asciz "failed to preserve register FPSCR, changed bits: %x" -error_message_gpr: - .asciz "failed to preserve register r%d" -error_message_vfp: - .asciz "failed to preserve register d%d" -error_message_stack: - .asciz "failed to preserve stack" -endconst - -@ max number of args used by any asm function. -#define MAX_ARGS 15 - -#define ARG_STACK 4*(MAX_ARGS - 4) - -@ Align the used stack space to 8 to preserve the stack alignment. -@ +8 for stack canary reference. -#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8) - -.macro clobbercheck variant -.equ pushed, 4*9 -function checkasm_checked_call_\variant, export=1 - push {r4-r11, lr} -.ifc \variant, vfp - vpush {d8-d15} - fmrx r4, FPSCR - push {r4} -.equ pushed, pushed + 16*4 + 4 -.endif - - movrel r12, register_init -.ifc \variant, vfp - vldm r12, {d8-d15} -.endif - ldm r12, {r4-r11} - - sub sp, sp, #ARG_STACK_A -.equ pos, 0 -.rept MAX_ARGS-4 - ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos] - str r12, [sp, #pos] -.equ pos, pos + 4 -.endr - - @ For stack overflows, the callee is free to overwrite the parameters - @ that were passed on the stack (if any), so we can only check after - @ that point. First figure out how many parameters the function - @ really took on the stack: - ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)] - @ Load the first non-parameter value from the stack, that should be - @ left untouched by the function. Store a copy of it inverted, so that - @ e.g. overwriting everything with zero would be noticed. - ldr r12, [sp, r12, lsl #2] - mvn r12, r12 - str r12, [sp, #ARG_STACK_A - 4] - - mov r12, r0 - mov r0, r2 - mov r1, r3 - ldrd r2, r3, [sp, #ARG_STACK_A + pushed] - @ Call the target function - blx r12 - - @ Load the number of stack parameters, stack canary and its reference - ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)] - ldr r2, [sp, r12, lsl #2] - ldr r3, [sp, #ARG_STACK_A - 4] - - add sp, sp, #ARG_STACK_A - push {r0, r1} - - mvn r3, r3 - cmp r2, r3 - bne 5f - - movrel r12, register_init -.ifc \variant, vfp -.macro check_reg_vfp, dreg, offset - ldrd r2, r3, [r12, #8 * (\offset)] - vmov r0, lr, \dreg - eor r2, r2, r0 - eor r3, r3, lr - orrs r2, r2, r3 - bne 4f -.endm - -.irp n, 8, 9, 10, 11, 12, 13, 14, 15 - @ keep track of the checked double/SIMD register - mov r1, #\n - check_reg_vfp d\n, \n-8 -.endr -.purgem check_reg_vfp - - fmrx r1, FPSCR - ldr r3, [sp, #8] - eor r1, r1, r3 - @ Ignore changes in bits 0-4 and 7 - bic r1, r1, #0x9f - @ Ignore changes in the topmost 5 bits - bics r1, r1, #0xf8000000 - bne 3f -.endif - - @ keep track of the checked GPR - mov r1, #4 -.macro check_reg reg1, reg2= - ldrd r2, r3, [r12], #8 - eors r2, r2, \reg1 - bne 2f - add r1, r1, #1 -.ifnb \reg2 - eors r3, r3, \reg2 - bne 2f -.endif - add r1, r1, #1 -.endm - check_reg r4, r5 - check_reg r6, r7 -@ r9 is a volatile register in the ios ABI -#ifdef __APPLE__ - check_reg r8 -#else - check_reg r8, r9 -#endif - check_reg r10, r11 -.purgem check_reg - - b 0f -5: - movrel r0, error_message_stack - b 1f -4: - movrel r0, error_message_vfp - b 1f -3: - movrel r0, error_message_fpscr - b 1f -2: - movrel r0, error_message_gpr -1: - bl X(checkasm_fail_func) -0: - pop {r0, r1} -.ifc \variant, vfp - pop {r2} - fmxr FPSCR, r2 - vpop {d8-d15} -.endif - pop {r4-r11, pc} -endfunc -.endm - -#if HAVE_VFP || HAVE_NEON -clobbercheck vfp -#endif -clobbercheck novfp diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 93298c46a7..4f54db8e1d 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -1,5 +1,6 @@ /* * Assembly testing and benchmarking tool + * Copyright (c) 2025 Niklas Haas * Copyright (c) 2015 Henrik Gramner * Copyright (c) 2008 Loren Merritt * @@ -48,73 +49,12 @@ #include "config.h" #include "config_components.h" -#ifndef _GNU_SOURCE -# define _GNU_SOURCE // for syscall (performance monitoring API), strsignal() -#endif +#include <checkasm/checkasm.h> -#include <signal.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> #include "checkasm.h" -#include "libavutil/avassert.h" -#include "libavutil/common.h" -#include "libavutil/cpu.h" -#include "libavutil/intfloat.h" -#include "libavutil/random_seed.h" - -#if HAVE_IO_H -#include <io.h> -#endif -#if HAVE_PRCTL -#include <sys/prctl.h> -#endif - -#if defined(_WIN32) && !defined(SIGBUS) -/* non-standard, use the same value as mingw-w64 */ -#define SIGBUS 10 -#endif - -#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE -#include <windows.h> -#define COLOR_RED FOREGROUND_RED -#define COLOR_GREEN FOREGROUND_GREEN -#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) -#else -#define COLOR_RED 1 -#define COLOR_GREEN 2 -#define COLOR_YELLOW 3 -#endif - -#if HAVE_UNISTD_H -#include <unistd.h> -#endif - -#if !HAVE_ISATTY -#define isatty(fd) 1 -#endif - -#if ARCH_AARCH64 -#include "libavutil/aarch64/cpu.h" -#elif ARCH_RISCV -#include "libavutil/riscv/cpu.h" -#endif - -#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL -#include "libavutil/arm/cpu.h" - -void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; -#endif - -/* Trade-off between speed and accuracy */ -uint64_t bench_runs = 1U << 10; /* List of tests to invoke */ -static const struct { - const char *name; - void (*func)(void); -} tests[] = { +static const CheckasmTest tests[] = { /* NOTE: When adding a new test to this list here, it also needs to be * added in tests/fate/checkasm.mak, otherwise it doesn't get executed * as part of "make fate" or "make fate-checkasm". */ @@ -380,11 +320,7 @@ static const struct { }; /* List of cpu flags to check */ -static const struct { - const char *name; - const char *suffix; - int flag; -} cpus[] = { +static const CheckasmCpuInfo cpuflags[] = { #if ARCH_AARCH64 { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8 }, { "NEON", "neon", AV_CPU_FLAG_NEON }, @@ -449,917 +385,19 @@ static const struct { { NULL } }; -typedef struct CheckasmFuncVersion { - struct CheckasmFuncVersion *next; - void *func; - int ok; - int cpu; - CheckasmPerf perf; -} CheckasmFuncVersion; - -/* Binary search tree node */ -typedef struct CheckasmFunc { - struct CheckasmFunc *child[2]; - CheckasmFuncVersion versions; - uint8_t color; /* 0 = red, 1 = black */ - char name[1]; -} CheckasmFunc; - -/* Internal state */ -static struct { - CheckasmFunc *funcs; - CheckasmFunc *current_func; - CheckasmFuncVersion *current_func_ver; - const char *current_test_name; - const char *bench_pattern; - int bench_pattern_len; - int num_checked; - int num_failed; - - /* perf */ - int nop_time; - int sysfd; - - int cpu_flag; - const char *cpu_flag_name; - const char *test_pattern; - int verbose; - int csv; - int tsv; - volatile sig_atomic_t catch_signals; -} state; - -/* PRNG state */ -AVLFG checkasm_lfg; - -/* float compare support code */ -static int is_negative(union av_intfloat32 u) -{ - return u.i >> 31; -} - -int float_near_ulp(float a, float b, unsigned max_ulp) -{ - union av_intfloat32 x, y; - - x.f = a; - y.f = b; - - if (is_negative(x) != is_negative(y)) { - // handle -0.0 == +0.0 - return a == b; - } - - if (llabs((int64_t)x.i - y.i) <= max_ulp) - return 1; - - return 0; -} - -int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, - unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (!float_near_ulp(a[i], b[i], max_ulp)) - return 0; - } - return 1; -} - -int float_near_abs_eps(float a, float b, float eps) -{ - float abs_diff = fabsf(a - b); - if (abs_diff < eps) - return 1; - - fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps); - - return 0; -} - -int float_near_abs_eps_array(const float *a, const float *b, float eps, - unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (!float_near_abs_eps(a[i], b[i], eps)) - return 0; - } - return 1; -} - -int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp) -{ - return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); -} - -int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, - unsigned max_ulp, unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) - return 0; - } - return 1; -} - -int double_near_abs_eps(double a, double b, double eps) -{ - double abs_diff = fabs(a - b); - - return abs_diff < eps; -} - -int double_near_abs_eps_array(const double *a, const double *b, double eps, - unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (!double_near_abs_eps(a[i], b[i], eps)) - return 0; - } - return 1; -} - -/* Print colored text to stderr if the terminal supports it */ -static void color_printf(int color, const char *fmt, ...) -{ - static int use_color = -1; - va_list arg; - -#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE - static HANDLE con; - static WORD org_attributes; - - if (use_color < 0) { - CONSOLE_SCREEN_BUFFER_INFO con_info; - con = GetStdHandle(STD_ERROR_HANDLE); - if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { - org_attributes = con_info.wAttributes; - use_color = 1; - } else - use_color = 0; - } - if (use_color) - SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); -#else - if (use_color < 0) { - const char *term = getenv("TERM"); - use_color = term && strcmp(term, "dumb") && isatty(2); - } - if (use_color) - fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); -#endif - - va_start(arg, fmt); - vfprintf(stderr, fmt, arg); - va_end(arg); - - if (use_color) { -#if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE - SetConsoleTextAttribute(con, org_attributes); -#else - fprintf(stderr, "\x1b[0m"); -#endif - } -} - -/* Deallocate a tree */ -static void destroy_func_tree(CheckasmFunc *f) -{ - if (f) { - CheckasmFuncVersion *v = f->versions.next; - while (v) { - CheckasmFuncVersion *next = v->next; - free(v); - v = next; - } - - destroy_func_tree(f->child[0]); - destroy_func_tree(f->child[1]); - free(f); - } -} - -/* Allocate a zero-initialized block, clean up and exit on failure */ -static void *checkasm_malloc(size_t size) -{ - void *ptr = calloc(1, size); - if (!ptr) { - fprintf(stderr, "checkasm: malloc failed\n"); - destroy_func_tree(state.funcs); - exit(1); - } - return ptr; -} - -/* Get the suffix of the specified cpu flag */ -static const char *cpu_suffix(int cpu) -{ - int i = FF_ARRAY_ELEMS(cpus); - - while (--i >= 0) - if (cpu & cpus[i].flag) - return cpus[i].suffix; - - return "c"; -} - -static int cmp_nop(const void *a, const void *b) -{ - return *(const uint16_t*)a - *(const uint16_t*)b; -} - -/* Measure the overhead of the timing code (in decicycles) */ -static int measure_nop_time(void) -{ - uint16_t nops[10000]; - int i, nop_sum = 0; - av_unused const int sysfd = state.sysfd; - - uint64_t t = 0; - for (i = 0; i < 10000; i++) { - PERF_START(t); - PERF_STOP(t); - nops[i] = t; - } - - qsort(nops, 10000, sizeof(uint16_t), cmp_nop); - for (i = 2500; i < 7500; i++) - nop_sum += nops[i]; - - return nop_sum / 500; -} - -static inline double avg_cycles_per_call(const CheckasmPerf *const p) -{ - if (p->iterations) { - const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time; - if (cycles > 0.0) - return cycles / 32.0; /* 32 calls per iteration */ - } - return 0.0; -} - -/* Print benchmark results */ -static void print_benchs(CheckasmFunc *f) -{ - if (f) { - CheckasmFuncVersion *v = &f->versions; - const CheckasmPerf *p = &v->perf; - const double baseline = avg_cycles_per_call(p); - double decicycles; - - print_benchs(f->child[0]); - - do { - if (p->iterations) { - p = &v->perf; - decicycles = avg_cycles_per_call(p); - if (state.csv || state.tsv) { - const char sep = state.csv ? ',' : '\t'; - printf("%s%c%s%c%.1f\n", f->name, sep, - cpu_suffix(v->cpu), sep, - decicycles / 10.0); - } else { - const int pad_length = 10 + 50 - - printf("%s_%s:", f->name, cpu_suffix(v->cpu)); - const double ratio = decicycles ? - baseline / decicycles : 0.0; - printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0), - decicycles / 10.0, ratio); - } - } - } while ((v = v->next)); - - print_benchs(f->child[1]); - } -} - -/* ASCIIbetical sort except preserving natural order for numbers */ -static int cmp_func_names(const char *a, const char *b) -{ - const char *start = a; - int ascii_diff, digit_diff; - - for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++); - for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); - - if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b))) - return digit_diff; - - return ascii_diff; -} - -/* Perform a tree rotation in the specified direction and return the new root */ -static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir) -{ - CheckasmFunc *r = f->child[dir^1]; - f->child[dir^1] = r->child[dir]; - r->child[dir] = f; - r->color = f->color; - f->color = 0; - return r; -} - -#define is_red(f) ((f) && !(f)->color) - -/* Balance a left-leaning red-black tree at the specified node */ -static void balance_tree(CheckasmFunc **root) -{ - CheckasmFunc *f = *root; - - if (is_red(f->child[0]) && is_red(f->child[1])) { - f->color ^= 1; - f->child[0]->color = f->child[1]->color = 1; - } - - if (!is_red(f->child[0]) && is_red(f->child[1])) - *root = rotate_tree(f, 0); /* Rotate left */ - else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) - *root = rotate_tree(f, 1); /* Rotate right */ -} - -/* Get a node with the specified name, creating it if it doesn't exist */ -static CheckasmFunc *get_func(CheckasmFunc **root, const char *name) -{ - CheckasmFunc *f = *root; - - if (f) { - /* Search the tree for a matching node */ - int cmp = cmp_func_names(name, f->name); - if (cmp) { - f = get_func(&f->child[cmp > 0], name); - - /* Rebalance the tree on the way up if a new node was inserted */ - if (!f->versions.func) - balance_tree(root); - } - } else { - /* Allocate and insert a new node into the tree */ - int name_length = strlen(name); - f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length); - memcpy(f->name, name, name_length + 1); - } - - return f; -} - -checkasm_context checkasm_context_buf; - -/* Crash handling: attempt to catch crashes and handle them - * gracefully instead of just aborting abruptly. */ -#ifdef _WIN32 -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -static LONG NTAPI signal_handler(EXCEPTION_POINTERS *e) { - int s; - - if (!state.catch_signals) - return EXCEPTION_CONTINUE_SEARCH; - - switch (e->ExceptionRecord->ExceptionCode) { - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - case EXCEPTION_INT_DIVIDE_BY_ZERO: - s = SIGFPE; - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - case EXCEPTION_PRIV_INSTRUCTION: - s = SIGILL; - break; - case EXCEPTION_ACCESS_VIOLATION: - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - case EXCEPTION_DATATYPE_MISALIGNMENT: - case EXCEPTION_STACK_OVERFLOW: - s = SIGSEGV; - break; - case EXCEPTION_IN_PAGE_ERROR: - s = SIGBUS; - break; - default: - return EXCEPTION_CONTINUE_SEARCH; - } - state.catch_signals = 0; - checkasm_load_context(s); - return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */ -} -#endif -#elif !defined(_WASI_EMULATED_SIGNAL) -static void signal_handler(int s); - -static const struct sigaction signal_handler_act = { - .sa_handler = signal_handler, - .sa_flags = SA_RESETHAND, -}; - -static void signal_handler(int s) { - if (state.catch_signals) { - state.catch_signals = 0; - sigaction(s, &signal_handler_act, NULL); - checkasm_load_context(s); - } -} -#endif - -/* Compares a string with a wildcard pattern. */ -static int wildstrcmp(const char *str, const char *pattern) -{ - const char *wild = strchr(pattern, '*'); - if (wild) { - const size_t len = wild - pattern; - if (strncmp(str, pattern, len)) return 1; - while (*++wild == '*'); - if (!*wild) return 0; - str += len; - while (*str && wildstrcmp(str, wild)) str++; - return !*str; - } - return strcmp(str, pattern); -} - -/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ -static void check_cpu_flag(const char *name, int flag) +static void set_cpu_flags(uint64_t flags) { - int old_cpu_flag = state.cpu_flag; - - flag |= old_cpu_flag; - av_force_cpu_flags(-1); - state.cpu_flag = flag & av_get_cpu_flags(); - av_force_cpu_flags(state.cpu_flag); - - if (!flag || state.cpu_flag != old_cpu_flag) { - int i; - - state.cpu_flag_name = name; - for (i = 0; tests[i].func; i++) { - if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern)) - continue; - state.current_test_name = tests[i].name; - tests[i].func(); - } - } + av_force_cpu_flags((int) flags); } -/* Print the name of the current CPU flag, but only do it once */ -static void print_cpu_name(void) +int main(int argc, const char *argv[]) { - if (state.cpu_flag_name) { - color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); - state.cpu_flag_name = NULL; - } -} - -#if CONFIG_LINUX_PERF -static int bench_init_linux(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .size = sizeof(struct perf_event_attr), - .config = PERF_COUNT_HW_CPU_CYCLES, - .disabled = 1, // start counting only on demand - .exclude_kernel = 1, - .exclude_hv = 1, -#if !ARCH_X86 - .exclude_guest = 1, -#endif + CheckasmConfig cfg = { + .cpu_flags = cpuflags, + .tests = tests, + .set_cpu_flags = set_cpu_flags, + .cpu = av_get_cpu_flags(), }; - fprintf(stderr, "benchmarking with Linux Perf Monitoring API\n"); - - state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); - if (state.sysfd == -1) { - perror("perf_event_open"); - return -1; - } - return 0; -} -#elif CONFIG_MACOS_KPERF -static int bench_init_kperf(void) -{ - ff_kperf_init(); - return 0; -} -#else -static int bench_init_ffmpeg(void) -{ -#ifdef AV_READ_TIME - if (!checkasm_save_context()) { - checkasm_set_signal_handler_state(1); - AV_READ_TIME(); - checkasm_set_signal_handler_state(0); - } else { - fprintf(stderr, "checkasm: unable to execute platform specific timer\n"); - return -1; - } - fprintf(stderr, "benchmarking with native FFmpeg timers\n"); - return 0; -#else - fprintf(stderr, "checkasm: --bench is not supported on your system\n"); - return -1; -#endif -} -#endif - -static int bench_init(void) -{ -#if CONFIG_LINUX_PERF - int ret = bench_init_linux(); -#elif CONFIG_MACOS_KPERF - int ret = bench_init_kperf(); -#else - int ret = bench_init_ffmpeg(); -#endif - if (ret < 0) - return ret; - - state.nop_time = measure_nop_time(); - fprintf(stderr, "nop: %d.%d\n", state.nop_time/10, state.nop_time%10); - return 0; -} - -static void bench_uninit(void) -{ -#if CONFIG_LINUX_PERF - close(state.sysfd); -#endif -} - -static int usage(const char *path) -{ - fprintf(stderr, - "Usage: %s [options...] [seed]\n" - " --test=<pattern> Run specific test.\n" - " --bench Run benchmark.\n" - " --csv, --tsv Output results in rows of comma or tab separated values.\n" - " --runs=<ptwo> Manual number of benchmark iterations to run 2**<ptwo>.\n" - " --verbose Increase verbosity.\n", - path); - return 1; -} - -int main(int argc, char *argv[]) -{ - unsigned int seed = av_get_random_seed(); - int i, ret = 0; - char arch_info_buf[50] = ""; - -#ifdef _WIN32 -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) - AddVectoredExceptionHandler(0, signal_handler); -#endif -#elif !defined(_WASI_EMULATED_SIGNAL) - sigaction(SIGBUS, &signal_handler_act, NULL); - sigaction(SIGFPE, &signal_handler_act, NULL); - sigaction(SIGILL, &signal_handler_act, NULL); - sigaction(SIGSEGV, &signal_handler_act, NULL); -#endif -#if HAVE_PRCTL && defined(PR_SET_UNALIGN) - prctl(PR_SET_UNALIGN, PR_UNALIGN_SIGBUS); -#endif -#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL - if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags())) - checkasm_checked_call = checkasm_checked_call_vfp; -#endif - - if (!tests[0].func || !cpus[0].flag) { - fprintf(stderr, "checkasm: no tests to perform\n"); - return 0; - } - - for (i = 1; i < argc; i++) { - const char *arg = argv[i]; - unsigned long l; - char *end; - - if (!strncmp(arg, "--bench", 7)) { - if (bench_init() < 0) - return 1; - if (arg[7] == '=') { - state.bench_pattern = arg + 8; - state.bench_pattern_len = strlen(state.bench_pattern); - } else - state.bench_pattern = "*"; - } else if (!strncmp(arg, "--test=", 7)) { - state.test_pattern = arg + 7; - } else if (!strcmp(arg, "--csv")) { - state.csv = 1; state.tsv = 0; - } else if (!strcmp(arg, "--tsv")) { - state.csv = 0; state.tsv = 1; - } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) { - state.verbose = 1; - } else if (!strncmp(arg, "--runs=", 7)) { - l = strtoul(arg + 7, &end, 10); - if (*end == '\0') { - if (l > 30) { - fprintf(stderr, "checkasm: error: runs exponent must be within the range 0 <= 30\n"); - usage(argv[0]); - } - bench_runs = 1U << l; - } else { - return usage(argv[0]); - } - } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX && - *end == '\0') { - seed = l; - } else { - return usage(argv[0]); - } - } - -#if ARCH_AARCH64 && HAVE_SVE - if (have_sve(av_get_cpu_flags())) - snprintf(arch_info_buf, sizeof(arch_info_buf), - "SVE %d bits, ", 8 * ff_aarch64_sve_length()); -#endif -#if ARCH_AARCH64 && HAVE_SME - if (have_sme(av_get_cpu_flags())) - av_strlcatf(arch_info_buf, sizeof(arch_info_buf), - "SME %d bits, ", 8 * ff_aarch64_sme_length()); -#endif -#if ARCH_RISCV && HAVE_RVV - if (av_get_cpu_flags() & AV_CPU_FLAG_RVV_I32) - snprintf(arch_info_buf, sizeof (arch_info_buf), - "%zu-bit vectors, ", 8 * ff_get_rv_vlenb()); -#endif - fprintf(stderr, "checkasm: %susing random seed %u\n", arch_info_buf, seed); - av_lfg_init(&checkasm_lfg, seed); - - if (state.bench_pattern) - fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", bench_runs, av_log2(bench_runs)); - - check_cpu_flag(NULL, 0); - for (i = 0; cpus[i].flag; i++) - check_cpu_flag(cpus[i].name, cpus[i].flag); - - if (state.num_failed) { - fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); - ret = 1; - } else { - fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); - if (state.bench_pattern) { - print_benchs(state.funcs); - } - } - - destroy_func_tree(state.funcs); - bench_uninit(); - return ret; -} - -/* Decide whether or not the specified function needs to be tested and - * allocate/initialize data structures if needed. Returns a pointer to a - * reference function if the function should be tested, otherwise NULL */ -void *checkasm_check_func(void *func, const char *name, ...) -{ - char name_buf[256]; - void *ref = func; - CheckasmFuncVersion *v; - int name_length; - va_list arg; - - va_start(arg, name); - name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); - va_end(arg); - - if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) - return NULL; - - state.current_func = get_func(&state.funcs, name_buf); - state.funcs->color = 1; - v = &state.current_func->versions; - - if (v->func) { - CheckasmFuncVersion *prev; - do { - /* Only test functions that haven't already been tested */ - if (v->func == func) - return NULL; - - if (v->ok) - ref = v->func; - - prev = v; - } while ((v = v->next)); - - v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); - } - - v->func = func; - v->ok = 1; - v->cpu = state.cpu_flag; - state.current_func_ver = v; - - if (state.cpu_flag) - state.num_checked++; - - return ref; -} - -/* Decide whether or not the current function needs to be benchmarked */ -int checkasm_bench_func(void) -{ - return !state.num_failed && state.bench_pattern && - !wildstrcmp(state.current_func->name, state.bench_pattern); -} - -/* Indicate that the current test has failed, return whether verbose printing - * is requested. */ -int checkasm_fail_func(const char *msg, ...) -{ - if (state.current_func_ver && state.current_func_ver->cpu && - state.current_func_ver->ok) - { - va_list arg; - - print_cpu_name(); - fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); - va_start(arg, msg); - vfprintf(stderr, msg, arg); - va_end(arg); - fprintf(stderr, ")\n"); - - state.current_func_ver->ok = 0; - state.num_failed++; - } - return state.verbose; -} - -void checkasm_set_signal_handler_state(int enabled) { - state.catch_signals = enabled; -} - -int checkasm_handle_signal(int s) { - if (s) { -#ifdef __GLIBC__ - checkasm_fail_func("fatal signal %d: %s", s, strsignal(s)); -#else - checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" : - s == SIGILL ? "illegal instruction" : - s == SIGBUS ? "bus error" : - "segmentation fault"); -#endif - } - return s; -} - -/* Get the benchmark context of the current function */ -CheckasmPerf *checkasm_get_perf_context(void) -{ - CheckasmPerf *perf = &state.current_func_ver->perf; - memset(perf, 0, sizeof(*perf)); - perf->sysfd = state.sysfd; - return perf; -} - -/* Print the outcome of all tests performed since the last time this function was called */ -void checkasm_report(const char *name, ...) -{ - static int prev_checked, prev_failed, max_length; - - if (state.num_checked > prev_checked) { - int pad_length = max_length + 4; - va_list arg; - - print_cpu_name(); - pad_length -= fprintf(stderr, " - %s.", state.current_test_name); - va_start(arg, name); - pad_length -= vfprintf(stderr, name, arg); - va_end(arg); - fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); - - if (state.num_failed == prev_failed) - color_printf(COLOR_GREEN, "OK"); - else - color_printf(COLOR_RED, "FAILED"); - fprintf(stderr, "]\n"); - - prev_checked = state.num_checked; - prev_failed = state.num_failed; - } else if (!state.cpu_flag) { - /* Calculate the amount of padding required to make the output vertically aligned */ - int length = strlen(state.current_test_name); - va_list arg; - - va_start(arg, name); - length += vsnprintf(NULL, 0, name, arg); - va_end(arg); - - if (length > max_length) - max_length = length; - } -} - -static int check_err(const char *file, int line, - const char *name, int w, int h, - int *err) -{ - if (*err) - return 0; - if (!checkasm_fail_func("%s:%d", file, line)) - return 1; - *err = 1; - fprintf(stderr, "%s (%dx%d):\n", name, w, h); - return 0; -} - -#define DEF_CHECKASM_CHECK_BODY(compare, type, fmt) \ -do { \ - int64_t aligned_w = (w - 1LL + align_w) & ~(align_w - 1); \ - int64_t aligned_h = (h - 1LL + align_h) & ~(align_h - 1); \ - int err = 0; \ - int y = 0; \ - av_assert0(aligned_w == (int32_t)aligned_w);\ - av_assert0(aligned_h == (int32_t)aligned_h);\ - stride1 /= sizeof(*buf1); \ - stride2 /= sizeof(*buf2); \ - for (y = 0; y < h; y++) \ - if (!compare(&buf1[y*stride1], &buf2[y*stride2], w)) \ - break; \ - if (y != h) { \ - if (check_err(file, line, name, w, h, &err)) \ - return 1; \ - for (y = 0; y < h; y++) { \ - for (int x = 0; x < w; x++) \ - fprintf(stderr, " " fmt, buf1[x]); \ - fprintf(stderr, " "); \ - for (int x = 0; x < w; x++) \ - fprintf(stderr, " " fmt, buf2[x]); \ - fprintf(stderr, " "); \ - for (int x = 0; x < w; x++) \ - fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ - buf1 += stride1; \ - buf2 += stride2; \ - fprintf(stderr, "\n"); \ - } \ - buf1 -= h*stride1; \ - buf2 -= h*stride2; \ - } \ - for (y = -padding; y < 0; y++) \ - if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ - w + 2*padding)) { \ - if (check_err(file, line, name, w, h, &err)) \ - return 1; \ - fprintf(stderr, " overwrite above\n"); \ - break; \ - } \ - for (y = aligned_h; y < aligned_h + padding; y++) \ - if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ - w + 2*padding)) { \ - if (check_err(file, line, name, w, h, &err)) \ - return 1; \ - fprintf(stderr, " overwrite below\n"); \ - break; \ - } \ - for (y = 0; y < h; y++) \ - if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ - padding)) { \ - if (check_err(file, line, name, w, h, &err)) \ - return 1; \ - fprintf(stderr, " overwrite left\n"); \ - break; \ - } \ - for (y = 0; y < h; y++) \ - if (!compare(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \ - padding)) { \ - if (check_err(file, line, name, w, h, &err)) \ - return 1; \ - fprintf(stderr, " overwrite right\n"); \ - break; \ - } \ - return err; \ -} while (0) - -#define cmp_int(a, b, len) (!memcmp(a, b, (len) * sizeof(*(a)))) -#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ -int checkasm_check_##type(const char *file, int line, \ - const type *buf1, ptrdiff_t stride1, \ - const type *buf2, ptrdiff_t stride2, \ - int w, int h, const char *name, \ - int align_w, int align_h, \ - int padding) \ -{ \ - DEF_CHECKASM_CHECK_BODY(cmp_int, type, fmt); \ -} - -DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") -DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") -DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x") -DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") -DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") - -int checkasm_check_float_ulp(const char *file, int line, - const float *buf1, ptrdiff_t stride1, - const float *buf2, ptrdiff_t stride2, - int w, int h, const char *name, - unsigned max_ulp, int align_w, int align_h, - int padding) -{ - #define cmp_float(a, b, len) float_near_ulp_array(a, b, max_ulp, len) - DEF_CHECKASM_CHECK_BODY(cmp_float, float, "%g"); - #undef cmp_float + return checkasm_main(&cfg, argc, argv); } diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index e48f53e3e6..612ad0f9b9 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -24,17 +24,10 @@ #define TESTS_CHECKASM_CHECKASM_H #include <stdint.h> -#include "config.h" - -#if CONFIG_LINUX_PERF -#include <unistd.h> // read(3) -#include <sys/ioctl.h> -#include <asm/unistd.h> -#include <linux/perf_event.h> -#elif CONFIG_MACOS_KPERF -#include "libavutil/macos_kperf.h" -#endif +#include <checkasm/test.h> +#include <checkasm/utils.h> +#include "config.h" #include "libavutil/avstring.h" #include "libavutil/cpu.h" #include "libavutil/emms.h" @@ -42,41 +35,6 @@ #include "libavutil/lfg.h" #include "libavutil/timer.h" -#ifdef _WIN32 -#include <windows.h> -#if defined(__i386__) || defined(_M_IX86) -#include <setjmp.h> -typedef jmp_buf checkasm_context; -#define checkasm_save_context() checkasm_handle_signal(setjmp(checkasm_context_buf)) -#define checkasm_load_context(s) longjmp(checkasm_context_buf, s) -#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -/* setjmp/longjmp on Windows on architectures using SEH (all except x86_32) - * will try to use SEH to unwind the stack, which doesn't work for assembly - * functions without unwind information. */ -typedef struct { CONTEXT c; int status; } checkasm_context; -#define checkasm_save_context() \ - (checkasm_context_buf.status = 0, \ - RtlCaptureContext(&checkasm_context_buf.c), \ - checkasm_handle_signal(checkasm_context_buf.status)) -#define checkasm_load_context(s) \ - (checkasm_context_buf.status = s, \ - RtlRestoreContext(&checkasm_context_buf.c, NULL)) -#else -#define checkasm_context void* -#define checkasm_save_context() 0 -#define checkasm_load_context() do {} while (0) -#endif -#elif defined(_WASI_EMULATED_SIGNAL) -#define checkasm_context void* -#define checkasm_save_context() 0 -#define checkasm_load_context() do {} while (0) -#else -#include <setjmp.h> -typedef sigjmp_buf checkasm_context; -#define checkasm_save_context() checkasm_handle_signal(sigsetjmp(checkasm_context_buf, 1)) -#define checkasm_load_context(s) siglongjmp(checkasm_context_buf, s) -#endif - void checkasm_check_aacencdsp(void); void checkasm_check_aacpsdsp(void); void checkasm_check_ac3dsp(void); @@ -179,309 +137,23 @@ void checkasm_check_vvc_alf(void); void checkasm_check_vvc_mc(void); void checkasm_check_vvc_sao(void); -struct CheckasmPerf; - -void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3); -int checkasm_bench_func(void); -int checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); -struct CheckasmPerf *checkasm_get_perf_context(void); -void checkasm_report(const char *name, ...) av_printf_format(1, 2); -void checkasm_set_signal_handler_state(int enabled); -int checkasm_handle_signal(int s); -extern checkasm_context checkasm_context_buf; - -/* float compare utilities */ -int float_near_ulp(float a, float b, unsigned max_ulp); -int float_near_abs_eps(float a, float b, float eps); -int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp); -int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, - unsigned len); -int float_near_abs_eps_array(const float *a, const float *b, float eps, - unsigned len); -int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, - unsigned max_ulp, unsigned len); -int double_near_abs_eps(double a, double b, double eps); -int double_near_abs_eps_array(const double *a, const double *b, double eps, - unsigned len); - -extern AVLFG checkasm_lfg; -#define rnd() av_lfg_get(&checkasm_lfg) - -av_unused static void *func_ref, *func_new; - -extern uint64_t bench_runs; +#define rnd checkasm_rand +#define declare_func_float declare_func +#define bench(...) checkasm_bench(__VA_ARGS__) -/* Decide whether or not the specified function needs to be tested */ -#define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__)) -#define check_key(key, ...) (checkasm_save_context(), checkasm_check_func(key, __VA_ARGS__)) - -/* Declare the function prototype. The first argument is the return value, the remaining - * arguments are the function parameters. Naming parameters is optional. */ -#define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) -#define declare_func_float(ret, ...) declare_new_float(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) -#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__) - -/* Indicate that the current test has failed */ -#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) - -/* Print the test outcome */ -#define report checkasm_report - -/* Call the reference function */ -#define call_ref(...)\ - checkasm_call((func_type *)func_ref, __VA_ARGS__) - -#define checkasm_call(func, ...) \ - (checkasm_set_signal_handler_state(1),\ - (func)(__VA_ARGS__));\ - checkasm_set_signal_handler_state(0) - -#define call_new(...) checkasm_call_checked(((func_type *)func_new), __VA_ARGS__) - -#if ARCH_X86 && HAVE_X86ASM -/* Verifies that clobbered callee-saved registers are properly saved and restored - * and that either no MMX registers are touched or emms is issued */ -void checkasm_checked_call(void *func, ...); -/* Verifies that clobbered callee-saved registers are properly saved and restored - * and issues emms for asm functions which are not required to do so */ -void checkasm_checked_call_emms(void *func, ...); -/* Verifies that clobbered callee-saved registers are properly saved and restored - * but doesn't issue emms. Meant for dsp functions returning float or double */ -void checkasm_checked_call_float(void *func, ...); - -#if ARCH_X86_64 -/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. - * This is done by clobbering the stack with junk around the stack pointer and calling the - * assembly function through checked_call() with added dummy arguments which forces all - * real arguments to be passed on the stack and not in registers. For 32-bit arguments the - * upper half of the 64-bit register locations on the stack will now contain junk which will - * cause misbehaving functions to either produce incorrect output or segfault. Note that - * even though this works extremely well in practice, it's technically not guaranteed - * and false negatives is theoretically possible, but there can never be any false positives. - */ -void checkasm_stack_clobber(uint64_t clobber, ...); -#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ - = (void *)checkasm_checked_call; -#define declare_new_float(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\ - = (void *)checkasm_checked_call_float; -#define declare_new_emms(cpu_flags, ret, ...) \ - ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \ - ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ - (void *)checkasm_checked_call; -#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) -#define checkasm_call_checked(func, ...) \ - (checkasm_set_signal_handler_state(1),\ - checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ - CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ - checked_call((func), 0, 0, 0, 0, 0, __VA_ARGS__));\ - checkasm_set_signal_handler_state(0) -#elif ARCH_X86_32 -#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; -#define declare_new_float(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float; -#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \ - ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \ - (void *)checkasm_checked_call; -#define checkasm_call_checked(func, ...)\ - (checkasm_set_signal_handler_state(1),\ - checked_call((func), __VA_ARGS__));\ - checkasm_set_signal_handler_state(0) -#endif -#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL -/* Use a dummy argument, to offset the real parameters by 2, not only 1. - * This makes sure that potential 8-byte-alignment of parameters is kept the same - * even when the extra parameters have been removed. */ -void checkasm_checked_call_vfp(void *func, int dummy, ...); -void checkasm_checked_call_novfp(void *func, int dummy, ...); -extern void (*checkasm_checked_call)(void *func, int dummy, ...); -#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__, \ - int, int, int, int, int, int, int, int, \ - int, int, int, int, int, int, int) = (void *)checkasm_checked_call; -#define checkasm_call_checked(func, ...) \ - (checkasm_set_signal_handler_state(1),\ - checked_call((func), 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\ - checkasm_set_signal_handler_state(0) -#elif ARCH_AARCH64 && !defined(__APPLE__) -void checkasm_stack_clobber(uint64_t clobber, ...); -void checkasm_checked_call(void *func, ...); -#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, int, int, __VA_ARGS__,\ - int, int, int, int, int, int, int, int,\ - int, int, int, int, int, int, int)\ - = (void *)checkasm_checked_call; -#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) -#define checkasm_call_checked(func, ...) (checkasm_set_signal_handler_state(1),\ - checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ - CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ - checked_call((func), 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\ - 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\ - checkasm_set_signal_handler_state(0) -#elif ARCH_RISCV -void checkasm_set_function(void *); -void *checkasm_get_wrapper(void); - -#if HAVE_RV -#define declare_new(ret, ...) \ - ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper(); -#define checkasm_call_checked(func, ...) \ - (checkasm_set_signal_handler_state(1),\ - checkasm_set_function(func), checked_call(__VA_ARGS__));\ - checkasm_set_signal_handler_state(0) -#else -#define declare_new(ret, ...) -#define checkasm_call_checked(func, ...)\ - (checkasm_set_signal_handler_state(1),\ - (func)(__VA_ARGS__));\ - checkasm_set_signal_handler_state(0) -#endif -#else -#define declare_new(ret, ...) -#define declare_new_float(ret, ...) -#define declare_new_emms(cpu_flags, ret, ...) -/* Call the function */ -#define checkasm_call_checked(func, ...)\ - (checkasm_set_signal_handler_state(1),\ - (func)(__VA_ARGS__));\ - checkasm_set_signal_handler_state(0) -#endif - -#ifndef declare_new_emms -#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__) -#endif -#ifndef declare_new_float -#define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__) -#endif - -typedef struct CheckasmPerf { - int sysfd; - uint64_t cycles; - int iterations; -} CheckasmPerf; - -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF - -#if CONFIG_LINUX_PERF -#define PERF_START(t) do { \ - ioctl(sysfd, PERF_EVENT_IOC_RESET, 0); \ - ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0); \ -} while (0) -#define PERF_STOP(t) do { \ - int ret; \ - ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \ - ret = read(sysfd, &t, sizeof(t)); \ - (void)ret; \ -} while (0) -#elif CONFIG_MACOS_KPERF -#define PERF_START(t) t = ff_kperf_cycles() -#define PERF_STOP(t) t = ff_kperf_cycles() - t -#else -#define PERF_START(t) t = AV_READ_TIME() -#define PERF_STOP(t) t = AV_READ_TIME() - t -#endif - -#define CALL4(...)\ - do {\ - tfunc(__VA_ARGS__); \ - tfunc(__VA_ARGS__); \ - tfunc(__VA_ARGS__); \ - tfunc(__VA_ARGS__); \ - } while (0) - -#define CALL16(...)\ - do {\ - CALL4(__VA_ARGS__); \ - CALL4(__VA_ARGS__); \ - CALL4(__VA_ARGS__); \ - CALL4(__VA_ARGS__); \ - } while (0) - -/* Benchmark the function */ -#define bench(func, ...)\ - do {\ - if (checkasm_bench_func()) {\ - struct CheckasmPerf *perf = checkasm_get_perf_context();\ - av_unused const int sysfd = perf->sysfd;\ - func_type *tfunc = func;\ - uint64_t tsum = 0;\ - uint64_t ti, tcount = 0;\ - uint64_t t = 0; \ - const uint64_t truns = FFMAX(bench_runs >> 3, 1);\ - checkasm_set_signal_handler_state(1);\ - for (ti = 0; ti < truns; ti++) {\ - PERF_START(t);\ - CALL16(__VA_ARGS__);\ - CALL16(__VA_ARGS__);\ - PERF_STOP(t);\ - if (t*tcount <= tsum*4 && ti > 0) {\ - tsum += t;\ - tcount++;\ - }\ - }\ - emms_c();\ - perf->cycles += tsum;\ - perf->iterations += tcount;\ - checkasm_set_signal_handler_state(0);\ - }\ - } while (0) -#else -#define bench(func, ...) while(0) -#define PERF_START(t) while(0) -#define PERF_STOP(t) while(0) -#endif - -#define bench_new(...) bench(func_new, __VA_ARGS__) - -#define BUF_RECT(type, name, w, h) \ - LOCAL_ALIGNED_32(type, name##_buf, [((h)+32)*(FFALIGN(w,64)+64) + 64]); \ - av_unused ptrdiff_t name##_stride = sizeof(type)*(FFALIGN(w,64)+64); \ - av_unused int name##_buf_h = (h)+32; \ - type *name = name##_buf + (FFALIGN(w,64)+64)*16 + 64 - -#define PIXEL_RECT(name, w, h) \ - LOCAL_ALIGNED_32(uint8_t, name##_buf, [sizeof(uint16_t) * (((h)+32)*(FFALIGN(w,64)+64) + 64)],); \ - av_unused ptrdiff_t name##_stride = sizeof(uint16_t) * (FFALIGN(w,64)+64); \ - av_unused int name##_buf_h = (h)+32; \ - uint8_t *name = name##_buf + (FFALIGN(w,64)+64)*16 + 64 - -#define CLEAR_BUF_RECT(name) \ - memset(name##_buf, 0x99, name##_stride * name##_buf_h + 64) -#define CLEAR_PIXEL_RECT(name) \ - CLEAR_BUF_RECT(name) - -#define DECL_CHECKASM_CHECK_FUNC(type) \ -int checkasm_check_##type(const char *file, int line, \ - const type *buf1, ptrdiff_t stride1, \ - const type *buf2, ptrdiff_t stride2, \ - int w, int h, const char *name, \ - int align_w, int align_h, \ - int padding) - -DECL_CHECKASM_CHECK_FUNC(uint8_t); -DECL_CHECKASM_CHECK_FUNC(uint16_t); -DECL_CHECKASM_CHECK_FUNC(uint32_t); -DECL_CHECKASM_CHECK_FUNC(int16_t); -DECL_CHECKASM_CHECK_FUNC(int32_t); - -int checkasm_check_float_ulp(const char *file, int line, - const float *buf1, ptrdiff_t stride1, - const float *buf2, ptrdiff_t stride2, - int w, int h, const char *name, - unsigned max_ulp, int align_w, int align_h, - int padding); +#define randomize_stddev(buf, size, stddev) \ + checkasm_randomize_distf(buf, size, (CheckasmDist){ 0.0, stddev }) +#define randomize_stddev_dbl(buf, size, stddev) \ + checkasm_randomize_dist(buf, size, (CheckasmDist){ 0.0, stddev }) -#define PASTE(a,b) a ## b -#define CONCAT(a,b) PASTE(a,b) +#define PIXEL_RECT(name, w, h) \ + BUF_RECT(uint16_t, name##_16, w, h); \ + av_unused ptrdiff_t name##_stride = name##_16_stride; \ + av_unused int name##_buf_h = name##_16_buf_h; \ + av_unused uint8_t* name##_buf = (uint8_t*)name##_16_buf; \ + uint8_t* name = (uint8_t*)name##_16 -#define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__) -#define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0) -/* Check a pointer from BUF_RECT, checking whether there have been - * writes outside of the designated area. */ -#define checkasm_check_padded(...) \ - checkasm_check2(__VA_ARGS__, 1, 1, 8) -/* Check a pointer from BUF_RECT, checking whether there have been - * writes outside of the designated area. Allow writing slightly past the - * end of the buffer, by aligning w/h to align_w/align_h, and checking - * for overwrites outside of that. */ -#define checkasm_check_padded_align(...) \ - checkasm_check2(__VA_ARGS__, 8) +#define CLEAR_PIXEL_RECT(name) CLEAR_BUF_RECT(name##_16) /* This assumes that there is a local variable named "bit_depth". * For tests that don't have that and only operate on a single @@ -515,15 +187,6 @@ int checkasm_check_float_ulp(const char *file, int line, buf2 ## _16, stride2, \ __VA_ARGS__)) -#define randomize_stddev(buf, size, stddev) \ -do { \ - double bmg[2]; \ - for (int i = 0; i < size; i += 2) { \ - av_bmg_get(&checkasm_lfg, bmg); \ - (buf)[i] = bmg[0] * (stddev); \ - (buf)[i + 1] = bmg[1] * (stddev); \ - } \ -} while (0); -#define randomize_stddev_dbl(...) randomize_stddev(__VA_ARGS__) +typedef uint8_t pixel; #endif /* TESTS_CHECKASM_CHECKASM_H */ diff --git a/tests/checkasm/crc.c b/tests/checkasm/crc.c index 4202c7784c..94ade9ce37 100644 --- a/tests/checkasm/crc.c +++ b/tests/checkasm/crc.c @@ -38,7 +38,7 @@ static void check_crc(const AVCRC *table_new, const char *name, unsigned idx) { declare_func(uint32_t, const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length); - const AVCRC *table_ref = check_key((AVCRC*)table_new, "crc_%s", name); + const AVCRC *table_ref = (const AVCRC *) check_key((CheckasmKey) table_new, "crc_%s", name); if (!table_ref) return; diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S deleted file mode 100644 index 44f32e440b..0000000000 --- a/tests/checkasm/riscv/checkasm.S +++ /dev/null @@ -1,337 +0,0 @@ -/**************************************************************************** - * Copyright © 2022 Rémi Denis-Courmont. - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "libavutil/riscv/asm.S" - -#if (__riscv_xlen == 32) -.macro lx rd, addr - lw \rd, \addr -.endm - -.macro sx rs, addr - sw \rs, \addr -.endm -#define REG_MAGIC 0xdeadbeef -#elif (__riscv_xlen == 64) -.macro lx rd, addr - ld \rd, \addr -.endm - -.macro sx rs, addr - sd \rs, \addr -.endm -#define REG_MAGIC 0xdeadbeef0badf00d -#else -.macro lx rd, addr - lq \rd, \addr -.endm - -.macro sx rs, addr - sq \rs, \addr -.endm -#define REG_MAGIC 0xdeadbeef0badf00daaaabbbbccccdddd -#endif -#define XSZ (__riscv_xlen / 8) -#define STACK_ALIGN 16 -#define STACK_SPACE(sz) (((sz) + (STACK_ALIGN - 1)) & -STACK_ALIGN) - -#if defined(__riscv_float_abi_soft) -.macro flf rd, addr -.endm -.macro fsf rs, addr -.endm -#define FSZ 0 -#elif defined(__riscv_float_abi_single) -.macro flf rd, addr - flw \rd, \addr -.endm -.macro fsf rs, addr - fsw \rs, \addr -.endm -#define FSZ 4 -#elif defined(__riscv_float_abi_double) -.macro flf rd, addr - fld \rd, \addr -.endm -.macro fsf rs, addr - fsd \rs, \addr -.endm -#define FSZ 8 -#elif defined(__riscv_float_abi_quad) -.macro flf rd, addr - flq \rd, \addr -.endm -.macro fsf rs, addr - fsq \rs, \addr -.endm -#define FSZ 16 -#else -#error "Unknown float ABI" -#endif - - .pushsection .tbss, "waT" - .align 4 -.Lchecked_func: - .fill 1, XSZ, 0 - .align 4 -.Lsaved_xregs: - .fill 4 + 12, XSZ, 0 // RA, SP, GP, TP, S0-S11 - .align 4 -.Lsaved_fregs: - .fill 12, FSZ, 0 // FS0-FS11 - .fill 1, XSZ, 0 // RA - .popsection - -func checkasm_set_function - lpad 0 - la.tls.ie t0, .Lchecked_func - add t0, tp, t0 - sx a0, (t0) - ret -endfunc - -func checkasm_get_wrapper, v - lpad 0 - addi sp, sp, -STACK_SPACE(2 * XSZ) - sx fp, (sp) - sx ra, XSZ(sp) - addi fp, sp, STACK_SPACE(2 * XSZ) - - call av_get_cpu_flags - andi t0, a0, 8 /* AV_CPU_FLAG_RVV_I32 */ -#ifdef __riscv_float_abi_soft - andi t1, a0, 16 /* AV_CPU_FLAG_RVV_F32 (implies F and Zve32x) */ - lla a0, checkasm_checked_call_i - beqz t0, 1f - lla a0, checkasm_checked_call_iv - beqz t1, 1f -#else - lla a0, checkasm_checked_call_if - beqz t0, 1f -#endif - lla a0, checkasm_checked_call_ifv -1: - lx ra, XSZ(sp) - lx fp, (sp) - addi sp, sp, 16 - ret -endfunc - - .pushsection ".rodata", "a" -.Lfail_s_reg: - .asciz "callee-saved integer register S%d clobbered" -.Lfail_fs_reg: - .asciz "callee-saved floating-point register FS%d clobbered" -.Lfail_rsvd_reg: - .asciz "unallocatable register %cP clobbered" -#if defined(__riscv_float_abi_soft) || defined(__riscv_float_abi_single) - .align 2 -.Lbad_float: - .single 123456789 -#elif defined(__riscv_float_abi_double) - .align 3 -.Lbad_float: - .double 123456789 -#elif defined(__riscv_float_abi_quad) - .align 4 -.Lbad_float: - .ldouble 123456789 -#endif - .popsection - -func checkasm_checked_call_i - /* <-- Entry point without the Vector extension --> */ - lpad 0 - /* Save RA, unallocatable and callee-saved registers */ - la.tls.ie t0, .Lsaved_xregs - add t0, tp, t0 - sx ra, (t0) - sx sp, 1 * XSZ(t0) - sx gp, 2 * XSZ(t0) - sx tp, 3 * XSZ(t0) - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - sx s\n, (4 + \n) * XSZ(t0) - .endr - - /* Clobber the stack space right below SP */ - li t1, REG_MAGIC - li t0, 16 -1: - addi sp, sp, -XSZ - addi t0, t0, -1 - sx t1, (sp) - bnez t0, 1b - - addi sp, sp, 16 * XSZ - # Clobber temporary registers (except T2, FE-CFI label) - .irp n, 0, 1, 3, 4, 5, 6 - mv t\n, t1 - .endr - # Clobber the saved registers - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - mv s\n, t1 - .endr - - /* Call the tested function */ - la.tls.ie t0, .Lchecked_func - add t0, tp, t0 - lx t3, (t0) - sx zero, (t0) - jalr t3 - - /* Check special register values */ - la.tls.ie t0, .Lsaved_xregs - add t0, tp, t0 - lx t2, 1 * XSZ(t0) // SP - lx t3, 2 * XSZ(t0) // GP - lx t4, 3 * XSZ(t0) // TP - li t1, 'S' - bne t2, sp, .Lfail_xp - li t1, 'G' - bne t3, gp, .Lfail_xp - li t1, 'T' - bne t4, tp, .Lfail_xp - - /* Check value of saved registers */ - li t0, REG_MAGIC - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - li t1, \n - bne t0, s\n, .Lfail_s - .endr - -4: - /* Restore RA and saved registers */ - la.tls.ie t0, .Lsaved_xregs - add t0, tp, t0 - lx ra, (t0) - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - lx s\n, (4 + \n) * XSZ(t0) - .endr - ret - -.Lfail_xp: - # checkasm_fail_func() needs valid SP, GP and TP. Restore them. - lx sp, 1 * XSZ(t0) - lx gp, 2 * XSZ(t0) - lx tp, 3 * XSZ(t0) - lla a0, .Lfail_rsvd_reg - mv a1, t1 - call checkasm_fail_func - j 4b - -.Lfail_s: - lla a0, .Lfail_s_reg - mv a1, t1 - call checkasm_fail_func - j 4b -endfunc - -#ifndef __riscv_float_abi_soft -func checkasm_checked_call_if, f - lpad 0 - # Save callee-saved floating point registers and RA - la.tls.ie t0, .Lsaved_fregs - add t0, t0, tp - lla t1, .Lbad_float - sd ra, 12 * FSZ(t0) - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - fsf fs\n, \n * FSZ(t0) - .endr - # Clobber the saved and temporary floating point registers - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - flf ft\n, (t1) - flf fs\n, (t1) - .endr - - jal checkasm_checked_call_i - - # Check value of saved registers - lla t1, .Lbad_float - flf ft0, (t1) - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - li t1, \n -#if defined(__riscv_float_abi_single) - feq.s t0, ft0, fs\n -#elif defined(__riscv_float_abi_double) - feq.d t0, ft0, fs\n -#else - feq.q t0, ft0, fs\n -#endif - beqz t0, .Lfail_fs - .endr - -1: # Restore callee-saved floating point registers and RA - la.tls.ie t0, .Lsaved_fregs - add t0, t0, tp - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - flf fs\n, \n * FSZ(t0) - .endr - ld ra, 12 * FSZ(t0) - ret - -.Lfail_fs: - lla a0, .Lfail_fs_reg - mv a1, t1 - call checkasm_fail_func - j 1b -endfunc -#else -func checkasm_checked_call_if, f - lpad 0 - lla t1, .Lbad_float - # Clobber all floating point registers (soft float ABI). - .irp n, 0, 1, 2, 3, 4, 5, 6, 7 - flw fa\n, (t1) - .endr - .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - flw ft\n, (t1) - flw fs\n, (t1) - .endr - j checkasm_checked_call_i -endfunc - -func checkasm_checked_call_iv, zve32x - lpad 0 - jal t0, .Lclobber_v - j checkasm_checked_call_i -endfunc -#endif - -func checkasm_checked_call_ifv, zve32x - lpad 0 - jal t0, .Lclobber_v - j checkasm_checked_call_if - -.Lclobber_v: - # Clobber the vector registers - vsetvli t1, zero, e32, m8, ta, ma - li t1, -0xdeadbeef - vmv.v.x v0, t1 - vmv.v.x v8, t1 - vmv.v.x v16, t1 - vmv.v.x v24, t1 - # Clobber the vector configuration - li t1, 0 /* Vector length: zero */ - li t3, -4 /* Vector type: illegal */ - vsetvl zero, t1, t3 - csrwi vxrm, 3 /* Rounding mode: round-to-odd */ - csrwi vxsat, 1 /* Saturation: encountered */ - jr t0 -endfunc diff --git a/tests/checkasm/sw_ops.c b/tests/checkasm/sw_ops.c index 0c2071f21e..ca6882dae7 100644 --- a/tests/checkasm/sw_ops.c +++ b/tests/checkasm/sw_ops.c @@ -124,7 +124,7 @@ static void check_compiled(const char *name, const SwsOpBackend *backend, */ uintptr_t id = (uintptr_t) comp_new->func; id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new->cpu_flags; - if (!check_key((void *) id, "%s/%s", name, backend->name)) + if (!check_key(id, "%s/%s", name, backend->name)) return; declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end); @@ -310,7 +310,7 @@ static void check_ops(const char *name, const unsigned ranges[NB_PLANES], if (backend->hw_format != AV_PIX_FMT_NONE || backend == backend_ref) continue; - if (!av_get_cpu_flags()) { + if (!checkasm_get_cpu_info()) { /* Also test once with the existing C reference to set the baseline */ check_compiled(name, backend, read_op, write_op, ranges, &comp_ref, &comp_ref); } diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c index 4d6704d5a9..3081820c59 100644 --- a/tests/checkasm/vp8dsp.c +++ b/tests/checkasm/vp8dsp.c @@ -309,7 +309,7 @@ static void check_mc(VP8DSPContext *d) CLEAR_BUF_RECT(dst1); call_ref(dst0, dst0_stride, src, SRC_BUF_STRIDE, height, mx, my); call_new(dst1, dst1_stride, src, SRC_BUF_STRIDE, height, mx, my); - checkasm_check_padded(uint8_t, dst0, dst0_stride, dst1, dst1_stride, size, height, "dst"); + checkasm_check_rect_padded(dst0, dst0_stride, dst1, dst1_stride, size, height, "dst"); bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my); } } diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm deleted file mode 100644 index ab11bcba64..0000000000 --- a/tests/checkasm/x86/checkasm.asm +++ /dev/null @@ -1,244 +0,0 @@ -;***************************************************************************** -;* Assembly testing and benchmarking tool -;* Copyright (c) 2008 Loren Merritt -;* Copyright (c) 2012 Henrik Gramner -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or modify -;* it under the terms of the GNU General Public License as published by -;* the Free Software Foundation; either version 2 of the License, or -;* (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;* GNU General Public License for more details. -;* -;* You should have received a copy of the GNU General Public License -;* along with this program; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. -;***************************************************************************** - -%define private_prefix checkasm -%include "libavutil/x86/x86inc.asm" - -SECTION_RODATA - -error_message: db "failed to preserve register", 0 -error_message_emms: db "failed to issue emms", 0 - -%if ARCH_X86_64 -; just random numbers to reduce the chance of incidental match -ALIGN 16 -x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 -x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 -x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e -x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f -x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 -x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d -x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b -x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 -x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef -x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 -n7: dq 0x21f86d66c8ca00ce -n8: dq 0x75b6ba21077c48ad -n9: dq 0xed56bb2dcb3c7736 -n10: dq 0x8bda43d3fd1a7e06 -n11: dq 0xb64a9c9e5d318408 -n12: dq 0xdf9a54b303f1d3a3 -n13: dq 0x4a75479abd64e097 -n14: dq 0x249214109d5d1c88 -%endif - -SECTION .text - -cextern fail_func - -; max number of args used by any asm function. -; (max_args % 4) must equal 3 for stack alignment -%define max_args 15 - -%if ARCH_X86_64 - -;----------------------------------------------------------------------------- -; int checkasm_stack_clobber(uint64_t clobber, ...) -;----------------------------------------------------------------------------- -cglobal stack_clobber, 1,2 - ; Clobber the stack with junk below the stack pointer - %define argsize (max_args+6)*8 - SUB rsp, argsize - mov r1, argsize-8 -.loop: - mov [rsp+r1], r0 - sub r1, 8 - jge .loop - ADD rsp, argsize - RET - -%if WIN64 - %assign free_regs 7 - DECLARE_REG_TMP 4 -%else - %assign free_regs 9 - DECLARE_REG_TMP 7 -%endif - -%macro report_fail 1 - mov r9, rax - mov r10, rdx - lea r0, [%1] - xor eax, eax - call fail_func - mov rdx, r10 - mov rax, r9 -%endmacro - -;----------------------------------------------------------------------------- -; void checkasm_checked_call(void *func, ...) -;----------------------------------------------------------------------------- -INIT_XMM -%macro CHECKED_CALL 0-1 -cglobal checked_call%1, 2,15,16,max_args*8+8 - mov t0, r0 - - ; All arguments have been pushed on the stack instead of registers in order to - ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. - mov r0, r6mp - mov r1, r7mp - mov r2, r8mp - mov r3, r9mp -%if UNIX64 - mov r4, r10mp - mov r5, r11mp - %assign i 6 - %rep max_args-6 - mov r9, [rsp+stack_offset+(i+1)*8] - mov [rsp+(i-6)*8], r9 - %assign i i+1 - %endrep -%else ; WIN64 - %assign i 4 - %rep max_args-4 - mov r9, [rsp+stack_offset+(i+7)*8] - mov [rsp+i*8], r9 - %assign i i+1 - %endrep - - ; Move possible floating-point arguments to the correct registers - movq m0, r0 - movq m1, r1 - movq m2, r2 - movq m3, r3 - - %assign i 6 - %rep 16-6 - mova m %+ i, [x %+ i] - %assign i i+1 - %endrep -%endif - -%assign i 14 -%rep 15-free_regs - mov r %+ i, [n %+ i] - %assign i i-1 -%endrep - call t0 -%assign i 14 -%rep 15-free_regs - xor r %+ i, [n %+ i] - or r14, r %+ i - %assign i i-1 -%endrep - -%if WIN64 - %assign i 6 - %rep 16-6 - pxor m %+ i, [x %+ i] - por m6, m %+ i - %assign i i+1 - %endrep - packsswb m6, m6 - movq r5, m6 - or r14, r5 -%endif - - ; Call fail_func() with a descriptive message to mark it as a failure - ; if the called function didn't preserve all callee-saved registers. - ; Save the return value located in rdx:rax first to prevent clobbering. - jz .clobber_ok - report_fail error_message -.clobber_ok: -%ifidn %1, _emms - emms -%elifnidn %1, _float - fstenv [rsp] - cmp word [rsp + 8], 0xffff - je .emms_ok - report_fail error_message_emms - emms -.emms_ok: -%endif - RET -%endmacro - -%else - -; just random numbers to reduce the chance of incidental match -%define n3 dword 0x6549315c -%define n4 dword 0xe02f3e23 -%define n5 dword 0xb78d0d1d -%define n6 dword 0x33627ba7 - -%macro report_fail 1 - mov r3, eax - mov r4, edx - lea r0, [%1] - mov [esp], r0 - call fail_func - mov edx, r4 - mov eax, r3 -%endmacro - -%macro CHECKED_CALL 0-1 -;----------------------------------------------------------------------------- -; void checkasm_checked_call(void *func, ...) -;----------------------------------------------------------------------------- -cglobal checked_call%1, 1,7 - mov r3, n3 - mov r4, n4 - mov r5, n5 - mov r6, n6 -%rep max_args - PUSH dword [esp+20+max_args*4] -%endrep - call r0 - xor r3, n3 - xor r4, n4 - xor r5, n5 - xor r6, n6 - or r3, r4 - or r5, r6 - or r3, r5 - jz .clobber_ok - report_fail error_message -.clobber_ok: -%ifidn %1, _emms - emms -%elifnidn %1, _float - fstenv [esp] - cmp word [esp + 8], 0xffff - je .emms_ok - report_fail error_message_emms - emms -.emms_ok: -%endif - add esp, max_args*4 - RET -%endmacro - -%endif ; ARCH_X86_64 - -CHECKED_CALL -CHECKED_CALL _emms -CHECKED_CALL _float _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
