Signed-off-by: Yury Norov <yno...@caviumnetworks.com> --- sysdeps/aarch64/__longjmp.S | 2 ++ sysdeps/aarch64/dl-tlsdesc.S | 6 ++++++ sysdeps/aarch64/memcmp.S | 3 +++ sysdeps/aarch64/memcpy.S | 4 +++- sysdeps/aarch64/memmove.S | 3 +++ sysdeps/aarch64/memset.S | 3 ++- sysdeps/aarch64/setjmp.S | 1 + sysdeps/aarch64/strchr.S | 1 + sysdeps/aarch64/strchrnul.S | 1 + sysdeps/aarch64/strcmp.S | 2 ++ sysdeps/aarch64/strcpy.S | 2 ++ sysdeps/aarch64/strlen.S | 2 ++ sysdeps/aarch64/strncmp.S | 3 +++ sysdeps/aarch64/strnlen.S | 3 +++ sysdeps/aarch64/strrchr.S | 1 + sysdeps/aarch64/sysdep.h | 4 +++- sysdeps/unix/sysv/linux/aarch64/clone.S | 7 +++++++ sysdeps/unix/sysv/linux/aarch64/getcontext.S | 1 + sysdeps/unix/sysv/linux/aarch64/setcontext.S | 1 + sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 1 + 20 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S index 58332be..0377715 100644 --- a/sysdeps/aarch64/__longjmp.S +++ b/sysdeps/aarch64/__longjmp.S @@ -46,6 +46,8 @@ ENTRY (__longjmp) cfi_offset(d14, JB_D14<<3) cfi_offset(d15, JB_D15<<3) + DELOUSE(0) + ldp x19, x20, [x0, #JB_X19<<3] ldp x21, x22, [x0, #JB_X21<<3] ldp x23, x24, [x0, #JB_X23<<3] diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S index fe8a17d..718dddf 100644 --- a/sysdeps/aarch64/dl-tlsdesc.S +++ b/sysdeps/aarch64/dl-tlsdesc.S @@ -74,6 +74,7 @@ cfi_startproc .align 2 _dl_tlsdesc_return: + DELOUSE(0) ldr PTR_REG (0), [x0, #PTR_SIZE] RET cfi_endproc @@ -126,6 +127,7 @@ _dl_tlsdesc_undefweak: td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load from [x0,#8] here happens after the initialization of td->arg. */ ldar xzr, [x0] + DELOUSE(0) ldr PTR_REG (0), [x0, #PTR_SIZE] mrs x1, tpidr_el0 sub PTR_REG (0), PTR_REG (0), PTR_REG (1) @@ -174,6 +176,7 @@ _dl_tlsdesc_dynamic: stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) mov x29, sp + DELOUSE(0) /* Save just enough registers to support fast path, if we fall into slow path we will save additional registers. */ @@ -279,12 +282,14 @@ _dl_tlsdesc_resolve_rela: SAVE_Q_REGISTERS + DELOUSE(3) ldr PTR_REG (1), [x3, #PTR_SIZE] bl _dl_tlsdesc_resolve_rela_fixup RESTORE_Q_REGISTERS ldr x0, [sp, #32+16*8] + DELOUSE(0) ldr PTR_REG (1), [x0] blr x1 @@ -346,6 +351,7 @@ _dl_tlsdesc_resolve_hold: RESTORE_Q_REGISTERS ldr x0, [sp, #32+16*9] + DELOUSE(0) ldr PTR_REG (1), [x0] blr x1 diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S index ae2d997..982aa02 100644 --- a/sysdeps/aarch64/memcmp.S +++ b/sysdeps/aarch64/memcmp.S @@ -47,6 +47,9 @@ #define mask x13 ENTRY_ALIGN (memcmp, 6) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(ret0) eor tmp1, src1, src2 tst tmp1, #7 diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index 442f390..e0bbbf8 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -46,7 +46,9 @@ #include <sysdep.h> ENTRY_ALIGN (memcpy, 6) - + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) mov dst, dstin cmp count, #64 b.ge L(cpy_not_short) diff --git a/sysdeps/aarch64/memmove.S b/sysdeps/aarch64/memmove.S index dd91db0..3f72dea 100644 --- a/sysdeps/aarch64/memmove.S +++ b/sysdeps/aarch64/memmove.S @@ -46,6 +46,9 @@ #define D_h x14 ENTRY_ALIGN (memmove, 6) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cmp dstin, src b.lo L(downwards) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index e49f4d6..e8eed9e 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -52,7 +52,8 @@ #define tmp3w w9 ENTRY_ALIGN (__memset, 6) - + DELOUSE(0) + DELOUSE(2) mov dst, dstin /* Preserve return value. */ ands A_lw, val, #255 #ifndef DONT_USE_DC diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S index da83f19..d608660 100644 --- a/sysdeps/aarch64/setjmp.S +++ b/sysdeps/aarch64/setjmp.S @@ -33,6 +33,7 @@ END (_setjmp) libc_hidden_def (_setjmp) ENTRY (__sigsetjmp) + DELOUSE(0) 1: stp x19, x20, [x0, #JB_X19<<3] diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S index 5e3aecf..838384c 100644 --- a/sysdeps/aarch64/strchr.S +++ b/sysdeps/aarch64/strchr.S @@ -62,6 +62,7 @@ /* Locals and temporaries. */ ENTRY (strchr) + DELOUSE(0) mov wtmp2, #0x0401 movk wtmp2, #0x4010, lsl #16 dup vrepchr.16b, chrin diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S index a624c8d..b60df26 100644 --- a/sysdeps/aarch64/strchrnul.S +++ b/sysdeps/aarch64/strchrnul.S @@ -60,6 +60,7 @@ identify exactly which byte is causing the termination. */ ENTRY (__strchrnul) + DELOUSE(0) /* Magic constant 0x40100401 to allow us to identify which lane matches the termination condition. */ mov wtmp2, #0x0401 diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S index ba0ccb4..ccfe281 100644 --- a/sysdeps/aarch64/strcmp.S +++ b/sysdeps/aarch64/strcmp.S @@ -49,6 +49,8 @@ /* Start of performance-critical section -- one 64B cache line. */ ENTRY_ALIGN(strcmp, 6) + DELOUSE(0) + DELOUSE(1) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S index 0694199..2a281b9 100644 --- a/sysdeps/aarch64/strcpy.S +++ b/sysdeps/aarch64/strcpy.S @@ -91,6 +91,8 @@ #define MIN_PAGE_SIZE (1 << MIN_PAGE_P2) ENTRY_ALIGN (STRCPY, 6) + DELOUSE(0) + DELOUSE(1) /* For moderately short strings, the fastest way to do the copy is to calculate the length of the string in the same way as strlen, then essentially do a memcpy of the result. This avoids the need for diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S index 9b4d1da..4734cfb 100644 --- a/sysdeps/aarch64/strlen.S +++ b/sysdeps/aarch64/strlen.S @@ -85,6 +85,8 @@ boundary. */ ENTRY_ALIGN (strlen, 6) + DELOUSE(0) + DELOUSE(1) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S index f6a17fd..a372654 100644 --- a/sysdeps/aarch64/strncmp.S +++ b/sysdeps/aarch64/strncmp.S @@ -51,6 +51,9 @@ #define endloop x15 ENTRY_ALIGN_AND_PAD (strncmp, 6, 7) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S index 4cce45f..6f67221 100644 --- a/sysdeps/aarch64/strnlen.S +++ b/sysdeps/aarch64/strnlen.S @@ -50,6 +50,9 @@ #define REP8_80 0x8080808080808080 ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) cbz limit, L(hit_limit) mov zeroones, #REP8_01 bic src, srcin, #15 diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S index 44c1917..bb85a60 100644 --- a/sysdeps/aarch64/strrchr.S +++ b/sysdeps/aarch64/strrchr.S @@ -68,6 +68,7 @@ identify exactly which byte is causing the termination, and why. */ ENTRY(strrchr) + DELOUSE(0) cbz x1, L(null_search) /* Magic constant 0x40100401 to allow us to identify which lane matches the requested byte. Magic constant 0x80200802 used diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h index f2ea821..c4ff5e7 100644 --- a/sysdeps/aarch64/sysdep.h +++ b/sysdeps/aarch64/sysdep.h @@ -25,10 +25,12 @@ #define AARCH64_R(NAME) R_AARCH64_ ## NAME #define PTR_REG(n) x##n #define PTR_LOG_SIZE 3 -#else +#define DELOUSE(n) +#else /* __ILP32__ */ #define AARCH64_R(NAME) R_AARCH64_P32_ ## NAME #define PTR_REG(n) w##n #define PTR_LOG_SIZE 2 +#define DELOUSE(n) mov w##n, w##n #endif #define PTR_SIZE (1<<PTR_LOG_SIZE) diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S index 596fb9c..91a1d4c 100644 --- a/sysdeps/unix/sysv/linux/aarch64/clone.S +++ b/sysdeps/unix/sysv/linux/aarch64/clone.S @@ -39,6 +39,13 @@ */ .text ENTRY(__clone) + DELOUSE(0) + DELOUSE(1) + DELOUSE(2) + DELOUSE(3) + DELOUSE(4) + DELOUSE(5) + DELOUSE(6) /* Save args for the child. */ mov x10, x0 mov x11, x2 diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S index 71e526c..35ff326 100644 --- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S @@ -30,6 +30,7 @@ .text ENTRY(__getcontext) + DELOUSE(0) /* The saved context will return to the getcontext() call point with a return value of 0 */ str xzr, [x0, oX0 + 0 * SZREG] diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S index d17f8c8..7d854bd 100644 --- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S @@ -34,6 +34,7 @@ .text ENTRY (__setcontext) + DELOUSE(0) /* Save a copy of UCP. */ mov x9, x0 diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S index c1a16f3..764fedc 100644 --- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S @@ -27,6 +27,7 @@ .text ENTRY(__swapcontext) + DELOUSE(0) /* Set the value returned when swapcontext() returns in this context. */ str xzr, [x0, oX0 + 0 * SZREG] -- 2.7.4