Four small patches relative to the last set. (1) Big thinko in how to set the EQ condition from the strex results.
And surprisingly that wrong condition PASSES a large portion of the testsuite, by running through the ll/sc loop twice, with the first time being truly successful and the second time producing the boolean output that we wanted. However, this mistake was visible via infinite looping in one libitm test. (2) Use syscall as discussed w/ Joseph. It's been too many weeks since that exchange, and I can't remember if I actually forgot to do it or simply failed to merge patch sets properly across N machines. Sorry about that, anyway. (3) Ramana's feedback re cpu_relax. (4) Ramama's feedback re sjlj.S, specifically: thumb2 and movt. Built (w/o bootstrap) and then built and tested libitm w/ -march={armv7-a,armv5te} {,-mthumb}. Full testsuite run started; hopefully done overnight. Full tree at git://repo.or.cz/gcc/rth.git rth/atomic/arm if there's any doubt about what's relative to what. Better? Ok? r~
>From 291880c393066ca9cf73f989521116c82f748b2c Mon Sep 17 00:00:00 2001 From: Richard Henderson <r...@redhat.com> Date: Mon, 12 Dec 2011 18:22:21 -0500 Subject: [PATCH 1/4] fixup arm: end test for ll/sc --- gcc/config/arm/arm.c | 8 ++++---- gcc/config/arm/arm.md | 15 +-------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0b31ebb..f829a83 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24755,13 +24755,13 @@ arm_split_compare_and_swap (rtx operands[]) arm_emit_store_exclusive (mode, scratch, mem, newval); /* Weak or strong, we want EQ to be true for success, so that we - match the flags that we got from the compare above. Thus we - prefer to use TEQ instead of TST here. */ - emit_insn (gen_xorsi3_compare0_scratch (scratch, const1_rtx)); + match the flags that we got from the compare above. */ + cond = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); if (!is_weak) { - cond = gen_rtx_REG (CCmode, CC_REGNUM); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 2faf0ef..1af825d 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3079,20 +3079,7 @@ [(set_attr "conds" "set")] ) -(define_insn "*xorsi3_compare0_scratch_arm" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r") - (match_operand:SI 1 "arm_rhs_operand" "rI")) - (const_int 0)))] - "TARGET_ARM" - "teq%?\\t%0, %1" - [(set_attr "conds" "set") - ; Not predicable via IT block (since it doesn't modify the flags there), - ; but we're keen on having this predicate in ARM mode for compare-and-swap. - (set_attr "predicable" "yes")] -) - -(define_insn "xorsi3_compare0_scratch" +(define_insn "*xorsi3_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r") (match_operand:SI 1 "arm_rhs_operand" "rI")) -- 1.7.7.3
>From 633084f18a38c1283543f247b82d65daa662a4ea Mon Sep 17 00:00:00 2001 From: Richard Henderson <r...@redhat.com> Date: Mon, 12 Dec 2011 18:23:37 -0500 Subject: [PATCH 2/4] fixup arm: use syscall(3) for futex --- libitm/config/linux/arm/futex_bits.h | 21 +++++---------------- libitm/config/linux/futex.cc | 1 + libitm/config/linux/futex.h | 2 -- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/libitm/config/linux/arm/futex_bits.h b/libitm/config/linux/arm/futex_bits.h index 7e1b52f..0a21763 100644 --- a/libitm/config/linux/arm/futex_bits.h +++ b/libitm/config/linux/arm/futex_bits.h @@ -24,25 +24,14 @@ /* Provide target-specific access to the futex system call. */ +#include <unistd.h> #include <sys/syscall.h> static inline long sys_futex0 (int *addr, long op, long val) { - register long sc_0 __asm__("r0"); - register long sc_1 __asm__("r1"); - register long sc_2 __asm__("r2"); - register long sc_3 __asm__("r3"); - - sc_0 = (long) addr; - sc_1 = op; - sc_2 = val; - sc_3 = 0; - - __asm volatile ("swi %1" - : "+r"(sc_0) - : "i"(SYS_futex), "r"(sc_1), "r"(sc_2), "r"(sc_3) - : "memory"); - - return sc_0; + /* There are two styles of syscall, and in the eabi style the syscall + number goes into the thumb frame pointer. We need to either write + this in pure assembler or just defer entirely to libc. */ + return syscall (SYS_futex, addr, op, val, 0); } diff --git a/libitm/config/linux/futex.cc b/libitm/config/linux/futex.cc index 45c9db6..c4b99a9 100644 --- a/libitm/config/linux/futex.cc +++ b/libitm/config/linux/futex.cc @@ -26,6 +26,7 @@ #include "libitm_i.h" #include "futex.h" +#include "futex_bits.h" #include <errno.h> namespace GTM HIDDEN { diff --git a/libitm/config/linux/futex.h b/libitm/config/linux/futex.h index 326c0f5..00161b4 100644 --- a/libitm/config/linux/futex.h +++ b/libitm/config/linux/futex.h @@ -29,8 +29,6 @@ namespace GTM HIDDEN { -#include "futex_bits.h" - extern void futex_wait (int *addr, int val); extern long futex_wake (int *addr, int count); -- 1.7.7.3
>From 3d9f74381c8d18ff459f88d4b6d6f088bd9b30a1 Mon Sep 17 00:00:00 2001 From: Richard Henderson <r...@redhat.com> Date: Mon, 12 Dec 2011 18:25:19 -0500 Subject: [PATCH 3/4] fixup arm: use __sync_synchronize for relax --- libitm/config/arm/target.h | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libitm/config/arm/target.h b/libitm/config/arm/target.h index 99dd99a..c0ea8f2 100644 --- a/libitm/config/arm/target.h +++ b/libitm/config/arm/target.h @@ -43,8 +43,11 @@ typedef struct gtm_jmpbuf static inline void cpu_relax (void) { - /* ??? Maybe use WFE. */ - __asm volatile ("" : : : "memory"); + /* ??? The kernel uses the condition + #if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327) + Given that we're actually just waiting, it doesn't seem like it + hurts to simply use a full barrier all the time. */ + __sync_synchronize (); } static inline void -- 1.7.7.3
>From d21f8afdbca0b653ff243cebeed5786407c70934 Mon Sep 17 00:00:00 2001 From: Richard Henderson <r...@redhat.com> Date: Mon, 12 Dec 2011 19:13:40 -0500 Subject: [PATCH 4/4] fixup arm: sjlj changes from ramana, addressing errors --- libitm/config/arm/sjlj.S | 46 ++++++++++++++++++++++++++++++++++++---------- 1 files changed, 36 insertions(+), 10 deletions(-) diff --git a/libitm/config/arm/sjlj.S b/libitm/config/arm/sjlj.S index cf2ae4b..b9a063c 100644 --- a/libitm/config/arm/sjlj.S +++ b/libitm/config/arm/sjlj.S @@ -27,19 +27,46 @@ .syntax unified -#if defined(__thumb__) +#if defined(__thumb2__) +# define PC_OFS 4 .thumb .thumb_func +#else +# define PC_OFS 8 +#endif + +#if defined (__thumb2__) && defined(__ARM_ARCH_6T2__) +# define HAVE_MOVT + .arch armv6t2 +#elif defined (__ARM_ARCH_7A__) +# define HAVE_MOVT + .arch armv7-a +#elif defined (__ARM_ARCH_7R__) +# define HAVE_MOVT + .arch armv7-r +#elif defined (__ARM_ARCH_7M__) +# define HAVE_MOVT + .arch armv7-m #endif -/* ??? Use movw/movt when possible. */ -#if defined(PIC) +#if defined(HAVE_MOVT) && defined(PIC) +.macro ldaddr reg, addr + movw \reg, #:lower16:(\addr - (98f + PC_OFS)) + movt \reg, #:upper16:(\addr - (98f + PC_OFS)) +98: add \reg, \reg, pc +.endm +#elif defined(HAVE_MOVT) +.macro ldaddr reg, addr + movw \reg, #:lower16:\addr + movt \reg, #:upper16:\addr +.endm +#elif defined(PIC) .macro ldaddr reg, addr ldr \reg, 99f - ldr \reg, [\reg] 98: add \reg, \reg, pc .subsection 1 -99: .word \addr - (98b + 8) + .align 2 +99: .word \addr - (98b + PC_OFS) .subsection 0 .endm #else @@ -56,12 +83,8 @@ _ITM_beginTransaction: .fnstart cfi_startproc -#ifdef __thumb__ mov ip, sp push { r4-r11, ip, lr } -#else - push { r4-r11, sp, lr } -#endif .save { lr } .pad #(9*4) cfi_adjust_cfa_offset(40) @@ -71,6 +94,7 @@ _ITM_beginTransaction: cfi_adjust_cfa_offset(14*8) ldaddr r2, GTM_hwcap + ldr r2, [r2] /* Store the VFP registers. Don't use VFP instructions directly because this code is used in non-VFP multilibs. */ @@ -100,12 +124,14 @@ _ITM_beginTransaction: cfi_endproc .size _ITM_beginTransaction, . - _ITM_beginTransaction + .align 2 .global GTM_longjmp .hidden GTM_longjmp .type GTM_longjmp, %function GTM_longjmp: ldaddr r2, GTM_hwcap + ldr r2, [r2] tst r2, #HWCAP_ARM_VFP it ne @@ -121,7 +147,7 @@ GTM_longjmp: ldcl p1, cr15, [r1, #104] 1: add r1, r1, #(14*8) /* Skip both VFP and iWMMXt blocks */ -#ifdef __thumb__ +#ifdef __thumb2__ ldm r1, { r4-r11, ip, lr } mov sp, ip bx lr -- 1.7.7.3