On 12/03/2011 09:20 AM, Iain Sandoe wrote: > version 2 is a modification of your original: > > a) -FRAME+BASE(r1) cannot be guaranteed to be vec-aligned in general (it > isn't on m32 darwin) > > ... so I've taken the liberty of rounding the gtm_buffer object and then > pointing r4 at original_sp-rounded_size, which is what we want for the call > to GTM_begin_transaction anyway.
I've kept this in the version below, but I cannot see how that can be, since your version of BASE is 8*WS = 64, a multiple of 16. > b) I've added the CR etc. wrapped in __MACH__ ifdefs. Taken out of the ifdefs to be done everywhere. > c) ";" is a comment introducer for Darwin's asm .. so I unwrapped those lines > ... Sure. > d) I put in the logic for handling __USER_LABEL_PREFIX__ . Merged into the FUNC / END / HIDDEN macros. > e) The real problem is finding a non-horrible way of dealing with the %r <=> > r issue - and I've not done that so far... Dropped the %r entirely and using bare numbers, which is what the compiler emits by default. I kept the %[rfv] in the cfi directives though; I assume that darwin simply doesn't have those and so it won't be an issue. Worse come to worse, we can map those to raw dwarf columns, but I thought this was more readable in case we can keep them. Give this a go. The full tree is git://repo.or.cz/gcc/rth.git rth/tm-next which is what I actually tested on ppc64-linux, but I've extracted the middle three patches from that tree, which ought to apply to mainline. r~
diff --git a/libitm/config/generic/asmcfi.h b/libitm/config/generic/asmcfi.h index 4344d6f..0727f41 100644 --- a/libitm/config/generic/asmcfi.h +++ b/libitm/config/generic/asmcfi.h @@ -1,4 +1,3 @@ - /* Copyright (C) 2011 Free Software Foundation, Inc. Contributed by Richard Henderson <r...@redhat.com>. @@ -32,6 +31,9 @@ #define cfi_def_cfa_offset(n) .cfi_def_cfa_offset n #define cfi_def_cfa(r,n) .cfi_def_cfa r, n #define cfi_register(o,n) .cfi_register o, n +#define cfi_offset(r,o) .cfi_offset r, o +#define cfi_restore(r) .cfi_restore r +#define cfi_undefined(r) .cfi_undefined r #else @@ -40,5 +42,8 @@ #define cfi_def_cfa_offset(n) #define cfi_def_cfa(r,n) #define cfi_register(o,n) +#define cfi_offset(r,o) +#define cfi_restore(r) +#define cfi_undefined(r) #endif /* HAVE_AS_CFI_PSEUDO_OP */ diff --git a/libitm/config/linux/powerpc/futex_bits.h b/libitm/config/linux/powerpc/futex_bits.h new file mode 100644 index 0000000..5587fca --- /dev/null +++ b/libitm/config/linux/powerpc/futex_bits.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/syscall.h> + +static inline long +sys_futex0 (int *addr, int op, int val) +{ + register long int r0 __asm__ ("r0"); + register long int r3 __asm__ ("r3"); + register long int r4 __asm__ ("r4"); + register long int r5 __asm__ ("r5"); + register long int r6 __asm__ ("r6"); + + r0 = SYS_futex; + r3 = (long) addr; + r4 = op; + r5 = val; + r6 = 0; + + /* ??? The powerpc64 sysdep.h file clobbers ctr; the powerpc32 sysdep.h + doesn't. It doesn't much matter for us. In the interest of unity, + go ahead and clobber it always. */ + + __asm volatile ("sc; mfcr %0" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6) + : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6) + : "r7", "r8", "r9", "r10", "r11", "r12", + "cr0", "ctr", "memory"); + if (__builtin_expect (r0 & (1 << 28), 0)) + return r3; + return 0; +} diff --git a/libitm/config/powerpc/cacheline.h b/libitm/config/powerpc/cacheline.h new file mode 100644 index 0000000..e20cfec --- /dev/null +++ b/libitm/config/powerpc/cacheline.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_POWERPC_CACHELINE_H +#define LIBITM_POWERPC_CACHELINE_H 1 + +// A cacheline is the smallest unit with which locks are associated. +// The current implementation of the _ITM_[RW] barriers assumes that +// all data types can fit (aligned) within a cachline, which means +// in practice sizeof(complex long double) is the smallest cacheline size. +// It ought to be small enough for efficient manipulation of the +// modification mask, below. +#define CACHELINE_SIZE 64 + +#include "config/generic/cacheline.h" + +#endif // LIBITM_POWERPC_CACHELINE_H diff --git a/libitm/config/powerpc/sjlj.S b/libitm/config/powerpc/sjlj.S new file mode 100644 index 0000000..109a7a8 --- /dev/null +++ b/libitm/config/powerpc/sjlj.S @@ -0,0 +1,387 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + .text + +#include "asmcfi.h" + +#if defined(__powerpc64__) && defined(__ELF__) +.macro FUNC name + .globl \name, .\name + .section ".opd","aw" + .align 3 +\name: + .quad .\name, .TOC.@tocbase, 0 + .size \name, 24 + .type .\name, @function + .text +.\name: +.endm +.macro END name + .size .\name, . - .\name +.endm +.macro HIDDEN name + .hidden \name, .\name +.endm +#elif defined(__ELF__) +.macro FUNC name + .globl \name + .type \name, @function +\name: +.endm +.macro END name + .size \name, . - \name +.endm +.macro HIDDEN name + .hidden \name +.endm +#elif defined(_CALL_DARWIN) +.macro FUNC name + .globl _$0 +_$0: +.endmacro +.macro END name +.endmacro +.macro HIDDEN name + .private_extern _$0 +.endmacro +# ifdef __ppc64__ + .machine ppc64 +# else + .macrhine ppc7400 +# endif +#else +#error "unsupported system" +#endif + +/* Parameterize the code for 32-bit vs 64-bit. */ +#if defined(__powerpc64__) || defined(__ppc64__) +#define ldreg ld +#define streg std +#define stregu stdu +#define WS 8 +#else +#define ldreg lwz +#define streg stw +#define stregu stwu +#define WS 4 +#endif + +/* Parameterize the code for call frame constants. */ +#if defined(_CALL_AIXDESC) +# define BASE 6*WS +# define LR_SAVE 2*WS +#elif defined(_CALL_SYSV) +# define BASE 2*WS +# define LR_SAVE 1*WS +#elif defined(_CALL_DARWIN) +# define BASE (6*WS + 2*WS) +# define LR_SAVE 2*WS +#else +# error "unsupported system" +#endif + +#if defined(__ALTIVEC__) || defined(__VSX__) +# define OFS_VR 0 +# define OFS_VSCR 12*16 +# define OFS_VR_END OFS_VSCR + 8 +#else +# define OFS_VR_END 0 +#endif +#ifndef _SOFT_FLOAT +# define OFS_FR OFS_VR_END +# define OFS_FPSCR OFS_FR + 18*8 +# define OFS_FR_END OFS_FPSCR + 8 +#else +# define OFS_FR_END OFS_VR_END +#endif +#define OFS_GR OFS_FR_END +#define OFS_CFA OFS_GR + 18*WS +#define OFS_LR OFS_CFA + WS +#define OFS_TOC OFS_LR + WS +#define OFS_CR OFS_TOC + WS +#define OFS_END (((OFS_CR + WS + 15) / 16) * 16) + +#define FRAME (((BASE + OFS_END + 15) / 16) * 16) +#define VRSAVE 256 + + .align 4 +FUNC _ITM_beginTransaction + cfi_startproc + mflr 0 + mfcr 5 + addi 4, 1, -OFS_END + mr 6, 1 + streg 0, LR_SAVE(1) + stregu 1, -FRAME(1) + cfi_def_cfa_offset(FRAME) + cfi_offset(65, LR_SAVE) + streg 6, OFS_CFA(4) + streg 0, OFS_LR(4) +#ifdef _CALL_DARWIN + streg 13, OFS_TOC(4) +#else + streg 2, OFS_TOC(4) +#endif + streg 5, OFS_CR(4) + streg 14, 0*WS+OFS_GR(4) + streg 15, 1*WS+OFS_GR(4) + streg 16, 2*WS+OFS_GR(4) + streg 17, 3*WS+OFS_GR(4) + streg 18, 4*WS+OFS_GR(4) + streg 19, 5*WS+OFS_GR(4) + streg 20, 6*WS+OFS_GR(4) + streg 21, 7*WS+OFS_GR(4) + streg 22, 8*WS+OFS_GR(4) + streg 23, 9*WS+OFS_GR(4) + streg 24, 10*WS+OFS_GR(4) + streg 25, 11*WS+OFS_GR(4) + streg 26, 12*WS+OFS_GR(4) + streg 27, 13*WS+OFS_GR(4) + streg 28, 14*WS+OFS_GR(4) + streg 29, 15*WS+OFS_GR(4) + streg 30, 16*WS+OFS_GR(4) + streg 31, 17*WS+OFS_GR(4) + +#ifndef _SOFT_FLOAT + /* ??? Determine when FPRs not present. */ + /* ??? Test 3 for pr_hasNoFloatUpdate and skip the fp save. + This is not yet set by the compiler. */ + mffs 0 + stfd 14, 0+OFS_FR(4) + stfd 15, 8+OFS_FR(4) + stfd 16, 16+OFS_FR(4) + stfd 17, 24+OFS_FR(4) + stfd 18, 32+OFS_FR(4) + stfd 19, 40+OFS_FR(4) + stfd 20, 48+OFS_FR(4) + stfd 21, 56+OFS_FR(4) + stfd 22, 64+OFS_FR(4) + stfd 23, 72+OFS_FR(4) + stfd 24, 80+OFS_FR(4) + stfd 25, 88+OFS_FR(4) + stfd 26, 96+OFS_FR(4) + stfd 27,104+OFS_FR(4) + stfd 28,112+OFS_FR(4) + stfd 29,120+OFS_FR(4) + stfd 30,128+OFS_FR(4) + stfd 31,136+OFS_FR(4) + stfd 0, OFS_FPSCR(4) +#endif + +#if defined(__ALTIVEC__) || defined(__VSX__) + /* ??? Determine when VRs not present. */ + /* ??? Test 3 for pr_hasNoVectorUpdate and skip the vr save. + This is not yet set by the compiler. */ + addi 5, 4, OFS_VR + addi 6, 4, OFS_VR+16 + mfspr 0, VRSAVE + stvx 20, 0, 5 + addi 5, 5, 32 + stvx 21, 0, 6 + addi 6, 6, 32 + stvx 22, 0, 5 + addi 5, 5, 32 + stvx 23, 0, 6 + addi 6, 6, 32 + stvx 24, 0, 5 + addi 5, 5, 32 + stvx 25, 0, 6 + addi 6, 6, 32 + stvx 26, 0, 5 + addi 5, 5, 32 + stvx 27, 0, 6 + addi 6, 6, 32 + stvx 28, 0, 5 + addi 5, 5, 32 + stvx 29, 0, 6 + addi 6, 6, 32 + stvx 30, 0, 5 + stvx 31, 0, 6 + streg 0, OFS_VSCR(4) +#endif + + bl GTM_begin_transaction + nop + + ldreg 0, LR_SAVE+FRAME(1) + mtlr 0 + addi 1, 1, FRAME + cfi_def_cfa_offset(0) + cfi_restore(65) + blr + cfi_endproc +END _ITM_beginTransaction + + .align 4 + HIDDEN GTM_longjmp +FUNC GTM_longjmp + cfi_startproc +#if defined(__ALTIVEC__) || defined(__VSX__) + /* ??? Determine when VRs not present. */ + /* ??? Test 5 for pr_hasNoVectorUpdate and skip the vr restore. + This is not yet set by the compiler. */ + addi 6, 4, OFS_VR + addi 7, 4, OFS_VR+16 + ldreg 0, OFS_VSCR(4) + cfi_undefined(%v20) + cfi_undefined(%v21) + cfi_undefined(%v22) + cfi_undefined(%v23) + cfi_undefined(%v24) + cfi_undefined(%v25) + cfi_undefined(%v26) + cfi_undefined(%v27) + cfi_undefined(%v28) + cfi_undefined(%v29) + cfi_undefined(%v30) + cfi_undefined(%v31) + lvx 20, 0, 6 + addi 6, 6, 32 + lvx 21, 0, 7 + addi 7, 7, 32 + lvx 22, 0, 6 + addi 6, 6, 32 + lvx 23, 0, 7 + addi 7, 7, 32 + lvx 24, 0, 6 + addi 6, 6, 32 + lvx 25, 0, 7 + addi 7, 7, 32 + lvx 26, 0, 6 + addi 6, 6, 32 + lvx 27, 0, 7 + addi 7, 7, 32 + lvx 28, 0, 6 + addi 6, 6, 32 + lvx 29, 0, 7 + addi 7, 7, 32 + lvx 30, 0, 6 + lvx 31, 0, 7 + mtspr VRSAVE, 0 +#endif + +#ifndef _SOFT_FLOAT + /* ??? Determine when FPRs not present. */ + /* ??? Test 5 for pr_hasNoFloatUpdate and skip the fp load. + This is not yet set by the compiler. */ + lfd 0, OFS_FPSCR(4) + cfi_undefined(%f14) + cfi_undefined(%f15) + cfi_undefined(%f16) + cfi_undefined(%f17) + cfi_undefined(%f18) + cfi_undefined(%f19) + cfi_undefined(%f20) + cfi_undefined(%f21) + cfi_undefined(%f22) + cfi_undefined(%f23) + cfi_undefined(%f24) + cfi_undefined(%f25) + cfi_undefined(%f26) + cfi_undefined(%f27) + cfi_undefined(%f28) + cfi_undefined(%f29) + cfi_undefined(%f30) + cfi_undefined(%f31) + lfd 14, 0+OFS_FR(4) + lfd 15, 8+OFS_FR(4) + lfd 16, 16+OFS_FR(4) + lfd 17, 24+OFS_FR(4) + lfd 18, 32+OFS_FR(4) + lfd 19, 40+OFS_FR(4) + lfd 20, 48+OFS_FR(4) + lfd 21, 56+OFS_FR(4) + lfd 22, 64+OFS_FR(4) + lfd 23, 72+OFS_FR(4) + lfd 24, 80+OFS_FR(4) + lfd 25, 88+OFS_FR(4) + lfd 26, 96+OFS_FR(4) + lfd 27,104+OFS_FR(4) + lfd 28,112+OFS_FR(4) + lfd 29,120+OFS_FR(4) + lfd 30,128+OFS_FR(4) + lfd 31,136+OFS_FR(4) + mtfsf 0xff, 0 +#endif + + ldreg 6, OFS_CFA(4) + ldreg 0, OFS_LR(4) +#ifdef _CALL_DARWIN + ldreg 13, OFS_TOC(4) +#else + ldreg 2, OFS_TOC(4) +#endif + ldreg 7, OFS_CR(4) + /* At the instant we restore the LR, the only coherent view of + the world we have is into the new stack frame. Define the + CFA in terms of the not-yet-restored stack pointer. This will + last until the end of the function. */ + mtlr 0 + cfi_def_cfa(%r6, 0) + cfi_undefined(%r14) + cfi_undefined(%r15) + cfi_undefined(%r16) + cfi_undefined(%r17) + cfi_undefined(%r18) + cfi_undefined(%r19) + cfi_undefined(%r20) + cfi_undefined(%r21) + cfi_undefined(%r22) + cfi_undefined(%r23) + cfi_undefined(%r24) + cfi_undefined(%r25) + cfi_undefined(%r26) + cfi_undefined(%r27) + cfi_undefined(%r28) + cfi_undefined(%r29) + cfi_undefined(%r30) + cfi_undefined(%r31) + mtcr 7 + ldreg 14, 0*WS+OFS_GR(4) + ldreg 15, 1*WS+OFS_GR(4) + ldreg 16, 2*WS+OFS_GR(4) + ldreg 17, 3*WS+OFS_GR(4) + ldreg 18, 4*WS+OFS_GR(4) + ldreg 19, 5*WS+OFS_GR(4) + ldreg 20, 6*WS+OFS_GR(4) + ldreg 21, 7*WS+OFS_GR(4) + ldreg 22, 8*WS+OFS_GR(4) + ldreg 23, 9*WS+OFS_GR(4) + ldreg 24, 10*WS+OFS_GR(4) + ldreg 25, 11*WS+OFS_GR(4) + ldreg 26, 12*WS+OFS_GR(4) + ldreg 27, 13*WS+OFS_GR(4) + ldreg 28, 14*WS+OFS_GR(4) + ldreg 29, 15*WS+OFS_GR(4) + ldreg 30, 16*WS+OFS_GR(4) + ldreg 31, 17*WS+OFS_GR(4) + mr 1, 6 + blr + cfi_endproc +END GTM_longjmp + +#ifdef __linux__ +.section .note.GNU-stack, "", @progbits +#endif diff --git a/libitm/config/powerpc/target.h b/libitm/config/powerpc/target.h new file mode 100644 index 0000000..803397a --- /dev/null +++ b/libitm/config/powerpc/target.h @@ -0,0 +1,70 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +namespace GTM HIDDEN { + +typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16))); +typedef struct gtm_jmpbuf +{ +#if defined(__ALTIVEC__) || defined(__VSX__) + v128 vr[12]; /* vr20-vr31 */ + unsigned long long vscr; /* long long for padding only */ +#endif +#ifndef _SOFT_FLOAT + double fr[18]; /* f14-f31 */ + double fpscr; +#endif + unsigned long gr[18]; /* r14-r31 */ + void *cfa; + unsigned long pc; + unsigned long toc; /* r2 on aix, r13 on darwin */ + unsigned long cr; +} gtm_jmpbuf; + +/* The size of one line in hardware caches (in bytes). */ +#if defined (__powerpc64__) || defined (__ppc64__) +# define HW_CACHELINE_SIZE 128 +#else +# define HW_CACHELINE_SIZE 64 +#endif + +static inline void +cpu_relax (void) +{ + __asm volatile ("" : : : "memory"); +} + +static inline void +atomic_read_barrier (void) +{ + __sync_synchronize (); +} + +static inline void +atomic_write_barrier (void) +{ + __sync_synchronize (); +} + +} // namespace GTM diff --git a/libitm/configure.tgt b/libitm/configure.tgt index eac6f50..6046d54 100644 --- a/libitm/configure.tgt +++ b/libitm/configure.tgt @@ -46,7 +46,8 @@ fi # Map the target cpu to an ARCH sub-directory. At the same time, # work out any special compilation flags as necessary. case "${target_cpu}" in - alpha*) ARCH=alpha ;; + alpha*) ARCH=alpha ;; + rs6000 | powerpc*) ARCH=powerpc ;; i[3456]86) case " ${CC} ${CFLAGS} " in @@ -90,6 +91,11 @@ case "${target}" in fi ;; + powerpc*-*-aix* | rs6000-*-aix*) + # The system ought to be supported, but sjlj.S has not been ported. + UNSUPPORTED=1 + ;; + *-*-gnu* | *-*-k*bsd*-gnu \ | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \ | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \