This is a tad rough, but not too bad. No hwcap stuff. Probably something similar to what I did for ARM will work. I'd prefer that someone else figure out what to do with all that, honestly.
No support for non-ELF, aka AIX and Darwin. I'm not 100% sure how to handle the assembly markup for those, and I couldn't test it anyway. Again, I'd prefer someone else figure that stuff out. These are still marked as UNSUPPORTED, so I ought not be breaking the bootstrap there. Nevertheless, this is good enough to pass all tests on ppc{,64}-linux. Oh, for the record, I think we should probably be saving and restoring the fp state on all targets. If we restart a transaction, we're really saying that absolutely nothing happened. Something like double a, b, c; __transaction_atomic { a = b+c; } shouldn't erroneously set the overflow flag if the first iteration of the transaction generates an infinity but the final iteration doesn't. The x86 port is currently wrong for this, as is the port I just posted for ARM, but I make the attempt here. r~
commit ad469de30f6838bdf25df5a22f254609ddb43136 Author: Richard Henderson <r...@twiddle.net> Date: Tue Nov 29 16:13:50 2011 -0800 libitm: Initial PowerPC support. diff --git a/libitm/config/generic/asmcfi.h b/libitm/config/generic/asmcfi.h index 4344d6f..0727f41 100644 --- a/libitm/config/generic/asmcfi.h +++ b/libitm/config/generic/asmcfi.h @@ -1,4 +1,3 @@ - /* Copyright (C) 2011 Free Software Foundation, Inc. Contributed by Richard Henderson <r...@redhat.com>. @@ -32,6 +31,9 @@ #define cfi_def_cfa_offset(n) .cfi_def_cfa_offset n #define cfi_def_cfa(r,n) .cfi_def_cfa r, n #define cfi_register(o,n) .cfi_register o, n +#define cfi_offset(r,o) .cfi_offset r, o +#define cfi_restore(r) .cfi_restore r +#define cfi_undefined(r) .cfi_undefined r #else @@ -40,5 +42,8 @@ #define cfi_def_cfa_offset(n) #define cfi_def_cfa(r,n) #define cfi_register(o,n) +#define cfi_offset(r,o) +#define cfi_restore(r) +#define cfi_undefined(r) #endif /* HAVE_AS_CFI_PSEUDO_OP */ diff --git a/libitm/config/linux/powerpc/futex_bits.h b/libitm/config/linux/powerpc/futex_bits.h new file mode 100644 index 0000000..5587fca --- /dev/null +++ b/libitm/config/linux/powerpc/futex_bits.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/syscall.h> + +static inline long +sys_futex0 (int *addr, int op, int val) +{ + register long int r0 __asm__ ("r0"); + register long int r3 __asm__ ("r3"); + register long int r4 __asm__ ("r4"); + register long int r5 __asm__ ("r5"); + register long int r6 __asm__ ("r6"); + + r0 = SYS_futex; + r3 = (long) addr; + r4 = op; + r5 = val; + r6 = 0; + + /* ??? The powerpc64 sysdep.h file clobbers ctr; the powerpc32 sysdep.h + doesn't. It doesn't much matter for us. In the interest of unity, + go ahead and clobber it always. */ + + __asm volatile ("sc; mfcr %0" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6) + : "r"(r0), "r"(r3), "r"(r4), "r"(r5), "r"(r6) + : "r7", "r8", "r9", "r10", "r11", "r12", + "cr0", "ctr", "memory"); + if (__builtin_expect (r0 & (1 << 28), 0)) + return r3; + return 0; +} diff --git a/libitm/config/powerpc/cacheline.h b/libitm/config/powerpc/cacheline.h new file mode 100644 index 0000000..e20cfec --- /dev/null +++ b/libitm/config/powerpc/cacheline.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef LIBITM_POWERPC_CACHELINE_H +#define LIBITM_POWERPC_CACHELINE_H 1 + +// A cacheline is the smallest unit with which locks are associated. +// The current implementation of the _ITM_[RW] barriers assumes that +// all data types can fit (aligned) within a cachline, which means +// in practice sizeof(complex long double) is the smallest cacheline size. +// It ought to be small enough for efficient manipulation of the +// modification mask, below. +#define CACHELINE_SIZE 64 + +#include "config/generic/cacheline.h" + +#endif // LIBITM_POWERPC_CACHELINE_H diff --git a/libitm/config/powerpc/sjlj.S b/libitm/config/powerpc/sjlj.S new file mode 100644 index 0000000..51cce77 --- /dev/null +++ b/libitm/config/powerpc/sjlj.S @@ -0,0 +1,336 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + .text + +#include "asmcfi.h" + +#if defined(__powerpc64__) && defined(__ELF__) +.macro FUNC name + .globl \name, .\name + .section ".opd","aw" + .align 3 +\name: + .quad .\name, .TOC.@tocbase, 0 + .size \name, 24 + .type .\name, @function + .text +.\name: +.endm +.macro END name + .size .\name, . - .\name +.endm +.macro HIDDEN name + .hidden \name, .\name +.endm +#elif defined(__ELF__) +.macro FUNC name + .globl \name + .type \name, @function +\name: +.endm +.macro END name + .size \name, . - \name +.endm +.macro HIDDEN name + .hidden \name +.endm +#else +#error "unsupported system" +#endif + +/* Parameterize the code for 32-bit vs 64-bit. */ +#ifdef __powerpc64__ +#define ldreg ld +#define streg std +#define stregu stdu +#define WS 8 +#else +#define ldreg lwz +#define streg stw +#define stregu stwu +#define WS 4 +#endif + +/* Parameterize the code for call frame constants. */ +#if defined(_CALL_AIXDESC) +# define BASE 6*WS +# define LR_SAVE 2*WS +#elif defined(_CALL_SYSV) +# define BASE 2*WS +# define LR_SAVE 1*WS +#else +# error "unsupported system" +#endif + +#if defined(__ALTIVEC__) || defined(__VSX__) +# define OFS_VR 0 +# define OFS_VSCR 12*16 +# define OFS_VR_END OFS_VSCR + 8 +#else +# define OFS_VR_END 0 +#endif +#ifndef _SOFT_FLOAT +# define OFS_FR OFS_VR_END +# define OFS_FPSCR OFS_FR + 18*8 +# define OFS_FR_END OFS_FPSCR + 8 +#else +# define OFS_FR_END OFS_VR_END +#endif +#define OFS_GR OFS_FR_END +#define OFS_CFA OFS_GR + 18*WS +#define OFS_LR OFS_CFA + WS +#define OFS_TOC OFS_LR + WS +#define OFS_END OFS_TOC + WS + +#define FRAME (((BASE + OFS_END + 15) / 16) * 16) +#define VRSAVE 256 + + .align 4 +FUNC _ITM_beginTransaction + cfi_startproc + mflr %r0 + streg %r0, LR_SAVE(%r1) + mr %r11, %r1 + stregu %r1, -FRAME(%r1) + cfi_def_cfa_offset(FRAME) + cfi_offset(65, LR_SAVE) + streg %r11, OFS_CFA+BASE(%r1) + streg %r0, OFS_LR+BASE(%r1) + streg %r2, OFS_TOC+BASE(%r1) + streg %r14, 0*WS+OFS_GR+BASE(%r1) + streg %r15, 1*WS+OFS_GR+BASE(%r1) + streg %r16, 2*WS+OFS_GR+BASE(%r1) + streg %r17, 3*WS+OFS_GR+BASE(%r1) + streg %r18, 4*WS+OFS_GR+BASE(%r1) + streg %r19, 5*WS+OFS_GR+BASE(%r1) + streg %r20, 6*WS+OFS_GR+BASE(%r1) + streg %r21, 7*WS+OFS_GR+BASE(%r1) + streg %r22, 8*WS+OFS_GR+BASE(%r1) + streg %r23, 9*WS+OFS_GR+BASE(%r1) + streg %r24, 10*WS+OFS_GR+BASE(%r1) + streg %r25, 11*WS+OFS_GR+BASE(%r1) + streg %r26, 12*WS+OFS_GR+BASE(%r1) + streg %r27, 13*WS+OFS_GR+BASE(%r1) + streg %r28, 14*WS+OFS_GR+BASE(%r1) + streg %r29, 15*WS+OFS_GR+BASE(%r1) + streg %r30, 16*WS+OFS_GR+BASE(%r1) + streg %r31, 17*WS+OFS_GR+BASE(%r1) + +#ifndef _SOFT_FLOAT + /* ??? Determine when FPRs not present. */ + /* ??? Test %r3 for pr_hasNoFloatUpdate and skip the fp save. + This is not yet set by the compiler. */ + mffs %f0 + stfd %f14, 0+OFS_FR+BASE(%r1) + stfd %f15, 8+OFS_FR+BASE(%r1) + stfd %f16, 16+OFS_FR+BASE(%r1) + stfd %f17, 24+OFS_FR+BASE(%r1) + stfd %f18, 32+OFS_FR+BASE(%r1) + stfd %f19, 40+OFS_FR+BASE(%r1) + stfd %f20, 48+OFS_FR+BASE(%r1) + stfd %f21, 56+OFS_FR+BASE(%r1) + stfd %f22, 64+OFS_FR+BASE(%r1) + stfd %f23, 72+OFS_FR+BASE(%r1) + stfd %f24, 80+OFS_FR+BASE(%r1) + stfd %f25, 88+OFS_FR+BASE(%r1) + stfd %f26, 96+OFS_FR+BASE(%r1) + stfd %f27,104+OFS_FR+BASE(%r1) + stfd %f28,112+OFS_FR+BASE(%r1) + stfd %f29,120+OFS_FR+BASE(%r1) + stfd %f30,128+OFS_FR+BASE(%r1) + stfd %f31,136+OFS_FR+BASE(%r1) + stfd %f0, OFS_FPSCR+BASE(%r1) +#endif + +#if defined(__ALTIVEC__) || defined(__VSX__) + /* ??? Determine when VRs not present. */ + /* ??? Test %r3 for pr_hasNoVectorUpdate and skip the vr save. + This is not yet set by the compiler. */ + addi %r4, %r1, OFS_VR+BASE + addi %r5, %r1, OFS_VR+BASE+16 + mfspr %r0, VRSAVE + stvx %v20, 0, %r4; addi %r4, %r4, 32 + stvx %v21, 0, %r5; addi %r5, %r5, 32 + stvx %v22, 0, %r4; addi %r4, %r4, 32 + stvx %v23, 0, %r5; addi %r5, %r5, 32 + stvx %v24, 0, %r4; addi %r4, %r4, 32 + stvx %v25, 0, %r5; addi %r5, %r5, 32 + stvx %v26, 0, %r4; addi %r4, %r4, 32 + stvx %v27, 0, %r5; addi %r5, %r5, 32 + stvx %v28, 0, %r4; addi %r4, %r4, 32 + stvx %v29, 0, %r5; addi %r5, %r5, 32 + stvx %v30, 0, %r4 + stvx %v31, 0, %r5 + streg %r0, OFS_VSCR+BASE(%r1) +#endif + + addi %r4, %r1, BASE + bl GTM_begin_transaction + nop + + ldreg %r0, LR_SAVE+FRAME(%r1) + mtlr %r0 + addi %r1, %r1, FRAME + cfi_def_cfa_offset(0) + cfi_restore(65) + blr + cfi_endproc +END _ITM_beginTransaction + + .align 4 + HIDDEN GTM_longjmp +FUNC GTM_longjmp + cfi_startproc +#if defined(__ALTIVEC__) || defined(__VSX__) + /* ??? Determine when VRs not present. */ + /* ??? Test %r5 for pr_hasNoVectorUpdate and skip the vr restore. + This is not yet set by the compiler. */ + addi %r6, %r4, OFS_VR + addi %r7, %r4, OFS_VR+16 + ldreg %r0, OFS_VSCR(%r4) + cfi_undefined(%v20) + cfi_undefined(%v21) + cfi_undefined(%v22) + cfi_undefined(%v23) + cfi_undefined(%v24) + cfi_undefined(%v25) + cfi_undefined(%v26) + cfi_undefined(%v27) + cfi_undefined(%v28) + cfi_undefined(%v29) + cfi_undefined(%v30) + cfi_undefined(%v31) + lvx %v20, 0, %r6; addi %r6, %r6, 32 + lvx %v21, 0, %r7; addi %r7, %r7, 32 + lvx %v22, 0, %r6; addi %r6, %r6, 32 + lvx %v23, 0, %r7; addi %r7, %r7, 32 + lvx %v24, 0, %r6; addi %r6, %r6, 32 + lvx %v25, 0, %r7; addi %r7, %r7, 32 + lvx %v26, 0, %r6; addi %r6, %r6, 32 + lvx %v27, 0, %r7; addi %r7, %r7, 32 + lvx %v28, 0, %r6; addi %r6, %r6, 32 + lvx %v29, 0, %r7; addi %r7, %r7, 32 + lvx %v30, 0, %r6 + lvx %v31, 0, %r7 + mtspr VRSAVE, %r0 +#endif + +#ifndef _SOFT_FLOAT + /* ??? Determine when FPRs not present. */ + /* ??? Test %r5 for pr_hasNoFloatUpdate and skip the fp load. + This is not yet set by the compiler. */ + lfd %f0, OFS_FPSCR(%r4) + cfi_undefined(%f14) + cfi_undefined(%f15) + cfi_undefined(%f16) + cfi_undefined(%f17) + cfi_undefined(%f18) + cfi_undefined(%f19) + cfi_undefined(%f20) + cfi_undefined(%f21) + cfi_undefined(%f22) + cfi_undefined(%f23) + cfi_undefined(%f24) + cfi_undefined(%f25) + cfi_undefined(%f26) + cfi_undefined(%f27) + cfi_undefined(%f28) + cfi_undefined(%f29) + cfi_undefined(%f30) + cfi_undefined(%f31) + lfd %f14, 0+OFS_FR(%r4) + lfd %f15, 8+OFS_FR(%r4) + lfd %f16, 16+OFS_FR(%r4) + lfd %f17, 24+OFS_FR(%r4) + lfd %f18, 32+OFS_FR(%r4) + lfd %f19, 40+OFS_FR(%r4) + lfd %f20, 48+OFS_FR(%r4) + lfd %f21, 56+OFS_FR(%r4) + lfd %f22, 64+OFS_FR(%r4) + lfd %f23, 72+OFS_FR(%r4) + lfd %f24, 80+OFS_FR(%r4) + lfd %f25, 88+OFS_FR(%r4) + lfd %f26, 96+OFS_FR(%r4) + lfd %f27,104+OFS_FR(%r4) + lfd %f28,112+OFS_FR(%r4) + lfd %f29,120+OFS_FR(%r4) + lfd %f30,128+OFS_FR(%r4) + lfd %f31,136+OFS_FR(%r4) + mtfsf 0xff, %f0 +#endif + + ldreg %r6, OFS_CFA(%r4) + ldreg %r0, OFS_LR(%r4) + ldreg %r2, OFS_TOC(%r4) + /* At the instant we restore the LR, the only coherent view of + the world we have is into the new stack frame. Define the + CFA in terms of the not-yet-restored stack pointer. This will + last until the end of the function. */ + mtlr %r0 + cfi_def_cfa(%r6, 0) + cfi_undefined(%r14) + cfi_undefined(%r15) + cfi_undefined(%r16) + cfi_undefined(%r17) + cfi_undefined(%r18) + cfi_undefined(%r19) + cfi_undefined(%r20) + cfi_undefined(%r21) + cfi_undefined(%r22) + cfi_undefined(%r23) + cfi_undefined(%r24) + cfi_undefined(%r25) + cfi_undefined(%r26) + cfi_undefined(%r27) + cfi_undefined(%r28) + cfi_undefined(%r29) + cfi_undefined(%r30) + cfi_undefined(%r31) + ldreg %r14, 0*WS+OFS_GR(%r4) + ldreg %r15, 1*WS+OFS_GR(%r4) + ldreg %r16, 2*WS+OFS_GR(%r4) + ldreg %r17, 3*WS+OFS_GR(%r4) + ldreg %r18, 4*WS+OFS_GR(%r4) + ldreg %r19, 5*WS+OFS_GR(%r4) + ldreg %r20, 6*WS+OFS_GR(%r4) + ldreg %r21, 7*WS+OFS_GR(%r4) + ldreg %r22, 8*WS+OFS_GR(%r4) + ldreg %r23, 9*WS+OFS_GR(%r4) + ldreg %r24, 10*WS+OFS_GR(%r4) + ldreg %r25, 11*WS+OFS_GR(%r4) + ldreg %r26, 12*WS+OFS_GR(%r4) + ldreg %r27, 13*WS+OFS_GR(%r4) + ldreg %r28, 14*WS+OFS_GR(%r4) + ldreg %r29, 15*WS+OFS_GR(%r4) + ldreg %r30, 16*WS+OFS_GR(%r4) + ldreg %r31, 17*WS+OFS_GR(%r4) + mr %r1, %r6 + blr + cfi_endproc +END GTM_longjmp + +#ifdef __linux__ +.section .note.GNU-stack, "", @progbits +#endif diff --git a/libitm/config/powerpc/target.h b/libitm/config/powerpc/target.h new file mode 100644 index 0000000..2d036cd --- /dev/null +++ b/libitm/config/powerpc/target.h @@ -0,0 +1,65 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +namespace GTM HIDDEN { + +typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16))); +typedef struct gtm_jmpbuf +{ +#if defined(__ALTIVEC__) || defined(__VSX__) + v128 vr[12]; /* vr20-vr31 */ + unsigned long long vscr; /* long long for padding only */ +#endif +#ifndef _SOFT_FLOAT + double fr[18]; /* f14-f31 */ + double fpscr; +#endif + unsigned long gr[18]; /* r14-r31 */ + void *cfa; + unsigned long pc; + unsigned long r2; +} gtm_jmpbuf; + +/* The size of one line in hardware caches (in bytes). */ +#define HW_CACHELINE_SIZE 64 + +static inline void +cpu_relax (void) +{ + __asm volatile ("" : : : "memory"); +} + +static inline void +atomic_read_barrier (void) +{ + __sync_synchronize (); +} + +static inline void +atomic_write_barrier (void) +{ + __sync_synchronize (); +} + +} // namespace GTM diff --git a/libitm/configure.tgt b/libitm/configure.tgt index eac6f50..2549bbe 100644 --- a/libitm/configure.tgt +++ b/libitm/configure.tgt @@ -46,7 +46,8 @@ fi # Map the target cpu to an ARCH sub-directory. At the same time, # work out any special compilation flags as necessary. case "${target_cpu}" in - alpha*) ARCH=alpha ;; + alpha*) ARCH=alpha ;; + rs6000 | powerpc*) ARCH=powerpc ;; i[3456]86) case " ${CC} ${CFLAGS} " in @@ -90,6 +91,11 @@ case "${target}" in fi ;; + powerpc*-*-darwin* | powerpc*-*-aix* | rs6000-*-aix*) + # The system ought to be supported, but sjlj.S has not been ported. + UNSUPPORTED=1 + ;; + *-*-gnu* | *-*-k*bsd*-gnu \ | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \ | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \