Hi Richard,
On 2 Dec 2011, at 23:36, Iain Sandoe wrote:
On 2 Dec 2011, at 22:59, Richard Henderson wrote:
I personally think the whole thing would be much easier to read
without relying on the redzone. Aside from that, there's actually
very little real difference in the two files. Essentially, you're
storing the registers in a different order because the prologue
does, just so you can make use of the redzone.
OK - I guess I got carried away with thinking that I might be able
to re-use the save_world () routine - but that doesn't look feasible
after all so....
The aix abi saves r2; darwin 32-bit saves r13. One extra register
in both cases, which could use the same slot.
... will take another look tomorrow....
.... although we still have some syntax issues that might make
sharing the original code somewhat ugly....
Two versions attached, both of which produce working code on darwin
(although the attached modification to yours will be broken on
assemblers needing % in front of reg names).
version 1 is a tidied up red-zone implementation.
===
version 2 is a modification of your original:
a) -FRAME+BASE(r1) cannot be guaranteed to be vec-aligned in general
(it isn't on m32 darwin)
... so I've taken the liberty of rounding the gtm_buffer object and
then pointing r4 at original_sp-rounded_size, which is what we want
for the call to GTM_begin_transaction anyway.
b) I've added the CR etc. wrapped in __MACH__ ifdefs.
c) ";" is a comment introducer for Darwin's asm .. so I unwrapped
those lines ...
d) I put in the logic for handling __USER_LABEL_PREFIX__ .
===
e) The real problem is finding a non-horrible way of dealing with the
%r <=> r issue - and I've not done that so far...
... pending your opinion on the best way forward....
cheers
Iain
Index: libitm/config/darwin/powerpc/sjlj.S
===================================================================
--- libitm/config/darwin/powerpc/sjlj.S (revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S (revision 0)
@@ -0,0 +1,335 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Iain Sandoe <ia...@gcc.gnu.org>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define MACHINE MODE_CHOICE(ppc7400,ppc64)
+#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */
+#define GPR_BYTES MODE_CHOICE(4,8) /* size of a GPR in bytes */
+#define FPR_BYTES 8 /* size of a FPR in bytes */
+#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */
+
+#define cmpg MODE_CHOICE(cmpw, cmpd)
+#define lg MODE_CHOICE(lwz, ld)
+#define sg MODE_CHOICE(stw, std)
+#define lgx MODE_CHOICE(lwzx, ldx)
+#define sgx MODE_CHOICE(stwx, stdx)
+#define lgu MODE_CHOICE(lwzu, ldu)
+#define sgu MODE_CHOICE(stwu, stdu)
+#define lgux MODE_CHOICE(lwzux, ldux)
+#define sgux MODE_CHOICE(stwux, stdux)
+#define lgwa MODE_CHOICE(lwz, lwa)
+
+/* Stack frame constants. */
+#define RED_ZONE_SIZE MODE_CHOICE(224,288)
+#define LINKAGE_SIZE MODE_CHOICE(24,48)
+#define SAVED_LR_OFFSET MODE_CHOICE(8,16)
+#define SAVED_CR_OFFSET MODE_CHOICE(4,8)
+
+/* We will assume that the code is to be built for a processor with Altivec.
+
+ TODO 1:
+ For Darwin 8 or earlier, which might be on hardware with either G3 or G4/G5,
+ then the vecSave code here would ideally need to be made conditional (for
+ m32) on __cpu_has_altivec from the system framework (as is done in Darwin`s
+ save_world () routine in libgcc). I.E. One needs to check for Altivec at
+ runtime. As things stand, it should be fine on Darwin 8/9 with G4/G5. */
+
+/* TODO 2:
+ Generate eh_frame data. */
+
+# define VECS_SIZ 12*16
+
+#ifdef __ppc64__
+# define VEC_PAD 0
+#else
+# define VEC_PAD 8
+#endif
+
+#define RESTOC GPR_BYTES
+#define SAVPC GPR_BYTES
+#define FPSCR FPR_BYTES
+#define SAVSP GPR_BYTES
+#define SAVCR 4
+#ifdef __ppc64__
+/* Add for gpr13 and the VRsave. */
+# define RZADD 12
+#else
+ /* which are included in red zone at m32. */
+# define RZADD 0
+#endif
+
+#define GTMSIZ (VECS_SIZ + VEC_PAD + RESTOC + SAVPC + FPSCR + SAVCR + SAVSP\
+ + RZADD + RED_ZONE_SIZE)
+
+/* The stack frame components for the call to GTM_begin_transaction.
+
+ We call GTM_begin_transaction (uint32_t, gtm_jmpbuf*). So we must provide
+ space for those params in the stack frame so that the callee can save them
+ there if it wants to.
+*/
+
+#define NEXT_FRAME (LINKAGE_SIZE + 2 * GPR_BYTES)
+
+#define FRAME_SIZE (((NEXT_FRAME + GTMSIZ + 15) / 16) * 16)
+
+/* From the start of the gtm_jmpbuf. */
+#define VEC_OFF 0
+#define TOC_OFF (VECS_SIZ + VEC_PAD)
+#define PC_OFF (TOC_OFF + GPR_BYTES)
+#define FPSCRS_OFF (PC_OFF + GPR_BYTES)
+#define SP_OFF (FPSCRS_OFF + FPR_BYTES)
+#define CR_OFF (SP_OFF + GPR_BYTES)
+#define VRSAVE_OFF (CR_OFF + 4)
+#define GPRS_OFF (VRSAVE_OFF + 4)
+#define FPRS_OFF (GPRS_OFF + 19 * GPR_BYTES)
+
+ .text
+
+ .machine MACHINE
+
+ .align 4
+
+ /* _ITM_beginTransaction(uint32_t, ...) */
+
+ .globl __ITM_beginTransaction
+__ITM_beginTransaction:
+
+ mfcr r0
+
+ /* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+ This is not yet set by the compiler. */
+
+ stfd f14,-144(r1)
+ stfd f15,-136(r1)
+ stfd f16,-128(r1)
+ stfd f17,-120(r1)
+ stfd f18,-112(r1)
+ stfd f19,-104(r1)
+ stfd f20,-96(r1)
+ stfd f21,-88(r1)
+ stfd f22,-80(r1)
+ stfd f23,-72(r1)
+ stfd f24,-64(r1)
+ stfd f25,-56(r1)
+ stfd f26,-48(r1)
+ stfd f27,-40(r1)
+ stfd f28,-32(r1)
+ stfd f29,-24(r1)
+ stfd f30,-16(r1)
+ mffs f0
+ stfd f31,-8(r1)
+
+#ifndef __ppc64__
+ /* This might not be the best approach - stmw could be slower than the
+ one-by-one store. */
+ stmw r13,-220(r1)
+#else
+ sg r14,(-288 + 0 * GPR_BYTES)(r1)
+ sg r15,(-288 + 1 * GPR_BYTES)(r1)
+ sg r16,(-288 + 2 * GPR_BYTES)(r1)
+ sg r17,(-288 + 3 * GPR_BYTES)(r1)
+ sg r18,(-288 + 4 * GPR_BYTES)(r1)
+ sg r19,(-288 + 5 * GPR_BYTES)(r1)
+ sg r20,(-288 + 6 * GPR_BYTES)(r1)
+ sg r21,(-288 + 7 * GPR_BYTES)(r1)
+ sg r22,(-288 + 8 * GPR_BYTES)(r1)
+ sg r23,(-288 + 9 * GPR_BYTES)(r1)
+ sg r24,(-288 + 10 * GPR_BYTES)(r1)
+ sg r25,(-288 + 11 * GPR_BYTES)(r1)
+ sg r26,(-288 + 12 * GPR_BYTES)(r1)
+ sg r27,(-288 + 13 * GPR_BYTES)(r1)
+ sg r28,(-288 + 14 * GPR_BYTES)(r1)
+ sg r29,(-288 + 15 * GPR_BYTES)(r1)
+ sg r30,(-288 + 16 * GPR_BYTES)(r1)
+ sg r31,(-288 + 17 * GPR_BYTES)(r1)
+#endif
+ /* Filled the red zone - so now we need to allocate the frame. */
+ mr r5,r1 /* copy SP. */
+ sgu r1,-FRAME_SIZE(r1) /* Allocate stack frame */
+ addi r4,r5,-GTMSIZ /* Point to the jump buffer. */
+
+/* sg r2,TOC_OFF(r4) not needed on Darwin. */
+#ifdef __ppc64__
+ /* The ABI doc is slighlty ambiguous about r13 m64 - it is reserved for
+ for TLS, but also stated to be call-saved (TLS is not implemented
+ for any ppc Darwin variant). */
+ sg r13,GPRS_OFF(r4)
+#endif
+ stfd f0,FPSCRS_OFF(r4)
+ sg r5,SP_OFF(r4)
+ stw r0,CR_OFF(r4)
+ mflr r0
+ sg r0,PC_OFF(r4)
+ sg r0,SAVED_LR_OFFSET(r5)
+
+ /* ??? Determine when VRs not present. */
+ /* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+ This is not yet set by the compiler. */
+
+ mfspr r0,VRsave
+ addi r5,r4,VECS_SIZ-16 /* Now r5 points at V31 save. */
+ stw r0,VRSAVE_OFF(r4)
+ addi r4,r5,-16 /* now r4 points to V30 save. */
+
+ stvx v31,0,r5
+ addi r5,r5,-32
+ stvx v30,0,r4
+ addi r4,r4,-32
+ stvx v29,0,r5
+ addi r5,r5,-32
+ stvx v28,0,r4
+ addi r4,r4,-32
+ stvx v27,0,r5
+ addi r5,r5,-32
+ stvx v26,0,r4
+ addi r4,r4,-32
+ stvx v25,0,r5
+ addi r5,r5,-32
+ stvx v24,0,r4
+ addi r4,r4,-32
+ stvx v23,0,r5
+ addi r5,r5,-32
+ stvx v22,0,r4
+ addi r4,r4,-32
+ stvx v21,0,r5
+ stvx v20,0,r4 /* r4 back to the start. */
+
+ /* r3 is as per entry, r4 points at our gtm_jmpbuf. */
+ /* GTM_begin_transaction (uint32_t prop, gtm_jmpbuf *bf) */
+ bl _GTM_begin_transaction
+ nop
+
+ lg r0,(SAVED_LR_OFFSET + FRAME_SIZE)(r1)
+ mtlr r0
+ addi r1, r1, FRAME_SIZE
+ blr
+
+ /* End of _ITM_beginTransaction. */
+
+ /* uint32_t GTM_longjmp (uint32_t, const gtm_jmpbuf *, uint32_t) */
+
+ .private_extern _GTM_longjmp
+_GTM_longjmp:
+
+ /* ??? Determine when VRs not present. */
+ /* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+ This is not yet set by the compiler. */
+
+ mr r14,r4
+ lwz r0,VRSAVE_OFF(r4)
+ addi r15,r4,16
+
+ lvx v20,0,r14
+ addi r14,r14,32
+ lvx v21,0,r15
+ addi r15,r15,32
+ lvx v22,0,r14
+ addi r14,r14,32
+ lvx v23,0,r15
+ addi r15,r15,32
+ lvx v24,0,r14
+ addi r14,r14,32
+ lvx v25,0,r15
+ addi r15,r15,32
+ lvx v26,0,r14
+ addi r14,r14,32
+ lvx v27,0,r15
+ addi r15,r15,32
+ lvx v28,0,r14
+ addi r14,r14,32
+ lvx v29,0,r15
+ addi r15,r15,32
+ lvx v30,0,r14
+ addi r14,r14,32
+ lvx v31,0,r15
+
+ mtspr VRsave,r0 /* Restored... */
+
+#ifndef __ppc64__
+ lmw r13,GPRS_OFF(r4)
+#else
+ lg r13,(GPRS_OFF + 0 * GPR_BYTES)(r4)
+ lg r14,(GPRS_OFF + 1 * GPR_BYTES)(r4)
+ lg r15,(GPRS_OFF + 2 * GPR_BYTES)(r4)
+ lg r16,(GPRS_OFF + 3 * GPR_BYTES)(r4)
+ lg r17,(GPRS_OFF + 4 * GPR_BYTES)(r4)
+ lg r18,(GPRS_OFF + 5 * GPR_BYTES)(r4)
+ lg r19,(GPRS_OFF + 6 * GPR_BYTES)(r4)
+ lg r20,(GPRS_OFF + 7 * GPR_BYTES)(r4)
+ lg r21,(GPRS_OFF + 8 * GPR_BYTES)(r4)
+ lg r22,(GPRS_OFF + 9 * GPR_BYTES)(r4)
+ lg r23,(GPRS_OFF + 10 * GPR_BYTES)(r4)
+ lg r24,(GPRS_OFF + 11 * GPR_BYTES)(r4)
+ lg r25,(GPRS_OFF + 12 * GPR_BYTES)(r4)
+ lg r26,(GPRS_OFF + 13 * GPR_BYTES)(r4)
+ lg r27,(GPRS_OFF + 14 * GPR_BYTES)(r4)
+ lg r28,(GPRS_OFF + 15 * GPR_BYTES)(r4)
+ lg r29,(GPRS_OFF + 16 * GPR_BYTES)(r4)
+ lg r30,(GPRS_OFF + 17 * GPR_BYTES)(r4)
+ lg r31,(GPRS_OFF + 18 * GPR_BYTES)(r4)
+#endif
+
+ /* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+ This is not yet set by the compiler. */
+
+ lfd f0,FPSCRS_OFF(r4)
+
+ lfd f14,(FPRS_OFF + 0)(r4)
+ lfd f15,(FPRS_OFF + 8)(r4)
+ lfd f16,(FPRS_OFF + 16)(r4)
+ lfd f17,(FPRS_OFF + 24)(r4)
+ lfd f18,(FPRS_OFF + 32)(r4)
+ lfd f19,(FPRS_OFF + 40)(r4)
+ lfd f20,(FPRS_OFF + 48)(r4)
+ lfd f21,(FPRS_OFF + 56)(r4)
+ lfd f22,(FPRS_OFF + 64)(r4)
+ lfd f23,(FPRS_OFF + 72)(r4)
+ lfd f24,(FPRS_OFF + 80)(r4)
+ lfd f25,(FPRS_OFF + 88)(r4)
+ lfd f26,(FPRS_OFF + 96)(r4)
+ lfd f27,(FPRS_OFF + 104)(r4)
+ lfd f28,(FPRS_OFF + 112)(r4)
+ lfd f29,(FPRS_OFF + 120)(r4)
+ lfd f30,(FPRS_OFF + 128)(r4)
+ lfd f31,(FPRS_OFF + 136)(r4)
+
+ mtfsf 0xff,f0 /* Restore fpscr. */
+
+ /* So now restore to entry of _ITM_beginTransaction. */
+ lwz r0,CR_OFF(r4)
+ mtcr r0 /* Restore CR. */
+ lg r0,PC_OFF(r4)
+ mtlr r0 /* Ret addr. */
+ lg r1,SP_OFF(r4) /* SP. */
+ blr
+
+ /* End of GTM_longjump */
+ /* End of sjlj.S */
+
Index: libitm/config/darwin/powerpc/target.h
===================================================================
--- libitm/config/darwin/powerpc/target.h (revision 0)
+++ libitm/config/darwin/powerpc/target.h (revision 0)
@@ -0,0 +1,78 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Iain Sandoe <ia...@gcc.gnu.org>.
+ Based on libitm/powerpc/target.h by Richard Henderson.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+namespace GTM HIDDEN {
+
+/* We will assume that the code is to be built for a processor with Altivec.
+ If it is desired to make this run on Darwin 8 or earlier, which might be on
+ hardware with either G3 or G4, then the vecSave code in sjlj.S will need to
+ be made conditional on __cpu_has_altivec from the system framework (as is
+ done in Darwin's 'world_save ()' routine in libgcc). I.E. you need to check
+ for Altivec at runtime. */
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+
+typedef struct gtm_jmpbuf
+{
+ v128 vr[12]; /* vr20-vr31 */
+#ifndef __ppc64__
+ unsigned int vecpad[2];
+#endif
+ void *ResForToc;
+ unsigned long pc;
+ double fpscr;
+ void *sp;
+ unsigned int cr;
+ unsigned int VRsave;
+ unsigned long gr[19]; /* r13-r31 */
+ double fr[18]; /* f14-f31 */
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#ifdef __ppc64__
+# define HW_CACHELINE_SIZE 128
+#else
+# define HW_CACHELINE_SIZE 64
+#endif
+
+static inline void
+cpu_relax (void)
+{
+ __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+ __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+ __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt (revision 181968)
+++ libitm/configure.tgt (working copy)
@@ -46,7 +46,8 @@ fi
# Map the target cpu to an ARCH sub-directory. At the same time,
# work out any special compilation flags as necessary.
case "${target_cpu}" in
- alpha*) ARCH=alpha ;;
+ alpha*) ARCH=alpha ;;
+ rs6000 | powerpc*) ARCH=powerpc ;;
i[3456]86)
case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,15 @@ case "${target}" in
fi
;;
+ powerpc*-*-darwin*)
+ config_path="darwin/$ARCH $config_path"
+ ;;
+
+ powerpc*-*-aix* | rs6000-*-aix*)
+ # The system ought to be supported, but sjlj.S has not been ported.
+ UNSUPPORTED=1
+ ;;
+
*-*-gnu* | *-*-k*bsd*-gnu \
| *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
| *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \
Index: libitm/config/powerpc/sjlj.S
===================================================================
--- libitm/config/powerpc/sjlj.S (revision 0)
+++ libitm/config/powerpc/sjlj.S (revision 0)
@@ -0,0 +1,410 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <r...@redhat.com>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+ .text
+
+#include "asmcfi.h"
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+#ifdef __USER_LABEL_PREFIX__
+# define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+# define SYM(x) x
+#endif
+
+#if defined(__powerpc64__) && defined(__ELF__)
+.macro FUNC name
+ .globl \name, .\name
+ .section ".opd","aw"
+ .align 3
+\name:
+ .quad .\name, .TOC.@tocbase, 0
+ .size \name, 24
+ .type .\name, @function
+ .text
+.\name:
+.endm
+.macro END name
+ .size .\name, . - .\name
+.endm
+.macro HIDDEN name
+ .hidden \name, .\name
+.endm
+#define MACHINE
+#elif defined(__ELF__)
+.macro FUNC name
+ .globl \name
+ .type \name, @function
+\name:
+.endm
+.macro END name
+ .size \name, . - \name
+.endm
+.macro HIDDEN name
+ .hidden \name
+.endm
+#define MACHINE
+#elif defined (__MACH__)
+.macro FUNC name
+ .globl $0
+$0:
+.endmacro
+.macro HIDDEN name
+ .private_extern $0
+.endmacro
+.macro END name
+.endmacro
+#ifdef __ppc64__
+# define MACHINE .machine ppc64
+#else
+# define MACHINE .machine ppc7400
+#endif
+
+#else
+#error "unsupported system"
+#endif
+
+/* Parameterize the code for 32-bit vs 64-bit. */
+#if defined (__powerpc64__) || defined (__ppc64__)
+#define ldreg ld
+#define streg std
+#define stregu stdu
+#define WS 8
+#else
+#define ldreg lwz
+#define streg stw
+#define stregu stwu
+#define WS 4
+#endif
+
+/* Parameterize the code for call frame constants. */
+#if defined(_CALL_AIXDESC)
+# define BASE 6*WS
+# define LR_SAVE 2*WS
+#elif defined(_CALL_SYSV)
+# define BASE 2*WS
+# define LR_SAVE 1*WS
+#elif defined(__MACH__)
+/* 6 words Linkage area + space for two parameters. */
+# define BASE (6*WS + 2*WS)
+# define LR_SAVE 2*WS
+#else
+# error "unsupported system"
+#endif
+
+#if defined(__ALTIVEC__) || defined(__VSX__)
+# define OFS_VR 0
+# define VECS_SIZ 12*16
+# define OFS_VSCR VECS_SIZ
+# define OFS_VR_END OFS_VSCR + 8
+#else
+# define OFS_VR_END 0
+#endif
+#ifndef _SOFT_FLOAT
+# define OFS_FR OFS_VR_END
+# define OFS_FPSCR OFS_FR + 18*8
+# define OFS_FR_END OFS_FPSCR + 8
+#else
+# define OFS_FR_END OFS_VR_END
+#endif
+#define OFS_GR OFS_FR_END
+#define OFS_CFA OFS_GR + 18*WS
+#define OFS_LR OFS_CFA + WS
+#define OFS_TOC OFS_LR + WS
+#define OFS_CR OFS_TOC + WS
+#define OFS_END OFS_CR + WS
+
+/* Ensure that when this is subtracted from SP it remains vec aligned. */
+#define GTM_SIZE (((OFS_END +15) /16) *16)
+
+#define FRAME (((BASE + GTM_SIZE + 15) / 16) * 16)
+#define VRSAVE 256
+
+ MACHINE
+
+ .align 4
+FUNC SYM(_ITM_beginTransaction)
+ cfi_startproc
+ mflr r0
+ streg r0, LR_SAVE(r1)
+ mr r5, r1
+ stregu r1, -FRAME(r1)
+ cfi_def_cfa_offset(FRAME)
+ cfi_offset(65, LR_SAVE)
+ addi r4,r5,-GTM_SIZE /* jump buf pointer. */
+ streg r5, OFS_CFA(r4)
+ streg r0, OFS_LR(r4)
+#ifdef __MACH__
+ streg r13, OFS_TOC(r4)
+ mfcr r0
+ streg r0, OFS_CR(r4)
+#else
+ streg r2, OFS_TOC(r4)
+#endif
+ streg r14, 0*WS+OFS_GR(r4)
+ streg r15, 1*WS+OFS_GR(r4)
+ streg r16, 2*WS+OFS_GR(r4)
+ streg r17, 3*WS+OFS_GR(r4)
+ streg r18, 4*WS+OFS_GR(r4)
+ streg r19, 5*WS+OFS_GR(r4)
+ streg r20, 6*WS+OFS_GR(r4)
+ streg r21, 7*WS+OFS_GR(r4)
+ streg r22, 8*WS+OFS_GR(r4)
+ streg r23, 9*WS+OFS_GR(r4)
+ streg r24, 10*WS+OFS_GR(r4)
+ streg r25, 11*WS+OFS_GR(r4)
+ streg r26, 12*WS+OFS_GR(r4)
+ streg r27, 13*WS+OFS_GR(r4)
+ streg r28, 14*WS+OFS_GR(r4)
+ streg r29, 15*WS+OFS_GR(r4)
+ streg r30, 16*WS+OFS_GR(r4)
+ streg r31, 17*WS+OFS_GR(r4)
+
+#ifndef _SOFT_FLOAT
+ /* ??? Determine when FPRs not present. */
+ /* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+ This is not yet set by the compiler. */
+ mffs f0
+ stfd f14, 0+OFS_FR(r4)
+ stfd f15, 8+OFS_FR(r4)
+ stfd f16, 16+OFS_FR(r4)
+ stfd f17, 24+OFS_FR(r4)
+ stfd f18, 32+OFS_FR(r4)
+ stfd f19, 40+OFS_FR(r4)
+ stfd f20, 48+OFS_FR(r4)
+ stfd f21, 56+OFS_FR(r4)
+ stfd f22, 64+OFS_FR(r4)
+ stfd f23, 72+OFS_FR(r4)
+ stfd f24, 80+OFS_FR(r4)
+ stfd f25, 88+OFS_FR(r4)
+ stfd f26, 96+OFS_FR(r4)
+ stfd f27,104+OFS_FR(r4)
+ stfd f28,112+OFS_FR(r4)
+ stfd f29,120+OFS_FR(r4)
+ stfd f30,128+OFS_FR(r4)
+ stfd f31,136+OFS_FR(r4)
+ stfd f0, OFS_FPSCR(r4)
+#endif
+
+#if defined(__ALTIVEC__) || defined(__VSX__)
+ /* ??? Determine when VRs not present. */
+ /* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+ This is not yet set by the compiler. */
+ mfspr r0, VRSAVE
+
+ addi r5,r4,VECS_SIZ-16 /* Now r5 points at V31 save. */
+ addi r4,r5,-16 /* now r4 points to V30 save. */
+
+ stvx v31,0,r5
+ addi r5,r5,-32
+ stvx v30,0,r4
+ addi r4,r4,-32
+ stvx v29,0,r5
+ addi r5,r5,-32
+ stvx v28,0,r4
+ addi r4,r4,-32
+ stvx v27,0,r5
+ addi r5,r5,-32
+ stvx v26,0,r4
+ addi r4,r4,-32
+ stvx v25,0,r5
+ addi r5,r5,-32
+ stvx v24,0,r4
+ addi r4,r4,-32
+ stvx v23,0,r5
+ addi r5,r5,-32
+ stvx v22,0,r4
+ addi r4,r4,-32
+ stvx v21,0,r5
+ stvx v20,0,r4 /* r4 back to the start. */
+ streg r0, OFS_VSCR(r4)
+#endif
+
+ /* r3 unchanged from entry, r4 points to jump buffer. */
+ bl SYM(GTM_begin_transaction)
+ nop
+
+ ldreg r0, LR_SAVE+FRAME(r1)
+ mtlr r0
+ addi r1, r1, FRAME
+ cfi_def_cfa_offset(0)
+ cfi_restore(65)
+ blr
+ cfi_endproc
+END SYM(_ITM_beginTransaction)
+
+ .align 4
+ HIDDEN SYM(GTM_longjmp)
+FUNC SYM(GTM_longjmp)
+ cfi_startproc
+#if defined(__ALTIVEC__) || defined(__VSX__)
+ /* ??? Determine when VRs not present. */
+ /* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+ This is not yet set by the compiler. */
+ addi r6, r4, OFS_VR
+ addi r7, r4, OFS_VR+16
+ ldreg r0, OFS_VSCR(r4)
+ cfi_undefined(v20)
+ cfi_undefined(v21)
+ cfi_undefined(v22)
+ cfi_undefined(v23)
+ cfi_undefined(v24)
+ cfi_undefined(v25)
+ cfi_undefined(v26)
+ cfi_undefined(v27)
+ cfi_undefined(v28)
+ cfi_undefined(v29)
+ cfi_undefined(v30)
+ cfi_undefined(v31)
+ lvx v20, 0, r6
+ addi r6, r6, 32
+ lvx v21, 0, r7
+ addi r7, r7, 32
+ lvx v22, 0, r6
+ addi r6, r6, 32
+ lvx v23, 0, r7
+ addi r7, r7, 32
+ lvx v24, 0, r6
+ addi r6, r6, 32
+ lvx v25, 0, r7
+ addi r7, r7, 32
+ lvx v26, 0, r6
+ addi r6, r6, 32
+ lvx v27, 0, r7
+ addi r7, r7, 32
+ lvx v28, 0, r6
+ addi r6, r6, 32
+ lvx v29, 0, r7
+ addi r7, r7, 32
+ lvx v30, 0, r6
+ lvx v31, 0, r7
+ mtspr VRSAVE, r0
+#endif
+
+#ifndef _SOFT_FLOAT
+ /* ??? Determine when FPRs not present. */
+ /* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+ This is not yet set by the compiler. */
+ lfd f0, OFS_FPSCR(r4)
+ cfi_undefined(f14)
+ cfi_undefined(f15)
+ cfi_undefined(f16)
+ cfi_undefined(f17)
+ cfi_undefined(f18)
+ cfi_undefined(f19)
+ cfi_undefined(f20)
+ cfi_undefined(f21)
+ cfi_undefined(f22)
+ cfi_undefined(f23)
+ cfi_undefined(f24)
+ cfi_undefined(f25)
+ cfi_undefined(f26)
+ cfi_undefined(f27)
+ cfi_undefined(f28)
+ cfi_undefined(f29)
+ cfi_undefined(f30)
+ cfi_undefined(f31)
+ lfd f14, 0+OFS_FR(r4)
+ lfd f15, 8+OFS_FR(r4)
+ lfd f16, 16+OFS_FR(r4)
+ lfd f17, 24+OFS_FR(r4)
+ lfd f18, 32+OFS_FR(r4)
+ lfd f19, 40+OFS_FR(r4)
+ lfd f20, 48+OFS_FR(r4)
+ lfd f21, 56+OFS_FR(r4)
+ lfd f22, 64+OFS_FR(r4)
+ lfd f23, 72+OFS_FR(r4)
+ lfd f24, 80+OFS_FR(r4)
+ lfd f25, 88+OFS_FR(r4)
+ lfd f26, 96+OFS_FR(r4)
+ lfd f27,104+OFS_FR(r4)
+ lfd f28,112+OFS_FR(r4)
+ lfd f29,120+OFS_FR(r4)
+ lfd f30,128+OFS_FR(r4)
+ lfd f31,136+OFS_FR(r4)
+ mtfsf 0xff, f0
+#endif
+
+ ldreg r6, OFS_CFA(r4)
+ ldreg r0, OFS_LR(r4)
+
+#ifdef __MACH__
+ ldreg r13, OFS_CR(r4)
+ mtcr r13
+ ldreg r13, OFS_TOC(r4)
+#else
+ ldreg r2, OFS_TOC(r4)
+#endif
+ /* At the instant we restore the LR, the only coherent view of
+ the world we have is into the new stack frame. Define the
+ CFA in terms of the not-yet-restored stack pointer. This will
+ last until the end of the function. */
+ mtlr r0
+ cfi_def_cfa(r6, 0)
+ cfi_undefined(r14)
+ cfi_undefined(r15)
+ cfi_undefined(r16)
+ cfi_undefined(r17)
+ cfi_undefined(r18)
+ cfi_undefined(r19)
+ cfi_undefined(r20)
+ cfi_undefined(r21)
+ cfi_undefined(r22)
+ cfi_undefined(r23)
+ cfi_undefined(r24)
+ cfi_undefined(r25)
+ cfi_undefined(r26)
+ cfi_undefined(r27)
+ cfi_undefined(r28)
+ cfi_undefined(r29)
+ cfi_undefined(r30)
+ cfi_undefined(r31)
+ ldreg r14, 0*WS+OFS_GR(r4)
+ ldreg r15, 1*WS+OFS_GR(r4)
+ ldreg r16, 2*WS+OFS_GR(r4)
+ ldreg r17, 3*WS+OFS_GR(r4)
+ ldreg r18, 4*WS+OFS_GR(r4)
+ ldreg r19, 5*WS+OFS_GR(r4)
+ ldreg r20, 6*WS+OFS_GR(r4)
+ ldreg r21, 7*WS+OFS_GR(r4)
+ ldreg r22, 8*WS+OFS_GR(r4)
+ ldreg r23, 9*WS+OFS_GR(r4)
+ ldreg r24, 10*WS+OFS_GR(r4)
+ ldreg r25, 11*WS+OFS_GR(r4)
+ ldreg r26, 12*WS+OFS_GR(r4)
+ ldreg r27, 13*WS+OFS_GR(r4)
+ ldreg r28, 14*WS+OFS_GR(r4)
+ ldreg r29, 15*WS+OFS_GR(r4)
+ ldreg r30, 16*WS+OFS_GR(r4)
+ ldreg r31, 17*WS+OFS_GR(r4)
+ mr r1, r6
+ blr
+ cfi_endproc
+END SYM(GTM_longjmp)
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
Index: libitm/config/powerpc/target.h
===================================================================
--- libitm/config/powerpc/target.h (revision 0)
+++ libitm/config/powerpc/target.h (revision 0)
@@ -0,0 +1,70 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <r...@redhat.com>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+namespace GTM HIDDEN {
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+typedef struct gtm_jmpbuf
+{
+#if defined(__ALTIVEC__) || defined(__VSX__)
+ v128 vr[12]; /* vr20-vr31 */
+ unsigned long long vscr; /* long long for padding only */
+#endif
+#ifndef _SOFT_FLOAT
+ double fr[18]; /* f14-f31 */
+ double fpscr;
+#endif
+ unsigned long gr[18]; /* r14-r31 */
+ void *cfa;
+ unsigned long pc;
+ unsigned long r2r13; /* r2 for AIX, r13 for Darwin. */
+ unsigned long cr;
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#if defined (__powerpc64__) || defined (__ppc64__)
+# define HW_CACHELINE_SIZE 128
+#else
+# define HW_CACHELINE_SIZE 32
+#endif
+
+static inline void
+cpu_relax (void)
+{
+ __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+ __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+ __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt (revision 181968)
+++ libitm/configure.tgt (working copy)
@@ -46,7 +46,8 @@ fi
# Map the target cpu to an ARCH sub-directory. At the same time,
# work out any special compilation flags as necessary.
case "${target_cpu}" in
- alpha*) ARCH=alpha ;;
+ alpha*) ARCH=alpha ;;
+ rs6000 | powerpc*) ARCH=powerpc ;;
i[3456]86)
case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,11 @@ case "${target}" in
fi
;;
+ powerpc*-*-aix* | rs6000-*-aix*)
+ # The system ought to be supported, but sjlj.S has not been ported.
+ UNSUPPORTED=1
+ ;;
+
*-*-gnu* | *-*-k*bsd*-gnu \
| *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
| *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \