Hi Richard,

On 2 Dec 2011, at 23:36, Iain Sandoe wrote:

On 2 Dec 2011, at 22:59, Richard Henderson wrote:

I personally think the whole thing would be much easier to read without relying on the redzone. Aside from that, there's actually very little real difference in the two files. Essentially, you're storing the registers in a different order because the prologue does, just so you can make use of the redzone.

OK - I guess I got carried away with thinking that I might be able to re-use the save_world () routine - but that doesn't look feasible after all so....

The aix abi saves r2; darwin 32-bit saves r13. One extra register in both cases, which could use the same slot.

... will take another look tomorrow....
.... although we still have some syntax issues that might make sharing the original code somewhat ugly....

Two versions attached, both of which produce working code on darwin (although the attached modification to yours will be broken on assemblers needing % in front of reg names).

version 1 is a tidied up red-zone implementation.

===

version 2 is a modification of your original:

a) -FRAME+BASE(r1) cannot be guaranteed to be vec-aligned in general (it isn't on m32 darwin)

... so I've taken the liberty of rounding the gtm_buffer object and then pointing r4 at original_sp-rounded_size, which is what we want for the call to GTM_begin_transaction anyway.

b) I've added the CR etc. wrapped in  __MACH__ ifdefs.

c) ";" is a comment introducer for Darwin's asm .. so I unwrapped those lines ...

d) I put in the logic for handling __USER_LABEL_PREFIX__ .

===

e) The real problem is finding a non-horrible way of dealing with the %r <=> r issue - and I've not done that so far...

... pending your opinion on the best way forward....

cheers
Iain


Index: libitm/config/darwin/powerpc/sjlj.S
===================================================================
--- libitm/config/darwin/powerpc/sjlj.S (revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S (revision 0)
@@ -0,0 +1,335 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <ia...@gcc.gnu.org>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define MACHINE                MODE_CHOICE(ppc7400,ppc64)
+#define g_long         MODE_CHOICE(long, quad) /* usage is ".g_long" */
+#define GPR_BYTES      MODE_CHOICE(4,8)        /* size of a GPR in bytes */
+#define FPR_BYTES      8                       /* size of a FPR in bytes */
+#define LOG2_GPR_BYTES MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
+#define cmpg           MODE_CHOICE(cmpw, cmpd)
+#define lg             MODE_CHOICE(lwz, ld)
+#define sg             MODE_CHOICE(stw, std)
+#define lgx            MODE_CHOICE(lwzx, ldx)
+#define sgx            MODE_CHOICE(stwx, stdx)
+#define lgu            MODE_CHOICE(lwzu, ldu)
+#define sgu            MODE_CHOICE(stwu, stdu)
+#define lgux           MODE_CHOICE(lwzux, ldux)
+#define sgux           MODE_CHOICE(stwux, stdux)
+#define lgwa           MODE_CHOICE(lwz, lwa)
+
+/* Stack frame constants.  */
+#define RED_ZONE_SIZE  MODE_CHOICE(224,288)
+#define LINKAGE_SIZE   MODE_CHOICE(24,48)
+#define SAVED_LR_OFFSET        MODE_CHOICE(8,16)
+#define SAVED_CR_OFFSET        MODE_CHOICE(4,8)
+
+/* We will assume that the code is to be built for a processor with Altivec.
+
+   TODO 1:
+   For Darwin 8 or earlier, which might be on hardware with either G3 or G4/G5,
+   then the vecSave code here would ideally need to be made conditional (for
+   m32) on __cpu_has_altivec from the system framework (as is done in Darwin`s
+   save_world () routine in libgcc). I.E. One needs to check for Altivec at
+   runtime.  As things stand, it should be fine on Darwin 8/9 with G4/G5.  */
+
+/* TODO 2:
+   Generate eh_frame data.  */
+
+# define VECS_SIZ      12*16
+
+#ifdef __ppc64__
+# define VEC_PAD       0
+#else
+# define VEC_PAD       8
+#endif
+
+#define RESTOC GPR_BYTES
+#define SAVPC  GPR_BYTES
+#define FPSCR  FPR_BYTES
+#define SAVSP  GPR_BYTES
+#define SAVCR  4
+#ifdef __ppc64__
+/* Add for gpr13 and the VRsave.   */
+# define RZADD 12
+#else
+ /* which are included in red zone at m32.  */
+# define RZADD 0
+#endif
+
+#define GTMSIZ (VECS_SIZ + VEC_PAD + RESTOC + SAVPC + FPSCR + SAVCR + SAVSP\
+               + RZADD + RED_ZONE_SIZE) 
+
+/* The stack frame components for the call to GTM_begin_transaction.
+
+   We call GTM_begin_transaction (uint32_t, gtm_jmpbuf*).  So we must provide
+   space for those params in the stack frame so that the callee can save them
+   there if it wants to.
+*/
+
+#define NEXT_FRAME (LINKAGE_SIZE + 2 * GPR_BYTES)
+
+#define FRAME_SIZE (((NEXT_FRAME + GTMSIZ + 15) / 16) * 16)
+
+/* From the start of the gtm_jmpbuf.  */
+#define VEC_OFF 0
+#define TOC_OFF (VECS_SIZ + VEC_PAD)
+#define PC_OFF (TOC_OFF + GPR_BYTES)
+#define FPSCRS_OFF (PC_OFF + GPR_BYTES)
+#define SP_OFF (FPSCRS_OFF + FPR_BYTES)
+#define CR_OFF (SP_OFF + GPR_BYTES)
+#define VRSAVE_OFF (CR_OFF + 4)
+#define GPRS_OFF (VRSAVE_OFF + 4)
+#define FPRS_OFF (GPRS_OFF + 19 * GPR_BYTES)
+
+       .text
+
+       .machine MACHINE
+       
+       .align  4
+       
+       /*  _ITM_beginTransaction(uint32_t, ...) */
+       
+       .globl __ITM_beginTransaction
+__ITM_beginTransaction:
+       
+       mfcr r0
+
+       /* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+          This is not yet set by the compiler.  */
+
+       stfd f14,-144(r1)
+       stfd f15,-136(r1)
+       stfd f16,-128(r1)
+       stfd f17,-120(r1)
+       stfd f18,-112(r1)
+       stfd f19,-104(r1)
+       stfd f20,-96(r1)
+       stfd f21,-88(r1)
+       stfd f22,-80(r1)
+       stfd f23,-72(r1)
+       stfd f24,-64(r1)
+       stfd f25,-56(r1)
+       stfd f26,-48(r1)
+       stfd f27,-40(r1)
+       stfd f28,-32(r1)
+       stfd f29,-24(r1)
+       stfd f30,-16(r1)
+       mffs f0
+       stfd f31,-8(r1)
+
+#ifndef __ppc64__
+       /* This might not be the best approach - stmw could be slower than the
+          one-by-one store.  */
+       stmw r13,-220(r1)                               
+#else
+       sg r14,(-288 +  0 * GPR_BYTES)(r1)
+       sg r15,(-288 +  1 * GPR_BYTES)(r1)
+       sg r16,(-288 +  2 * GPR_BYTES)(r1)
+       sg r17,(-288 +  3 * GPR_BYTES)(r1)
+       sg r18,(-288 +  4 * GPR_BYTES)(r1)
+       sg r19,(-288 +  5 * GPR_BYTES)(r1)
+       sg r20,(-288 +  6 * GPR_BYTES)(r1)
+       sg r21,(-288 +  7 * GPR_BYTES)(r1)
+       sg r22,(-288 +  8 * GPR_BYTES)(r1)
+       sg r23,(-288 +  9 * GPR_BYTES)(r1)
+       sg r24,(-288 + 10 * GPR_BYTES)(r1)
+       sg r25,(-288 + 11 * GPR_BYTES)(r1)
+       sg r26,(-288 + 12 * GPR_BYTES)(r1)
+       sg r27,(-288 + 13 * GPR_BYTES)(r1)
+       sg r28,(-288 + 14 * GPR_BYTES)(r1)
+       sg r29,(-288 + 15 * GPR_BYTES)(r1)
+       sg r30,(-288 + 16 * GPR_BYTES)(r1)
+       sg r31,(-288 + 17 * GPR_BYTES)(r1)
+#endif
+       /* Filled the red zone - so now we need to allocate the frame.  */
+       mr  r5,r1               /* copy SP.  */
+       sgu r1,-FRAME_SIZE(r1)  /* Allocate stack frame  */
+       addi r4,r5,-GTMSIZ      /* Point to the jump buffer.  */
+
+/*     sg r2,TOC_OFF(r4)       not needed on Darwin.  */
+#ifdef __ppc64__
+       /* The ABI doc is slighlty ambiguous about r13 m64 - it is reserved for
+          for TLS, but also stated to be call-saved (TLS is not implemented
+          for any ppc Darwin variant).  */
+       sg r13,GPRS_OFF(r4)
+#endif
+       stfd f0,FPSCRS_OFF(r4)
+       sg r5,SP_OFF(r4)
+       stw r0,CR_OFF(r4)
+       mflr r0
+       sg r0,PC_OFF(r4)
+       sg r0,SAVED_LR_OFFSET(r5)
+
+       /* ??? Determine when VRs not present.  */
+       /* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+          This is not yet set by the compiler.  */
+
+       mfspr r0,VRsave
+       addi  r5,r4,VECS_SIZ-16 /* Now r5 points at V31 save.  */
+       stw r0,VRSAVE_OFF(r4)
+       addi r4,r5,-16          /* now r4 points to V30 save.  */
+
+       stvx v31,0,r5
+       addi r5,r5,-32
+       stvx v30,0,r4
+       addi r4,r4,-32
+       stvx v29,0,r5
+       addi r5,r5,-32
+       stvx v28,0,r4
+       addi r4,r4,-32
+       stvx v27,0,r5
+       addi r5,r5,-32
+       stvx v26,0,r4
+       addi r4,r4,-32
+       stvx v25,0,r5
+       addi r5,r5,-32
+       stvx v24,0,r4
+       addi r4,r4,-32
+       stvx v23,0,r5
+       addi r5,r5,-32
+       stvx v22,0,r4
+       addi r4,r4,-32
+       stvx v21,0,r5
+       stvx v20,0,r4           /* r4 back to the start.  */
+
+       /* r3 is as per entry, r4 points at our gtm_jmpbuf.  */
+       /* GTM_begin_transaction (uint32_t prop, gtm_jmpbuf *bf) */
+       bl      _GTM_begin_transaction
+       nop
+
+       lg      r0,(SAVED_LR_OFFSET + FRAME_SIZE)(r1)
+       mtlr    r0
+       addi    r1, r1, FRAME_SIZE
+       blr
+
+       /* End of _ITM_beginTransaction.  */
+
+       /* uint32_t GTM_longjmp (uint32_t, const gtm_jmpbuf *, uint32_t) */ 
+
+       .private_extern _GTM_longjmp
+_GTM_longjmp:
+
+       /* ??? Determine when VRs not present.  */
+       /* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+          This is not yet set by the compiler.  */
+       
+       mr r14,r4
+       lwz r0,VRSAVE_OFF(r4)
+       addi r15,r4,16  
+
+       lvx v20,0,r14
+       addi r14,r14,32
+       lvx v21,0,r15
+       addi r15,r15,32
+       lvx v22,0,r14
+       addi r14,r14,32
+       lvx v23,0,r15
+       addi r15,r15,32
+       lvx v24,0,r14
+       addi r14,r14,32
+       lvx v25,0,r15
+       addi r15,r15,32
+       lvx v26,0,r14
+       addi r14,r14,32
+       lvx v27,0,r15
+       addi r15,r15,32
+       lvx v28,0,r14
+       addi r14,r14,32
+       lvx v29,0,r15
+       addi r15,r15,32
+       lvx v30,0,r14
+       addi r14,r14,32
+       lvx v31,0,r15
+
+       mtspr VRsave,r0         /* Restored... */
+       
+#ifndef __ppc64__
+       lmw r13,GPRS_OFF(r4)
+#else
+       lg r13,(GPRS_OFF +  0 * GPR_BYTES)(r4)
+       lg r14,(GPRS_OFF +  1 * GPR_BYTES)(r4)
+       lg r15,(GPRS_OFF +  2 * GPR_BYTES)(r4)
+       lg r16,(GPRS_OFF +  3 * GPR_BYTES)(r4)
+       lg r17,(GPRS_OFF +  4 * GPR_BYTES)(r4)
+       lg r18,(GPRS_OFF +  5 * GPR_BYTES)(r4)
+       lg r19,(GPRS_OFF +  6 * GPR_BYTES)(r4)
+       lg r20,(GPRS_OFF +  7 * GPR_BYTES)(r4)
+       lg r21,(GPRS_OFF +  8 * GPR_BYTES)(r4)
+       lg r22,(GPRS_OFF +  9 * GPR_BYTES)(r4)
+       lg r23,(GPRS_OFF + 10 * GPR_BYTES)(r4)
+       lg r24,(GPRS_OFF + 11 * GPR_BYTES)(r4)
+       lg r25,(GPRS_OFF + 12 * GPR_BYTES)(r4)
+       lg r26,(GPRS_OFF + 13 * GPR_BYTES)(r4)
+       lg r27,(GPRS_OFF + 14 * GPR_BYTES)(r4)
+       lg r28,(GPRS_OFF + 15 * GPR_BYTES)(r4)
+       lg r29,(GPRS_OFF + 16 * GPR_BYTES)(r4)
+       lg r30,(GPRS_OFF + 17 * GPR_BYTES)(r4)
+       lg r31,(GPRS_OFF + 18 * GPR_BYTES)(r4)
+#endif
+
+       /* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+          This is not yet set by the compiler.  */
+
+       lfd f0,FPSCRS_OFF(r4)
+
+       lfd f14,(FPRS_OFF +   0)(r4)
+       lfd f15,(FPRS_OFF +   8)(r4)
+       lfd f16,(FPRS_OFF +  16)(r4)
+       lfd f17,(FPRS_OFF +  24)(r4)
+       lfd f18,(FPRS_OFF +  32)(r4)
+       lfd f19,(FPRS_OFF +  40)(r4)
+       lfd f20,(FPRS_OFF +  48)(r4)
+       lfd f21,(FPRS_OFF +  56)(r4)
+       lfd f22,(FPRS_OFF +  64)(r4)
+       lfd f23,(FPRS_OFF +  72)(r4)
+       lfd f24,(FPRS_OFF +  80)(r4)
+       lfd f25,(FPRS_OFF +  88)(r4)
+       lfd f26,(FPRS_OFF +  96)(r4)
+       lfd f27,(FPRS_OFF + 104)(r4)
+       lfd f28,(FPRS_OFF + 112)(r4)
+       lfd f29,(FPRS_OFF + 120)(r4)
+       lfd f30,(FPRS_OFF + 128)(r4)
+       lfd f31,(FPRS_OFF + 136)(r4)
+       
+       mtfsf 0xff,f0                   /* Restore fpscr.  */
+
+       /* So now restore to entry of _ITM_beginTransaction.  */
+       lwz r0,CR_OFF(r4)
+       mtcr r0                         /* Restore CR.  */
+       lg r0,PC_OFF(r4)
+       mtlr r0                         /* Ret addr.  */
+       lg r1,SP_OFF(r4)                /* SP.  */
+       blr
+
+       /* End of GTM_longjump */
+       /* End of sjlj.S */
+
Index: libitm/config/darwin/powerpc/target.h
===================================================================
--- libitm/config/darwin/powerpc/target.h       (revision 0)
+++ libitm/config/darwin/powerpc/target.h       (revision 0)
@@ -0,0 +1,78 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <ia...@gcc.gnu.org>.
+   Based on libitm/powerpc/target.h by Richard Henderson.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+/* We will assume that the code is to be built for a processor with Altivec.
+   If it is desired to make this run on Darwin 8 or earlier, which might be on
+   hardware with either G3 or G4, then the vecSave code in sjlj.S will need to
+   be made conditional on __cpu_has_altivec from the system framework (as is
+   done in Darwin's 'world_save ()' routine in libgcc).  I.E. you need to check
+   for Altivec at runtime.  */
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+   
+typedef struct gtm_jmpbuf
+{
+  v128 vr[12];                 /* vr20-vr31 */
+#ifndef __ppc64__
+  unsigned int vecpad[2];
+#endif
+  void *ResForToc;
+  unsigned long pc;
+  double fpscr;
+  void *sp;
+  unsigned int cr;
+  unsigned int VRsave;
+  unsigned long gr[19];                /* r13-r31 */
+  double fr[18];               /* f14-f31 */
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#ifdef __ppc64__
+#  define HW_CACHELINE_SIZE 128
+#else
+#  define HW_CACHELINE_SIZE 64
+#endif
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt        (revision 181968)
+++ libitm/configure.tgt        (working copy)
@@ -46,7 +46,8 @@ fi
 # Map the target cpu to an ARCH sub-directory.  At the same time,
 # work out any special compilation flags as necessary.
 case "${target_cpu}" in
-  alpha*)      ARCH=alpha ;;
+  alpha*)              ARCH=alpha ;;
+  rs6000 | powerpc*)   ARCH=powerpc ;;
 
   i[3456]86)
        case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,15 @@ case "${target}" in
        fi
        ;;
 
+  powerpc*-*-darwin*)
+       config_path="darwin/$ARCH $config_path" 
+       ;;
+
+  powerpc*-*-aix* | rs6000-*-aix*)
+       # The system ought to be supported, but sjlj.S has not been ported.
+       UNSUPPORTED=1
+       ;;
+
   *-*-gnu* | *-*-k*bsd*-gnu \
   | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
   | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \





Index: libitm/config/powerpc/sjlj.S
===================================================================
--- libitm/config/powerpc/sjlj.S        (revision 0)
+++ libitm/config/powerpc/sjlj.S        (revision 0)
@@ -0,0 +1,410 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <r...@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+       .text
+
+#include "asmcfi.h"
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+#ifdef __USER_LABEL_PREFIX__
+#  define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+#  define SYM(x) x
+#endif
+
+#if defined(__powerpc64__) && defined(__ELF__)
+.macro FUNC name
+        .globl  \name, .\name
+        .section ".opd","aw"
+        .align  3
+\name:
+        .quad   .\name, .TOC.@tocbase, 0
+        .size   \name, 24
+        .type   .\name, @function
+        .text
+.\name:
+.endm
+.macro END name
+       .size   .\name, . - .\name
+.endm
+.macro HIDDEN name
+       .hidden \name, .\name
+.endm
+#define MACHINE
+#elif defined(__ELF__)
+.macro FUNC name
+       .globl  \name
+       .type   \name, @function
+\name:
+.endm
+.macro END name
+       .size   \name, . - \name
+.endm
+.macro HIDDEN name
+       .hidden \name
+.endm
+#define MACHINE
+#elif defined (__MACH__)
+.macro FUNC name
+       .globl  $0
+$0:
+.endmacro
+.macro HIDDEN name
+       .private_extern $0
+.endmacro
+.macro END name
+.endmacro
+#ifdef __ppc64__
+# define MACHINE .machine ppc64
+#else
+# define MACHINE .machine ppc7400
+#endif
+
+#else
+#error "unsupported system"
+#endif
+
+/* Parameterize the code for 32-bit vs 64-bit.  */
+#if defined (__powerpc64__) || defined (__ppc64__)
+#define ldreg  ld
+#define streg  std
+#define stregu stdu
+#define WS     8
+#else
+#define ldreg  lwz
+#define streg  stw
+#define stregu stwu
+#define WS     4
+#endif
+
+/* Parameterize the code for call frame constants.  */
+#if defined(_CALL_AIXDESC)
+# define BASE          6*WS
+# define LR_SAVE       2*WS
+#elif defined(_CALL_SYSV)
+# define BASE          2*WS
+# define LR_SAVE       1*WS
+#elif defined(__MACH__)
+/* 6 words Linkage area + space for two parameters.  */
+# define BASE          (6*WS + 2*WS)
+# define LR_SAVE       2*WS
+#else
+# error "unsupported system"
+#endif
+
+#if defined(__ALTIVEC__) || defined(__VSX__)
+# define OFS_VR                0
+# define VECS_SIZ      12*16
+# define OFS_VSCR      VECS_SIZ
+# define OFS_VR_END    OFS_VSCR + 8
+#else
+# define OFS_VR_END    0
+#endif
+#ifndef _SOFT_FLOAT
+# define OFS_FR                OFS_VR_END
+# define OFS_FPSCR     OFS_FR + 18*8
+# define OFS_FR_END    OFS_FPSCR + 8
+#else
+# define OFS_FR_END    OFS_VR_END
+#endif
+#define OFS_GR         OFS_FR_END
+#define OFS_CFA                OFS_GR + 18*WS
+#define OFS_LR         OFS_CFA + WS
+#define OFS_TOC                OFS_LR + WS
+#define OFS_CR         OFS_TOC + WS
+#define OFS_END                OFS_CR + WS
+
+/* Ensure that when this is subtracted from SP it remains vec aligned.  */
+#define GTM_SIZE (((OFS_END +15) /16) *16)
+
+#define FRAME          (((BASE + GTM_SIZE + 15) / 16) * 16)
+#define VRSAVE         256
+
+       MACHINE
+
+       .align  4
+FUNC SYM(_ITM_beginTransaction)
+       cfi_startproc
+       mflr    r0
+       streg   r0, LR_SAVE(r1)
+       mr      r5, r1
+       stregu  r1, -FRAME(r1)
+       cfi_def_cfa_offset(FRAME)
+       cfi_offset(65, LR_SAVE)
+       addi    r4,r5,-GTM_SIZE /* jump buf pointer.  */
+       streg   r5, OFS_CFA(r4)
+       streg   r0, OFS_LR(r4)
+#ifdef __MACH__
+       streg   r13, OFS_TOC(r4)
+       mfcr    r0
+       streg   r0, OFS_CR(r4)
+#else
+       streg   r2,   OFS_TOC(r4)
+#endif
+       streg   r14,  0*WS+OFS_GR(r4)
+       streg   r15,  1*WS+OFS_GR(r4)
+       streg   r16,  2*WS+OFS_GR(r4)
+       streg   r17,  3*WS+OFS_GR(r4)
+       streg   r18,  4*WS+OFS_GR(r4)
+       streg   r19,  5*WS+OFS_GR(r4)
+       streg   r20,  6*WS+OFS_GR(r4)
+       streg   r21,  7*WS+OFS_GR(r4)
+       streg   r22,  8*WS+OFS_GR(r4)
+       streg   r23,  9*WS+OFS_GR(r4)
+       streg   r24, 10*WS+OFS_GR(r4)
+       streg   r25, 11*WS+OFS_GR(r4)
+       streg   r26, 12*WS+OFS_GR(r4)
+       streg   r27, 13*WS+OFS_GR(r4)
+       streg   r28, 14*WS+OFS_GR(r4)
+       streg   r29, 15*WS+OFS_GR(r4)
+       streg   r30, 16*WS+OFS_GR(r4)
+       streg   r31, 17*WS+OFS_GR(r4)
+
+#ifndef _SOFT_FLOAT
+       /* ??? Determine when FPRs not present.  */
+       /* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+          This is not yet set by the compiler.  */
+       mffs    f0
+       stfd    f14,  0+OFS_FR(r4)
+       stfd    f15,  8+OFS_FR(r4)
+       stfd    f16, 16+OFS_FR(r4)
+       stfd    f17, 24+OFS_FR(r4)
+       stfd    f18, 32+OFS_FR(r4)
+       stfd    f19, 40+OFS_FR(r4)
+       stfd    f20, 48+OFS_FR(r4)
+       stfd    f21, 56+OFS_FR(r4)
+       stfd    f22, 64+OFS_FR(r4)
+       stfd    f23, 72+OFS_FR(r4)
+       stfd    f24, 80+OFS_FR(r4)
+       stfd    f25, 88+OFS_FR(r4)
+       stfd    f26, 96+OFS_FR(r4)
+       stfd    f27,104+OFS_FR(r4)
+       stfd    f28,112+OFS_FR(r4)
+       stfd    f29,120+OFS_FR(r4)
+       stfd    f30,128+OFS_FR(r4)
+       stfd    f31,136+OFS_FR(r4)
+       stfd    f0, OFS_FPSCR(r4)
+#endif
+
+#if defined(__ALTIVEC__) || defined(__VSX__)
+       /* ??? Determine when VRs not present.  */
+       /* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+          This is not yet set by the compiler.  */
+       mfspr   r0, VRSAVE
+
+       addi    r5,r4,VECS_SIZ-16       /* Now r5 points at V31 save.  */
+       addi    r4,r5,-16               /* now r4 points to V30 save.  */
+
+       stvx    v31,0,r5
+       addi    r5,r5,-32
+       stvx    v30,0,r4
+       addi    r4,r4,-32
+       stvx    v29,0,r5
+       addi    r5,r5,-32
+       stvx    v28,0,r4
+       addi    r4,r4,-32
+       stvx    v27,0,r5
+       addi    r5,r5,-32
+       stvx    v26,0,r4
+       addi    r4,r4,-32
+       stvx    v25,0,r5
+       addi    r5,r5,-32
+       stvx    v24,0,r4
+       addi    r4,r4,-32
+       stvx    v23,0,r5
+       addi    r5,r5,-32
+       stvx    v22,0,r4
+       addi    r4,r4,-32
+       stvx    v21,0,r5
+       stvx    v20,0,r4                /* r4 back to the start.  */
+       streg   r0, OFS_VSCR(r4)
+#endif
+
+       /* r3 unchanged from entry, r4 points to jump buffer.  */
+       bl      SYM(GTM_begin_transaction)
+       nop
+
+       ldreg   r0, LR_SAVE+FRAME(r1)
+       mtlr    r0
+       addi    r1, r1, FRAME
+       cfi_def_cfa_offset(0)
+       cfi_restore(65)
+       blr
+       cfi_endproc
+END SYM(_ITM_beginTransaction)
+
+       .align 4
+       HIDDEN  SYM(GTM_longjmp)
+FUNC SYM(GTM_longjmp)
+       cfi_startproc
+#if defined(__ALTIVEC__) || defined(__VSX__)
+       /* ??? Determine when VRs not present.  */
+       /* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+          This is not yet set by the compiler.  */
+       addi    r6, r4, OFS_VR
+       addi    r7, r4, OFS_VR+16
+       ldreg   r0, OFS_VSCR(r4)
+       cfi_undefined(v20)
+       cfi_undefined(v21)
+       cfi_undefined(v22)
+       cfi_undefined(v23)
+       cfi_undefined(v24)
+       cfi_undefined(v25)
+       cfi_undefined(v26)
+       cfi_undefined(v27)
+       cfi_undefined(v28)
+       cfi_undefined(v29)
+       cfi_undefined(v30)
+       cfi_undefined(v31)
+       lvx     v20, 0, r6
+       addi    r6, r6, 32
+       lvx     v21, 0, r7
+       addi    r7, r7, 32
+       lvx     v22, 0, r6
+       addi    r6, r6, 32
+       lvx     v23, 0, r7
+       addi    r7, r7, 32
+       lvx     v24, 0, r6
+       addi    r6, r6, 32
+       lvx     v25, 0, r7
+       addi    r7, r7, 32
+       lvx     v26, 0, r6
+       addi    r6, r6, 32
+       lvx     v27, 0, r7
+       addi    r7, r7, 32
+       lvx     v28, 0, r6
+       addi    r6, r6, 32
+       lvx     v29, 0, r7
+       addi    r7, r7, 32
+       lvx     v30, 0, r6
+       lvx     v31, 0, r7
+       mtspr   VRSAVE, r0
+#endif
+
+#ifndef _SOFT_FLOAT
+       /* ??? Determine when FPRs not present.  */
+       /* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+          This is not yet set by the compiler.  */
+       lfd     f0, OFS_FPSCR(r4)
+       cfi_undefined(f14)
+       cfi_undefined(f15)
+       cfi_undefined(f16)
+       cfi_undefined(f17)
+       cfi_undefined(f18)
+       cfi_undefined(f19)
+       cfi_undefined(f20)
+       cfi_undefined(f21)
+       cfi_undefined(f22)
+       cfi_undefined(f23)
+       cfi_undefined(f24)
+       cfi_undefined(f25)
+       cfi_undefined(f26)
+       cfi_undefined(f27)
+       cfi_undefined(f28)
+       cfi_undefined(f29)
+       cfi_undefined(f30)
+       cfi_undefined(f31)
+       lfd     f14,  0+OFS_FR(r4)
+       lfd     f15,  8+OFS_FR(r4)
+       lfd     f16, 16+OFS_FR(r4)
+       lfd     f17, 24+OFS_FR(r4)
+       lfd     f18, 32+OFS_FR(r4)
+       lfd     f19, 40+OFS_FR(r4)
+       lfd     f20, 48+OFS_FR(r4)
+       lfd     f21, 56+OFS_FR(r4)
+       lfd     f22, 64+OFS_FR(r4)
+       lfd     f23, 72+OFS_FR(r4)
+       lfd     f24, 80+OFS_FR(r4)
+       lfd     f25, 88+OFS_FR(r4)
+       lfd     f26, 96+OFS_FR(r4)
+       lfd     f27,104+OFS_FR(r4)
+       lfd     f28,112+OFS_FR(r4)
+       lfd     f29,120+OFS_FR(r4)
+       lfd     f30,128+OFS_FR(r4)
+       lfd     f31,136+OFS_FR(r4)
+       mtfsf   0xff, f0
+#endif
+
+       ldreg   r6,   OFS_CFA(r4)
+       ldreg   r0,   OFS_LR(r4)
+
+#ifdef __MACH__
+       ldreg   r13,  OFS_CR(r4)
+       mtcr    r13
+       ldreg   r13,  OFS_TOC(r4)
+#else
+       ldreg   r2,   OFS_TOC(r4)
+#endif
+       /* At the instant we restore the LR, the only coherent view of
+          the world we have is into the new stack frame.  Define the
+          CFA in terms of the not-yet-restored stack pointer.  This will
+          last until the end of the function.  */
+       mtlr    r0
+       cfi_def_cfa(r6, 0)
+       cfi_undefined(r14)
+       cfi_undefined(r15)
+       cfi_undefined(r16)
+       cfi_undefined(r17)
+       cfi_undefined(r18)
+       cfi_undefined(r19)
+       cfi_undefined(r20)
+       cfi_undefined(r21)
+       cfi_undefined(r22)
+       cfi_undefined(r23)
+       cfi_undefined(r24)
+       cfi_undefined(r25)
+       cfi_undefined(r26)
+       cfi_undefined(r27)
+       cfi_undefined(r28)
+       cfi_undefined(r29)
+       cfi_undefined(r30)
+       cfi_undefined(r31)
+       ldreg   r14,  0*WS+OFS_GR(r4)
+       ldreg   r15,  1*WS+OFS_GR(r4)
+       ldreg   r16,  2*WS+OFS_GR(r4)
+       ldreg   r17,  3*WS+OFS_GR(r4)
+       ldreg   r18,  4*WS+OFS_GR(r4)
+       ldreg   r19,  5*WS+OFS_GR(r4)
+       ldreg   r20,  6*WS+OFS_GR(r4)
+       ldreg   r21,  7*WS+OFS_GR(r4)
+       ldreg   r22,  8*WS+OFS_GR(r4)
+       ldreg   r23,  9*WS+OFS_GR(r4)
+       ldreg   r24, 10*WS+OFS_GR(r4)
+       ldreg   r25, 11*WS+OFS_GR(r4)
+       ldreg   r26, 12*WS+OFS_GR(r4)
+       ldreg   r27, 13*WS+OFS_GR(r4)
+       ldreg   r28, 14*WS+OFS_GR(r4)
+       ldreg   r29, 15*WS+OFS_GR(r4)
+       ldreg   r30, 16*WS+OFS_GR(r4)
+       ldreg   r31, 17*WS+OFS_GR(r4)
+       mr      r1, r6
+       blr
+       cfi_endproc
+END SYM(GTM_longjmp)
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
Index: libitm/config/powerpc/target.h
===================================================================
--- libitm/config/powerpc/target.h      (revision 0)
+++ libitm/config/powerpc/target.h      (revision 0)
@@ -0,0 +1,70 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <r...@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+typedef struct gtm_jmpbuf
+{
+#if defined(__ALTIVEC__) || defined(__VSX__)
+  v128 vr[12];                 /* vr20-vr31 */
+  unsigned long long vscr;     /* long long for padding only */
+#endif
+#ifndef _SOFT_FLOAT
+  double fr[18];               /* f14-f31 */
+  double fpscr;
+#endif
+  unsigned long gr[18];                /* r14-r31 */
+  void *cfa;
+  unsigned long pc;
+  unsigned long r2r13;         /* r2 for AIX, r13 for Darwin.  */
+  unsigned long cr;
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#if defined (__powerpc64__) || defined (__ppc64__)
+#  define HW_CACHELINE_SIZE 128
+#else
+#  define HW_CACHELINE_SIZE 32
+#endif
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt        (revision 181968)
+++ libitm/configure.tgt        (working copy)
@@ -46,7 +46,8 @@ fi
 # Map the target cpu to an ARCH sub-directory.  At the same time,
 # work out any special compilation flags as necessary.
 case "${target_cpu}" in
-  alpha*)      ARCH=alpha ;;
+  alpha*)              ARCH=alpha ;;
+  rs6000 | powerpc*)   ARCH=powerpc ;;
 
   i[3456]86)
        case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,11 @@ case "${target}" in
        fi
        ;;
 
+  powerpc*-*-aix* | rs6000-*-aix*)
+       # The system ought to be supported, but sjlj.S has not been ported.
+       UNSUPPORTED=1
+       ;;
+
   *-*-gnu* | *-*-k*bsd*-gnu \
   | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
   | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \

Reply via email to