On Wed, Feb 06, 2013 at 01:24:43PM -0500, David Edelsohn wrote: > We just need to tweak it until we close these corner cases. > > Do you have a testcase that can be added?
David's "corner case" comment prompted me to look at this again, and sure enough, there was another case that could be triggered with -m32 -mpowerpc64. Discussed off-list with David, final patch as follows. Committed revision 195835 (pr54131 test) and 195836. gcc/ PR target/54009 * config/rs6000/rs6000.c (mem_operand_gpr): Check that LO_SUM addresses won't wrap when offsetting. (rs6000_secondary_reload): Provide secondary reloads needed for wrapping LO_SUM addresses. gcc/testsuite/ PR target/54131 * gfortran.dg/pr54131.f: New test. PR target/54009 * gcc.target/powerpc/pr54009.c: New test. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 195707) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -5135,17 +5135,14 @@ if (TARGET_POWERPC64 && (offset & 3) != 0) return false; + extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; + gcc_assert (extra >= 0); + if (GET_CODE (addr) == LO_SUM) - /* We know by alignment that ABI_AIX medium/large model toc refs - will not cross a 32k boundary, since all entries in the - constant pool are naturally aligned and we check alignment for - other medium model toc-relative addresses. For ABI_V4 and - ABI_DARWIN lo_sum addresses, we just check that 64-bit - offsets are 4-byte aligned. */ - return true; + /* For lo_sum addresses, we must allow any offset except one that + causes a wrap, so test only the low 16 bits. */ + offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; - extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; - gcc_assert (extra >= 0); return offset + 0x8000 < 0x10000u - extra; } @@ -13823,19 +13819,36 @@ && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) { - rtx off = address_offset (XEXP (x, 0)); - unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + rtx addr = XEXP (x, 0); + rtx off = address_offset (addr); - if (off != NULL_RTX - && (INTVAL (off) & 3) != 0 - && (unsigned HOST_WIDE_INT) INTVAL (off) + 0x8000 < 0x10000 - extra) + if (off != NULL_RTX) { - if (in_p) - sri->icode = CODE_FOR_reload_di_load; + unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + unsigned HOST_WIDE_INT offset = INTVAL (off); + + /* We need a secondary reload when our legitimate_address_p + says the address is good (as otherwise the entire address + will be reloaded), and the offset is not a multiple of + four or we have an address wrap. Address wrap will only + occur for LO_SUMs since legitimate_offset_address_p + rejects addresses for 16-byte mems that will wrap. */ + if (GET_CODE (addr) == LO_SUM + ? (1 /* legitimate_address_p allows any offset for lo_sum */ + && ((offset & 3) != 0 + || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) + : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ + && (offset & 3) != 0)) + { + if (in_p) + sri->icode = CODE_FOR_reload_di_load; + else + sri->icode = CODE_FOR_reload_di_store; + sri->extra_cost = 2; + ret = NO_REGS; + } else - sri->icode = CODE_FOR_reload_di_store; - sri->extra_cost = 2; - ret = NO_REGS; + default_p = true; } else default_p = true; @@ -13845,25 +13858,43 @@ && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) { - rtx off = address_offset (XEXP (x, 0)); - unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + rtx addr = XEXP (x, 0); + rtx off = address_offset (addr); - /* We need a secondary reload only when our legitimate_address_p - says the address is good (as otherwise the entire address - will be reloaded). So for mode sizes of 8 and 16 this will - be when the offset is in the ranges [0x7ffc,0x7fff] and - [0x7ff4,0x7ff7] respectively. Note that the address we see - here may have been manipulated by legitimize_reload_address. */ - if (off != NULL_RTX - && ((unsigned HOST_WIDE_INT) INTVAL (off) - (0x8000 - extra) - < UNITS_PER_WORD)) + if (off != NULL_RTX) { - if (in_p) - sri->icode = CODE_FOR_reload_si_load; + unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; + unsigned HOST_WIDE_INT offset = INTVAL (off); + + /* We need a secondary reload when our legitimate_address_p + says the address is good (as otherwise the entire address + will be reloaded), and we have a wrap. + + legitimate_lo_sum_address_p allows LO_SUM addresses to + have any offset so test for wrap in the low 16 bits. + + legitimate_offset_address_p checks for the range + [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] + for mode size of 16. We wrap at [0x7ffc,0x7fff] and + [0x7ff4,0x7fff] respectively, so test for the + intersection of these ranges, [0x7ffc,0x7fff] and + [0x7ff4,0x7ff7] respectively. + + Note that the address we see here may have been + manipulated by legitimize_reload_address. */ + if (GET_CODE (addr) == LO_SUM + ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra + : offset - (0x8000 - extra) < UNITS_PER_WORD) + { + if (in_p) + sri->icode = CODE_FOR_reload_si_load; + else + sri->icode = CODE_FOR_reload_si_store; + sri->extra_cost = 2; + ret = NO_REGS; + } else - sri->icode = CODE_FOR_reload_si_store; - sri->extra_cost = 2; - ret = NO_REGS; + default_p = true; } else default_p = true; Index: gcc/testsuite/gcc.target/powerpc/pr54009.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr54009.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr54009.c (revision 0) @@ -0,0 +1,43 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target powerpc_fprs } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "\[\+\]32768" } } */ + +/* -O2 -m32 store to x.d in w was + lis 9,x+32764@ha + stw 10,x+32764@l(9) + stw 11,x+32768@l(9) <-- wrap! */ + +struct big { + char a[32764]; + double d __attribute__ ((aligned (4))); +} __attribute__ ((packed)); + +extern struct big x; +double y; + +void r (void) +{ + double tmp = x.d; +#if 1 + asm ("#": "+r" (tmp) + : : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", + "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", + "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23", + "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31"); +#endif + y = tmp; +} + +void w (void) +{ + double tmp = y; +#if 1 + asm ("#": "+r" (tmp) + : : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", + "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", + "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23", + "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31"); +#endif + x.d = tmp; +} Index: gcc/testsuite/gfortran.dg/pr54131.f =================================================================== --- gcc/testsuite/gfortran.dg/pr54131.f (revision 0) +++ gcc/testsuite/gfortran.dg/pr54131.f (revision 0) @@ -0,0 +1,23 @@ +! { dg-do compile } +! { dg-options "-O2 -funroll-loops" } + + SUBROUTINE EFPGRD(IFCM,NAT,NVIB,NPUN,FCM, + * DEN,GRD,ENG,DIP,NVST,NFTODO,LIST) + IMPLICIT DOUBLE PRECISION (A-H,O-Z) + DIMENSION DEN(*),GRD(*),ENG(*),DIP(*),LIST(*) + PARAMETER (MXPT=100, MXFRG=50, MXFGPT=MXPT*MXFRG) + COMMON /FGRAD / DEF(3,MXFGPT),DEFT(3,MXFRG),TORQ(3,MXFRG), + * ATORQ(3,MXFRG) + IF(NVST.EQ.0) THEN + CALL PUVIB(IFCM,IW,.FALSE.,NCOORD,IVIB,IATOM,ICOORD, + * ENG(IENG),GRD(IGRD),DIP(IDIP)) + END IF + DO 290 IVIB=1,NVIB + DO 220 IFRG=1,NFRG + DO 215 J=1,3 + DEFT(J,IFRG)=GRD(INDX+J-1) + 215 CONTINUE + INDX=INDX+6 + 220 CONTINUE + 290 CONTINUE + END -- Alan Modra Australia Development Lab, IBM