This patch adds a new address mask field to the reg_addr structure in rs6000.c. The new address mask says that a particular register type (gpr, fpr, altivec) needs to use DS style offset addressing for a given mode (i.e. the bottom 2 bits must be 0). In working on this, I discovered that the instructions that take up more than one register (i.e. __ibm128) did not set the offset mask, and I have fixed those modes.
I have done the usual bootstrap and compare make check on a little endian power8 system and there were no regressions. In addition, I used the -mdebug=reg option that prints out the mask definitions, and verified each of the types have the appropriate d/ds/dq flags set. I have done it with both the normal options and with -mabi=ieeelongdoulbe and I verified the TFmode/TCmode is appropriate for a vector type instead of a pair of registes. Patch #12 (and ultimately patch #17) will use this information. Can I check this patch into the FSF trunk? This is a series of separate patches to add functionality to the PowerPC backend to support future processors. Here is a high level summary of the patches: * Patches 1-8, have already been applied * Patch 9 has been rewritten in patches 12-13 * Patch 10 is withdrawn for now * Patch 11 adds DS offset mode to rs6000.c's reg_addr * Patch 12 adds a new enumeration for instruction format * Patch 13 adds support for matching prefixed insns * Patch 14 adds pc-relative support to load up addresses * Patch 15 renamed some functions to be smaller * Patch 16 updated a comment and moved a predicate * Patch 17 adds the prefixed RTL attribute & emitting 'p' before prefixed * Patch 18 adds prefixed support for scalar types * Patch 19 uses a separate 'future' cost structure * Patch 20 clones power9.md for initial scheduling on future.md. The following patches have not yet been written, but I expect them to be: * Patch 21 finish prefixed insn support for vectors & 128-bit int/floats * Patch 22 enable pc-relative by default * Patch 23 add pcrel linker optimization * Patch 24 new tests 2019-07-24 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.c (addr_mask_type): Make the type unsigned short to accomidate RELOAD_REG_DS_OFFSET. (RELOAD_REG_DS_OFFSET): New mask to identify instructions using the DS instruction format. (rs6000_setup_reg_addr_masks): Set the RELOAD_REG_DS_OFFSET mask if the instruction would need to use a DS instruction format for a register class. Split up processing for vectors between vector registers and GPRs. Treat floating point modes that take two registers like DF/SF mode. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 273771) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -351,16 +351,17 @@ static const struct reload_reg_map_type /* Mask bits for each register class, indexed per mode. Historically the compiler has been more restrictive which types can do PRE_MODIFY instead of PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ -typedef unsigned char addr_mask_type; +typedef unsigned short addr_mask_type; -#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ -#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ -#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ -#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ -#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ -#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ -#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ -#define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ +#define RELOAD_REG_VALID 0x001 /* Mode valid in register. */ +#define RELOAD_REG_MULTIPLE 0x002 /* Mode takes multiple registers. */ +#define RELOAD_REG_INDEXED 0x004 /* Reg+reg addressing. */ +#define RELOAD_REG_OFFSET 0x008 /* Reg+offset addressing. */ +#define RELOAD_REG_PRE_INCDEC 0x010 /* PRE_INC/PRE_DEC valid. */ +#define RELOAD_REG_PRE_MODIFY 0x020 /* PRE_MODIFY valid. */ +#define RELOAD_REG_AND_M16 0x040 /* AND -16 addressing. */ +#define RELOAD_REG_QUAD_OFFSET 0x080 /* quad offset is limited. */ +#define RELOAD_REG_DS_OFFSET 0x100 /* quad offset is limited. */ /* Register type masks based on the type, of valid addressing modes. */ struct rs6000_reg_addr { @@ -2078,6 +2079,8 @@ rs6000_debug_addr_mask (addr_mask_type m if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) *p++ = 'O'; + else if ((mask & RELOAD_REG_DS_OFFSET) != 0) + *p++ = 'd'; else if ((mask & RELOAD_REG_OFFSET) != 0) *p++ = 'o'; else if (keep_spaces) @@ -2539,8 +2542,6 @@ rs6000_setup_reg_addr_masks (void) { machine_mode m2 = (machine_mode) m; bool complex_p = false; - bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); - size_t msize; if (COMPLEX_MODE_P (m2)) { @@ -2548,7 +2549,9 @@ rs6000_setup_reg_addr_masks (void) m2 = GET_MODE_INNER (m2); } - msize = GET_MODE_SIZE (m2); + ssize_t msize = GET_MODE_SIZE (m2); + bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); + bool float_2reg_p = FLOAT128_2REG_P (m2); /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and @@ -2633,25 +2636,43 @@ rs6000_setup_reg_addr_masks (void) possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing for 64-bit scalars and 32-bit SFmode to altivec registers. */ if ((addr_mask != 0) && !indexed_only_p - && msize <= 8 + && (msize <= 8 || float_2reg_p) && (rc == RELOAD_REG_GPR - || ((msize == 8 || m2 == SFmode) + || ((msize == 8 || m2 == SFmode || float_2reg_p) && (rc == RELOAD_REG_FPR || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR))))) - addr_mask |= RELOAD_REG_OFFSET; + { + addr_mask |= RELOAD_REG_OFFSET; + + /* 64-bit types in GPRs in 64-bit mode need DS-form memory. + Likewise any offsettable load in Altivec registers needs + DS-form. */ + if ((rc == RELOAD_REG_GPR && msize == 8 && TARGET_POWERPC64) + || rc == RELOAD_REG_VMX) + addr_mask |= RELOAD_REG_DS_OFFSET; + } /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 instructions are enabled. The offset for 128-bit VSX registers is - only 12-bits. While GPRs can handle the full offset range, VSX - registers can only handle the restricted range. */ + only 12-bits (DQ addressing). */ else if ((addr_mask != 0) && !indexed_only_p && msize == 16 && TARGET_P9_VECTOR + && (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) || (m2 == TImode && TARGET_VSX))) + addr_mask |= (RELOAD_REG_OFFSET | RELOAD_REG_QUAD_OFFSET); + + /* Vectors in GPR registers can do REG+OFFSET addressing. We need to + set DQ addressing if quad memory instructions are enabled, and DS + addressing for 64-bit mode for the LD/STD instructions. */ + else if ((addr_mask != 0) && !indexed_only_p + && msize >= 16 && rc == RELOAD_REG_GPR) { addr_mask |= RELOAD_REG_OFFSET; - if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) + if (TARGET_QUAD_MEMORY) addr_mask |= RELOAD_REG_QUAD_OFFSET; + else if (TARGET_POWERPC64) + addr_mask |= RELOAD_REG_DS_OFFSET; } /* VMX registers can do (REG & -16) and ((REG+REG) & -16) -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797