This patch respin https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01804.html.
A new symbol classification "SYMBOL_SMALL_GOT_28K" added to represent symbol which needs go through GOT table and it's under -fpic/-mcmodel-small. the "_28K" suffix can reflect the symbol's attribute better, and by introducing this new symbol type, we could avoid checking aarch64_cmodel at some extent though still needs the check somewhere. All other code logic not changed. OK for trunk? Thanks. 2015-06-26 Jiong. Wang <jiong.w...@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_symbol_type): New type SYMBOL_SMALL_GOT_28K. * config/aarch64/aarch64.md: (ldr_got_small_<mode>): Support new GOT relocation modifiers. (unspec): New enum "UNSPEC_GOTMALLPIC28K. (ldr_got_small_28k_<mode>): New. (ldr_got_small_28k_sidi): New. * config/aarch64/iterators.md (got_modifier): New mode iterator. * config/aarch64/aarch64-otps.h (aarch64_code_model): New model. * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Support SYMBOL_SMALL_GOT_28K. (aarch64_rtx_costs): Add costs for new instruction sequences. (initialize_aarch64_code_model): Initialize new model. (aarch64_classify_symbol): Recognize new model and new symbol classification. (aarch64_asm_preferred_eh_data_format): Support new model. (aarch64_load_symref_appropriately): Generate new instruction sequences for -fpic. (TARGET_USE_PSEUDO_PIC_REG): New definition. (aarch64_use_pseudo_pic_reg): New function. gcc/testsuite/ * gcc.target/aarch64/pic-small.c: New testcase. Regards, Jiong
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index ea64cf4..24bfd9f 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -56,6 +56,9 @@ enum aarch64_code_model { /* Static code, data and GOT/PLT fit within a 4GB region. The default PIC code model. */ AARCH64_CMODEL_SMALL_PIC, + /* -fpic for small memory model. + GOT size to 28KiB (4K*8-4K) or 3580 entries. */ + AARCH64_CMODEL_SMALL_SPIC, /* No assumptions about addresses of code and data. The PIC variant is not yet implemented. */ AARCH64_CMODEL_LARGE diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 36bd051..9b506d5 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -62,6 +62,13 @@ enum aarch64_symbol_context This corresponds to the small PIC model of the compiler. + SYMBOL_SMALL_GOT_28K: Similar to SYMBOL_SMALL_GOT_4G, but used for symbol + restricted within 28K GOT table size. + + ldr reg, [gp, #:gotpage_lo15:sym] + + This corresponds to -fpic model for small memory model of the compiler. + SYMBOL_SMALL_TLSGD SYMBOL_SMALL_TLSDESC SYMBOL_SMALL_GOTTPREL @@ -95,6 +102,7 @@ enum aarch64_symbol_context enum aarch64_symbol_type { SYMBOL_SMALL_ABSOLUTE, + SYMBOL_SMALL_GOT_28K, SYMBOL_SMALL_GOT_4G, SYMBOL_SMALL_TLSGD, SYMBOL_SMALL_TLSDESC, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 776600f..2a1be00 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -814,6 +814,66 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, emit_insn (gen_rtx_SET (dest, imm)); return; + case SYMBOL_SMALL_GOT_28K: + { + machine_mode mode = GET_MODE (dest); + rtx gp_rtx = pic_offset_table_rtx; + + /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach + here before rtl expand. Tree IVOPT will generate rtl pattern to + decide rtx costs, in which case pic_offset_table_rtx is not + initialized. For that case no need to generate the first adrp + instruction as the the final cost for global variable access is + one instruction. */ + if (gp_rtx != NULL) + { + /* -fpic for -mcmodel=small allow 32K GOT table size (but we are + using the page base as GOT base, the first page may be wasted, + in the worst scenario, there is only 28K space for GOT). + + The generate instruction sequence for accessing global variable + is: + + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym] + + Only one instruction needed. But we must initialize + pic_offset_table_rtx properly. We generate initialize insn for + every global access, and allow CSE to remove all redundant. + + The final instruction sequences will look like the following + for multiply global variables access. + + adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_ + + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1] + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2] + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3] + ... */ + + rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + crtl->uses_pic_offset_table = 1; + emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s)); + + if (mode != GET_MODE (gp_rtx)) + gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0); + } + + if (mode == ptr_mode) + { + if (mode == DImode) + emit_insn (gen_ldr_got_small_28k_di (dest, gp_rtx, imm)); + else + emit_insn (gen_ldr_got_small_28k_si (dest, gp_rtx, imm)); + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm)); + } + + return; + } + case SYMBOL_SMALL_GOT_4G: { /* In ILP32, the mode of dest can be either SImode or DImode, @@ -1466,6 +1526,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) case SYMBOL_SMALL_TLSGD: case SYMBOL_SMALL_TLSDESC: case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_GOT_28K: case SYMBOL_SMALL_GOT_4G: case SYMBOL_TINY_GOT: if (offset != const0_rtx) @@ -6395,7 +6456,8 @@ cost_plus: case SYMBOL_REF: - if (aarch64_cmodel == AARCH64_CMODEL_LARGE) + if (aarch64_cmodel == AARCH64_CMODEL_LARGE + || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC) { /* LDR. */ if (speed) @@ -7247,7 +7309,9 @@ initialize_aarch64_code_model (void) aarch64_cmodel = AARCH64_CMODEL_TINY_PIC; break; case AARCH64_CMODEL_SMALL: - aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; + aarch64_cmodel = (flag_pic == 2 + ? AARCH64_CMODEL_SMALL_PIC + : AARCH64_CMODEL_SMALL_SPIC); break; case AARCH64_CMODEL_LARGE: sorry ("code model %qs with -f%s", "large", @@ -7328,6 +7392,7 @@ aarch64_classify_symbol (rtx x, rtx offset, case AARCH64_CMODEL_TINY: return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_SPIC: case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; @@ -7375,9 +7440,11 @@ aarch64_classify_symbol (rtx x, rtx offset, return SYMBOL_TINY_GOT; return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_SPIC: case AARCH64_CMODEL_SMALL_PIC: if (!aarch64_symbol_binds_local_p (x)) - return SYMBOL_SMALL_GOT_4G; + return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC + ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); return SYMBOL_SMALL_ABSOLUTE; default: @@ -9256,6 +9323,7 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) case AARCH64_CMODEL_TINY_PIC: case AARCH64_CMODEL_SMALL: case AARCH64_CMODEL_SMALL_PIC: + case AARCH64_CMODEL_SMALL_SPIC: /* text+got+data < 4Gb. 4-byte signed relocs are sufficient for everything. */ type = DW_EH_PE_sdata4; @@ -11551,6 +11619,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, return true; } +/* Return 1 if pseudo register should be created and used to hold + GOT address for PIC code. */ + +bool +aarch64_use_pseudo_pic_reg (void) +{ + return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -11829,6 +11906,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, #undef TARGET_SCHED_FUSION_PRIORITY #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority +#undef TARGET_USE_PSEUDO_PIC_REG +#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h" diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index d3f5d5b..673189d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -87,6 +87,7 @@ UNSPEC_FRINTX UNSPEC_FRINTZ UNSPEC_GOTSMALLPIC + UNSPEC_GOTSMALLPIC28K UNSPEC_GOTSMALLTLS UNSPEC_GOTTINYPIC UNSPEC_LD1 @@ -4360,6 +4361,29 @@ [(set_attr "type" "load1")] ) +(define_insn "ldr_got_small_28k_<mode>" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(mem:PTR (lo_sum:PTR + (match_operand:PTR 1 "register_operand" "r") + (match_operand:PTR 2 "aarch64_valid_symref" "S")))] + UNSPEC_GOTSMALLPIC28K))] + "" + "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]" + [(set_attr "type" "load1")] +) + +(define_insn "ldr_got_small_28k_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI [(mem:SI (lo_sum:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "aarch64_valid_symref" "S")))] + UNSPEC_GOTSMALLPIC28K)))] + "TARGET_ILP32" + "ldr\\t%w0, [%1, #:gotpage_lo14:%a2]" + [(set_attr "type" "load1")] +) + (define_insn "ldr_got_tiny" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 498358a..b19d3d7 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -657,6 +657,10 @@ (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")]) +;; -fpic small model GOT reloc modifers: gotpage_lo15/lo14 for ILP64/32. +;; No need of iterator for -fPIC as it use got_lo12 for both modes. +(define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")]) + ;; ------------------------------------------------------------------- ;; Code Iterators ;; ------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/pic-small.c b/gcc/testsuite/gcc.target/aarch64/pic-small.c new file mode 100644 index 0000000..874f81b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pic-small.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fpic -fno-inline --save-temps" } */ + +void abort (); +int global_a; + +int +initialize (void) +{ + global_a = 0x10; + return global_a - 1; +} + +int +main (int argc, char **argv) +{ + int a = initialize (); + + if (a != global_a - 1) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, _GLOBAL_OFFSET_TABLE" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */