Hi This patch is for google/main.
The bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129 describes how arm backend generates ineffecient code to load global variable's address in terms of both code size and performance when option -fpic is specified. It also describes how it can be improved by relocation R_ARM_GOT_PREL. This patch contains a new pass simplify-got to implement the suggested optimization and the hooks for arm. This new pass can also be applied to different targets when they satisfy 1. There should be at least 2 methods to load a global variable's address from GOT. Usually it means using different relocation types. 2. By default all global variable's address loading use the same method: first load the base address of GOT into a register pic_reg, then the real global variable's address is loaded as (SET offset_reg <the offset from GOT base to the GOT entry>) (SET address_reg (MEM (PLUS pic_reg offset_reg))) 3. When considering the given number of global variable accesses and the number of accessed variables there is a target dependent situation that the alternative method may be better. 4. The alternative method doesn't use the base of GOT (pic_reg). This patch depends on the arm relocation R_ARM_GOT_PREL which is implemented in binutils 2.21. This patch has been tested on arm qemu. It also bootstraped and regression tested on x86_64. thanks Carrot 2011-04-28 Guozhi Wei <car...@google.com> PR target/43129 * gcc/hooks.c (hook_rtx_void_null): New function. * gcc/hooks.h (hook_rtx_void_null): New prototype. * gcc/target.def (got_access): New hook vector declaration. * gcc/tree-pass.h (pass_simplify_got): New pass. * gcc/timevar.def (TV_SIMPLIFY_GOT): New TV id. * gcc/simplify-got.c: New source file. * gcc/Makefile.in (simplify-got.o): Add a new file. * gcc/passes.c (init_optimization_passes): Add a new pass. * gcc/config/arm/arm.c (arm_output_addr_const_extra): Output GOT_PREL relocation. (arm_get_pic_reg): New function. (arm_clear_pic_reg): New function. (arm_can_simplify_got_access): New function. (arm_loaded_global_var): New function. (arm_load_global_address): New function. * gcc/config/arm/arm.md (UNSPEC_GOT_PREL_SYM): New UNSPEC symbol. 2011-04-28 Guozhi Wei <car...@google.com> PR target/43129 * gcc/testsuite/gcc.target/arm/got1.c: New testcase. * gcc/testsuite/gcc.target/arm/got2.c: New testcase. Index: gcc/hooks.c =================================================================== --- gcc/hooks.c (revision 172951) +++ gcc/hooks.c (working copy) @@ -280,6 +280,13 @@ hook_rtx_tree_int_null (tree a ATTRIBUTE return NULL; } +/* Generic hook that returns NULL_RTX. */ +rtx +hook_rtx_void_null (void) +{ + return NULL; +} + /* Generic hook that takes three trees and returns the last one as is. */ tree hook_tree_tree_tree_tree_3rd_identity (tree a ATTRIBUTE_UNUSED, Index: gcc/hooks.h =================================================================== --- gcc/hooks.h (revision 172951) +++ gcc/hooks.h (working copy) @@ -84,6 +84,7 @@ extern bool default_can_output_mi_thunk_ extern rtx hook_rtx_rtx_identity (rtx); extern rtx hook_rtx_rtx_null (rtx); extern rtx hook_rtx_tree_int_null (tree, int); +extern rtx hook_rtx_void_null (void); extern const char *hook_constcharptr_const_tree_null (const_tree); extern const char *hook_constcharptr_const_rtx_null (const_rtx); Index: gcc/target.def =================================================================== --- gcc/target.def (revision 172951) +++ gcc/target.def (working copy) @@ -2615,6 +2615,68 @@ DEFHOOK HOOK_VECTOR_END (target_option) +/* Functions used to simplify GOT access. */ +#undef HOOK_PREFIX +#define HOOK_PREFIX "TARGET_" +HOOK_VECTOR (TARGET_SIMPLIFY_GOT_ACCESS, simplify_got_access) + +/* Function to get the pic_reg which holds the base address of GOT. */ +DEFHOOK +(get_pic_reg, +"Return the pic_reg pseudo register which holds the base address of GOT.\ + It is only required by the simplify-got optimization.", + rtx, (void), + hook_rtx_void_null) + +/* Function to clear the pic_reg which is useless now. */ +DEFHOOK +(clear_pic_reg, +"After successful simplify-got optimization, the pic_reg is useless. So a\ + target can use this hook to clear pic_reg.", + void, (void), + NULL) + +/* Function to detect if the specified insn loads a global variable's + address from GOT. If so returns that symbol. */ +DEFHOOK +(loaded_global_var, +"This hook is used to detect if the given @var{insn} loads a global\ + variable's address from GOT with the form of\ + @smallexample\ + (set @var{address_reg} (mem (plus pic_reg @var{offset_reg})))\ + @end smallexample\ + If so return the global variable whose address will be loaded and fill in\ + @var{offset_insn} and @var{offset_reg}. @var{offset_reg} is set at\ + @var{offset_insn} to hold the offset from GOT base to the GOT entry of the\ + global variable. Otherwise return @code{NULL_RTX}.", + rtx, (rtx insn, rtx *offset_reg, rtx *offset_insn), + NULL) + +/* This function checks if it satisfies the target dependent conditions + that we can simplify GOT accesses. */ +DEFHOOK +(can_simplify_got_access, +"This hook determines if it satisfy the target dependent conditions to do\ + simplify-got when given the number of global variable accessing and the\ + number of accessed symbols. If the returned value is false the GOT access\ + insns will not be rewritten. Otherwise we will rewrite these insns.", + bool, (int n_symbol, int n_access), + NULL) + +/* This function does the actual rewriting of GOT accesses. */ +DEFHOOK +(load_global_address, +"This hook does the actual rewriting of GOT access insn @var{load_insn}.\ + The global variable is @var{symbol}. The global address should be loaded\ + into @var{address_reg}. The register @var{offset_reg} was previously set\ + in insn @var{offset_insn} to hold the offset from GOT base to the GOT\ + entry of the global variable. Now it can be used as a scratch register.", + void, (rtx symbol, rtx offset_reg, rtx address_reg, rtx load_insn, + rtx offset_insn), + NULL) + +HOOK_VECTOR_END (got_access) + /* For targets that need to mark extra registers as live on entry to the function, they should define this target hook and set their bits in the bitmap passed in. */ Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 172951) +++ gcc/tree-pass.h (working copy) @@ -501,6 +501,7 @@ extern struct rtl_opt_pass pass_rtl_hois extern struct rtl_opt_pass pass_rtl_store_motion; extern struct rtl_opt_pass pass_cse_after_global_opts; extern struct rtl_opt_pass pass_rtl_ifcvt; +extern struct rtl_opt_pass pass_simplify_got; extern struct rtl_opt_pass pass_into_cfg_layout_mode; extern struct rtl_opt_pass pass_outof_cfg_layout_mode; Index: gcc/testsuite/gcc.target/arm/got1.c =================================================================== --- gcc/testsuite/gcc.target/arm/got1.c (revision 0) +++ gcc/testsuite/gcc.target/arm/got1.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-options "-Os -fpic" } */ +/* { dg-final { scan-assembler "GOT_PREL" } } */ + +extern int x; +int foo(int j) +{ + int t = x; + x = j; + return t; +} Index: gcc/testsuite/gcc.target/arm/got2.c =================================================================== --- gcc/testsuite/gcc.target/arm/got2.c (revision 0) +++ gcc/testsuite/gcc.target/arm/got2.c (revision 0) @@ -0,0 +1,11 @@ +/* We should not use GOT_PREL relocation to load global address with so + many global accesses. */ + +/* { dg-options "-Os -fpic" } */ +/* { dg-final { scan-assembler-not "GOT_PREL" } } */ + +extern int x1, x2, x3, x4, x5; +int sum() +{ + return x1 + x2 + x3 + x4 + x5; +} Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 172951) +++ gcc/timevar.def (working copy) @@ -237,6 +237,7 @@ DEFTIMEVAR (TV_TREE_UNINIT , " DEFTIMEVAR (TV_TREE_THREADSAFE , "thread safety analysis") DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization") DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution") +DEFTIMEVAR (TV_SIMPLIFY_GOT , "simplify got") /* Everything else in rest_of_compilation not included above. */ DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes") Index: gcc/simplify-got.c =================================================================== --- gcc/simplify-got.c (revision 0) +++ gcc/simplify-got.c (revision 0) @@ -0,0 +1,196 @@ +/* Simplify the code to load global variable's address from GOT. + Copyright (C) 2011 + Free Software Foundation, Inc. + Contributed by Wei Guozhi <car...@google.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This file contains optimization for global variable's address loading + from GOT. + + When generating PIC code, we need to load global variable's address from + GOT. Many targets do this as following: + + (set pic_reg ...) # load the base address of GOT into pic_reg. + ... + (set off_set ...) # load the offset from the base of GOT to + # a global variable's GOT entry. + (set address # load the address from GOT. + (mem (plus pic_reg off_set))) + ... + + If the target has an alternative method (usually uses a different + relocation) to load the global address and in some cases it has less + cost and avoid the pic_reg, we can use this pass to improve it. + + In order to employ this optimization the target must satisfy the + following constraints: + + 1. There should be at least 2 methods to load a global variable's + address from GOT. + + 2. By default all global variables accesses use the method described + above. + + 3. There is a target dependent situation that the alternative method is + better when considering the number of global variable accesses and + the number of accessed variables. + + 4. The alternative method doesn't use the base of GOT (pic_reg). +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "target.h" +#include "tree-pass.h" +#include "df.h" +#include "timevar.h" + +#define VAR_TABLE_SIZE 10 + +/* Information needed when rewrite the GOT access insns. */ +struct got_access_info +{ + rtx symbol; /* The global variable. */ + rtx offset_reg; /* Register contains the GOT entry offset. */ + rtx address_reg; /* Register contains the final global address. */ + rtx offset_insn; /* The insn loads the offset. */ + rtx load_insn; /* The insn which loads the address from GOT. */ +}; + +/* This optimization is enabled only when the pic_reg is actually used. */ +static bool +gate_handle_simplify_got (void) +{ + return (optimize > 0) && targetm.got_access.get_pic_reg (); +} + +static unsigned int +rest_of_handle_simplify_got (void) +{ + df_ref ref; + rtx use = NULL_RTX; + int i, n_symbol, n_access = 0; + struct got_access_info* got_accesses; + htab_t var_table = htab_create (VAR_TABLE_SIZE, + htab_hash_pointer, + htab_eq_pointer, + NULL); + rtx pic_reg = targetm.got_access.get_pic_reg (); + gcc_assert (pic_reg); + + ref = DF_REG_USE_CHAIN (REGNO (pic_reg)); + got_accesses = XNEWVEC(struct got_access_info, + DF_REG_USE_COUNT (REGNO (pic_reg))); + + /* Check if all uses of pic_reg are loading global address through the + default method. */ + while (ref) + { + rtx insn = DF_REF_INSN (ref); + + /* Check for the special USE insn, it is not a real usage of pic_reg. */ + if (GET_CODE (PATTERN (insn)) == USE) + use = insn; + else + { + /* If an insn both set and use pic_reg, it is in the process of + constructing the value of pic_reg. We should also ignore it. */ + rtx set = single_set (insn); + if (!(set && (SET_DEST (set) == pic_reg))) + { + rtx offset_reg; + rtx offset_insn; + rtx symbol = targetm.got_access.loaded_global_var (insn, + &offset_reg, + &offset_insn); + if (symbol) + { + rtx* slot = (rtx*) htab_find_slot (var_table, symbol, INSERT); + if (*slot == HTAB_EMPTY_ENTRY) + *slot = symbol; + + gcc_assert (set); + got_accesses[n_access].symbol = symbol; + got_accesses[n_access].offset_reg = offset_reg; + got_accesses[n_access].address_reg = SET_DEST (set); + got_accesses[n_access].load_insn = insn; + got_accesses[n_access].offset_insn = offset_insn; + n_access++; + } + else + { + /* This insn doesn't load a global address, but it has + other unexpected usage of pic_reg, give up. */ + free (got_accesses); + htab_delete (var_table); + return 0; + } + } + } + ref = DF_REF_NEXT_REG(ref); + } + + /* Check if we can simplify it. */ + n_symbol = htab_elements (var_table); + gcc_assert (n_symbol <= n_access); + if (!targetm.got_access.can_simplify_got_access (n_symbol, n_access)) + { + free (got_accesses); + htab_delete (var_table); + return 0; + } + + /* Rewrite the global address loading insns. */ + for (i=0; i<n_access; i++) + targetm.got_access.load_global_address (got_accesses[i].symbol, + got_accesses[i].offset_reg, + got_accesses[i].address_reg, + got_accesses[i].load_insn, + got_accesses[i].offset_insn); + + /* Since there is no usage of pic_reg now, we can remove it. */ + if (use) + remove_insn (use); + targetm.got_access.clear_pic_reg (); + free (got_accesses); + htab_delete (var_table); + return 0; +} + +struct rtl_opt_pass pass_simplify_got = +{ + { + RTL_PASS, + "simplify_got", /* name */ + gate_handle_simplify_got, /* gate */ + rest_of_handle_simplify_got, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_SIMPLIFY_GOT, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func /* todo_flags_finish */ + } +}; Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 172951) +++ gcc/Makefile.in (working copy) @@ -1348,6 +1348,7 @@ OBJS-common = \ sel-sched-dump.o \ sel-sched.o \ sese.o \ + simplify-got.o \ simplify-rtx.o \ sparseset.o \ sreal.o \ @@ -2977,6 +2978,8 @@ jump.o : jump.c $(CONFIG_H) $(SYSTEM_H) $(EXCEPT_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(TREE_PASS_H) \ $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_CORE_H) $(INSN_ATTR_H) $(TM_P_H) reload.h \ $(PREDICT_H) $(TIMEVAR_H) $(TARGET_H) +simplify-got.o : simplify-got.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(FLAGS_H) $(TREE_PASS_H) $(TIMEVAR_H) $(TARGET_H) $(HASHTAB_H) simplify-rtx.o : simplify-rtx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \ $(RECOG_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(GGC_H) $(TM_P_H) \ Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 172951) +++ gcc/passes.c (working copy) @@ -988,6 +988,7 @@ init_optimization_passes (void) NEXT_PASS (pass_rtl_loop_done); *p = NULL; } + NEXT_PASS (pass_simplify_got); NEXT_PASS (pass_web); NEXT_PASS (pass_rtl_cprop); NEXT_PASS (pass_cse2); Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 172951) +++ gcc/config/arm/arm.c (working copy) @@ -250,6 +250,11 @@ static bool arm_builtin_support_vector_m bool is_packed); static void arm_conditional_register_usage (void); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); +static rtx arm_get_pic_reg (void); +static void arm_clear_pic_reg (void); +static bool arm_can_simplify_got_access (int, int); +static rtx arm_loaded_global_var (rtx, rtx *, rtx *); +static void arm_load_global_address (rtx, rtx, rtx, rtx, rtx); /* Table of machine attributes. */ @@ -591,6 +596,21 @@ static const struct default_options arm_ #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class +#undef TARGET_GET_PIC_REG +#define TARGET_GET_PIC_REG arm_get_pic_reg + +#undef TARGET_CLEAR_PIC_REG +#define TARGET_CLEAR_PIC_REG arm_clear_pic_reg + +#undef TARGET_LOADED_GLOBAL_VAR +#define TARGET_LOADED_GLOBAL_VAR arm_loaded_global_var + +#undef TARGET_CAN_SIMPLIFY_GOT_ACCESS +#define TARGET_CAN_SIMPLIFY_GOT_ACCESS arm_can_simplify_got_access + +#undef TARGET_LOAD_GLOBAL_ADDRESS +#define TARGET_LOAD_GLOBAL_ADDRESS arm_load_global_address + struct gcc_target targetm = TARGET_INITIALIZER; #define SHORTEST_FAR_JUMP_LENGTH 2040 @@ -22883,6 +22903,14 @@ arm_output_addr_const_extra (FILE *fp, r fputc (')', fp); return TRUE; } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOT_PREL_SYM) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("(GOT_PREL)+(", fp); + output_addr_const (fp, XVECEXP (x, 0, 1)); + fputc (')', fp); + return TRUE; + } else if (GET_CODE (x) == CONST_VECTOR) return arm_emit_vector_const (fp, x); @@ -23682,4 +23710,195 @@ arm_preferred_rename_class (reg_class_t return NO_REGS; } +rtx +arm_get_pic_reg (void) +{ + return cfun->machine->pic_reg; +} + +/* Clear the pic_reg to NULL. */ +void +arm_clear_pic_reg (void) +{ + cfun->machine->pic_reg = NULL_RTX; +} + +/* Determine if it is profitable to simplify GOT accesses. + + The default global address loading instructions are: + + ldr r3, .L2 # A + ldr r2, .L2+4 # B +.LPIC0: + add r3, pc # A + ldr r4, [r3, r2] # B + ... +.L2: + .word _GLOBAL_OFFSET_TABLE_-(.LPIC0+4) # A + .word i(GOT) # S + + The new instruction sequence is: + + ldr r3, .L2 # C +.LPIC0: + add r3, pc # C + ldr r3, [r3] # C + ... +.L2: + i(GOT_PREL)+(.-(.LPIC0+4)) # C + + Suppose the number of global address loading is n, the number of + accessed global symbol is s, this function should return + + cost(A) + cost(B) * n + cost(S) * s >= cost(C) * n + + From the above code snippets, we can see that + + cost(A) = INSN_LENGTH * 2 + WORD_LENGTH + cost(B) = INSN_LENGTH * 2 + cost(S) = WORD_LENGTH + cost(C) = INSN_LENGTH * 3 + WORD_LENGTH + + The length of instruction depends on the target instruction set. */ + +#define N_INSNS_A 2 +#define N_INSNS_B 2 +#define N_INSNS_C 3 + +bool +arm_can_simplify_got_access (int n_symbol, int n_access) +{ + int insn_len = TARGET_THUMB ? 2 : 4; + int cost_A = insn_len * N_INSNS_A + UNITS_PER_WORD; + int cost_B = insn_len * N_INSNS_B; + int cost_S = UNITS_PER_WORD; + int cost_C = insn_len * N_INSNS_C + UNITS_PER_WORD; + + return cost_A + cost_B * n_access + cost_S * n_symbol >= cost_C * n_access; +} + +/* Detect if INSN loads a global address. If so returns the symbol. + If the GOT offset is loaded in a separate instruction, sets the + corresponding OFFSET_REG and OFFSET_INSN. Otherwise fills with NULL. */ +rtx +arm_loaded_global_var (rtx insn, rtx *offset_reg, rtx *offset_insn) +{ + rtx set = single_set (insn); + rtx pic_reg = cfun->machine->pic_reg; + gcc_assert (pic_reg); + + /* Global address loading instruction has the pattern: + (SET address_reg (MEM (PLUS pic_reg offset_reg))) */ + if (set && MEM_P (SET_SRC (set)) + && (GET_CODE (XEXP (SET_SRC (set),0)) == PLUS)) + { + unsigned int regno; + df_ref def; + rtx def_insn; + rtx src; + rtx plus = XEXP (SET_SRC (set),0); + rtx op0 = XEXP (plus, 0); + rtx op1 = XEXP (plus, 1); + if (op1 == pic_reg) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if (op0 != pic_reg) + return NULL_RTX; + + if (REG_P (op1)) + { + regno = REGNO (op1); + if ((DF_REG_USE_COUNT (regno) != 1) + || (DF_REG_DEF_COUNT (regno) != 1)) + return NULL_RTX; + + /* The offset loading insn has the pattern: + (SET offset_reg (UNSPEC [symbol] UNSPEC_PIC_SYM)) */ + def = DF_REG_DEF_CHAIN (regno); + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (SET_DEST (set) != op1) + return NULL_RTX; + + src = SET_SRC (set); + *offset_reg = op1; + *offset_insn = def_insn; + } + else + { + src = op1; + *offset_reg = NULL; + *offset_insn = NULL; + } + + if ((GET_CODE (src) != UNSPEC) || (XINT (src, 1) != UNSPEC_PIC_SYM)) + return NULL_RTX; + + return RTVEC_ELT (XVEC (src, 0), 0); + } + + return NULL_RTX; +} + +/* Rewrite the global address loading instructions. + SYMBOL is the global variable. OFFSET_REG contains the offset of the + GOT entry. ADDRESS_REG will receive the final global address. + LOAD_INSN is the original insn which loads the address from GOT. + OFFSET_INSN is the original insn which sets OFFSET_REG. + If the GOT offset is not loaded in a separate instruction, OFFSET_REG + and OFFSET_INSN should be NULL. */ +void +arm_load_global_address (rtx symbol, rtx offset_reg, + rtx address_reg, rtx load_insn, rtx offset_insn) +{ + rtx offset, got_prel, new_insn; + rtx labelno = GEN_INT (pic_labelno++); + rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx set = single_set (load_insn); + + rtx tmp_reg = offset_reg; + rtx insert_pos = offset_insn; + if (offset_reg == NULL) + { + tmp_reg = address_reg; + insert_pos = PREV_INSN (load_insn); + } + + /* The first insn: + (SET tmp_reg (address_of_GOT_entry(symbol) - pc)) + The expression (address_of_GOT_entry(symbol) - pc) is expressed by + got_prel, which is actually represented by R_ARM_GOT_PREL relocation. */ + l1 = gen_rtx_CONST (VOIDmode, l1); + l1 = plus_constant (l1, TARGET_ARM ? 8 : 4); + offset = gen_rtx_MINUS (VOIDmode, pc_rtx, l1); + got_prel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, offset), + UNSPEC_GOT_PREL_SYM); + got_prel = gen_rtx_CONST (Pmode, got_prel); + if (TARGET_32BIT) + new_insn = emit_insn_after (gen_pic_load_addr_32bit (tmp_reg, got_prel), + insert_pos); + else + new_insn = emit_insn_after (gen_pic_load_addr_thumb1 (tmp_reg, got_prel), + insert_pos); + + /* The second insn: + (SET tmp_reg (PLUS tmp_reg pc_rtx)) */ + if (TARGET_ARM) + emit_insn_after (gen_pic_add_dot_plus_eight (tmp_reg, tmp_reg, labelno), + new_insn); + else + emit_insn_after (gen_pic_add_dot_plus_four (tmp_reg, tmp_reg, labelno), + new_insn); + + /* The last insn to access the GOT entry: + (SET address_reg (MEM tmp_reg)) + We reuse the existed load instruction. */ + XEXP (SET_SRC (set), 0) = tmp_reg; + df_insn_rescan (load_insn); +} + #include "gt-arm.h" Index: gcc/config/arm/arm.md =================================================================== --- gcc/config/arm/arm.md (revision 172951) +++ gcc/config/arm/arm.md (working copy) @@ -104,6 +104,7 @@ (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from ; another symbolic address. (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. + (UNSPEC_GOT_PREL_SYM 29) ; Specify an R_ARM_GOT_PREL relocation of a symbol. ] ) -- This patch is available for review at http://codereview.appspot.com/4433079