On Sat, Aug 22, 2020 at 07:05:51PM -0500, Bill Schmidt wrote:
> What is necessary in order to allow this optimization to occur
> earlier is to make this hidden dependency explicit.  When the
> relocation is inserted, we have to change the "pld" instruction to
> have a specific clobber of (in this case) r5, which represents what
> will happen if the linker makes the substitution.
> 
> I agree that it's too fragile to force this to be the last pass, so
> I think if Mike can look into introducing a clobber of the hard
> register when performing the optimization, that would at least allow
> us to move this anywhere after reload.
> 
> I don't immediately see a solution that works prior to register
> allocation because we basically are representing two potential
> starting points of a live range, only one of which will survive in
> the final code.  That is too ugly a problem to hand to the register
> allocator.

As I said in a private message, I have the appropriate clobbers and such
already.

Here is the program I used in my previous reply to Segher:

        extern int a, b, c;

        int sum (void)
        {
          return a + b + c;
        }


Here is the RTL before the PCREL_OPT pass from sched2:

        ;; Load the address of a into r8
        (insn:TI 5 13 6 2 (set (reg/f:DI 8 8 [123])
                (symbol_ref:DI ("a") [flags 0xc0]  <var_decl 0x7ff100832480 
a>)) "foo02.c":5:12 722 {*pcrel_extern_addr}
             (expr_list:REG_EQUIV (symbol_ref:DI ("a") [flags 0xc0]  <var_decl 
0x7ff100832480 a>)
                (nil)))

        ;; Load the address of b into r10
        (insn 6 5 10 2 (set (reg/f:DI 10 10 [124])
                (symbol_ref:DI ("b") [flags 0xc0]  <var_decl 0x7ff100832510 
b>)) "foo02.c":5:12 722 {*pcrel_extern_addr}
             (expr_list:REG_EQUIV (symbol_ref:DI ("b") [flags 0xc0]  <var_decl 
0x7ff100832510 b>)
                (nil)))

        ;; Load the address of c into r9
        (insn 10 6 7 2 (set (reg/f:DI 9 9 [128])
                (symbol_ref:DI ("c") [flags 0xc0]  <var_decl 0x7ff1008325a0 
c>)) "foo02.c":5:16 722 {*pcrel_extern_addr}
             (expr_list:REG_EQUIV (symbol_ref:DI ("c") [flags 0xc0]  <var_decl 
0x7ff1008325a0 c>)
                (nil)))

        ;; Load a's value into r3, using r8 as the base register
        (insn:TI 7 10 8 2 (set (reg:DI 3 3)
                (zero_extend:DI (mem/c:SI (reg/f:DI 8 8 [123]) [1 a+0 S4 
A32]))) "foo02.c":5:12 16 {zero_extendsidi2}
             (expr_list:REG_DEAD (reg/f:DI 8 8 [123])
                (nil)))

        ;; Load b's value into r10, using r10 as the base register
        (insn 8 7 11 2 (set (reg:DI 10 10)
                (zero_extend:DI (mem/c:SI (reg/f:DI 10 10 [124]) [1 b+0 S4 
A32]))) "foo02.c":5:12 16 {zero_extendsidi2}
             (nil))

        ;; Load c's value into r9, using r9 as the base register
        (insn 11 8 9 2 (set (reg:DI 9 9)
                (zero_extend:DI (mem/c:SI (reg/f:DI 9 9 [128]) [1 c+0 S4 
A32]))) "foo02.c":5:16 16 {zero_extendsidi2}
             (nil))

        ;; Add a+b
        (insn:TI 9 11 12 2 (set (reg:SI 3 3 [125])
                (plus:SI (reg:SI 3 3 [orig:126 a ] [126])
                    (reg:SI 10 10 [orig:127 b ] [127]))) "foo02.c":5:12 65 
{*addsi3}
             (expr_list:REG_DEAD (reg:SI 10 10 [orig:127 b ] [127])
                (nil)))

        ;; Add (a+b)+c
        (insn:TI 12 9 18 2 (set (reg:SI 3 3 [122])
                (plus:SI (reg:SI 3 3 [125])
                    (reg:SI 9 9 [orig:129 c ] [129]))) "foo02.c":5:16 65 
{*addsi3}
             (expr_list:REG_DEAD (reg:SI 9 9 [orig:129 c ] [129])
                (nil)))

        ;; Sign extend
        (insn:TI 18 12 19 2 (set (reg/i:DI 3 3)
                (sign_extend:DI (reg:SI 3 3 [122]))) "foo02.c":6:1 31 
{extendsidi2}
             (nil))

        ;; Return
        (insn 19 18 29 2 (use (reg/i:DI 3 3)) "foo02.c":6:1 -1
             (nil))
        (note 29 19 25 2 NOTE_INSN_EPILOGUE_BEG)
        (jump_insn 25 29 26 2 (simple_return) "foo02.c":6:1 866 {simple_return}
             (nil)
         -> simple_return)


And here is the RTL after the PCREL_OPT:

        ;; Load of address a into r8, a will be loaded into r3
        (insn:TI 5 13 6 2 (parallel [
                    (set (reg/f:DI 8 8 [123])
                        (unspec:DI [
                                (symbol_ref:DI ("a") [flags 0xc0]  <var_decl 
0x7ff100832480 a>)
                                (const_int 1 [0x1])
                            ] UNSPEC_PCREL_OPT_LD_ADDR))
                    (set (reg:DI 3 3)
                        (unspec:DI [
                                (const_int 0 [0])
                            ] UNSPEC_PCREL_OPT_LD_ADDR))
                ]) "foo02.c":5:12 2198 {pcrel_opt_ld_addr}
             (expr_list:REG_EQUIV (symbol_ref:DI ("a") [flags 0xc0]  <var_decl 
0x7ff100832480 a>)
                (nil)))

        ;; Load of address b into r10, which will be the same register b's 
value is loaded into
        (insn 6 5 10 2 (set (reg/f:DI 10 10 [124])
                (unspec:DI [
                        (symbol_ref:DI ("b") [flags 0xc0]  <var_decl 
0x7ff100832510 b>)
                        (const_int 2 [0x2])
                    ] UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) "foo02.c":5:12 2199 
{pcrel_opt_ld_addr_same_reg}
             (expr_list:REG_EQUIV (symbol_ref:DI ("b") [flags 0xc0]  <var_decl 
0x7ff100832510 b>)
                (nil)))

        ;; Load of address c into r9, which will be the same register c's value 
is loaded into
        (insn 10 6 7 2 (set (reg/f:DI 9 9 [128])
                (unspec:DI [
                        (symbol_ref:DI ("c") [flags 0xc0]  <var_decl 
0x7ff1008325a0 c>)
                        (const_int 3 [0x3])
                    ] UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) "foo02.c":5:16 2199 
{pcrel_opt_ld_addr_same_reg}
             (expr_list:REG_EQUIV (symbol_ref:DI ("c") [flags 0xc0]  <var_decl 
0x7ff1008325a0 c>)
                (nil)))

        ;; Load & zero extend the variable a into r3, using base register r8
        (insn:TI 7 10 8 2 (parallel [
                    (set (reg:DI 3 3)
                        (zero_extend:DI (unspec:SI [
                                    (mem/c:SI (reg/f:DI 8 8 [123]) [1 a+0 S4 
A32])
                                    (reg:DI 3 3)
                                    (const_int 1 [0x1])
                                ] UNSPEC_PCREL_OPT_LD_RELOC)))
                    (clobber (reg/f:DI 8 8 [123]))
                ]) "foo02.c":5:12 2207 {*pcrel_opt_ldsi_udi_gpr}
             (expr_list:REG_DEAD (reg/f:DI 8 8 [123])
                (nil)))

        ;; Load & zero extend the variable b into r10, using r10 as the base 
register
        (insn 8 7 11 2 (parallel [
                    (set (reg:DI 10 10)
                        (zero_extend:DI (unspec:SI [
                                    (mem/c:SI (reg/f:DI 10 10 [124]) [1 b+0 S4 
A32])
                                    (reg:DI 10 10)
                                    (const_int 2 [0x2])
                                ] UNSPEC_PCREL_OPT_LD_RELOC)))
                    (clobber (scratch:DI))
                ]) "foo02.c":5:12 2207 {*pcrel_opt_ldsi_udi_gpr}
             (nil))

        ;; Load and zero extend the variable c into r9, using r9 as the base 
register
        (insn 11 8 9 2 (parallel [
                    (set (reg:DI 9 9)
                        (zero_extend:DI (unspec:SI [
                                    (mem/c:SI (reg/f:DI 9 9 [128]) [1 c+0 S4 
A32])
                                    (reg:DI 9 9)
                                    (const_int 3 [0x3])
                                ] UNSPEC_PCREL_OPT_LD_RELOC)))
                    (clobber (scratch:DI))
                ]) "foo02.c":5:16 2207 {*pcrel_opt_ldsi_udi_gpr}
             (nil))

        ;; Add a+b
        (insn:TI 9 11 12 2 (set (reg:SI 3 3 [125])
                (plus:SI (reg:SI 3 3 [orig:126 a ] [126])
                    (reg:SI 10 10 [orig:127 b ] [127]))) "foo02.c":5:12 65 
{*addsi3}
             (expr_list:REG_DEAD (reg:SI 10 10 [orig:127 b ] [127])
                (nil)))

        ;; Add (a+b)+c
        (insn:TI 12 9 18 2 (set (reg:SI 3 3 [122])
                (plus:SI (reg:SI 3 3 [125])
                    (reg:SI 9 9 [orig:129 c ] [129]))) "foo02.c":5:16 65 
{*addsi3}
             (expr_list:REG_DEAD (reg:SI 9 9 [orig:129 c ] [129])
                (nil)))

        ;; Sign extend the result
        (insn:TI 18 12 19 2 (set (reg/i:DI 3 3)
                (sign_extend:DI (reg:SI 3 3 [122]))) "foo02.c":6:1 31 
{extendsidi2}
             (nil))

        ;; Return
        (insn 19 18 29 2 (use (reg/i:DI 3 3)) "foo02.c":6:1 -1
             (nil))
        (note 29 19 25 2 NOTE_INSN_EPILOGUE_BEG)
        (jump_insn 25 29 26 2 (simple_return) "foo02.c":6:1 866 {simple_return}
             (nil)
         -> simple_return)

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Reply via email to