Conanap created this revision. Conanap added reviewers: stefanp, nemanjai, PowerPC. Conanap added projects: LLVM, clang, PowerPC. Herald added subscribers: dang, kbarton, arichardson, emaste. Herald added a reviewer: espindola. Herald added a reviewer: MaskRay. Conanap requested review of this revision.
Implemented the option to omit Power10 instructions from save stubs via the option `--no-power10-stubs` or `--power10-stubs=no` on lld. `--power10-stubs=` will override the other option. `--power10-stubs=auto` also exists to use the default behaviour (ie allow Power10 instructions in stubs). Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D94627 Files: lld/ELF/Config.h lld/ELF/Driver.cpp lld/ELF/Options.td lld/ELF/Thunks.cpp lld/test/ELF/ppc64-call-reach.s lld/test/ELF/ppc64-long-branch-localentry-offset.s lld/test/ELF/ppc64-long-branch-pi.s lld/test/ELF/ppc64-long-branch-rel14.s lld/test/ELF/ppc64-long-branch.s lld/test/ELF/ppc64-pcrel-call-to-extern.s lld/test/ELF/ppc64-pcrel-call-to-toc.s lld/test/ELF/ppc64-plt-stub-compatible.s lld/test/ELF/ppc64-tls-pcrel-gd.s lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s lld/test/ELF/ppc64-toc-call-to-pcrel.s llvm/include/llvm/Object/ELF.h
Index: llvm/include/llvm/Object/ELF.h =================================================================== --- llvm/include/llvm/Object/ELF.h +++ llvm/include/llvm/Object/ELF.h @@ -50,7 +50,11 @@ enum PPCInstrMasks : uint64_t { PADDI_R12_NO_DISP = 0x0610000039800000, + ADDIS_R12_NO_DISP = 0x3D800000, + ADDI_R12_TO_R12_NO_DISP = 0x398C0000, PLD_R12_NO_DISP = 0x04100000E5800000, + LD_R12_NO_DISP = 0xE9800000, + LD_R12_TO_R12_NO_DISP = 0xE98C0000, MTCTR_R12 = 0x7D8903A6, BCTR = 0x4E800420, }; Index: lld/test/ELF/ppc64-toc-call-to-pcrel.s =================================================================== --- lld/test/ELF/ppc64-toc-call-to-pcrel.s +++ lld/test/ELF/ppc64-toc-call-to-pcrel.s @@ -14,7 +14,13 @@ # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL # RUN: llvm-objdump -d --no-show-raw-insn --mcpu=future %t | FileCheck %s -# The point of this test is to make sure that when a function with TOC access +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t --no-power10-stubs +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=future %t \ +# RUN: | FileCheck %s + +## The point of this test is to make sure that when a function with TOC access # a local function with st_other=1, a TOC save stub is inserted. # SYMBOL: Symbol table '.symtab' contains 7 entries Index: lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s =================================================================== --- lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s +++ lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s @@ -16,10 +16,10 @@ # SYM-NEXT: 2: 0000000020020008 0 NOTYPE LOCAL DEFAULT [<other: 0x60>] 3 caller # SYM-NEXT: 3: 0000000010020008 0 NOTYPE LOCAL DEFAULT 2 caller_close # SYM-NEXT: 4: 0000000520020008 0 NOTYPE LOCAL DEFAULT 4 caller_far -# SYM-NEXT: 5: 0000000520028038 0 NOTYPE LOCAL HIDDEN 6 .TOC. +# SYM-NEXT: 5: 0000000520028040 0 NOTYPE LOCAL HIDDEN 6 .TOC. # SYM-NEXT: 6: 0000000010020020 8 FUNC LOCAL DEFAULT 2 __toc_save_callee -# SYM-NEXT: 7: 0000000020020020 20 FUNC LOCAL DEFAULT 3 __toc_save_callee -# SYM-NEXT: 8: 0000000520020020 20 FUNC LOCAL DEFAULT 4 __toc_save_callee +# SYM-NEXT: 7: 0000000020020020 32 FUNC LOCAL DEFAULT 3 __toc_save_callee +# SYM-NEXT: 8: 0000000520020020 32 FUNC LOCAL DEFAULT 4 __toc_save_callee #--- lts PHDRS { Index: lld/test/ELF/ppc64-tls-pcrel-gd.s =================================================================== --- lld/test/ELF/ppc64-tls-pcrel-gd.s +++ lld/test/ELF/ppc64-tls-pcrel-gd.s @@ -42,10 +42,10 @@ #--- asm # GD-RELOC: Relocation section '.rela.dyn' at offset 0x100b8 contains 4 entries: -# GD-RELOC: 0000000001001160 0000000200000044 R_PPC64_DTPMOD64 0000000000000000 x + 0 -# GD-RELOC: 0000000001001168 000000020000004e R_PPC64_DTPREL64 0000000000000000 x + 0 -# GD-RELOC: 0000000001001170 0000000300000044 R_PPC64_DTPMOD64 0000000000000000 y + 0 -# GD-RELOC: 0000000001001178 000000030000004e R_PPC64_DTPREL64 0000000000000000 y + 0 +# GD-RELOC: 0000000001001170 0000000200000044 R_PPC64_DTPMOD64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001178 000000020000004e R_PPC64_DTPREL64 0000000000000000 x + 0 +# GD-RELOC: 0000000001001180 0000000300000044 R_PPC64_DTPMOD64 0000000000000000 y + 0 +# GD-RELOC: 0000000001001188 000000030000004e R_PPC64_DTPREL64 0000000000000000 y + 0 # GD-SYM: Symbol table '.dynsym' contains 4 entries: # GD-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND x @@ -68,9 +68,9 @@ # GDTOLE-SYM: 4: 0000000000000004 0 TLS GLOBAL DEFAULT 3 y # GD-LABEL: <GDTwoVal>: -# GD-NEXT: paddi 3, 0, 352, 1 +# GD-NEXT: paddi 3, 0, 368, 1 # GD-NEXT: bl -# GD-NEXT: paddi 3, 0, 356, 1 +# GD-NEXT: paddi 3, 0, 372, 1 # GD-NEXT: bl # GD-NEXT: blr # GDTOIE-LABEL: <GDTwoVal>: Index: lld/test/ELF/ppc64-plt-stub-compatible.s =================================================================== --- lld/test/ELF/ppc64-plt-stub-compatible.s +++ lld/test/ELF/ppc64-plt-stub-compatible.s @@ -29,9 +29,9 @@ # T2-LABEL: <p9codegen>: # T2-NEXT: 10010300: addis 2, 12, 1 -# T2-NEXT: 10010304: addi 2, 2, -32384 +# T2-NEXT: 10010304: addi 2, 2, -32368 # T2-NEXT: 10010308: addis 4, 2, -1 -# T2-NEXT: 1001030c: lwa 3, 32428(4) +# T2-NEXT: 1001030c: lwa 3, 32412(4) # T2-NEXT: 10010310: bl 0x10010330 # T2-NEXT: 10010314: ld 2, 24(1) # T2-NEXT: 10010318: blr @@ -49,7 +49,7 @@ # T2-NEXT: 10010340: bctr # T2-LABEL: <__plt_pcrel_callee>: -# T2-NEXT: 10010350: pld 12, 328(0), 1 +# T2-NEXT: 10010350: pld 12, 344(0), 1 # T2-NEXT: 10010358: mtctr 12 # T2-NEXT: 1001035c: bctr .ifdef T2 @@ -83,24 +83,24 @@ # T3-LABEL: <p9codegen>: # T3-NEXT: 10010310: addis 2, 12, 1 -# T3-NEXT: 10010314: addi 2, 2, -32408 +# T3-NEXT: 10010314: addi 2, 2, -32392 # T3-NEXT: 10010318: addis 4, 2, -1 -# T3-NEXT: 1001031c: lwa 3, 32436(4) -# T3-NEXT: 10010320: bl 0x10010340 +# T3-NEXT: 1001031c: lwa 3, 32420(4) +# T3-NEXT: 10010320: bl 0x10010350 # T3-NEXT: 10010324: ld 2, 24(1) # T3-NEXT: 10010328: blr # T3-LABEL: <__plt_pcrel_callee>: -# T3-NEXT: 10010330: pld 12, 352(0), 1 +# T3-NEXT: 10010330: pld 12, 368(0), 1 # T3-NEXT: 10010338: mtctr 12 # T3-NEXT: 1001033c: bctr # T3-LABEL: <__plt_callee>: -# T3-NEXT: 10010340: std 2, 24(1) -# T3-NEXT: 10010344: addis 12, 2, 0 -# T3-NEXT: 10010348: ld 12, -32744(12) -# T3-NEXT: 1001034c: mtctr 12 -# T3-NEXT: 10010350: bctr +# T3-NEXT: 10010350: std 2, 24(1) +# T3-NEXT: 10010354: addis 12, 2, 0 +# T3-NEXT: 10010358: ld 12, -32744(12) +# T3-NEXT: 1001035c: mtctr 12 +# T3-NEXT: 10010360: bctr .ifdef T3 .section .text_start, "ax", %progbits p10codegen: Index: lld/test/ELF/ppc64-pcrel-call-to-toc.s =================================================================== --- lld/test/ELF/ppc64-pcrel-call-to-toc.s +++ lld/test/ELF/ppc64-pcrel-call-to-toc.s @@ -15,13 +15,19 @@ # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL # RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o +# RUN: ld.lld -T %t.script %t.o -o %t --no-power10-stubs +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t \ +# RUN: | FileCheck %s --check-prefix=CHECK-NOP10 + ## When a function without TOC accesses a function using TOC, an r12 setup stub ## is inserted # SYMBOL: 1: 0000000010020000 0 NOTYPE LOCAL DEFAULT [<other: 0x60>] 2 callee # SYMBOL-NEXT: 2: 0000000010030000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 3 caller # SYMBOL-NEXT: 3: 0000000010010000 0 NOTYPE LOCAL DEFAULT 1 func -# SYMBOL: 6: 0000000010030010 16 FUNC LOCAL DEFAULT 3 __gep_setup_callee +# SYMBOL: 6: 0000000010030010 32 FUNC LOCAL DEFAULT 3 __gep_setup_callee # CHECK-LABEL: <func>: # CHECK-NEXT: blr @@ -29,7 +35,7 @@ # CHECK-LABEL: <callee>: # CHECK: bl 0x10010000 # CHECK-NEXT: addis 4, 2, -1 -# CHECK-NEXT: lwz 4, 32744(4) +# CHECK-NEXT: lwz 4, 32728(4) # CHECK-NEXT: blr # CHECK-LABEL: <caller>: @@ -41,6 +47,16 @@ # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr +# CHECK-NOP10-LABEL: <__gep_setup_callee>: +# CHECK-NOP10-NEXT: mflr 0 +# CHECK-NOP10-NEXT: bcl 20, 31, 0x10030018 +# CHECK-NOP10-NEXT: mflr 11 +# CHECK-NOP10-NEXT: mtlr 12 +# CHECK-NOP10-NEXT: addis 12, 12, -1 +# CHECK-NOP10-NEXT: addi 12, 12, -24 +# CHECK-NOP10-NEXT: mtctr 12 +# CHECK-NOP10-NEXT: bctr + .section .text_func, "ax", %progbits func: blr Index: lld/test/ELF/ppc64-pcrel-call-to-extern.s =================================================================== --- lld/test/ELF/ppc64-pcrel-call-to-extern.s +++ lld/test/ELF/ppc64-pcrel-call-to-extern.s @@ -23,6 +23,24 @@ # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=REL # RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=powerpc64le --defsym AUX=1 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so +# RUN: ld.lld -T %t.script %t1.o %t2.so -o %t --no-power10-stubs +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL-NOP10 +# RUN: llvm-readelf -S -d %t | FileCheck %s --check-prefix=SEC-NOP10 +# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=REL-NOP10 +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-NOP10 + +# RUN: llvm-mc -filetype=obj -triple=powerpc64 --defsym AUX=1 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t2.o +# RUN: ld.lld --shared %t2.o -o %t2.so +# RUN: ld.lld -T %t.script %t1.o %t2.so -o %t --no-power10-stubs +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYMBOL-NOP10 +# RUN: llvm-readelf -S -d %t | FileCheck %s --check-prefix=SEC-NOP10 +# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=REL-NOP10 +# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-NOP10 + ## The test is created to check that when a function without TOC access an ## external function, a r12 setup stub is inserted. @@ -35,56 +53,127 @@ # SYMBOL: 2: 0000000010010000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 6 caller1 # SYMBOL-NEXT: 3: 0000000010020000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 7 caller2 # SYMBOL-NEXT: 4: 0000000010030000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 8 caller3 -# SYMBOL: 6: 0000000010010010 16 FUNC LOCAL DEFAULT 6 __plt_pcrel_callee_global_stother0 -# SYMBOL-NEXT: 7: 0000000010020010 16 FUNC LOCAL DEFAULT 7 __plt_pcrel_callee_global_stother1 -# SYMBOL-NEXT: 8: 0000000010030010 16 FUNC LOCAL DEFAULT 8 __plt_pcrel_callee_global_TOC +# SYMBOL: 6: 0000000010010010 32 FUNC LOCAL DEFAULT 6 __plt_pcrel_callee_global_stother0 +# SYMBOL-NEXT: 7: 0000000010020010 32 FUNC LOCAL DEFAULT 7 __plt_pcrel_callee_global_stother1 +# SYMBOL-NEXT: 8: 0000000010030010 32 FUNC LOCAL DEFAULT 8 __plt_pcrel_callee_global_TOC # SYMBOL-NEXT: 9: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [<other: 0x60>] UND callee_global_TOC # SYMBOL-NEXT: 10: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND callee_global_stother0 # SYMBOL-NEXT: 11: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [<other: 0x20>] UND callee_global_stother1 +# SYMBOL-NOP10: Symbol table '.symtab' contains 12 entries: +# SYMBOL-NOP10: 2: 0000000010010000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 6 caller1 +# SYMBOL-NOP10-NEXT: 3: 0000000010020000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 7 caller2 +# SYMBOL-NOP10-NEXT: 4: 0000000010030000 0 NOTYPE LOCAL DEFAULT [<other: 0x20>] 8 caller3 +# SYMBOL-NOP10: 6: 0000000010010010 32 FUNC LOCAL DEFAULT 6 __plt_pcrel_callee_global_stother0 +# SYMBOL-NOP10-NEXT: 7: 0000000010020010 32 FUNC LOCAL DEFAULT 7 __plt_pcrel_callee_global_stother1 +# SYMBOL-NOP10-NEXT: 8: 0000000010030010 32 FUNC LOCAL DEFAULT 8 __plt_pcrel_callee_global_TOC +# SYMBOL-NOP10-NEXT: 9: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [<other: 0x60>] UND callee_global_TOC +# SYMBOL-NOP10-NEXT: 10: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND callee_global_stother0 +# SYMBOL-NOP10-NEXT: 11: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [<other: 0x20>] UND callee_global_stother1 + ## DT_PLTGOT points to .plt -# SEC: .plt NOBITS 0000000010030148 040148 000028 00 WA 0 0 8 -# SEC: 0x0000000000000003 (PLTGOT) 0x10030148 +# SEC: .plt NOBITS 0000000010030158 040158 000028 00 WA 0 0 8 +# SEC-OG: .plt NOBITS 0000000010030148 040148 000028 00 WA 0 0 8 +# SEC: 0x0000000000000003 (PLTGOT) 0x10030158 +# SEC-OG: 0x0000000000000003 (PLTGOT) 0x10030148 + +## DT_PLTGOT points to .plt +# SEC-NOP10: .plt NOBITS 0000000010030158 040158 000028 00 WA 0 0 8 +# SEC-NOP10: 0x0000000000000003 (PLTGOT) 0x10030158 ## The first 2 entries in the .plt are reserved for the dynamic linkers ## usage. The JMP_SLOT relocations are stored at .plt[2], .plt[3], .plt[4]. ## Check that we emit 3 R_PPC64_JMP_SLOT in .rela.plt. # REL: .rela.plt { -# REL-NEXT: 0x10030158 R_PPC64_JMP_SLOT callee_global_stother0 0x0 -# REL-NEXT: 0x10030160 R_PPC64_JMP_SLOT callee_global_stother1 0x0 -# REL-NEXT: 0x10030168 R_PPC64_JMP_SLOT callee_global_TOC 0x0 +# REL-NEXT: 0x10030168 R_PPC64_JMP_SLOT callee_global_stother0 0x0 +# REL-NEXT-OG: 0x10030158 R_PPC64_JMP_SLOT callee_global_stother0 0x0 +# REL-NEXT: 0x10030170 R_PPC64_JMP_SLOT callee_global_stother1 0x0 +# REL-NEXT-OG: 0x10030160 R_PPC64_JMP_SLOT callee_global_stother1 0x0 +# REL-NEXT: 0x10030178 R_PPC64_JMP_SLOT callee_global_TOC 0x0 +# REL-NEXT-OG: 0x10030168 R_PPC64_JMP_SLOT callee_global_TOC 0x0 # REL-NEXT: } +# REL-NOP10: .rela.plt { +# REL-NOP10-NEXT: 0x10030168 R_PPC64_JMP_SLOT callee_global_stother0 0x0 +# REL-NOP10-NEXT: 0x10030170 R_PPC64_JMP_SLOT callee_global_stother1 0x0 +# REL-NOP10-NEXT: 0x10030178 R_PPC64_JMP_SLOT callee_global_TOC 0x0 +# REL-NOP10-NEXT: } + # CHECK-LABEL: <caller1>: # CHECK: 10010000: bl 0x10010010 # CHECK-NEXT: 10010004: blr -## .plt[2] - 0x10010010 = 0x10030158 - 0x10010010 = 0x20148 = 131400 +# CHECK-NOP10-LABEL: <caller1>: +# CHECK-NOP10: 10010000: bl 0x10010010 +# CHECK-NOP10-NEXT: 10010004: blr + +## .plt[2] - 0x10010010 = 0x10030158 - 0x10010010 = 0x20148 = 131416 # CHECK-LABEL: <__plt_pcrel_callee_global_stother0>: -# CHECK: 10010010: pld 12, 131400(0), 1 +# CHECK: 10010010: pld 12, 131416(0), 1 # CHECK-NEXT: 10010018: mtctr 12 # CHECK-NEXT: 1001001c: bctr +## No P10; branch to next inst to get addr +# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_stother0>: +# CHECK-NOP10: 10010010: mflr 0 +# CHECK-NOP10-NEXT: 10010014: bcl 20, 31, 0x10010018 +# CHECK-NOP10: 10010018: mflr 11 +# CHECK-NOP10: 1001001c: mtlr 12 +# CHECK-NOP10: 10010020: addis 12, 11, -4097 +# CHECK-NOP10: 10010024: addi 12, 12, -24 +# CHECK-NOP10-NEXT: 10010028: mtctr 12 +# CHECK-NOP10-NEXT: 1001002c: bctr + # CHECK-LABEL: <caller2>: # CHECK: 10020000: bl 0x10020010 # CHECK-NEXT: 10020004: blr -## .plt[3] - 0x10020010 = 0x10030160 - 0x10020010 = 0x10150 = 65872 +# CHECK-NOP10-LABEL: <caller2>: +# CHECK-NOP10: 10020000: bl 0x10020010 +# CHECK-NOP10-NEXT: 10020004: blr + +## .plt[3] - 0x10020010 = 0x10030160 - 0x10020010 = 0x10150 = 65888 # CHECK-LABEL: <__plt_pcrel_callee_global_stother1>: -# CHECK: 10020010: pld 12, 65872(0), 1 +# CHECK: 10020010: pld 12, 65888(0), 1 # CHECK-NEXT: 10020018: mtctr 12 # CHECK-NEXT: 1002001c: bctr +## no P10; branch to next inst to get addr +# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_stother1>: +# CHECK-NOP10: 10020010: mflr 0 +# CHECK-NOP10-NEXT: 10020014: bcl 20, 31, 0x10020018 +# CHECK-NOP10-NEXT: 10020018: mflr 11 +# CHECK-NOP10-NEXT: 1002001c: mtlr 12 +# CHECK-NOP10-NEXT: 10020020: addis 12, 11, -4098 +# CHECK-NOP10-NEXT: 10020024: addi 12, 12, -24 +# CHECK-NOP10-NEXT: 10020028: mtctr 12 +# CHECK-NOP10-NEXT: 1002002c: bctr + # CHECK-LABEL: <caller3>: # CHECK: 10030000: bl 0x10030010 # CHECK-NEXT: 10030004: blr -## .plt[4] - 0x10030010 = 0x10030168 - 0x10030010 = 0x150 = 344 +# CHECK-NOP10-LABEL: <caller3>: +# CHECK-NOP10: 10030000: bl 0x10030010 +# CHECK-NOP10-NEXT: 10030004: blr + +## .plt[4] - 0x10030010 = 0x10030168 - 0x10030010 = 0x150 = 360 # CHECK-LABEL: <__plt_pcrel_callee_global_TOC>: -# CHECK: 10030010: pld 12, 344(0), 1 +# CHECK: 10030010: pld 12, 360(0), 1 # CHECK-NEXT: 10030018: mtctr 12 # CHECK-NEXT: 1003001c: bctr +## no P10; branch to next inst to get addr +# CHECK-NOP10-LABEL: <__plt_pcrel_callee_global_TOC>: +# CHECK-NOP10-NEXT: 10030010: mflr 0 +# CHECK-NOP10-NEXT: 10030014: bcl 20, 31, 0x10030018 +# CHECK-NOP10-NEXT: 10030018: mflr 11 +# CHECK-NOP10-NEXT: 1003001c: mtlr 12 +# CHECK-NOP10-NEXT: 10030020: addis 12, 11, -4099 +# CHECK-NOP10-NEXT: 10030024: addi 12, 12, -24 +# CHECK-NOP10-NEXT: 10030028: mtctr 12 +# CHECK-NOP10-NEXT: 1003002c: bctr + .ifdef AUX .section .text_caller1, "ax", %progbits caller1: Index: lld/test/ELF/ppc64-long-branch.s =================================================================== --- lld/test/ELF/ppc64-long-branch.s +++ lld/test/ELF/ppc64-long-branch.s @@ -19,16 +19,16 @@ # RUN: llvm-nm --no-sort %t | FileCheck --check-prefix=NM %s # SEC: Name Type Address Off Size ES Flg Lk Inf Al -# SEC: .got PROGBITS 0000000002002030 2002030 000008 00 WA 0 0 8 -# SEC: .branch_lt PROGBITS 0000000002002038 2002038 000018 00 WA 0 0 8 +# SEC: .got PROGBITS 0000000002002040 2002040 000008 00 WA 0 0 8 +# SEC: .branch_lt PROGBITS 0000000002002048 2002048 000018 00 WA 0 0 8 # SEC: There are no relocations in this file. ## high@localentry (high+8), .text_high+16 and .text_low+8 -# BRANCH-LE: 0x02002038 08200002 00000000 10200002 00000000 -# BRANCH-LE-NEXT: 0x02002048 08200000 00000000 -# BRANCH-BE: 0x02002038 00000000 02002008 00000000 02002010 -# BRANCH-BE-NEXT: 0x02002048 00000000 00002008 +# BRANCH-LE: 0x02002048 08200002 00000000 10200002 00000000 +# BRANCH-LE-NEXT: 0x02002058 08200000 00000000 +# BRANCH-BE: 0x02002048 00000000 02002008 00000000 02002010 +# BRANCH-BE-NEXT: 0x02002058 00000000 00002008 # CHECK: <_start>: # CHECK-NEXT: 2000: bl 0x2020 @@ -45,7 +45,7 @@ ## &.branch_lt[1] - .TOC. = .branch_lt - (.got+0x8000) = -32752 # CHECK: <__long_branch_>: -# CHECK-NEXT: 2030: addis 12, 2, 0 +# CHECK-NEXT: 2040: addis 12, 2, 0 # CHECK-NEXT: ld 12, -32752(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr @@ -64,7 +64,7 @@ # CHECK-EMPTY: # CHECK-NEXT: <high>: # CHECK-NEXT: 2002000: addis 2, 12, 1 -# CHECK-NEXT: addi 2, 2, -32720 +# CHECK-NEXT: addi 2, 2, -32704 # CHECK-NEXT: bl 0x2008 # CHECK-NEXT: bl 0x2002020 # CHECK: <__long_branch_>: Index: lld/test/ELF/ppc64-long-branch-rel14.s =================================================================== --- lld/test/ELF/ppc64-long-branch-rel14.s +++ lld/test/ELF/ppc64-long-branch-rel14.s @@ -20,7 +20,7 @@ # CHECK-NEXT: 2000: bt 2, 0x2020 # CHECK-NEXT: bt+ 2, 0x2020 # CHECK-NEXT: bf 2, 0xa004 -# CHECK-NEXT: bt 2, 0x2030 +# CHECK-NEXT: bt 2, 0x2040 # CHECK-NEXT: blr # CHECK-NEXT: trap # CHECK-NEXT: trap @@ -31,10 +31,11 @@ # CHECK-NEXT: ld 12, {{.*}}(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr +# CHECK-NEXT: ... # CHECK-EMPTY: # CHECK-NEXT: <__long_branch_>: -# CHECK-NEXT: 2030: addis 12, 2, 0 +# CHECK-NEXT: 2040: addis 12, 2, 0 # CHECK-NEXT: ld 12, {{.*}}(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr Index: lld/test/ELF/ppc64-long-branch-pi.s =================================================================== --- lld/test/ELF/ppc64-long-branch-pi.s +++ lld/test/ELF/ppc64-long-branch-pi.s @@ -14,26 +14,26 @@ # RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s # SEC-PIE: Name Type Address Off Size ES Flg Lk Inf Al -# SEC-PIE: .got PROGBITS 00000000020020f0 20120f0 000008 00 WA 0 0 8 -# SEC-PIE: .branch_lt NOBITS 0000000002002100 2012100 000020 00 WA 0 0 8 +# SEC-PIE: .got PROGBITS 0000000002002100 2012100 000008 00 WA 0 0 8 +# SEC-PIE: .branch_lt NOBITS 0000000002002110 2012110 000020 00 WA 0 0 8 # SEC-SHARED: Name Type Address Off Size ES Flg Lk Inf Al -# SEC-SHARED: .got PROGBITS 00000000020020d0 20120d0 000008 00 WA 0 0 8 -# SEC-SHARED: .branch_lt NOBITS 00000000020020e0 20120e0 000020 00 WA 0 0 8 +# SEC-SHARED: .got PROGBITS 00000000020020e0 20120e0 000008 00 WA 0 0 8 +# SEC-SHARED: .branch_lt NOBITS 00000000020020f0 20120f0 000020 00 WA 0 0 8 # RELOC: .rela.dyn { -# RELOC-NEXT: 0x20020F8 R_PPC64_RELATIVE - 0x8000 -# RELOC-NEXT: 0x2002100 R_PPC64_RELATIVE - 0x2002000 -# RELOC-NEXT: 0x2002108 R_PPC64_RELATIVE - 0x2002008 -# RELOC-NEXT: 0x2002110 R_PPC64_RELATIVE - 0x200200C -# RELOC-NEXT: 0x2002118 R_PPC64_RELATIVE - 0x2000 +# RELOC-NEXT: 0x2002108 R_PPC64_RELATIVE - 0x8000 +# RELOC-NEXT: 0x2002110 R_PPC64_RELATIVE - 0x2002000 +# RELOC-NEXT: 0x2002118 R_PPC64_RELATIVE - 0x2002008 +# RELOC-NEXT: 0x2002120 R_PPC64_RELATIVE - 0x200200C +# RELOC-NEXT: 0x2002128 R_PPC64_RELATIVE - 0x2000 # RELOC-NEXT: } # CHECK: <_start>: # CHECK-NEXT: 2000: bl 0x2010 # CHECK-NEXT: bl 0x2002000 -# CHECK-NEXT: bl 0x2020 # CHECK-NEXT: bl 0x2030 +# CHECK-NEXT: bl 0x2050 ## &.branch_lt[0] - .TOC. = .branch_lt - (.got+0x8000) = -32752 # CHECK: <__long_branch_>: @@ -44,14 +44,14 @@ ## &.branch_lt[1] - .TOC. = .branch_lt - (.got+0x8000) = -32744 # CHECK: <__long_branch_>: -# CHECK-NEXT: 2020: addis 12, 2, 0 +# CHECK-NEXT: 2030: addis 12, 2, 0 # CHECK-NEXT: ld 12, -32744(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr ## &.branch_lt[2] - .TOC. = .branch_lt - (.got+0x8000) = -32736 # CHECK: <__long_branch_>: -# CHECK-NEXT: 2030: addis 12, 2, 0 +# CHECK-NEXT: 2050: addis 12, 2, 0 # CHECK-NEXT: ld 12, -32736(12) # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr Index: lld/test/ELF/ppc64-long-branch-localentry-offset.s =================================================================== --- lld/test/ELF/ppc64-long-branch-localentry-offset.s +++ lld/test/ELF/ppc64-long-branch-localentry-offset.s @@ -5,8 +5,8 @@ # RUN: llvm-nm %t | FileCheck %s # CHECK-DAG: 0000000010010000 t __long_branch_callee -# CHECK-DAG: 0000000010010010 T _start -# CHECK-DAG: 0000000012010008 T callee +# CHECK-DAG: 0000000010010020 T _start +# CHECK-DAG: 0000000012010018 T callee # The bl instruction jumps to the local entry. The distance requires a long branch stub: # localentry(callee) - _start = 0x12010008+8 - 0x10010010 = 0x2000000 Index: lld/test/ELF/ppc64-call-reach.s =================================================================== --- lld/test/ELF/ppc64-call-reach.s +++ lld/test/ELF/ppc64-call-reach.s @@ -67,7 +67,7 @@ # THUNK-LABEL: <test>: # THUNK: 10010014: bl 0x10010030 -# THUNK: 10010024: b 0x10010040 +# THUNK: 10010024: b 0x10010050 # .branch_lt[0] # THUNK-LABEL: <__long_branch_callee>: @@ -78,7 +78,7 @@ # .branch_lt[1] # THUNK-LABEL: <__long_branch_tail_callee>: -# THUNK-NEXT: 10010040: addis 12, 2, 1 +# THUNK-NEXT: 10010050: addis 12, 2, 1 # THUNK-NEXT: ld 12, -32752(12) # THUNK-NEXT: mtctr 12 # THUNK-NEXT: bctr Index: lld/ELF/Thunks.cpp =================================================================== --- lld/ELF/Thunks.cpp +++ lld/ELF/Thunks.cpp @@ -304,7 +304,7 @@ } return true; } - uint32_t size() override { return getMayUseShortThunk() ? 8 : 20; } + uint32_t size() override { return getMayUseShortThunk() ? 8 : 32; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -325,7 +325,7 @@ class PPC64R12SetupStub final : public Thunk { public: PPC64R12SetupStub(Symbol &dest) : Thunk(dest, 0) { alignment = 16; } - uint32_t size() override { return 16; } + uint32_t size() override { return 32; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; }; @@ -340,7 +340,7 @@ class PPC64PCRelPLTStub final : public Thunk { public: PPC64PCRelPLTStub(Symbol &dest) : Thunk(dest, 0) { alignment = 16; } - uint32_t size() override { return 16; } + uint32_t size() override { return 32; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; bool isCompatibleWith(const InputSection &isec, @@ -357,7 +357,7 @@ // used. class PPC64LongBranchThunk : public Thunk { public: - uint32_t size() override { return 16; } + uint32_t size() override { return 32; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; bool isCompatibleWith(const InputSection &isec, @@ -401,7 +401,7 @@ : Thunk(dest, addend) { alignment = 16; } - uint32_t size() override { return 16; } + uint32_t size() override { return 32; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; bool isCompatibleWith(const InputSection &isec, @@ -917,17 +917,33 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { const int64_t offset = computeOffset(); - write32(buf + 0, 0xf8410018); // std r2,24(r1) + write32(buf + 0, 0xf8410018); // std r2,24(r1) // The branch offset needs to fit in 26 bits. if (getMayUseShortThunk()) { write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset> } else if (isInt<34>(offset)) { - const uint64_t paddi = PADDI_R12_NO_DISP | - (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func@pcrel, 1 - write32(buf + 12, MTCTR_R12); // mtctr r12 - write32(buf + 16, BCTR); // bctr + int inst2; + if (config->Power10Stub == P10Stub::No) { + uint64_t toc_offset = destination.getVA() - getPPC64TocBase(); + uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (toc_offset & 0xffff); + if (toc_offset >> 16 > 0) { + const uint64_t addis = ADDIS_R12_NO_DISP | ((toc_offset >> 16) & 0xffff); + write32(buf + 4, addis); // addis r12, 0, top of offset + write32(buf + 8, addi); // addi r12, r12, bottom of offset + inst2 = 12; + } else { + write32(buf + 4, addi); // addi r12, 0, offset + inst2 = 8; + } + } else { + const uint64_t paddi = PADDI_R12_NO_DISP | + (((offset >> 16) & 0x3ffff) << 32) | + (offset & 0xffff); + writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func@pcrel, 1 + inst2 = 12; + } + write32(buf + inst2, MTCTR_R12); // mtctr r12 + write32(buf + inst2 + 4, BCTR); // bctr } else { in.ppc64LongBranchTarget->addEntry(&destination, addend); const int64_t offsetFromTOC = @@ -947,12 +963,27 @@ int64_t offset = destination.getVA() - getThunkTargetSym()->getVA(); if (!isInt<34>(offset)) reportRangeError(buf, offset, 34, destination, "R12 setup stub offset"); - uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1 - write32(buf + 8, MTCTR_R12); // mtctr r12 - write32(buf + 12, BCTR); // bctr + int inst2; + if (config->Power10Stub == P10Stub::No) { + auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000 - 8) >> 16; }; + uint32_t d = destination.getVA(addend); + uint32_t off = d - getThunkTargetSym()->getVA(); + write32(buf + 0, 0x7c0802a6); // mflr r12 + write32(buf + 4, 0x429f0005); // bcl 20,31,.+4 + write32(buf + 8, 0x7d6802a6); // mflr r11 + write32(buf + 12, 0x7d8803a6); // mtlr r12 + write32(buf + 16, 0x3d8c0000 | ha(off)); // addis r12,r11,off@ha + write32(buf + 20, 0x398c0000 | (uint16_t)(off) - 8); // addi r12,r12,off@l + inst2 = 24; + } else { + uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | + (offset & 0xffff); + writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1 + inst2 = 8; + } + write32(buf + inst2, MTCTR_R12); // mtctr r12 + write32(buf + inst2 + 4, BCTR); // bctr } void PPC64R12SetupStub::addSymbols(ThunkSection &isec) { @@ -961,16 +992,32 @@ } void PPC64PCRelPLTStub::writeTo(uint8_t *buf) { + int inst2 = 0; int64_t offset = destination.getGotPltVA() - getThunkTargetSym()->getVA(); - if (!isInt<34>(offset)) - reportRangeError(buf, offset, 34, destination, - "PC-relative PLT stub offset"); - uint64_t pld = - PLD_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | (offset & 0xffff); - writePrefixedInstruction(buf + 0, pld); // pld r12, func@plt@pcrel - write32(buf + 8, MTCTR_R12); // mtctr r12 - write32(buf + 12, BCTR); // bctr + if (config->Power10Stub != P10Stub::No) { + if (!isInt<34>(offset)) + reportRangeError(buf, offset, 34, destination, + "PC-relative PLT stub offset"); + uint64_t pld = + PLD_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | (offset & 0xffff); + writePrefixedInstruction(buf + 0, pld); // pld r12, func@plt@pcrel + inst2 = 8; + } else { + auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000 - 8) >> 16; }; + auto lo = [](uint32_t v) -> uint16_t { return v - 8; }; + uint32_t d = destination.getVA(addend); + uint32_t off = d - getThunkTargetSym()->getVA(); + write32(buf + 0, 0x7c0802a6); // mflr r12 + write32(buf + 4, 0x429f0005); // bcl 20,31,.+4 + write32(buf + 8, 0x7d6802a6); // mflr r11 + write32(buf + 12, 0x7d8803a6); // mtlr r12 + write32(buf + 16, 0x3d8b0000 | ha(off)); // addis r12,r11,off@ha + write32(buf + 20, 0x398c0000 | lo(off)); // addi r12,r12,off@l + inst2 = 24; + } + write32(buf + inst2, MTCTR_R12); // mtctr r12 + write32(buf + inst2 + 4, BCTR); // bctr } void PPC64PCRelPLTStub::addSymbols(ThunkSection &isec) { @@ -1004,12 +1051,27 @@ if (!isInt<34>(offset)) reportRangeError(buf, offset, 34, destination, "PC-relative long branch stub offset"); - uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1 - write32(buf + 8, MTCTR_R12); // mtctr r12 - write32(buf + 12, BCTR); // bctr + int inst2; + if (config->Power10Stub == P10Stub::No) { + auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000 - 8) >> 16; }; + uint32_t d = destination.getVA(addend); + uint32_t off = d - getThunkTargetSym()->getVA(); + write32(buf + 0, 0x7c0802a6); // mflr r12 + write32(buf + 4, 0x429f0005); // bcl 20,31,.+4 + write32(buf + 8, 0x7d6802a6); // mflr r11 + write32(buf + 12, 0x7d8803a6); // mtlr r12 + write32(buf + 16, 0x3d8b0000 | ha(off)); // addis r12,r11,off@ha + write32(buf + 20, 0x398c0000 | (uint16_t)(off) - 8); // addi r12,r12,off@l + inst2 = 24; + } else { + uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | + (offset & 0xffff); + writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1 + inst2 = 8; + } + write32(buf + inst2, MTCTR_R12); // mtctr r12 + write32(buf + inst2 + 4, BCTR); // bctr } void PPC64PCRelLongBranchThunk::addSymbols(ThunkSection &isec) { Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -442,6 +442,17 @@ def version: F<"version">, HelpText<"Display the version number and exit">; +def power10_stubs: F<"power10-stubs">, HelpText<"Alias for --power10-stubs=yes">; + +def no_power10_stubs: F<"no-power10-stubs">, HelpText<"Alias for --power10-stubs=no">; + +def power10_stubs_eq: + J<"power10-stubs=">, HelpText<"Enables Power10 instr in all stubs without options, " + "options override previous flags." + "auto: Default" + "no: No Power10 instr in stubs" + "yes: Enable Power10 instr in stubs">; + defm version_script: Eq<"version-script", "Read a version script">; defm warn_backrefs: BB<"warn-backrefs", Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -760,6 +760,23 @@ return OrphanHandlingPolicy::Place; } +static P10Stub getP10StubOpt(opt::InputArgList &args) { + + bool NoP10 = args.hasArg(OPT_no_power10_stubs); + + StringRef SelectedOpt = args.getLastArgValue(OPT_power10_stubs_eq); + + if (SelectedOpt == "no") + return P10Stub::No; + + if (!args.hasArg(OPT_power10_stubs_eq)) { + if (NoP10) + return P10Stub::No; + } + + return P10Stub::Default; +} + // Parse --build-id or --build-id=<style>. We handle "tree" as a // synonym for "sha1" because all our hash functions including // -build-id=sha1 are actually tree hashes for performance reasons. @@ -1123,6 +1140,7 @@ config->zText = getZFlag(args, "text", "notext", true); config->zWxneeded = hasZOption(args, "wxneeded"); setUnresolvedSymbolPolicy(args); + config->Power10Stub = getP10StubOpt(args); for (opt::Arg *arg : args.filtered(OPT_z)) { std::pair<StringRef, StringRef> option = Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -70,6 +70,9 @@ // For -z *stack enum class GnuStackKind { None, Exec, NoExec }; +// For --power10-stub +enum class P10Stub { Default, No }; + struct SymbolVersion { llvm::StringRef name; bool isExternCpp; @@ -250,6 +253,7 @@ UnresolvedPolicy unresolvedSymbols; UnresolvedPolicy unresolvedSymbolsInShlib; Target2Policy target2; + P10Stub Power10Stub; ARMVFPArgKind armVFPArgs = ARMVFPArgKind::Default; BuildIdKind buildId = BuildIdKind::None; SeparateSegmentKind zSeparate;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits