On Tue, Dec 17, 2024 at 2:47 AM Bin Lan via lists.openembedded.org <bin.lan.cn=windriver....@lists.openembedded.org> wrote: > > There is the following warning when building linux-yocto with > default configuration on x86-64 with gcc-14.2: > AR built-in.a > AR vmlinux.a > LD vmlinux.o > vmlinux.o: warning: objtool: .export_symbol+0x332a0: data relocation to > !ENDBR: stpcpy+0x0 > > This change set removes the warning. > > PR target/116174 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116174] > > Signed-off-by: Bin Lan <bin.lan...@windriver.com> > --- > meta/recipes-devtools/gcc/gcc-14.2.inc | 1 + > ...ch-to-fix-data-relocation-to-ENDBR-s.patch | 447 ++++++++++++++++++ > 2 files changed, 448 insertions(+) > create mode 100644 > meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch > > diff --git a/meta/recipes-devtools/gcc/gcc-14.2.inc > b/meta/recipes-devtools/gcc/gcc-14.2.inc > index 4f505bef68..a25bc019e5 100644 > --- a/meta/recipes-devtools/gcc/gcc-14.2.inc > +++ b/meta/recipes-devtools/gcc/gcc-14.2.inc > @@ -69,6 +69,7 @@ SRC_URI = "${BASEURI} \ > file://0024-Avoid-hardcoded-build-paths-into-ppc-libgcc.patch \ > file://0025-gcc-testsuite-tweaks-for-mips-OE.patch \ > file://0026-gcc-Fix-c-tweak-for-Wrange-loop-construct.patch \ > + > file://0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch \
tiny nit. is this tab vs spaces ? can you fix this. > file://gcc.git-ab884fffe3fc82a710bea66ad651720d71c938b8.patch \ > " > > diff --git > a/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch > > b/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch > new file mode 100644 > index 0000000000..5bede60816 > --- /dev/null > +++ > b/meta/recipes-devtools/gcc/gcc/0027-gcc-backport-patch-to-fix-data-relocation-to-ENDBR-s.patch > @@ -0,0 +1,447 @@ > +From 4e7735a8d87559bbddfe3a985786996e22241f8d Mon Sep 17 00:00:00 2001 > +From: liuhongt <hongtao....@intel.com> > +Date: Mon, 12 Aug 2024 14:35:31 +0800 > +Subject: [PATCH] Move ix86_align_loops into a separate pass and insert the > + pass after pass_endbr_and_patchable_area. > + > +gcc/ChangeLog: > + > + PR target/116174 > + * config/i386/i386.cc (ix86_align_loops): Move this to .. > + * config/i386/i386-features.cc (ix86_align_loops): .. here. > + (class pass_align_tight_loops): New class. > + (make_pass_align_tight_loops): New function. > + * config/i386/i386-passes.def: Insert pass_align_tight_loops > + after pass_insert_endbr_and_patchable_area. > + * config/i386/i386-protos.h (make_pass_align_tight_loops): New > + declare. > + > +gcc/testsuite/ChangeLog: > + > + * gcc.target/i386/pr116174.c: New test. > + > +(cherry picked from commit c3c83d22d212a35cb1bfb8727477819463f0dcd8) > + > +Upstream-Status: Backport > [https://gcc.gnu.org/git/?p=gcc.git;a=patch;h=4e7735a8d87559bbddfe3a985786996e22241f8d] > + > +Signed-off-by: Bin Lan <bin.lan...@windriver.com> > +--- > + gcc/config/i386/i386-features.cc | 191 +++++++++++++++++++++++ > + gcc/config/i386/i386-passes.def | 3 + > + gcc/config/i386/i386-protos.h | 1 + > + gcc/config/i386/i386.cc | 146 ----------------- > + gcc/testsuite/gcc.target/i386/pr116174.c | 12 ++ > + 5 files changed, 207 insertions(+), 146 deletions(-) > + create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c > + > +diff --git a/gcc/config/i386/i386-features.cc > b/gcc/config/i386/i386-features.cc > +index e3e004d55267..7de19d423637 100644 > +--- a/gcc/config/i386/i386-features.cc > ++++ b/gcc/config/i386/i386-features.cc > +@@ -3253,6 +3253,197 @@ make_pass_remove_partial_avx_dependency > (gcc::context *ctxt) > + return new pass_remove_partial_avx_dependency (ctxt); > + } > + > ++/* When a hot loop can be fit into one cacheline, > ++ force align the loop without considering the max skip. */ > ++static void > ++ix86_align_loops () > ++{ > ++ basic_block bb; > ++ > ++ /* Don't do this when we don't know cache line size. */ > ++ if (ix86_cost->prefetch_block == 0) > ++ return; > ++ > ++ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); > ++ profile_count count_threshold = cfun->cfg->count_max / > param_align_threshold; > ++ FOR_EACH_BB_FN (bb, cfun) > ++ { > ++ rtx_insn *label = BB_HEAD (bb); > ++ bool has_fallthru = 0; > ++ edge e; > ++ edge_iterator ei; > ++ > ++ if (!LABEL_P (label)) > ++ continue; > ++ > ++ profile_count fallthru_count = profile_count::zero (); > ++ profile_count branch_count = profile_count::zero (); > ++ > ++ FOR_EACH_EDGE (e, ei, bb->preds) > ++ { > ++ if (e->flags & EDGE_FALLTHRU) > ++ has_fallthru = 1, fallthru_count += e->count (); > ++ else > ++ branch_count += e->count (); > ++ } > ++ > ++ if (!fallthru_count.initialized_p () || !branch_count.initialized_p > ()) > ++ continue; > ++ > ++ if (bb->loop_father > ++ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) > ++ && (has_fallthru > ++ ? (!(single_succ_p (bb) > ++ && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) > ++ && optimize_bb_for_speed_p (bb) > ++ && branch_count + fallthru_count > count_threshold > ++ && (branch_count > fallthru_count * > param_align_loop_iterations)) > ++ /* In case there'no fallthru for the loop. > ++ Nops inserted won't be executed. */ > ++ : (branch_count > count_threshold > ++ || (bb->count > bb->prev_bb->count * 10 > ++ && (bb->prev_bb->count > ++ <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) > ++ { > ++ rtx_insn* insn, *end_insn; > ++ HOST_WIDE_INT size = 0; > ++ bool padding_p = true; > ++ basic_block tbb = bb; > ++ unsigned cond_branch_num = 0; > ++ bool detect_tight_loop_p = false; > ++ > ++ for (unsigned int i = 0; i != bb->loop_father->num_nodes; > ++ i++, tbb = tbb->next_bb) > ++ { > ++ /* Only handle continuous cfg layout. */ > ++ if (bb->loop_father != tbb->loop_father) > ++ { > ++ padding_p = false; > ++ break; > ++ } > ++ > ++ FOR_BB_INSNS (tbb, insn) > ++ { > ++ if (!NONDEBUG_INSN_P (insn)) > ++ continue; > ++ size += ix86_min_insn_size (insn); > ++ > ++ /* We don't know size of inline asm. > ++ Don't align loop for call. */ > ++ if (asm_noperands (PATTERN (insn)) >= 0 > ++ || CALL_P (insn)) > ++ { > ++ size = -1; > ++ break; > ++ } > ++ } > ++ > ++ if (size == -1 || size > ix86_cost->prefetch_block) > ++ { > ++ padding_p = false; > ++ break; > ++ } > ++ > ++ FOR_EACH_EDGE (e, ei, tbb->succs) > ++ { > ++ /* It could be part of the loop. */ > ++ if (e->dest == bb) > ++ { > ++ detect_tight_loop_p = true; > ++ break; > ++ } > ++ } > ++ > ++ if (detect_tight_loop_p) > ++ break; > ++ > ++ end_insn = BB_END (tbb); > ++ if (JUMP_P (end_insn)) > ++ { > ++ /* For decoded icache: > ++ 1. Up to two branches are allowed per Way. > ++ 2. A non-conditional branch is the last micro-op in a > Way. > ++ */ > ++ if (onlyjump_p (end_insn) > ++ && (any_uncondjump_p (end_insn) > ++ || single_succ_p (tbb))) > ++ { > ++ padding_p = false; > ++ break; > ++ } > ++ else if (++cond_branch_num >= 2) > ++ { > ++ padding_p = false; > ++ break; > ++ } > ++ } > ++ > ++ } > ++ > ++ if (padding_p && detect_tight_loop_p) > ++ { > ++ emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 > (size)), > ++ GEN_INT (0)), label); > ++ /* End of function. */ > ++ if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) > ++ break; > ++ /* Skip bb which already fits into one cacheline. */ > ++ bb = tbb; > ++ } > ++ } > ++ } > ++ > ++ loop_optimizer_finalize (); > ++ free_dominance_info (CDI_DOMINATORS); > ++} > ++ > ++namespace { > ++ > ++const pass_data pass_data_align_tight_loops = > ++{ > ++ RTL_PASS, /* type */ > ++ "align_tight_loops", /* name */ > ++ OPTGROUP_NONE, /* optinfo_flags */ > ++ TV_MACH_DEP, /* tv_id */ > ++ 0, /* properties_required */ > ++ 0, /* properties_provided */ > ++ 0, /* properties_destroyed */ > ++ 0, /* todo_flags_start */ > ++ 0, /* todo_flags_finish */ > ++}; > ++ > ++class pass_align_tight_loops : public rtl_opt_pass > ++{ > ++public: > ++ pass_align_tight_loops (gcc::context *ctxt) > ++ : rtl_opt_pass (pass_data_align_tight_loops, ctxt) > ++ {} > ++ > ++ /* opt_pass methods: */ > ++ bool gate (function *) final override > ++ { > ++ return optimize && optimize_function_for_speed_p (cfun); > ++ } > ++ > ++ unsigned int execute (function *) final override > ++ { > ++ timevar_push (TV_MACH_DEP); > ++#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN > ++ ix86_align_loops (); > ++#endif > ++ timevar_pop (TV_MACH_DEP); > ++ return 0; > ++ } > ++}; // class pass_align_tight_loops > ++ > ++} // anon namespace > ++ > ++rtl_opt_pass * > ++make_pass_align_tight_loops (gcc::context *ctxt) > ++{ > ++ return new pass_align_tight_loops (ctxt); > ++} > ++ > + /* This compares the priority of target features in function DECL1 > + and DECL2. It returns positive value if DECL1 is higher priority, > + negative value if DECL2 is higher priority and 0 if they are the > +diff --git a/gcc/config/i386/i386-passes.def > b/gcc/config/i386/i386-passes.def > +index 7d96766f7b96..e500f15c9971 100644 > +--- a/gcc/config/i386/i386-passes.def > ++++ b/gcc/config/i386/i386-passes.def > +@@ -31,5 +31,8 @@ along with GCC; see the file COPYING3. If not see > + INSERT_PASS_BEFORE (pass_cse2, 1, pass_stv, true /* timode_p */); > + > + INSERT_PASS_BEFORE (pass_shorten_branches, 1, > pass_insert_endbr_and_patchable_area); > ++ /* pass_align_tight_loops must be after > pass_insert_endbr_and_patchable_area. > ++ PR116174. */ > ++ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); > + > + INSERT_PASS_AFTER (pass_combine, 1, pass_remove_partial_avx_dependency); > +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > +index 46214a63974d..36c7b1aed42b 100644 > +--- a/gcc/config/i386/i386-protos.h > ++++ b/gcc/config/i386/i386-protos.h > +@@ -419,6 +419,7 @@ extern rtl_opt_pass > *make_pass_insert_endbr_and_patchable_area > + (gcc::context *); > + extern rtl_opt_pass *make_pass_remove_partial_avx_dependency > + (gcc::context *); > ++extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); > + > + extern bool ix86_has_no_direct_extern_access; > + > +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > +index 6f89891d3cb5..288c69467d62 100644 > +--- a/gcc/config/i386/i386.cc > ++++ b/gcc/config/i386/i386.cc > +@@ -23444,150 +23444,6 @@ ix86_split_stlf_stall_load () > + } > + } > + > +-/* When a hot loop can be fit into one cacheline, > +- force align the loop without considering the max skip. */ > +-static void > +-ix86_align_loops () > +-{ > +- basic_block bb; > +- > +- /* Don't do this when we don't know cache line size. */ > +- if (ix86_cost->prefetch_block == 0) > +- return; > +- > +- loop_optimizer_init (AVOID_CFG_MODIFICATIONS); > +- profile_count count_threshold = cfun->cfg->count_max / > param_align_threshold; > +- FOR_EACH_BB_FN (bb, cfun) > +- { > +- rtx_insn *label = BB_HEAD (bb); > +- bool has_fallthru = 0; > +- edge e; > +- edge_iterator ei; > +- > +- if (!LABEL_P (label)) > +- continue; > +- > +- profile_count fallthru_count = profile_count::zero (); > +- profile_count branch_count = profile_count::zero (); > +- > +- FOR_EACH_EDGE (e, ei, bb->preds) > +- { > +- if (e->flags & EDGE_FALLTHRU) > +- has_fallthru = 1, fallthru_count += e->count (); > +- else > +- branch_count += e->count (); > +- } > +- > +- if (!fallthru_count.initialized_p () || !branch_count.initialized_p > ()) > +- continue; > +- > +- if (bb->loop_father > +- && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) > +- && (has_fallthru > +- ? (!(single_succ_p (bb) > +- && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) > +- && optimize_bb_for_speed_p (bb) > +- && branch_count + fallthru_count > count_threshold > +- && (branch_count > fallthru_count * > param_align_loop_iterations)) > +- /* In case there'no fallthru for the loop. > +- Nops inserted won't be executed. */ > +- : (branch_count > count_threshold > +- || (bb->count > bb->prev_bb->count * 10 > +- && (bb->prev_bb->count > +- <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) > +- { > +- rtx_insn* insn, *end_insn; > +- HOST_WIDE_INT size = 0; > +- bool padding_p = true; > +- basic_block tbb = bb; > +- unsigned cond_branch_num = 0; > +- bool detect_tight_loop_p = false; > +- > +- for (unsigned int i = 0; i != bb->loop_father->num_nodes; > +- i++, tbb = tbb->next_bb) > +- { > +- /* Only handle continuous cfg layout. */ > +- if (bb->loop_father != tbb->loop_father) > +- { > +- padding_p = false; > +- break; > +- } > +- > +- FOR_BB_INSNS (tbb, insn) > +- { > +- if (!NONDEBUG_INSN_P (insn)) > +- continue; > +- size += ix86_min_insn_size (insn); > +- > +- /* We don't know size of inline asm. > +- Don't align loop for call. */ > +- if (asm_noperands (PATTERN (insn)) >= 0 > +- || CALL_P (insn)) > +- { > +- size = -1; > +- break; > +- } > +- } > +- > +- if (size == -1 || size > ix86_cost->prefetch_block) > +- { > +- padding_p = false; > +- break; > +- } > +- > +- FOR_EACH_EDGE (e, ei, tbb->succs) > +- { > +- /* It could be part of the loop. */ > +- if (e->dest == bb) > +- { > +- detect_tight_loop_p = true; > +- break; > +- } > +- } > +- > +- if (detect_tight_loop_p) > +- break; > +- > +- end_insn = BB_END (tbb); > +- if (JUMP_P (end_insn)) > +- { > +- /* For decoded icache: > +- 1. Up to two branches are allowed per Way. > +- 2. A non-conditional branch is the last micro-op in a > Way. > +- */ > +- if (onlyjump_p (end_insn) > +- && (any_uncondjump_p (end_insn) > +- || single_succ_p (tbb))) > +- { > +- padding_p = false; > +- break; > +- } > +- else if (++cond_branch_num >= 2) > +- { > +- padding_p = false; > +- break; > +- } > +- } > +- > +- } > +- > +- if (padding_p && detect_tight_loop_p) > +- { > +- emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 > (size)), > +- GEN_INT (0)), label); > +- /* End of function. */ > +- if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) > +- break; > +- /* Skip bb which already fits into one cacheline. */ > +- bb = tbb; > +- } > +- } > +- } > +- > +- loop_optimizer_finalize (); > +- free_dominance_info (CDI_DOMINATORS); > +-} > +- > + /* Implement machine specific optimizations. We implement padding of > returns > + for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ > + static void > +@@ -23611,8 +23467,6 @@ ix86_reorg (void) > + #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN > + if (TARGET_FOUR_JUMP_LIMIT) > + ix86_avoid_jump_mispredicts (); > +- > +- ix86_align_loops (); > + #endif > + } > + } > +diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c > b/gcc/testsuite/gcc.target/i386/pr116174.c > +new file mode 100644 > +index 000000000000..8877d0b51af1 > +--- /dev/null > ++++ b/gcc/testsuite/gcc.target/i386/pr116174.c > +@@ -0,0 +1,12 @@ > ++/* { dg-do compile { target *-*-linux* } } */ > ++/* { dg-options "-O2 -fcf-protection=branch" } */ > ++ > ++char * > ++foo (char *dest, const char *src) > ++{ > ++ while ((*dest++ = *src++) != '\0') > ++ /* nothing */; > ++ return --dest; > ++} > ++ > ++/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */ > +-- > +2.43.5 > -- > 2.34.1 > > > >
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#208844): https://lists.openembedded.org/g/openembedded-core/message/208844 Mute This Topic: https://lists.openembedded.org/mt/110160747/21656 Group Owner: openembedded-core+ow...@lists.openembedded.org Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-