Hi Aaron, On Mon, 2024-02-05 at 18:11 -0500, Aaron Merey wrote: > During symtab merging, adjust_relocs might be called multiple times on > some SHT_REL/SHT_RELA sections. In these cases it is possible for a > relocation's symbol index to be correctly mapped from X to Y during the > first call to adjust_relocs but then wrongly remapped from Y to Z during > the second call. > > Fix this by adjusting relocation symbol indices just once per section. > > Also add stable sorting for symbols during symtab merging so that the > symbol order in the output file's symtab does not depend on undefined > behaviour in qsort. > > Note that adjust_relocs still might be called a second time on a section > during add_new_section_symbols. However since add_new_section_symbols > generates its own distinct symbol index map, this should not trigger the > bug described above. > > https://sourceware.org/bugzilla/show_bug.cgi?id=31097 > > Signed-off-by: Aaron Merey <ame...@redhat.com> > --- > > I added some tests to run-unstrip-test.sh to verify that strip+unstrip > does not alter the symbols referred to by relocations. I tried to use > eu-elfcmp for this purpose. However for a pair of i386 ET_REL binaries > I did some testing with, eu-elfcmp skips checking the SHT_REL/SHT_RELA > sections because the sh_flags do not contain SHF_ALLOC (the flags only > contain SHF_INFO_LINK in this case). > > I'm not sure if this eu-elfcmp behaviour is intentional or if eu-elfcmp > should start comparing REL/RELA sections even if they aren't allocated.
So it looks like elfcmp explicitly checks ebl_section_strip_p and doesn't compare sections that are strippable. Maybe we should add an eu-elfcmp --all-sections flag? We should probably also check that it handles the new SHT_RELR sections correctly. I see it only checks for SHT_REL and SHT_RELA in the code. Although RELR is really simple and doesn't have symbol references. So it is probably fine. > src/unstrip.c | 81 ++++++++++++++++---- > tests/.gitignore | 1 + > tests/Makefile.am | 3 +- > tests/elf-print-reloc-syms.c | 144 +++++++++++++++++++++++++++++++++++ > tests/run-unstrip-test.sh | 8 ++ > 5 files changed, 221 insertions(+), 16 deletions(-) > create mode 100644 tests/elf-print-reloc-syms.c > > diff --git a/src/unstrip.c b/src/unstrip.c > index d5bd1821..f37d6c58 100644 > --- a/src/unstrip.c > +++ b/src/unstrip.c > @@ -598,21 +598,30 @@ adjust_relocs (Elf_Scn *outscn, Elf_Scn *inscn, const > GElf_Shdr *shdr, > /* Adjust all the relocation sections in the file. */ > static void > adjust_all_relocs (Elf *elf, Elf_Scn *symtab, const GElf_Shdr *symshdr, > - size_t map[], size_t map_size) > + size_t map[], size_t map_size, bool *scn_filter) > { Maybe bool scn_filter[] since it really is an array? > size_t new_sh_link = elf_ndxscn (symtab); > Elf_Scn *scn = NULL; > while ((scn = elf_nextscn (elf, scn)) != NULL) > if (scn != symtab) > { > + if (scn_filter != NULL) > + { > + size_t ndx = elf_ndxscn (scn); > + > + /* Skip relocations that were already mapped during adjust_relocs > + for the stripped symtab. This is to avoid mapping a relocation's > + symbol index from X to Y during the first adjust_relocs and then > + wrongly remapping it from Y to Z during the second call. */ > + if (scn_filter[ndx]) > + continue; > + } > + > GElf_Shdr shdr_mem; > GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); > ELF_CHECK (shdr != NULL, _("cannot get section header: %s")); > - /* Don't redo SHT_GROUP, groups are in both the stripped and debug, > - it will already have been done by adjust_relocs for the > - stripped_symtab. */ > - if (shdr->sh_type != SHT_NOBITS && shdr->sh_type != SHT_GROUP > - && shdr->sh_link == new_sh_link) > + > + if (shdr->sh_type != SHT_NOBITS && shdr->sh_link == new_sh_link) > adjust_relocs (scn, scn, shdr, map, map_size, symshdr); > } > } OK. > @@ -697,7 +706,7 @@ add_new_section_symbols (Elf_Scn *old_symscn, size_t > old_shnum, > } > > /* Adjust any relocations referring to the old symbol table. */ > - adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1); > + adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1, NULL); > > return symdata; > } OK. > @@ -874,6 +883,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, > Elf_Scn *strscn, > s->shndx = shndx; > s->info.info = sym->st_info; > s->info.other = sym->st_other; > + s->duplicate = NULL; > > if (scnmap != NULL && shndx != SHN_UNDEF && shndx < SHN_LORESERVE) > s->shndx = scnmap[shndx - 1]; > @@ -903,8 +913,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, > Elf_Scn *strscn, > if (s1->value > s2->value) \ > return 1 > > -/* Compare symbols with a consistent ordering, > - but one only meaningful for equality. */ > +/* Symbol comparison used to sort symbols in preparation for deduplication. > */ > static int > compare_symbols (const void *a, const void *b) > { > @@ -915,6 +924,38 @@ compare_symbols (const void *a, const void *b) > CMP (size); > CMP (shndx); > > + int res = s1->compare - s2->compare; > + if (res != 0) > + return res; > + > + res = strcmp (s1->name, s2->name); > + if (res != 0) > + return res; > + > + /* Duplicates still have distinct positions in the symbol index map. > + Compare map positions to ensure that duplicate symbols are ordered > + consistently even if the sort function is unstable. */ > + CMP (map); > + error_exit (0, _("found two identical index map positions.")); > +} > > +/* Symbol comparison used to deduplicate symbols found in both the stripped > + and unstripped input files. > + > + Similar to compare_symbols, but does not differentiate symbols based > + on their position in the symbol index map. Duplicates can't be found > + by comparing index map postions because they always have distinct > + positions in the map. */ > +static int > +compare_symbols_duplicate (const void *a, const void *b) > +{ > + const struct symbol *s1 = a; > + const struct symbol *s2 = b; > + > + CMP (value); > + CMP (size); > + CMP (shndx); > + > return (s1->compare - s2->compare) ?: strcmp (s1->name, s2->name); > } > Right, this new compare_symbols_duplicate is what compare_symbols did before, which is not a stable sort. > @@ -946,13 +987,13 @@ compare_symbols_output (const void *a, const void *b) > /* binutils always puts section symbols in section index order. */ > CMP (shndx); > else if (s1 != s2) > - error_exit (0, "section symbols in unexpected order"); > + error_exit (0, _("section symbols in unexpected order")); > } > Yes, this is a translatable user visible string. > > /* Nothing really matters, so preserve the original order. */ > CMP (map); > else if (s1 != s2) > - error_exit (0, "found two identical symbols"); > + error_exit (0, _("found two identical symbols")); > } > > return cmp; Likewise. > @@ -1855,7 +1896,8 @@ more sections in stripped file than debug file -- > arguments reversed?")); > } > > struct symbol *n = s; > - while (n + 1 < &symbols[total_syms] && !compare_symbols (s, n + 1)) > + while (n + 1 < &symbols[total_syms] > + && !compare_symbols_duplicate (s, n + 1)) > ++n; Right, because compare_symbols_duplicate does what compare_symbols use to do. > > while (s < n) > @@ -1992,6 +2034,11 @@ more sections in stripped file than debug file -- > arguments reversed?")); > elf_flagdata (symdata, ELF_C_SET, ELF_F_DIRTY); > update_shdr (unstripped_symtab, shdr); > > + /* Track which sections are adjusted during the first round > + of calls to adjust_relocs. */ > + bool scn_adjusted[unstripped_shnum]; > + memset (scn_adjusted, 0, sizeof scn_adjusted); > + > if (stripped_symtab != NULL) > { > /* Adjust any relocations referring to the old symbol table. */ > @@ -2000,14 +2047,18 @@ more sections in stripped file than debug file -- > arguments reversed?")); > sec < §ions[stripped_shnum - 1]; > ++sec) > if (sec->outscn != NULL && sec->shdr.sh_link == old_sh_link) > - adjust_relocs (sec->outscn, sec->scn, &sec->shdr, > - symndx_map, total_syms, shdr); > + { > + adjust_relocs (sec->outscn, sec->scn, &sec->shdr, > + symndx_map, total_syms, shdr); > + scn_adjusted[elf_ndxscn (sec->outscn)] = true; > + } > } > OK. > /* Also adjust references to the other old symbol table. */ > adjust_all_relocs (unstripped, unstripped_symtab, shdr, > &symndx_map[stripped_nsym - 1], > - total_syms - (stripped_nsym - 1)); > + total_syms - (stripped_nsym - 1), > + scn_adjusted); > > free (symbols); > free (symndx_map); OK. > diff --git a/tests/.gitignore b/tests/.gitignore > index 5bebb2c4..d00a883e 100644 > --- a/tests/.gitignore > +++ b/tests/.gitignore > @@ -63,6 +63,7 @@ > /elfshphehdr > /elfstrmerge > /elfstrtab > +/elf-print-reloc-syms > /emptyfile > /fillfile > /find-prologues OK. > diff --git a/tests/Makefile.am b/tests/Makefile.am > index 2373c980..13bd9d56 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -62,7 +62,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx > scnnames sectiondump \ > dwelf_elf_e_machine_string \ > getphdrnum leb128 read_unaligned \ > msg_tst system-elf-libelf-test system-elf-gelf-test \ > - nvidia_extended_linemap_libdw \ > + nvidia_extended_linemap_libdw elf-print-reloc-syms \ > $(asm_TESTS) > OK. > > asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \ > @@ -810,6 +810,7 @@ getphdrnum_LDADD = $(libelf) $(libdw) > leb128_LDADD = $(libelf) $(libdw) > read_unaligned_LDADD = $(libelf) $(libdw) > nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw) > +elf_print_reloc_syms_LDADD = $(libelf) > > # We want to test the libelf headers against the system elf.h header. > # Don't include any -I CPPFLAGS. Except when we install our own elf.h. OK. > diff --git a/tests/elf-print-reloc-syms.c b/tests/elf-print-reloc-syms.c > new file mode 100644 > index 00000000..d6b7867d > --- /dev/null > +++ b/tests/elf-print-reloc-syms.c > @@ -0,0 +1,144 @@ > +/* Copyright (C) 2024 Red Hat, Inc. > + This file is part of elfutils. > + > + This file is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + elfutils is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program. If not, see <http://www.gnu.org/licenses/>. */ > + > +#ifdef HAVE_CONFIG_H > +# include <config.h> > +#endif > + > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <inttypes.h> > +#include <libelf.h> > +#include <gelf.h> > +#include <stdbool.h> > +#include <stdio.h> > +#include <string.h> > +#include <unistd.h> > +#include <assert.h> > + > +static void > +print_reloc_symnames (Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, size_t > sh_entsize) > +{ > + int nentries = shdr->sh_size / sh_entsize; > + > + /* Get the data of the section. */ > + Elf_Data *data = elf_getdata (scn, NULL); > + assert (data != NULL); > + > + /* Get the symbol table information. */ > + Elf_Scn *symscn = elf_getscn (elf, shdr->sh_link); > + GElf_Shdr symshdr_mem; > + GElf_Shdr *symshdr = gelf_getshdr (symscn, &symshdr_mem); > + Elf_Data *symdata = elf_getdata (symscn, NULL); > + assert (symshdr != NULL); > + assert (symdata != NULL); > + > + /* Search for the optional extended section index table. */ > + Elf_Data *xndxdata = NULL; > + int xndxscnidx = elf_scnshndx (scn); > + if (xndxscnidx) > + xndxdata = elf_getdata (elf_getscn (elf, xndxscnidx), NULL); > + > + /* Get the section header string table index. */ > + size_t shstrndx; > + assert (elf_getshdrstrndx (elf, &shstrndx) >= 0); > + > + printf("Section: %s\n", elf_strptr (elf, shstrndx, shdr->sh_name)); > + for (int cnt = 0; cnt < nentries; ++cnt) > + { > + GElf_Rel relmem; > + GElf_Rel *rel = gelf_getrel (data, cnt, &relmem); > + > + > + if (likely (rel != NULL)) > + { > + GElf_Sym symmem; > + Elf32_Word xndx; > + GElf_Sym *sym = gelf_getsymshndx (symdata, xndxdata, > + GELF_R_SYM (rel->r_info), > + &symmem, &xndx); > + > + if (sym == NULL) > + { > + printf ("<SYM NOT FOUND>\n"); > + continue; > + } > + > + if (GELF_ST_TYPE (sym->st_info) != STT_SECTION) > + printf ("%s\n", elf_strptr (elf, symshdr->sh_link, > sym->st_name)); > + else > + { > + /* This is a relocation against a STT_SECTION symbol. */ > + GElf_Shdr secshdr_mem; > + GElf_Shdr *secshdr; > + secshdr = gelf_getshdr (elf_getscn (elf, > + sym->st_shndx == SHN_XINDEX > + ? xndx : sym->st_shndx), > + &secshdr_mem); > + > + if (secshdr == NULL) > + printf("<SECTION NOT FOUND>\n"); > + else > + printf ("%s\n", > + elf_strptr (elf, shstrndx, secshdr->sh_name)); > + } > + } > + } > +} > + > +int > +main (int argc, char *argv[]) > +{ > + if (argc != 2) > + { > + printf ("Usage: elf_print_reloc_syms FILE\n"); > + return -1; > + } > + > + elf_version (EV_CURRENT); > + > + int fd = open(argv[1], O_RDONLY); > + assert (fd != -1); > + > + Elf *elf = elf_begin (fd, ELF_C_READ, NULL); > + assert (elf != NULL); > + > + size_t shnums; > + assert (elf_getshdrnum (elf, &shnums) >= 0); > + > + Elf_Scn *scn = NULL; > + while ((scn = elf_nextscn (elf, scn)) != NULL) > + { > + GElf_Shdr shdr_mem; > + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); > + > + if (shdr != NULL) > + { > + /* Print the names of symbols referred to by relocations. */ > + if (shdr->sh_type == SHT_REL) > + { > + size_t sh_entsize = gelf_fsize (elf, ELF_T_REL, 1, EV_CURRENT); > + print_reloc_symnames (elf, scn, shdr, sh_entsize); > + } > + else if (shdr->sh_type == SHT_RELA) > + { > + size_t sh_entsize = gelf_fsize (elf, ELF_T_RELA, 1, EV_CURRENT); > + print_reloc_symnames (elf, scn, shdr, sh_entsize); > + } > + } > + } > +} It is just a testcase, but cleaning up would be nice. elf_end(elf); close (fd); (See also below for runtest support) > diff --git a/tests/run-unstrip-test.sh b/tests/run-unstrip-test.sh > index dc7d3a42..03373b3f 100755 > --- a/tests/run-unstrip-test.sh > +++ b/tests/run-unstrip-test.sh > @@ -33,6 +33,14 @@ testrun ${abs_top_builddir}/src/unstrip -o > testfile.unstrip $stripped $debugfile > > testrun ${abs_top_builddir}/src/elfcmp --hash-inexact $original > testfile.unstrip > > +tempfiles syms-orig syms-testfile > + > +# Check whether relocated symbols changed. > +${abs_top_builddir}/tests/elf-print-reloc-syms $original > syms-orig > +${abs_top_builddir}/tests/elf-print-reloc-syms testfile.unstrip > > syms-testfile > These really should run under "testrun" so they will use the just build libelf (and run under valgrind with --enable-valgrind, which would have caught that the testcase doesn't clean up all resources it allocates). > +testrun diff syms-orig syms-testfile > + > # Also test modifying the file in place. > > rm -f testfile.inplace Looks good in general. Just clean up in the new testcase and run it with testrun. Thanks, Mark