[PATCH 0/6] Various portability fixes and organization

2024-10-10 Thread Michael Pratt
Hi,

I saw that elfutils is having a release soon, so I thought now
would be a good time to send some patches that are used
by the Openwrt project to help elfutils build on more systems,
for example, by integrating with gnulib/libtool, and coinciding with
custom patches applied to other build tools.

The first 4 patches are very simple so I'm hoping they can be merged ASAP.

The last 2 patches are less critical, especially the last one
which is just an organizational step. If the last 2 patches
are not acceptable, they can be dropped or revisited after the release.

Thanks,
MCP

Michael Pratt (6):
  lib: Add missing config.h include to next_prime.c
  libcpu: Include config.h before standard headers in lexer source
  libdw: Let clean targets be unconditional
  src: Prevent invalid include of binary into compilation
  Remove usage of "unlocked" variant of stdio print functions
  Use file stream or format variants of stdio print functions

 lib/next_prime.c  |   4 +
 libasm/asm_align.c|   4 +-
 libcpu/i386_disasm.c  |   2 +-
 libcpu/i386_lex.l |  10 +-
 libcpu/i386_parse.y   |   4 +-
 libdw/Makefile.am |   2 +-
 libebl/eblobjnote.c   |   4 +-
 src/Makefile.am   |   1 +
 src/addr2line.c   |   8 +-
 src/ar.c  |   2 +-
 src/elfclassify.c |   2 +-
 src/elflint.c |   2 +-
 src/nm.c  |  22 ++--
 src/objdump.c |  28 ++---
 src/readelf.c | 198 +-
 src/size.c|   8 +-
 src/strings.c |  20 ++--
 src/unstrip.c |   2 +-
 tests/addrcfi.c   |   2 +-
 tests/addrscopes.c|   2 +-
 tests/all-dwarf-ranges.c  |   4 +-
 tests/arextract.c |   4 +-
 tests/asm-tst1.c  |   6 +-
 tests/asm-tst2.c  |   6 +-
 tests/asm-tst3.c  |   8 +-
 tests/asm-tst4.c  |   2 +-
 tests/asm-tst5.c  |   2 +-
 tests/asm-tst6.c  |   2 +-
 tests/asm-tst7.c  |   6 +-
 tests/asm-tst8.c  |   6 +-
 tests/asm-tst9.c  |   6 +-
 tests/buildid.c   |   4 +-
 tests/debugaltlink.c  |   4 +-
 tests/dwarf-getmacros.c   |   6 +-
 tests/dwarf-getstring.c   |   2 +-
 tests/dwarf-ranges.c  |   2 +-
 tests/dwarfcfi.c  |   2 +-
 tests/dwflmodtest.c   |   6 +-
 tests/funcretval.c|   6 +-
 tests/funcscopes.c|   2 +-
 tests/get-aranges.c   |   6 +-
 tests/get-files-define-file.c |   2 +-
 tests/get-files.c |   2 +-
 tests/get-pubnames.c  |   4 +-
 tests/line2addr.c |   4 +-
 tests/next-files.c|   2 +-
 tests/saridx.c|   2 +-
 tests/scnnames.c  |   2 +-
 tests/sectiondump.c   |   4 +-
 tests/show-die-info.c |  62 +--
 tests/showptable.c|  10 +-
 tests/test-nlist.c|   2 +-
 52 files changed, 262 insertions(+), 253 deletions(-)

-- 
2.30.2




[PATCH 2/6] libcpu: Include config.h before standard headers in lexer source

2024-10-10 Thread Michael Pratt
As part of the processing of flex, definitions and headers
are added to output source before any literal text or generated code.

This causes standard headers to come before config.h
unless config.h is included in a %top block instead
as specified in the flex manual, section 5.1 "Format of the Definitions".

The %top block is non-POSIX, so using it reinforces
the requirement of "flex" over a standardized "lex" even more.

  * libcpu/i386_lex.l (%top): add flex %top block
  and move config.h header inclusion to it.

Signed-off-by: Michael Pratt 
---
 libcpu/i386_lex.l | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libcpu/i386_lex.l b/libcpu/i386_lex.l
index 9b33ed95..9c1e14db 100644
--- a/libcpu/i386_lex.l
+++ b/libcpu/i386_lex.l
@@ -1,3 +1,9 @@
+%top{
+#ifdef HAVE_CONFIG_H
+# include 
+#endif
+}
+
 %{
 /* Copyright (C) 2004, 2005, 2007, 2008 Red Hat, Inc.
Written by Ulrich Drepper , 2004.
@@ -26,10 +32,6 @@
the GNU Lesser General Public License along with this program.  If
not, see .  */
 
-#ifdef HAVE_CONFIG_H
-# include 
-#endif
-
 #include 
 
 #include 
-- 
2.30.2




[PATCH 5/6] Remove usage of "unlocked" variant of stdio print functions

2024-10-10 Thread Michael Pratt
These Linux Standard Base functions are not available
on all systems that are capable of building Linux and ELFs.

The difference is negligible for simple printing to stdout.

POSIX also states for the similar putc_unlocked():

  These functions can safely be used in a multi-threaded program
  if and only if they are called while the invoking thread owns
  the (FILE *) object, as is the case after a successful call
  to the flockfile() or ftrylockfile() functions.

and

  These unlocked versions can be safely used
  only within explicitly locked program regions,
  using exported locking primitives.

and this was never done.

There is inconsistent use of fputc_unlocked()
mixed with putc_unlocked() and putchar_unlocked(),
so consistently use the safer fputc() instead.

Signed-off-by: Michael Pratt 
---
 libasm/asm_align.c  |  4 +-
 libcpu/i386_parse.y |  4 +-
 libebl/eblobjnote.c |  4 +-
 src/nm.c| 20 +-
 src/objdump.c   | 24 ++--
 src/readelf.c   | 90 ++---
 src/size.c  |  8 ++--
 src/strings.c   | 20 +-
 tests/showptable.c  |  8 ++--
 9 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/libasm/asm_align.c b/libasm/asm_align.c
index 3a976756..19ec9a13 100644
--- a/libasm/asm_align.c
+++ b/libasm/asm_align.c
@@ -60,13 +60,13 @@ asm_align (AsmScn_t *asmscn, GElf_Word value)
fprintf (asmscn->ctx->out.file, "%02hhx\n", asmscn->pattern->bytes[0]);
   else
{
- fputc_unlocked ('"', asmscn->ctx->out.file);
+ fputc ('"', asmscn->ctx->out.file);
 
  for (size_t cnt = 0; cnt < asmscn->pattern->len; ++cnt)
fprintf (asmscn->ctx->out.file, "\\x%02hhx",
 asmscn->pattern->bytes[cnt]);
 
- fputs_unlocked ("\"\n", asmscn->ctx->out.file);
+ fputs ("\"\n", asmscn->ctx->out.file);
}
   return 0;
 }
diff --git a/libcpu/i386_parse.y b/libcpu/i386_parse.y
index 459684c6..5c91e520 100644
--- a/libcpu/i386_parse.y
+++ b/libcpu/i386_parse.y
@@ -1158,7 +1158,7 @@ instrtable_out (void)
   EMIT_SUFFIX (w1);
   EMIT_SUFFIX (W1);
 
-  fputc_unlocked ('\n', outfile);
+  fputc ('\n', outfile);
 
   for (int i = 0; i < 3; ++i)
 {
@@ -1333,7 +1333,7 @@ instrtable_out (void)
  b = b->next;
}
 
-  fputc_unlocked ('\n', outfile);
+  fputc ('\n', outfile);
 }
   fputs ("};\n", outfile);
 }
diff --git a/libebl/eblobjnote.c b/libebl/eblobjnote.c
index ad3f49de..da69e99c 100644
--- a/libebl/eblobjnote.c
+++ b/libebl/eblobjnote.c
@@ -648,10 +648,10 @@ ebl_object_note (Ebl *ebl, uint32_t namesz, const char 
*name, uint32_t type,
  for (size_t cnt = 1; cnt < descsz / 4; ++cnt)
{
  if (cnt > 1)
-   putchar_unlocked ('.');
+   fputc ('.', stdout);
  printf ("%" PRIu32, buf[cnt]);
}
- putchar_unlocked ('\n');
+ fputc ('\n', stdout);
}
  if (descsz / 4 > FIXED_TAG_BYTES)
free (buf);
diff --git a/src/nm.c b/src/nm.c
index 3675f59b..06f1a072 100644
--- a/src/nm.c
+++ b/src/nm.c
@@ -439,7 +439,7 @@ handle_ar (int fd, Elf *elf, const char *prefix, const char 
*fname,
  Elf_Arhdr *arhdr = NULL;
  size_t arhdr_off = 0; /* Note: 0 is no valid offset.  */
 
- fputs_unlocked (_("\nArchive index:\n"), stdout);
+ fputs (_("\nArchive index:\n"), stdout);
 
  while (arsym->as_off != 0)
{
@@ -825,8 +825,8 @@ show_symbols_sysv (Ebl *ebl, GElf_Word strndx, const char 
*fullname,
   /* If we have to precede the line with the file name.  */
   if (print_file_name)
{
- fputs_unlocked (fullname, stdout);
- putchar_unlocked (':');
+ fputs (fullname, stdout);
+ fputc (':', stdout);
}
 
   /* Convert the address.  */
@@ -972,8 +972,8 @@ show_symbols_bsd (Elf *elf, const GElf_Ehdr *ehdr, 
GElf_Word strndx,
   /* If we have to precede the line with the file name.  */
   if (print_file_name)
{
- fputs_unlocked (fullname, stdout);
- putchar_unlocked (':');
+ fputs (fullname, stdout);
+ fputc (':', stdout);
}
 
   bool is_tls = GELF_ST_TYPE (syms[cnt].sym.st_info) == STT_TLS;
@@ -1046,8 +1046,8 @@ show_symbols_bsd (Elf *elf, const GElf_Ehdr *ehdr, 
GElf_Word strndx,
}
 
   if (color_mode)
-   fputs_unlocked (color_off, stdout);
-  putchar_unlocked ('\n');
+   fputs (color_off, stdout);
+  fputc ('\n', stdout);
 }
 
 #ifdef USE_DEMANGLE
@@ -1104,9 +1104,9 @@ show_symbols_posix (Elf *elf, const GElf_Ehdr *ehdr, 
GElf_Word strndx,
   /* If we have to precede the line with the file name.  */
   if (print_file_name)
{
- fputs_unlocked (fullname, stdout);
- putchar_unlocked (':');
- putchar_unlocked

[PATCH 3/6] libdw: Let clean targets be unconditional

2024-10-10 Thread Michael Pratt
The automake rule "maintainer-clean-generic"
is always available and never conditional,
so let the variable that uses it be define
non-conditionally.

If one actually wants conditional cleaning
they should write a custom rule and set it
as a dependency of a "*clean-local" automake rule.

There is no need to do conditional cleaning here,
so move the MAINTAINERCLEANFILES variable definition
to the end of the Makefile.am file as it is
in the rest of the project.

  * libdw/Makefile.am: move MAINTAINERCLEANFILES
  variable to the end of the file
  as a non-conditional definition.

Signed-off-by: Michael Pratt 
---
 libdw/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libdw/Makefile.am b/libdw/Makefile.am
index 5363c02a..4b9c4413 100644
--- a/libdw/Makefile.am
+++ b/libdw/Makefile.am
@@ -97,7 +97,6 @@ libdw_a_SOURCES = dwarf_begin.c dwarf_begin_elf.c dwarf_end.c 
dwarf_getelf.c \
 
 if MAINTAINER_MODE
 BUILT_SOURCES = $(srcdir)/known-dwarf.h
-MAINTAINERCLEANFILES = $(srcdir)/known-dwarf.h
 $(srcdir)/known-dwarf.h: $(top_srcdir)/config/known-dwarf.awk $(srcdir)/dwarf.h
gawk -f $^ > $@.new
mv -f $@.new $@
@@ -154,3 +153,4 @@ noinst_HEADERS = libdwP.h memory-access.h 
dwarf_abbrev_hash.h \
 EXTRA_DIST = libdw.map
 
 MOSTLYCLEANFILES = $(am_libdw_pic_a_OBJECTS) libdw.so libdw.so.$(VERSION)
+MAINTAINERCLEANFILES = $(srcdir)/known-dwarf.h
-- 
2.30.2




[PATCH 4/6] src: Prevent invalid include of binary into compilation

2024-10-10 Thread Michael Pratt
The "stack" binary built by elfutils has the same name
as the standard C++ header .

If that header were to be in the list of includes
while building the C++ elfutils program "srcfiles",
then there is a chance that the "stack" binary
is included instead and treated as text,
leading to a decode error
if "stack" happens to be built first
and "." is in the include paths.

Adding the result of C++ compilation srcfiles.o
to the dependencies of "stack" ensures that
the C++ compilation will happen first,
before the stack binary is present.

While this doesn't guarantee an error will not occur
in all cases, it does guarantee that it will not occur
from a clean build state.

  * src/Makefile.am: Add compilation dependency between
  the stack and srcfiles binaries.

Signed-off-by: Michael Pratt 
---
 src/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Makefile.am b/src/Makefile.am
index e0267d96..c5474c68 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -94,6 +94,7 @@ strings_LDADD = $(libelf) $(libeu) $(argp_LDADD)
 ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD) $(obstack_LIBS)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) 
$(demanglelib)
+EXTRA_stack_DEPENDENCIES = $(if $(findstring 
srcfiles,$(bin_PROGRAMS)),$(srcfiles_OBJECTS))
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
 srcfiles_SOURCES = srcfiles.cxx
-- 
2.30.2




[PATCH 1/6] lib: Add missing config.h include to next_prime.c

2024-10-10 Thread Michael Pratt
This is the last remaining C source file as of this commit
without the standard conditional inclusion of config.h
as the very first header.

  * lib/next_prime.c: add missing config.h header.

Signed-off-by: Michael Pratt 
---
 lib/next_prime.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/next_prime.c b/lib/next_prime.c
index f2c921e3..97c425e1 100644
--- a/lib/next_prime.c
+++ b/lib/next_prime.c
@@ -27,6 +27,10 @@
the GNU Lesser General Public License along with this program.  If
not, see .  */
 
+#ifdef HAVE_CONFIG_H
+# include 
+#endif
+
 #include 
 
 
-- 
2.30.2




[PATCH 6/6] Use file stream or format variants of stdio print functions

2024-10-10 Thread Michael Pratt
In many source files, putc() and fputc() and putchar()
are used interchangeably, and puts() and fputs() and printf()
are used interchangeably.

Reducing the usage to 2 of the 3 both reduces binary size
of the final product, improves readability and searchability
of the code, especially since grepping for puts() can clash with
words like "inputs" and "outputs", and that putc() can be a macro
while fputc() is the safer function.

While at it, adjust spacing syntax and punctuation for consistency.

Signed-off-by: Michael Pratt 
---
 libcpu/i386_disasm.c  |   2 +-
 src/addr2line.c   |   8 +--
 src/ar.c  |   2 +-
 src/elfclassify.c |   2 +-
 src/elflint.c |   2 +-
 src/nm.c  |   2 +-
 src/objdump.c |   4 +-
 src/readelf.c | 108 +-
 src/unstrip.c |   2 +-
 tests/addrcfi.c   |   2 +-
 tests/addrscopes.c|   2 +-
 tests/all-dwarf-ranges.c  |   4 +-
 tests/arextract.c |   4 +-
 tests/asm-tst1.c  |   6 +-
 tests/asm-tst2.c  |   6 +-
 tests/asm-tst3.c  |   8 +--
 tests/asm-tst4.c  |   2 +-
 tests/asm-tst5.c  |   2 +-
 tests/asm-tst6.c  |   2 +-
 tests/asm-tst7.c  |   6 +-
 tests/asm-tst8.c  |   6 +-
 tests/asm-tst9.c  |   6 +-
 tests/buildid.c   |   4 +-
 tests/debugaltlink.c  |   4 +-
 tests/dwarf-getmacros.c   |   6 +-
 tests/dwarf-getstring.c   |   2 +-
 tests/dwarf-ranges.c  |   2 +-
 tests/dwarfcfi.c  |   2 +-
 tests/dwflmodtest.c   |   6 +-
 tests/funcretval.c|   6 +-
 tests/funcscopes.c|   2 +-
 tests/get-aranges.c   |   6 +-
 tests/get-files-define-file.c |   2 +-
 tests/get-files.c |   2 +-
 tests/get-pubnames.c  |   4 +-
 tests/line2addr.c |   4 +-
 tests/next-files.c|   2 +-
 tests/saridx.c|   2 +-
 tests/scnnames.c  |   2 +-
 tests/sectiondump.c   |   4 +-
 tests/show-die-info.c |  62 +--
 tests/showptable.c|   2 +-
 tests/test-nlist.c|   2 +-
 43 files changed, 159 insertions(+), 157 deletions(-)

diff --git a/libcpu/i386_disasm.c b/libcpu/i386_disasm.c
index dec62bfa..7f199b49 100644
--- a/libcpu/i386_disasm.c
+++ b/libcpu/i386_disasm.c
@@ -565,7 +565,7 @@ i386_disasm (Ebl *ebl __attribute__((unused)),
 #endif
default:
  /* Cannot happen.  */
- puts ("unknown prefix");
+ fputs ("unknown prefix\n", stdout);
  abort ();
}
  data = begin + 1;
diff --git a/src/addr2line.c b/src/addr2line.c
index d87e5b45..9ced8a62 100644
--- a/src/addr2line.c
+++ b/src/addr2line.c
@@ -729,10 +729,10 @@ handle_address (const char *string, Dwfl *dwfl)
  show_int (&dwarf_lineisa, info, "isa");
  show_int (&dwarf_linediscriminator, info, "discriminator");
}
-  putchar ('\n');
+  fputc ('\n', stdout);
 }
   else
-puts ("??:0");
+fputs ("??:0\n", stdout);
 
   if (show_inlines)
 {
@@ -811,10 +811,10 @@ handle_address (const char *string, Dwfl *dwfl)
  if (src != NULL)
{
  print_src (src, lineno, linecol, &cu);
- putchar ('\n');
+ fputc ('\n', stdout);
}
  else
-   puts ("??:0");
+   fputs ("??:0\n", stdout);
}
}
}
diff --git a/src/ar.c b/src/ar.c
index 9ace28b9..ad48c395 100644
--- a/src/ar.c
+++ b/src/ar.c
@@ -579,7 +579,7 @@ do_oper_extract (int oper, const char *arfname, char 
**argv, int argc,
  if (oper == oper_list)
{
  if (!verbose)
-   puts (arhdr->ar_name);
+   printf ("%s\n", arhdr->ar_name);
 
  goto next;
}
diff --git a/src/elfclassify.c b/src/elfclassify.c
index 25fe9a65..8c18b5d1 100644
--- a/src/elfclassify.c
+++ b/src/elfclassify.c
@@ -820,7 +820,7 @@ process_current_path (int *status)
 {
 case do_print:
   if (checks_passed == flag_print_matching)
-puts (current_path);
+printf ("%s\n", current_path);
   break;
 case do_print0:
   if (checks_passed == flag_print_matching)
diff --git a/src/elflint.c b/src/elflint.c
index cdc6108d..24f3c043 100644
--- a/src/elflint.c
+++ b/src/elflint.c
@@ -181,7 +181,7 @@ main (int argc, char *argv[])
   elf_errmsg (-1));
 
  if (prev_error_count == error_count && !be_quiet)
-   puts (_("No errors"));
+   fputs (_("No errors\n"), stdout);
}
 
   close (fd);
diff --git a/src/nm.c b/src/nm.c
index 

Re: [patch] test tempdir move

2024-10-10 Thread Mark Wielaard
Hi Frank,

On Wed, Oct 09, 2024 at 04:43:56PM -0400, Frank Ch. Eigler wrote:
> > I like the idea of this change. Some nitpicks below.
> 
> Those were great ideas, v2 below:
> 
> From 65efbafc16fffa582a84c277493d2531bf88021a Mon Sep 17 00:00:00 2001
> From: "Frank Ch. Eigler" 
> Date: Wed, 9 Oct 2024 13:41:14 -0400
> Subject: [PATCH] tests/test-subr.sh: Put test_dir under /var/tmp.
> 
> Every individual test in elfutils involves a temporary directory.
> Previous version of this script put that directory under the build
> tree.  That's OK if it's a local disk, but if it's on NFS, then some
> tests - run-large-elf-file.sh, several run-debuginfod-*.sh - take long
> enough to run to fail tests intermittently.
> 
> This patch moves the temp_dir under ${TMPDIR-/var/tmp/}, so it
> operates at local disk speed rather than whatever-build-filesystem
> speed.  Individual test scripts are all unaffected.  (One could
> consider /tmp instead, which is a RAM disk on modern systems, except
> that some of the elfutils tests produce GB-sized temporary files.
> That's probably too big for RAM.)

Looks good to me.

Thanks,

Mark


[Bug general/32263] New: RFE: API to create/track executable memory layout for processes from perf PERF_RECORD_MMAP* events

2024-10-10 Thread wcohen at redhat dot com
https://sourceware.org/bugzilla/show_bug.cgi?id=32263

Bug ID: 32263
   Summary: RFE: API to create/track executable memory layout for
processes from perf PERF_RECORD_MMAP* events
   Product: elfutils
   Version: unspecified
Status: NEW
  Severity: normal
  Priority: P2
 Component: general
  Assignee: unassigned at sourceware dot org
  Reporter: wcohen at redhat dot com
CC: elfutils-devel at sourceware dot org
  Target Milestone: ---

The current eu-stacktrace code in development resorts to reading /proc
information to determine which binaries the IP addresses map to when
unwinding the stack.  The main disadvantages of reading the /proc
information is for short lived processes the /proc maybe non-existent
and for processes using dlopen/dlclose the information /proc
information might be out of sync with the mmap when the sample was
taken.  The approach that Linux perf takes to avoid those problems is
to record changes to the memory map with PERF_RECORD_MMAP and
PERF_RECORD_MMAP2 events.  It would be a nice addition to elfutils
tools to have a means of building a picture of a memory map of a
process using the PERF_RECORD_MMAP* events and being able to use that
in other parts of the elfutils such as eu-stacktrace.

The code may still need to read /proc//maps to get a
starting point memory map of processes that existed before data
recording started.

-- 
You are receiving this mail because:
You are on the CC list for the bug.

Re: Preparing for the 0.192 release on Friday October 18

2024-10-10 Thread Mark Wielaard
Hi Aaron,

On Thu, 2024-10-03 at 12:22 -0400, Aaron Merey wrote:
> We are planning to do the 0.192 release on Friday October 18.
> 
> Since the 0.191 release in March there have been 100 commits.  Among
> these are some important bug fixes and new features.
> 
> If your patch is still under review [1] or you have other patches you'd
> like merged before the release, please let us know.

Things I am working on that I hope will make it before the release:

- Make a decision about elf_memory:
https://inbox.sourceware.org/elfutils-devel/20240829134757.gc4...@gnu.wildebeest.org/

- Fix a elf_compress issue I found with debugedit:
https://sourceware.org/bugzilla/show_bug.cgi?id=32102

- Get the last MIPS pieces in:
https://patchwork.sourceware.org/project/elfutils/patch/20240305095122.889077-3-ying.hu...@oss.cipunited.com/
https://patchwork.sourceware.org/project/elfutils/patch/20240305095122.889077-4-ying.hu...@oss.cipunited.com/
https://patchwork.sourceware.org/project/elfutils/patch/20240305095122.889077-5-ying.hu...@oss.cipunited.com/
https://patchwork.sourceware.org/project/elfutils/patch/20240524094120.1761245-1-ying.hu...@oss.cipunited.com/
The issue here is how to handle the first patch which rewrites the
r_info field to make GELF_R_SYM and GELF_R_TYPE work. MIPS uses a
different layout than other arches, so it doesn't really fit the endian
based conversion mechanism we are currently using.

- Review your documentation patches v2

- Review other pending patches like the just posted Openwrt
  compat patches.

Cheers,

Mark

> [1] https://patchwork.sourceware.org/project/elfutils/list
> 


[PATCH] PR32218: debuginfod-client: support very long source file

2024-10-10 Thread Frank Ch. Eigler


>From be79d138989b9968f9c687ef62cc91b5b93e32b5 Mon Sep 17 00:00:00 2001
From: "Frank Ch. Eigler" 
Date: Thu, 10 Oct 2024 16:30:19 -0400
Subject: [PATCH] PR32218: debuginfod-client: support very long source file
 names

debuginfod clients & servers may sometimes encounter very long source
file names.  Previously, the client would synthesize a path name like
   $CACHEDIR/$BUILDID/source-$PATHNAME
where $PATHNAME was a funky ##-encoded version of the entire source
path name.  This can get too long to store as a single component
of a file system pathname (e.g. linux/limits.h NAME_MAX), resulting
on client-side errors even after a successful download.

New code switches encoding of the $PATHNAME part to use less escaping,
and a merciless truncation to the tail part of the filename.  (We keep
the tail rather than the head, so that the extension is preserved,
which makes some consumers happier.)  To limit collision damage from
truncation, we add also insert a goofy hash (4-byte elf_hash) of the
name into the path name.  The result is a relatively short name:

   $CACHEDIR/$BUILDID/source-$HASH-$NAMETAIL

This is a transparent change to clients, who are not to make any
assumptions about cache file naming structure.  However, one existing
test did make such assumptions, so is fixed with some globness.  A new
test is also added, using a pre-baked tarball with a very long srcfile
name.  Some auxiliary makefiles are tweaked to satisfy the new
libdebuginfod->libelf link order dependency.

Signed-off-by: Frank Ch. Eigler 
---
 debuginfod/Makefile.am|   4 +-
 debuginfod/debuginfod-client.c|  76 +++---
 src/Makefile.am   |   2 +-
 tests/Makefile.am |   7 +-
 .../bighello-sources/bighello.c   |   7 ++
 .../bighello-sources/bighello.h   |   1 +
 .../moremoremoremoremoremoremoremore  |   1 +
 tests/debuginfod-tars/bighello.tar| Bin 0 -> 51200 bytes
 tests/run-debuginfod-longsource.sh|  69 
 tests/run-debuginfod-section.sh   |  16 ++--
 10 files changed, 144 insertions(+), 39 deletions(-)
 create mode 100644 tests/debuginfod-tars/bighello-sources/bighello.c
 create mode 100644 tests/debuginfod-tars/bighello-sources/bighello.h
 create mode 12 
tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore
 create mode 100644 tests/debuginfod-tars/bighello.tar
 create mode 100755 tests/run-debuginfod-longsource.sh

diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am
index 5ad4e188c4c3..dbba2a437c80 100644
--- a/debuginfod/Makefile.am
+++ b/debuginfod/Makefile.am
@@ -70,10 +70,10 @@ bin_PROGRAMS += debuginfod-find
 endif
 
 debuginfod_SOURCES = debuginfod.cxx
-debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) 
$(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) 
$(rpm_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) $(lzma_LIBS) -lpthread -ldl
+debuginfod_LDADD = $(libdw) $(libdebuginfod) $(libelf) $(libeu) $(argp_LDADD) 
$(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) 
$(rpm_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) $(lzma_LIBS) -lpthread -ldl
 
 debuginfod_find_SOURCES = debuginfod-find.c
-debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) 
$(argp_LDADD) $(fts_LIBS) $(jsonc_LIBS)
+debuginfod_find_LDADD = $(libdw) $(libdebuginfod) $(libelf) $(libeu) 
$(argp_LDADD) $(fts_LIBS) $(jsonc_LIBS)
 
 if LIBDEBUGINFOD
 noinst_LIBRARIES = libdebuginfod.a
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 24ede19af385..2f56f6f4d784 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -1201,29 +1201,49 @@ perform_queries(CURLM *curlm, CURL **target_handle, 
struct handle_data *data, de
 /* Copy SRC to DEST, s,/,#,g */
 
 static void
-path_escape (const char *src, char *dest)
+path_escape (const char *src, char *dest, size_t dest_len)
 {
-  unsigned q = 0;
+  /* PR32218: Reversibly-escaping src character-by-character, for
+ large enough strings, risks ENAMETOOLONG errors.  For long names,
+ a simple hash based generated name instead, while still
+ attempting to preserve the as much of the content as possible.
+ It's possible that absurd choices of incoming names will collide
+ or still get truncated, but c'est la vie.
+  */
+  const size_t max_dest_len = NAME_MAX/2; /* ENAMETOOLONG seen even on 300-ish 
byte paths. */
+  dest_len = dest_len > max_dest_len ? max_dest_len : dest_len; /* Reduce! */
+  const size_t hash_prefix_destlen = strlen(src)+10; /* DEADBEEF-string\0 */
+  dest_len = dest_len > hash_prefix_destlen ? hash_prefix_destlen : dest_len; 
/* Reduce further! */
 
-  for (unsigned fi=0; q < PATH_MAX-2; fi++) /* -2, escape is 2 chars.  */
-switch (src[fi])
-  {
-  case '\0':
-dest[q] = '\0';
-   return;
-  case '/': /* escape / to prevent dir esca