On Sat, Nov 13, 2021 at 07:10:54PM +0000, Jessica Clarke wrote:
> On 13 Nov 2021, at 19:09, Jessica Clarke <jrt...@freebsd.org> wrote:
> > 
> > On 13 Nov 2021, at 19:06, Konstantin Belousov <kostik...@gmail.com> wrote:
> >> On Sat, Nov 13, 2021 at 08:59:00PM +0200, Konstantin Belousov wrote:
> >>> On Sat, Nov 13, 2021 at 06:29:24PM +0000, Jessica Clarke wrote:
> >>>> On 13 Nov 2021, at 17:57, Jessica Clarke <jrt...@freebsd.org> wrote:
> >>>>> 
> >>>>> On 13 Nov 2021, at 17:54, Jessica Clarke <jrt...@freebsd.org> wrote:
> >>>>>> 
> >>>>>> On 13 Nov 2021, at 17:33, Konstantin Belousov <k...@freebsd.org> wrote:
> >>>>>>> 
> >>>>>>> The branch main has been updated by kib:
> >>>>>>> 
> >>>>>>> URL: 
> >>>>>>> https://cgit.FreeBSD.org/src/commit/?id=64ba1f4cf3a6847a1dacf4bab0409d94898fa168
> >>>>>>> 
> >>>>>>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168
> >>>>>>> Author:     Konstantin Belousov <k...@freebsd.org>
> >>>>>>> AuthorDate: 2021-11-13 01:18:13 +0000
> >>>>>>> Commit:     Konstantin Belousov <k...@freebsd.org>
> >>>>>>> CommitDate: 2021-11-13 17:33:13 +0000
> >>>>>>> 
> >>>>>>> rtld: Implement LD_SHOW_AUXV
> >>>>>>> 
> >>>>>>> It dumps auxv as seen by interpreter, right before starting any user
> >>>>>>> code.
> >>>>>>> 
> >>>>>>> Copied from:    glibc
> >>>>>>> Sponsored by:   The FreeBSD Foundation
> >>>>>>> MFC after:      1 week
> >>>>>>> ---
> >>>>>>> libexec/rtld-elf/rtld.1 |  7 +++++-
> >>>>>>> libexec/rtld-elf/rtld.c | 67 
> >>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>> 2 files changed, 73 insertions(+), 1 deletion(-)
> >>>>>>> 
> >>>>>>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1
> >>>>>>> index 187dc105667a..66aa2bdabd17 100644
> >>>>>>> --- a/libexec/rtld-elf/rtld.1
> >>>>>>> +++ b/libexec/rtld-elf/rtld.1
> >>>>>>> @@ -28,7 +28,7 @@
> >>>>>>> .\"
> >>>>>>> .\" $FreeBSD$
> >>>>>>> .\"
> >>>>>>> -.Dd August 15, 2021
> >>>>>>> +.Dd November 13, 2021
> >>>>>>> .Dt RTLD 1
> >>>>>>> .Os
> >>>>>>> .Sh NAME
> >>>>>>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the 
> >>>>>>> loaded objects immediately,
> >>>>>>> instead of postponing it until required.
> >>>>>>> Normally, the filtees are opened at the time of the first symbol 
> >>>>>>> resolution
> >>>>>>> from the filter object.
> >>>>>>> +.It Ev LD_SHOW_AUXV
> >>>>>>> +If set, causes
> >>>>>>> +.Nm
> >>>>>>> +to dump content of the aux vector to standard output, before passing
> >>>>>>> +control to any user code.
> >>>>>>> .El
> >>>>>>> .Sh DIRECT EXECUTION MODE
> >>>>>>> .Nm
> >>>>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
> >>>>>>> index c173c5a6e22e..0475134b0d96 100644
> >>>>>>> --- a/libexec/rtld-elf/rtld.c
> >>>>>>> +++ b/libexec/rtld-elf/rtld.c
> >>>>>>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char *name, 
> >>>>>>> int fd, Obj_Entry *refobj,
> >>>>>>> static Obj_Entry *do_load_object(int, const char *, char *, struct 
> >>>>>>> stat *, int);
> >>>>>>> static int do_search_info(const Obj_Entry *obj, int, struct 
> >>>>>>> dl_serinfo *);
> >>>>>>> static bool donelist_check(DoneList *, const Obj_Entry *);
> >>>>>>> +static void dump_auxv(Elf_Auxinfo **aux_info);
> >>>>>>> static void errmsg_restore(struct dlerror_save *);
> >>>>>>> static struct dlerror_save *errmsg_save(void);
> >>>>>>> static void *fill_search_info(const char *, size_t, void *);
> >>>>>>> @@ -364,6 +365,7 @@ enum {
> >>>>>>>       LD_TRACE_LOADED_OBJECTS_FMT1,
> >>>>>>>       LD_TRACE_LOADED_OBJECTS_FMT2,
> >>>>>>>       LD_TRACE_LOADED_OBJECTS_ALL,
> >>>>>>> +     LD_SHOW_AUXV,
> >>>>>>> };
> >>>>>>> 
> >>>>>>> struct ld_env_var_desc {
> >>>>>>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] = {
> >>>>>>>       LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
> >>>>>>>       LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
> >>>>>>>       LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
> >>>>>>> +     LD_ENV_DESC(SHOW_AUXV, false),
> >>>>>>> };
> >>>>>>> 
> >>>>>>> static const char *
> >>>>>>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, 
> >>>>>>> Obj_Entry **objp)
> >>>>>>> if (rtld_verify_versions(&list_main) == -1 && !ld_tracing)
> >>>>>>>       rtld_die();
> >>>>>>> 
> >>>>>>> +    if (ld_get_env_var(LD_SHOW_AUXV) != NULL)
> >>>>>>> +       dump_auxv(aux_info);
> >>>>>>> +
> >>>>>>> if (ld_tracing) {             /* We're done */
> >>>>>>>       trace_loaded_objects(obj_main);
> >>>>>>>       exit(0);
> >>>>>>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0)
> >>>>>>>           "  <args>    Arguments to the executed process\n", argv0);
> >>>>>>> }
> >>>>>>> 
> >>>>>>> +#define      AUXFMT(at, xfmt) [at] = { .name = #at, .fmt = xfmt }
> >>>>>>> +static const struct auxfmt {
> >>>>>>> +     const char *name;
> >>>>>>> +     const char *fmt;
> >>>>>>> +} auxfmts[] = {
> >>>>>>> +     AUXFMT(AT_NULL, NULL),
> >>>>>>> +     AUXFMT(AT_IGNORE, NULL),
> >>>>>>> +     AUXFMT(AT_EXECFD, "%d"),
> >>>>>>> +     AUXFMT(AT_PHDR, "%p"),
> >>>>>>> +     AUXFMT(AT_PHENT, "%u"),
> >>>>>>> +     AUXFMT(AT_PHNUM, "%u"),
> >>>>>>> +     AUXFMT(AT_PAGESZ, "%u"),
> >>>>>>> +     AUXFMT(AT_BASE, "%#lx"),
> >>>>>>> +     AUXFMT(AT_FLAGS, "%#lx"),
> >>>>>>> +     AUXFMT(AT_ENTRY, "%p"),
> >>>>>>> +     AUXFMT(AT_NOTELF, NULL),
> >>>>>>> +     AUXFMT(AT_UID, "%d"),
> >>>>>>> +     AUXFMT(AT_EUID, "%d"),
> >>>>>>> +     AUXFMT(AT_GID, "%d"),
> >>>>>>> +     AUXFMT(AT_EGID, "%d"),
> >>>>>>> +     AUXFMT(AT_EXECPATH, "%s"),
> >>>>>>> +     AUXFMT(AT_CANARY, "%p"),
> >>>>>>> +     AUXFMT(AT_CANARYLEN, "%u"),
> >>>>>>> +     AUXFMT(AT_OSRELDATE, "%u"),
> >>>>>>> +     AUXFMT(AT_NCPUS, "%u"),
> >>>>>>> +     AUXFMT(AT_PAGESIZES, "%p"),
> >>>>>>> +     AUXFMT(AT_PAGESIZESLEN, "%u"),
> >>>>>>> +     AUXFMT(AT_TIMEKEEP, "%p"),
> >>>>>>> +     AUXFMT(AT_STACKPROT, "%#x"),
> >>>>>>> +     AUXFMT(AT_EHDRFLAGS, "%#lx"),
> >>>>>>> +     AUXFMT(AT_HWCAP, "%#lx"),
> >>>>>>> +     AUXFMT(AT_HWCAP2, "%#lx"),
> >>>>>>> +     AUXFMT(AT_BSDFLAGS, "%#lx"),
> >>>>>>> +     AUXFMT(AT_ARGC, "%u"),
> >>>>>>> +     AUXFMT(AT_ARGV, "%p"),
> >>>>>>> +     AUXFMT(AT_ENVC, "%p"),
> >>>>>>> +     AUXFMT(AT_ENVV, "%p"),
> >>>>>>> +     AUXFMT(AT_PS_STRINGS, "%p"),
> >>>>>>> +     AUXFMT(AT_FXRNG, "%p"),
> >>>>>>> +};
> >>>>>>> +
> >>>>>>> +static void
> >>>>>>> +dump_auxv(Elf_Auxinfo **aux_info)
> >>>>>>> +{
> >>>>>>> +     Elf_Auxinfo *auxp;
> >>>>>>> +     const struct auxfmt *fmt;
> >>>>>>> +     int i;
> >>>>>>> +
> >>>>>>> +     for (i = 0; i < AT_COUNT; i++) {
> >>>>>>> +             auxp = aux_info[i];
> >>>>>>> +             if (auxp == NULL)
> >>>>>>> +                     continue;
> >>>>>>> +             fmt = &auxfmts[i];
> >>>>>>> +             if (fmt->fmt == NULL)
> >>>>>>> +                     continue;
> >>>>>>> +             rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
> >>>>>>> +             rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, 
> >>>>>>> auxp->a_un.a_ptr);
> >>>>>>> +             rtld_fdprintf(STDOUT_FILENO, "\n");
> >>>>>> 
> >>>>>> This is undefined behaviour, breaks CHERI, and totally unnecessary. You
> >>>>>> have a handful of cases here, just make an enum and have separate
> >>>>>> rtld_fdprintf calls.
> >>>> 
> >>>> In particular, ignoring CHERI, unsigned ints are sign-extended to 64
> >>>> bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a %u,
> >>>> you are violating the calling convention. I can’t currently get GCC or
> >>>> Clang to exploit the fact that varargs arguments are sign-extended, but
> >>>> on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips via
> >>>> memory even when the va_arg calls have no branching surrounding them,
> >>>> rather than just grabbing from the register) there is a redundant
> >>>> sext.w that can legally be optimised out, but would be broken by this
> >>>> calling convention violation.
> >>> I might understand the argument that all non-pointer formats for auxv
> >>> should be longs, i.e. %lu/%ld/%lx, but this is the only problem I see
> >>> there. We do rely on having specific representations for addresses and
> >>> longs, and a low-level component as rtld has full rights to exercise
> >>> this fact, same as VM subsystem or memory allocators.
> >>> 
> >>> In fact ELF spec exercises this as well.
> >>> Our arches are either ILP32 or LP64.
> >>> 
> >>>> 
> >>>> Then CHERI makes it worse because a_ptr and a_val do not have the same
> >>>> representation, although in practice I think passing a_ptr and nothing
> >>>> further does end up working on CHERI-RISC-V and Morello, just not
> >>>> CHERI-MIPS due to being big-endian.
> >> 
> >> Ok, the following should be enough for CHERI, right?
> >> 
> >> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
> >> index 0475134b0d96..cf467ae7aacd 100644
> >> --- a/libexec/rtld-elf/rtld.c
> >> +++ b/libexec/rtld-elf/rtld.c
> >> @@ -6071,33 +6071,33 @@ static const struct auxfmt {
> >> } auxfmts[] = {
> >>    AUXFMT(AT_NULL, NULL),
> >>    AUXFMT(AT_IGNORE, NULL),
> >> -  AUXFMT(AT_EXECFD, "%d"),
> >> +  AUXFMT(AT_EXECFD, "%ld"),
> >>    AUXFMT(AT_PHDR, "%p"),
> >> -  AUXFMT(AT_PHENT, "%u"),
> >> -  AUXFMT(AT_PHNUM, "%u"),
> >> -  AUXFMT(AT_PAGESZ, "%u"),
> >> +  AUXFMT(AT_PHENT, "%lu"),
> >> +  AUXFMT(AT_PHNUM, "%lu"),
> >> +  AUXFMT(AT_PAGESZ, "%lu"),
> >>    AUXFMT(AT_BASE, "%#lx"),
> >>    AUXFMT(AT_FLAGS, "%#lx"),
> >>    AUXFMT(AT_ENTRY, "%p"),
> >>    AUXFMT(AT_NOTELF, NULL),
> >> -  AUXFMT(AT_UID, "%d"),
> >> -  AUXFMT(AT_EUID, "%d"),
> >> -  AUXFMT(AT_GID, "%d"),
> >> -  AUXFMT(AT_EGID, "%d"),
> >> +  AUXFMT(AT_UID, "%ld"),
> >> +  AUXFMT(AT_EUID, "%ld"),
> >> +  AUXFMT(AT_GID, "%ld"),
> >> +  AUXFMT(AT_EGID, "%ld"),
> >>    AUXFMT(AT_EXECPATH, "%s"),
> >>    AUXFMT(AT_CANARY, "%p"),
> >> -  AUXFMT(AT_CANARYLEN, "%u"),
> >> -  AUXFMT(AT_OSRELDATE, "%u"),
> >> -  AUXFMT(AT_NCPUS, "%u"),
> >> +  AUXFMT(AT_CANARYLEN, "%lu"),
> >> +  AUXFMT(AT_OSRELDATE, "%lu"),
> >> +  AUXFMT(AT_NCPUS, "%lu"),
> >>    AUXFMT(AT_PAGESIZES, "%p"),
> >> -  AUXFMT(AT_PAGESIZESLEN, "%u"),
> >> +  AUXFMT(AT_PAGESIZESLEN, "%lu"),
> >>    AUXFMT(AT_TIMEKEEP, "%p"),
> >> -  AUXFMT(AT_STACKPROT, "%#x"),
> >> +  AUXFMT(AT_STACKPROT, "%#lx"),
> >>    AUXFMT(AT_EHDRFLAGS, "%#lx"),
> >>    AUXFMT(AT_HWCAP, "%#lx"),
> >>    AUXFMT(AT_HWCAP2, "%#lx"),
> >>    AUXFMT(AT_BSDFLAGS, "%#lx"),
> >> -  AUXFMT(AT_ARGC, "%u"),
> >> +  AUXFMT(AT_ARGC, "%lu"),
> >>    AUXFMT(AT_ARGV, "%p"),
> >>    AUXFMT(AT_ENVC, "%p"),
> >>    AUXFMT(AT_ENVV, "%p"),
> >> @@ -6105,6 +6105,15 @@ static const struct auxfmt {
> >>    AUXFMT(AT_FXRNG, "%p"),
> >> };
> >> 
> >> +static bool
> >> +is_ptr_fmt(const char *fmt)
> >> +{
> >> +  char last;
> >> +
> >> +  last = fmt[strlen(fmt) - 1];
> >> +  return (last == 'p' || last == 's');
> >> +}
> >> +
> >> static void
> >> dump_auxv(Elf_Auxinfo **aux_info)
> >> {
> >> @@ -6120,7 +6129,8 @@ dump_auxv(Elf_Auxinfo **aux_info)
> >>            if (fmt->fmt == NULL)
> >>                    continue;
> >>            rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
> >> -          rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
> >> +          rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, is_ptr_fmt(fmt->fmt) ?
> >> +              auxp->a_un.a_ptr : auxp->a_un.a_val);
> >>            rtld_fdprintf(STDOUT_FILENO, "\n");
> >>    }
> >> }
> > 
> > That should indeed work, though I’d argue it’s still not as nice as
> > avoiding rtld_fdprintfx entirely.
> 
> Wait, no, it doesn’t, the ternary means both operands need to have the
> same type, so you end up implicitly casting the long to a pointer. You
> need a real if and two different rtld_fdprintfx calls (or just do it as
> I’ve suggested).
Ok, real if() then.

diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
index 0475134b0d96..d5c3d2893582 100644
--- a/libexec/rtld-elf/rtld.c
+++ b/libexec/rtld-elf/rtld.c
@@ -6071,33 +6071,33 @@ static const struct auxfmt {
 } auxfmts[] = {
        AUXFMT(AT_NULL, NULL),
        AUXFMT(AT_IGNORE, NULL),
-       AUXFMT(AT_EXECFD, "%d"),
+       AUXFMT(AT_EXECFD, "%ld"),
        AUXFMT(AT_PHDR, "%p"),
-       AUXFMT(AT_PHENT, "%u"),
-       AUXFMT(AT_PHNUM, "%u"),
-       AUXFMT(AT_PAGESZ, "%u"),
+       AUXFMT(AT_PHENT, "%lu"),
+       AUXFMT(AT_PHNUM, "%lu"),
+       AUXFMT(AT_PAGESZ, "%lu"),
        AUXFMT(AT_BASE, "%#lx"),
        AUXFMT(AT_FLAGS, "%#lx"),
        AUXFMT(AT_ENTRY, "%p"),
        AUXFMT(AT_NOTELF, NULL),
-       AUXFMT(AT_UID, "%d"),
-       AUXFMT(AT_EUID, "%d"),
-       AUXFMT(AT_GID, "%d"),
-       AUXFMT(AT_EGID, "%d"),
+       AUXFMT(AT_UID, "%ld"),
+       AUXFMT(AT_EUID, "%ld"),
+       AUXFMT(AT_GID, "%ld"),
+       AUXFMT(AT_EGID, "%ld"),
        AUXFMT(AT_EXECPATH, "%s"),
        AUXFMT(AT_CANARY, "%p"),
-       AUXFMT(AT_CANARYLEN, "%u"),
-       AUXFMT(AT_OSRELDATE, "%u"),
-       AUXFMT(AT_NCPUS, "%u"),
+       AUXFMT(AT_CANARYLEN, "%lu"),
+       AUXFMT(AT_OSRELDATE, "%lu"),
+       AUXFMT(AT_NCPUS, "%lu"),
        AUXFMT(AT_PAGESIZES, "%p"),
-       AUXFMT(AT_PAGESIZESLEN, "%u"),
+       AUXFMT(AT_PAGESIZESLEN, "%lu"),
        AUXFMT(AT_TIMEKEEP, "%p"),
-       AUXFMT(AT_STACKPROT, "%#x"),
+       AUXFMT(AT_STACKPROT, "%#lx"),
        AUXFMT(AT_EHDRFLAGS, "%#lx"),
        AUXFMT(AT_HWCAP, "%#lx"),
        AUXFMT(AT_HWCAP2, "%#lx"),
        AUXFMT(AT_BSDFLAGS, "%#lx"),
-       AUXFMT(AT_ARGC, "%u"),
+       AUXFMT(AT_ARGC, "%lu"),
        AUXFMT(AT_ARGV, "%p"),
        AUXFMT(AT_ENVC, "%p"),
        AUXFMT(AT_ENVV, "%p"),
@@ -6105,6 +6105,15 @@ static const struct auxfmt {
        AUXFMT(AT_FXRNG, "%p"),
 };
 
+static bool
+is_ptr_fmt(const char *fmt)
+{
+       char last;
+
+       last = fmt[strlen(fmt) - 1];
+       return (last == 'p' || last == 's');
+}
+
 static void
 dump_auxv(Elf_Auxinfo **aux_info)
 {
@@ -6120,7 +6129,13 @@ dump_auxv(Elf_Auxinfo **aux_info)
                if (fmt->fmt == NULL)
                        continue;
                rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
-               rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
+               if (is_ptr_fmt(fmt->fmt)) {
+                       rtld_fdprintfx(STDOUT_FILENO, fmt->fmt,
+                           auxp->a_un.a_ptr);
+               } else {
+                       rtld_fdprintfx(STDOUT_FILENO, fmt->fmt,
+                           auxp->a_un.a_val);
+               }
                rtld_fdprintf(STDOUT_FILENO, "\n");
        }
 }

Reply via email to