Re: [PATCH v2] Fix leb128 reading

Mark Wielaard Thu, 29 Oct 2020 16:06:31 -0700

Hi Tom,

On Wed, 2020-10-28 at 17:26 -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
> 
> This version of the fix only examines the sleb128 conversion.
> Overlong encodings are not handled, and the uleb128 decoders are not
> touched.  The approach taken here is to do the work in an unsigned
> type, and then rely on an implementation-defined cast to convert to
> signed.


I like this variant, it still handles longer than necessary encodings,
just not those that use more than 10 bytes. It also removes all the
tricky cases I had questions about. And keeps the parts I already acked
last time.

> Signed-off-by: Tom Tromey <t...@tromey.com>

> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
>  /tests/get-units-invalid
>  /tests/get-units-split
>  /tests/hash
> +/tests/leb128
>  /tests/line2addr
>  /tests/low_high_pc
>  /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..4c8699f8 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-28  Tom Tromey  <t...@tromey.com>
> +
> +     * .gitignore: Add /tests/leb128.
> +
>  2020-10-01  Frank Ch. Eigler  <f...@redhat.com>

Ack.

> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 731c7e79..289bb4c9 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,13 @@
> +2020-10-28  Tom Tromey  <t...@tromey.com>
> +
> +     PR26773
> +     * dwarf_getlocation.c (store_implicit_value): Use
> +     __libdw_get_uleb128_unchecked.
> +     * memory-access.h (get_sleb128_step): Assume unsigned type for
> +     'var'.
> +     (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Work in
> +     unsigned type.  Handle final byte.
> +
>  2020-10-19  Mark Wielaard  <m...@klomp.org>
>  
>       * dwarf_frame_register.c (dwarf_frame_register): Declare ops_mem
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..f2bad5a9 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,9 +130,8 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op 
> *op)
>    struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
>                                          sizeof (struct loc_block_s), 1);
>    const unsigned char *data = (const unsigned char *) (uintptr_t) 
> op->number2;
> -  uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> -  if (unlikely (len != op->number))
> -    return -1;
> +  /* Skip the block length.  */
> +  __libdw_get_uleb128_unchecked (&data);
>    block->addr = op;
>    block->data = (unsigned char *) data;
>    block->length = op->number;

Ack.

> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index 14436a71..8b2386ee 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -113,19 +113,22 @@ __libdw_get_uleb128_unchecked (const unsigned char 
> **addrp)
>  #define get_sleb128_step(var, addr, nth)                                   \
>    do {                                                                       
>       \
>      unsigned char __b = *(addr)++;                                         \
> +    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);                   \
>      if (likely ((__b & 0x80) == 0))                                        \
>        {                                                                      
>       \
> -     struct { signed int i:7; } __s = { .i = __b };                        \
> -     (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7));    \
> +     if ((__b & 0x40) != 0)                                                \
> +       (var) |= - ((typeof (var)) 1 << (((nth) + 1) * 7));                 \
>       return (var);                                                         \
>        }                                                                      
>       \
> -    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);                   \
>    } while (0)

Here it should be noted that var is already the unsigned acc from
__libdw_get_sleb, so the left shift is always fine. The value
extraction is now the same for both the continuation path and the last
byte path.

>  static inline int64_t
>  __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
>  {
> -  int64_t acc = 0;
> +  /* Do the work in an unsigned type, but use implementation-defined
> +     behavior to cast to signed on return.  This avoids some undefined
> +     behavior when shifting.  */
> +  uint64_t acc = 0;
>  
>    /* Unroll the first step to help the compiler optimize
>       for the common single-byte case.  */
> @@ -134,6 +137,20 @@ __libdw_get_sleb128 (const unsigned char **addrp, const 
> unsigned char *end)
>    const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
>    for (size_t i = 1; i < max; ++i)
>      get_sleb128_step (acc, *addrp, i);
> +  if (*addrp == end)
> +    return INT64_MAX;

OK, we keep the __libdw_max_len_sleb128 as is, so it is one smaller
than the actual max number of bytes, but reaching the end of data is
still overflow.

> +  /* There might be one extra byte.  */
> +  unsigned char b = **addrp;
> +  ++*addrp;
> +  if (likely ((b & 0x80) == 0))
> +    {
> +      /* We only need the low bit of the final byte, and as it is the
> +      sign bit, we don't need to do anything else here.  */
> +      acc |= ((typeof (acc)) b) << 7 * max;
> +      return acc;
> +    }
> +
>    /* Other implementations set VALUE to INT_MAX in this
>       case.  So we better do this as well.  */
>    return INT64_MAX;

Nice and simple.

> @@ -142,7 +159,10 @@ __libdw_get_sleb128 (const unsigned char **addrp, const 
> unsigned char *end)
>  static inline int64_t
>  __libdw_get_sleb128_unchecked (const unsigned char **addrp)
>  {
> -  int64_t acc = 0;
> +  /* Do the work in an unsigned type, but use implementation-defined
> +     behavior to cast to signed on return.  This avoids some undefined
> +     behavior when shifting.  */
> +  uint64_t acc = 0;
>  
>    /* Unroll the first step to help the compiler optimize
>       for the common single-byte case.  */
> @@ -152,6 +172,18 @@ __libdw_get_sleb128_unchecked (const unsigned char 
> **addrp)
>    const size_t max = len_leb128 (int64_t) - 1;
>    for (size_t i = 1; i < max; ++i)
>      get_sleb128_step (acc, *addrp, i);

Same as above, we just substract one here when using len_leb128 instead
of using __libdw_max_len_sleb128.

> +  /* There might be one extra byte.  */
> +  unsigned char b = **addrp;
> +  ++*addrp;
> +  if (likely ((b & 0x80) == 0))
> +    {
> +      /* We only need the low bit of the final byte, and as it is the
> +      sign bit, we don't need to do anything else here.  */
> +      acc |= ((typeof (acc)) b) << 7 * max;
> +      return acc;
> +    }

OK, as above.

>    /* Other implementations set VALUE to INT_MAX in this
>       case.  So we better do this as well.  */
>    return INT64_MAX;
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index e9d1e260..91aeadaf 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-10-28  Tom Tromey  <t...@tromey.com>
> +
> +     PR26773
> +     * Makefile.am (check_PROGRAMS, TESTS): Add leb128.
> +     (leb128_LDADD): New variable.
> +     * leb128.c: New file.
> +
>  2020-10-19  Mark Wielaard  <m...@klomp.org>
>  
>       * addrcfi.c (print_register): Make ops_mem 3 elements.
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index bc5d034f..1b51ab8d 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx 
> scnnames sectiondump \
>                 all-dwarf-ranges unit-info next_cfi \
>                 elfcopy addsections xlate_notes elfrdwrnop \
>                 dwelf_elf_e_machine_string \
> -               getphdrnum
> +               getphdrnum leb128
>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
>           asm-tst6 asm-tst7 asm-tst8 asm-tst9
> @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh 
> newfile test-nlist \
>       run-elfclassify.sh run-elfclassify-self.sh \
>       run-disasm-riscv64.sh \
>       run-pt_gnu_prop-tests.sh \
> -     run-getphdrnum.sh run-test-includes.sh
> +     run-getphdrnum.sh run-test-includes.sh \
> +     leb128
>  
>  if !BIARCH
>  export ELFUTILS_DISABLE_BIARCH = 1
> @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
>  elfrdwrnop_LDADD = $(libelf)
>  dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
>  getphdrnum_LDADD = $(libelf) $(libdw)
> +leb128_LDADD = $(libelf) $(libdw)

ack

>  # We want to test the libelf header against the system elf.h header.
>  # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
> diff --git a/tests/leb128.c b/tests/leb128.c
> new file mode 100644
> index 00000000..6994a436
> --- /dev/null
> +++ b/tests/leb128.c
> @@ -0,0 +1,140 @@
> +/* Test program for leb128
> +   Copyright (C) 2020 Tom Tromey
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <config.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +#include <libdw.h>
> +#include "../libdw/libdwP.h"
> +#include "../libdw/memory-access.h"
> +
> +#define OK 0
> +#define FAIL 1
> +
> +static const unsigned char v0[] = { 0x0 };
> +static const unsigned char v23[] = { 23 };
> +static const unsigned char vm_2[] = { 0x7e };
> +static const unsigned char v128[] = { 0x80, 0x01 };
> +static const unsigned char vm_128[] = { 0x80, 0x7f };
> +static const unsigned char vhuge[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0xff, 0x0
> +  };
> +static const unsigned char most_positive[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x3f
> +  };
> +static const unsigned char most_negative[] =
> +  {
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x80, 0x80, 0x80, 0x40
> +  };
> +static const unsigned char minus_one[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x7f
> +  };
> +
> +static int
> +test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
> +{
> +  int64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_sleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_sleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_sleb (void)
> +{
> +#define TEST(ARRAY, V)                                     \
> +  if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, -2);
> +  TEST (v128, 128);
> +  TEST (vm_128, -128);
> +  TEST (vhuge, 9223372036854775807ll);
> +  TEST (most_positive, 4611686018427387903ll);
> +  TEST (most_negative, -4611686018427387904ll);
> +  TEST (minus_one, -1);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +static int
> +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
> +{
> +  uint64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_uleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_uleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_uleb (void)
> +{
> +#define TEST(ARRAY, V)                                     \
> +  if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, 126);
> +  TEST (v128, 128);
> +  TEST (vm_128, 16256);
> +  TEST (vhuge, 9223372036854775807ull);
> +  TEST (most_positive, 4611686018427387903ull);
> +  TEST (most_negative, 4611686018427387904ull);
> +  TEST (minus_one, 9223372036854775807ull);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +int
> +main (void)
> +{
> +  return test_sleb () || test_uleb ();
> +}

Looks good. While reviewing I did try out some extra corner cases that
I added (see attached). It also shows that some values can have
multiple representations. I added them to your commit before I pushed
it. Hope you don't mind.

Thanks,

Mark

diff --git a/tests/leb128.c b/tests/leb128.c
index 6994a436..47b57c0d 100644
--- a/tests/leb128.c
+++ b/tests/leb128.c
@@ -18,6 +18,7 @@
 #include <config.h>
 #include <stddef.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <libdw.h>
 #include "../libdw/libdwP.h"
 #include "../libdw/memory-access.h"
@@ -26,10 +27,16 @@
 #define FAIL 1
 
 static const unsigned char v0[] = { 0x0 };
+static const unsigned char v1[] = { 0x1 };
 static const unsigned char v23[] = { 23 };
+static const unsigned char vm_1[] = { 0x7f };
 static const unsigned char vm_2[] = { 0x7e };
+static const unsigned char s127[] = { 0xff, 0x00 };
 static const unsigned char v128[] = { 0x80, 0x01 };
+static const unsigned char v129[] = { 0x81, 0x01 };
+static const unsigned char vm_127[] = { 0x81, 0x7f };
 static const unsigned char vm_128[] = { 0x80, 0x7f };
+static const unsigned char vm_129[] = { 0xff, 0x7e };
 static const unsigned char vhuge[] =
   {
     0xff, 0xff, 0xff, 0xff, 0xff,
@@ -50,6 +57,16 @@ static const unsigned char minus_one[] =
     0xff, 0xff, 0xff, 0xff, 0xff,
     0xff, 0xff, 0xff, 0x7f
   };
+static const unsigned char int64_max_m1[] =
+  {
+    0xfe, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0x00
+  };
+static const unsigned char int64_min_p1[] =
+  {
+    0x81, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x7f
+  };
 
 static int
 test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
@@ -78,14 +95,22 @@ test_sleb (void)
     return FAIL;
 
   TEST (v0, 0);
+  TEST (v1, 1);
   TEST (v23, 23);
+  TEST (vm_1, -1);
   TEST (vm_2, -2);
+  TEST (s127, 127);
   TEST (v128, 128);
+  TEST (v129, 129);
+  TEST (vm_127, -127);
   TEST (vm_128, -128);
+  TEST (vm_129, -129);
   TEST (vhuge, 9223372036854775807ll);
   TEST (most_positive, 4611686018427387903ll);
   TEST (most_negative, -4611686018427387904ll);
   TEST (minus_one, -1);
+  TEST (int64_max_m1, INT64_MAX - 1);
+  TEST (int64_min_p1, INT64_MIN + 1);
 
 #undef TEST
 
@@ -119,14 +144,22 @@ test_uleb (void)
     return FAIL;
 
   TEST (v0, 0);
+  TEST (v1, 1);
   TEST (v23, 23);
+  TEST (vm_1, 127);
   TEST (vm_2, 126);
+  TEST (s127, 127);
   TEST (v128, 128);
+  TEST (v129, 129);
+  TEST (vm_127, 16257);
   TEST (vm_128, 16256);
+  TEST (vm_129, 16255);
   TEST (vhuge, 9223372036854775807ull);
   TEST (most_positive, 4611686018427387903ull);
   TEST (most_negative, 4611686018427387904ull);
   TEST (minus_one, 9223372036854775807ull);
+  TEST (int64_max_m1, INT64_MAX - 1);
+  TEST (int64_min_p1, INT64_MIN + 1);
 
 #undef TEST

Re: [PATCH v2] Fix leb128 reading

Reply via email to