Hi Tom, On Wed, 2020-10-28 at 17:26 -0600, Tom Tromey wrote: > PR 26773 points out that some sleb128 values are decoded incorrectly. > > This version of the fix only examines the sleb128 conversion. > Overlong encodings are not handled, and the uleb128 decoders are not > touched. The approach taken here is to do the work in an unsigned > type, and then rely on an implementation-defined cast to convert to > signed.
I like this variant, it still handles longer than necessary encodings, just not those that use more than 10 bytes. It also removes all the tricky cases I had questions about. And keeps the parts I already acked last time. > Signed-off-by: Tom Tromey <t...@tromey.com> > diff --git a/.gitignore b/.gitignore > index c9790941..d737b14d 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -151,6 +151,7 @@ Makefile.in > /tests/get-units-invalid > /tests/get-units-split > /tests/hash > +/tests/leb128 > /tests/line2addr > /tests/low_high_pc > /tests/msg_tst > diff --git a/ChangeLog b/ChangeLog > index 72e8397c..4c8699f8 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,7 @@ > +2020-10-28 Tom Tromey <t...@tromey.com> > + > + * .gitignore: Add /tests/leb128. > + > 2020-10-01 Frank Ch. Eigler <f...@redhat.com> Ack. > diff --git a/libdw/ChangeLog b/libdw/ChangeLog > index 731c7e79..289bb4c9 100644 > --- a/libdw/ChangeLog > +++ b/libdw/ChangeLog > @@ -1,3 +1,13 @@ > +2020-10-28 Tom Tromey <t...@tromey.com> > + > + PR26773 > + * dwarf_getlocation.c (store_implicit_value): Use > + __libdw_get_uleb128_unchecked. > + * memory-access.h (get_sleb128_step): Assume unsigned type for > + 'var'. > + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Work in > + unsigned type. Handle final byte. > + > 2020-10-19 Mark Wielaard <m...@klomp.org> > > * dwarf_frame_register.c (dwarf_frame_register): Declare ops_mem > diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c > index 4617f9e9..f2bad5a9 100644 > --- a/libdw/dwarf_getlocation.c > +++ b/libdw/dwarf_getlocation.c > @@ -130,9 +130,8 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op > *op) > struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s, > sizeof (struct loc_block_s), 1); > const unsigned char *data = (const unsigned char *) (uintptr_t) > op->number2; > - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word)); > - if (unlikely (len != op->number)) > - return -1; > + /* Skip the block length. */ > + __libdw_get_uleb128_unchecked (&data); > block->addr = op; > block->data = (unsigned char *) data; > block->length = op->number; Ack. > diff --git a/libdw/memory-access.h b/libdw/memory-access.h > index 14436a71..8b2386ee 100644 > --- a/libdw/memory-access.h > +++ b/libdw/memory-access.h > @@ -113,19 +113,22 @@ __libdw_get_uleb128_unchecked (const unsigned char > **addrp) > #define get_sleb128_step(var, addr, nth) \ > do { > \ > unsigned char __b = *(addr)++; \ > + (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \ > if (likely ((__b & 0x80) == 0)) \ > { > \ > - struct { signed int i:7; } __s = { .i = __b }; \ > - (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7)); \ > + if ((__b & 0x40) != 0) \ > + (var) |= - ((typeof (var)) 1 << (((nth) + 1) * 7)); \ > return (var); \ > } > \ > - (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \ > } while (0) Here it should be noted that var is already the unsigned acc from __libdw_get_sleb, so the left shift is always fine. The value extraction is now the same for both the continuation path and the last byte path. > static inline int64_t > __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end) > { > - int64_t acc = 0; > + /* Do the work in an unsigned type, but use implementation-defined > + behavior to cast to signed on return. This avoids some undefined > + behavior when shifting. */ > + uint64_t acc = 0; > > /* Unroll the first step to help the compiler optimize > for the common single-byte case. */ > @@ -134,6 +137,20 @@ __libdw_get_sleb128 (const unsigned char **addrp, const > unsigned char *end) > const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end); > for (size_t i = 1; i < max; ++i) > get_sleb128_step (acc, *addrp, i); > + if (*addrp == end) > + return INT64_MAX; OK, we keep the __libdw_max_len_sleb128 as is, so it is one smaller than the actual max number of bytes, but reaching the end of data is still overflow. > + /* There might be one extra byte. */ > + unsigned char b = **addrp; > + ++*addrp; > + if (likely ((b & 0x80) == 0)) > + { > + /* We only need the low bit of the final byte, and as it is the > + sign bit, we don't need to do anything else here. */ > + acc |= ((typeof (acc)) b) << 7 * max; > + return acc; > + } > + > /* Other implementations set VALUE to INT_MAX in this > case. So we better do this as well. */ > return INT64_MAX; Nice and simple. > @@ -142,7 +159,10 @@ __libdw_get_sleb128 (const unsigned char **addrp, const > unsigned char *end) > static inline int64_t > __libdw_get_sleb128_unchecked (const unsigned char **addrp) > { > - int64_t acc = 0; > + /* Do the work in an unsigned type, but use implementation-defined > + behavior to cast to signed on return. This avoids some undefined > + behavior when shifting. */ > + uint64_t acc = 0; > > /* Unroll the first step to help the compiler optimize > for the common single-byte case. */ > @@ -152,6 +172,18 @@ __libdw_get_sleb128_unchecked (const unsigned char > **addrp) > const size_t max = len_leb128 (int64_t) - 1; > for (size_t i = 1; i < max; ++i) > get_sleb128_step (acc, *addrp, i); Same as above, we just substract one here when using len_leb128 instead of using __libdw_max_len_sleb128. > + /* There might be one extra byte. */ > + unsigned char b = **addrp; > + ++*addrp; > + if (likely ((b & 0x80) == 0)) > + { > + /* We only need the low bit of the final byte, and as it is the > + sign bit, we don't need to do anything else here. */ > + acc |= ((typeof (acc)) b) << 7 * max; > + return acc; > + } OK, as above. > /* Other implementations set VALUE to INT_MAX in this > case. So we better do this as well. */ > return INT64_MAX; > diff --git a/tests/ChangeLog b/tests/ChangeLog > index e9d1e260..91aeadaf 100644 > --- a/tests/ChangeLog > +++ b/tests/ChangeLog > @@ -1,3 +1,10 @@ > +2020-10-28 Tom Tromey <t...@tromey.com> > + > + PR26773 > + * Makefile.am (check_PROGRAMS, TESTS): Add leb128. > + (leb128_LDADD): New variable. > + * leb128.c: New file. > + > 2020-10-19 Mark Wielaard <m...@klomp.org> > > * addrcfi.c (print_register): Make ops_mem 3 elements. > diff --git a/tests/Makefile.am b/tests/Makefile.am > index bc5d034f..1b51ab8d 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx > scnnames sectiondump \ > all-dwarf-ranges unit-info next_cfi \ > elfcopy addsections xlate_notes elfrdwrnop \ > dwelf_elf_e_machine_string \ > - getphdrnum > + getphdrnum leb128 > > asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \ > asm-tst6 asm-tst7 asm-tst8 asm-tst9 > @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh > newfile test-nlist \ > run-elfclassify.sh run-elfclassify-self.sh \ > run-disasm-riscv64.sh \ > run-pt_gnu_prop-tests.sh \ > - run-getphdrnum.sh run-test-includes.sh > + run-getphdrnum.sh run-test-includes.sh \ > + leb128 > > if !BIARCH > export ELFUTILS_DISABLE_BIARCH = 1 > @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf) > elfrdwrnop_LDADD = $(libelf) > dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw) > getphdrnum_LDADD = $(libelf) $(libdw) > +leb128_LDADD = $(libelf) $(libdw) ack > # We want to test the libelf header against the system elf.h header. > # Don't include any -I CPPFLAGS. Except when we install our own elf.h. > diff --git a/tests/leb128.c b/tests/leb128.c > new file mode 100644 > index 00000000..6994a436 > --- /dev/null > +++ b/tests/leb128.c > @@ -0,0 +1,140 @@ > +/* Test program for leb128 > + Copyright (C) 2020 Tom Tromey > + This file is part of elfutils. > + > + This file is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + elfutils is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program. If not, see <http://www.gnu.org/licenses/>. */ > + > +#include <config.h> > +#include <stddef.h> > +#include <stdbool.h> > +#include <libdw.h> > +#include "../libdw/libdwP.h" > +#include "../libdw/memory-access.h" > + > +#define OK 0 > +#define FAIL 1 > + > +static const unsigned char v0[] = { 0x0 }; > +static const unsigned char v23[] = { 23 }; > +static const unsigned char vm_2[] = { 0x7e }; > +static const unsigned char v128[] = { 0x80, 0x01 }; > +static const unsigned char vm_128[] = { 0x80, 0x7f }; > +static const unsigned char vhuge[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0xff, 0x0 > + }; > +static const unsigned char most_positive[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0x3f > + }; > +static const unsigned char most_negative[] = > + { > + 0x80, 0x80, 0x80, 0x80, 0x80, > + 0x80, 0x80, 0x80, 0x40 > + }; > +static const unsigned char minus_one[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0x7f > + }; > + > +static int > +test_one_sleb (const unsigned char *data, size_t len, int64_t expect) > +{ > + int64_t value; > + const unsigned char *p; > + > + p = data; > + get_sleb128 (value, p, p + len); > + if (value != expect || p != data + len) > + return FAIL; > + > + p = data; > + get_sleb128_unchecked (value, p); > + if (value != expect || p != data + len) > + return FAIL; > + > + return OK; > +} > + > +static int > +test_sleb (void) > +{ > +#define TEST(ARRAY, V) \ > + if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \ > + return FAIL; > + > + TEST (v0, 0); > + TEST (v23, 23); > + TEST (vm_2, -2); > + TEST (v128, 128); > + TEST (vm_128, -128); > + TEST (vhuge, 9223372036854775807ll); > + TEST (most_positive, 4611686018427387903ll); > + TEST (most_negative, -4611686018427387904ll); > + TEST (minus_one, -1); > + > +#undef TEST > + > + return OK; > +} > + > +static int > +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect) > +{ > + uint64_t value; > + const unsigned char *p; > + > + p = data; > + get_uleb128 (value, p, p + len); > + if (value != expect || p != data + len) > + return FAIL; > + > + p = data; > + get_uleb128_unchecked (value, p); > + if (value != expect || p != data + len) > + return FAIL; > + > + return OK; > +} > + > +static int > +test_uleb (void) > +{ > +#define TEST(ARRAY, V) \ > + if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \ > + return FAIL; > + > + TEST (v0, 0); > + TEST (v23, 23); > + TEST (vm_2, 126); > + TEST (v128, 128); > + TEST (vm_128, 16256); > + TEST (vhuge, 9223372036854775807ull); > + TEST (most_positive, 4611686018427387903ull); > + TEST (most_negative, 4611686018427387904ull); > + TEST (minus_one, 9223372036854775807ull); > + > +#undef TEST > + > + return OK; > +} > + > +int > +main (void) > +{ > + return test_sleb () || test_uleb (); > +} Looks good. While reviewing I did try out some extra corner cases that I added (see attached). It also shows that some values can have multiple representations. I added them to your commit before I pushed it. Hope you don't mind. Thanks, Mark
diff --git a/tests/leb128.c b/tests/leb128.c index 6994a436..47b57c0d 100644 --- a/tests/leb128.c +++ b/tests/leb128.c @@ -18,6 +18,7 @@ #include <config.h> #include <stddef.h> #include <stdbool.h> +#include <stdint.h> #include <libdw.h> #include "../libdw/libdwP.h" #include "../libdw/memory-access.h" @@ -26,10 +27,16 @@ #define FAIL 1 static const unsigned char v0[] = { 0x0 }; +static const unsigned char v1[] = { 0x1 }; static const unsigned char v23[] = { 23 }; +static const unsigned char vm_1[] = { 0x7f }; static const unsigned char vm_2[] = { 0x7e }; +static const unsigned char s127[] = { 0xff, 0x00 }; static const unsigned char v128[] = { 0x80, 0x01 }; +static const unsigned char v129[] = { 0x81, 0x01 }; +static const unsigned char vm_127[] = { 0x81, 0x7f }; static const unsigned char vm_128[] = { 0x80, 0x7f }; +static const unsigned char vm_129[] = { 0xff, 0x7e }; static const unsigned char vhuge[] = { 0xff, 0xff, 0xff, 0xff, 0xff, @@ -50,6 +57,16 @@ static const unsigned char minus_one[] = 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }; +static const unsigned char int64_max_m1[] = + { + 0xfe, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00 + }; +static const unsigned char int64_min_p1[] = + { + 0x81, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x7f + }; static int test_one_sleb (const unsigned char *data, size_t len, int64_t expect) @@ -78,14 +95,22 @@ test_sleb (void) return FAIL; TEST (v0, 0); + TEST (v1, 1); TEST (v23, 23); + TEST (vm_1, -1); TEST (vm_2, -2); + TEST (s127, 127); TEST (v128, 128); + TEST (v129, 129); + TEST (vm_127, -127); TEST (vm_128, -128); + TEST (vm_129, -129); TEST (vhuge, 9223372036854775807ll); TEST (most_positive, 4611686018427387903ll); TEST (most_negative, -4611686018427387904ll); TEST (minus_one, -1); + TEST (int64_max_m1, INT64_MAX - 1); + TEST (int64_min_p1, INT64_MIN + 1); #undef TEST @@ -119,14 +144,22 @@ test_uleb (void) return FAIL; TEST (v0, 0); + TEST (v1, 1); TEST (v23, 23); + TEST (vm_1, 127); TEST (vm_2, 126); + TEST (s127, 127); TEST (v128, 128); + TEST (v129, 129); + TEST (vm_127, 16257); TEST (vm_128, 16256); + TEST (vm_129, 16255); TEST (vhuge, 9223372036854775807ull); TEST (most_positive, 4611686018427387903ull); TEST (most_negative, 4611686018427387904ull); TEST (minus_one, 9223372036854775807ull); + TEST (int64_max_m1, INT64_MAX - 1); + TEST (int64_min_p1, INT64_MIN + 1); #undef TEST