On 11/1/20 6:57 AM, Eduard-Mihai Burtescu wrote:
Reading the diff patch, the v0 changes look great. I wouldn't be too worried about the "printable character" aspect, there are similar Unicode-related issues elsewhere, e.g. the "non-control ASCII" comment in decode_legacy_escape (I suppose we could make it also go through the "print a non-control ASCII character or some escape sequence" logic you added, if you think that helps).
No, it's entirely fine with me! I just wasn't sure if the small deviations in output were acceptable. It sounds like they are.
However, I'm not sure about the legacy changes. Or rather, the .llvm. one, it's not really Rust-specific, it's only in the rustc-demangle crate for convenience, but C++ code compiled with Clang could run into the same problem - ideally, stripping that suffix could be done uniformly in cplus-dem.c, but I decided against making that change myself, for now. I'm especially not comfortable removing the fast path, since that was the condition under which I was able to make Rust demangling be attempted first, before C++, in order to implement the Rust legacy demangling standalone, separately from C++ demangling, so that it could be together with the v0 one.
It should be possible to keep the fast path if stripping .llvm.* suffixes is done before either Rust or C++ demangling is attempted, but even if that would be nice to have, IMO it should be a separate patch and not block v0 demangling.
That makes sense. I've attached updated patches (again generating a diff against both your original patch and trunk) without the changes to the legacy code. I did preserve one small hunk regarding the unescaping of a single '.' character in idents, as I believe that is just a straightforward bug in the existing code.
I can test the patch and upload the dataset tomorrow, but if you want to get something committed sooner (is there a deadline for the next release?), feel free to land the v0 changes (snprintf + const values) without the legacy ones.
My understanding is that the GCC tree closes to new features on November 16 (for "GCC 11 Stage 3"), but I'm not sure whether that applies to libiberty or whether this patch would be classified as a feature or a bugfix.
I don't have commit rights (nor am I even a GCC developer). Just wanted to tee things up for you and Ian this week. I'm very much looking forward to the new demangling scheme and didn't want to be just another +1 on the GitHub issue.
So certainly no time pressure from me. But perhaps someone from the GCC side can confirm whether we are under a bit of time pressure here given the GCC 11 release.
Cheers, Nikhil
diff --git a/rust-demangle.c b/rust-demangle.c index d604b3c..9cd8f99 100644 --- a/rust-demangle.c +++ b/rust-demangle.c @@ -143,6 +143,35 @@ parse_disambiguator (struct rust_demangler *rdm) return parse_opt_integer_62 (rdm, 's'); } +static size_t +parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value) +{ + char c; + size_t hex_len; + + hex_len = 0; + *value = 0; + + while (!eat (rdm, '_')) + { + *value <<= 4; + + c = next (rdm); + if (ISDIGIT (c)) + *value |= c - '0'; + else if (c >= 'a' && c <= 'f') + *value |= 10 + (c - 'a'); + else + { + rdm->errored = 1; + return 0; + } + hex_len++; + } + + return hex_len; +} + struct rust_mangled_ident { /* ASCII part of the identifier. */ @@ -240,7 +269,7 @@ static void print_uint64 (struct rust_demangler *rdm, uint64_t x) { char s[21]; - sprintf (s, "%" PRIu64, x); + snprintf (s, 21, "%" PRIu64, x); PRINT (s); } @@ -248,7 +277,7 @@ static void print_uint64_hex (struct rust_demangler *rdm, uint64_t x) { char s[17]; - sprintf (s, "%" PRIx64, x); + snprintf (s, 17, "%" PRIx64, x); PRINT (s); } @@ -380,8 +409,7 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) } else { - /* "." becomes "-" */ - PRINT ("-"); + PRINT ("."); len = 1; } } @@ -591,6 +619,9 @@ static int demangle_path_maybe_open_generics (struct rust_demangler *rdm); static void demangle_dyn_trait (struct rust_demangler *rdm); static void demangle_const (struct rust_demangler *rdm); static void demangle_const_uint (struct rust_demangler *rdm); +static void demangle_const_int (struct rust_demangler *rdm); +static void demangle_const_bool (struct rust_demangler *rdm); +static void demangle_const_char (struct rust_demangler *rdm); /* Optionally enter a binder ('G') for late-bound lifetimes, printing e.g. `for<'a, 'b> `, and make those lifetimes visible @@ -1089,6 +1120,11 @@ demangle_const (struct rust_demangler *rdm) ty_tag = next (rdm); switch (ty_tag) { + /* Placeholder. */ + case 'p': + PRINT ("_"); + return; + /* Unsigned integer types. */ case 'h': case 't': @@ -1096,6 +1132,27 @@ demangle_const (struct rust_demangler *rdm) case 'y': case 'o': case 'j': + demangle_const_uint (rdm); + break; + + /* Signed integer types. */ + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + demangle_const_int (rdm); + break; + + /* Boolean. */ + case 'b': + demangle_const_bool (rdm); + break; + + /* Character. */ + case 'c': + demangle_const_char (rdm); break; default: @@ -1103,10 +1160,8 @@ demangle_const (struct rust_demangler *rdm) return; } - if (eat (rdm, 'p')) - PRINT ("_"); - else - demangle_const_uint (rdm); + if (rdm->errored) + return; if (rdm->verbose) { @@ -1118,41 +1173,86 @@ demangle_const (struct rust_demangler *rdm) static void demangle_const_uint (struct rust_demangler *rdm) { - char c; size_t hex_len; uint64_t value; if (rdm->errored) return; - value = 0; - hex_len = 0; - while (!eat (rdm, '_')) - { - value <<= 4; + hex_len = parse_hex_nibbles (rdm, &value); - c = next (rdm); - if (ISDIGIT (c)) - value |= c - '0'; - else if (c >= 'a' && c <= 'f') - value |= 10 + (c - 'a'); - else - { - rdm->errored = 1; - return; - } - hex_len++; - } - - /* Print anything that doesn't fit in `uint64_t` verbatim. */ if (hex_len > 16) { + /* Print anything that doesn't fit in `uint64_t` verbatim. */ PRINT ("0x"); print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len); + } + else if (hex_len > 0) + print_uint64 (rdm, value); + else + rdm->errored = 1; +} + +static void +demangle_const_int (struct rust_demangler *rdm) +{ + if (eat (rdm, 'n')) + PRINT ("-"); + demangle_const_uint (rdm); +} + +static void +demangle_const_bool (struct rust_demangler *rdm) +{ + uint64_t value; + + if (parse_hex_nibbles (rdm, &value) != 1) + { + rdm->errored = 1; return; } - print_uint64 (rdm, value); + if (value == 0) + PRINT ("false"); + else if (value == 1) + PRINT ("true"); + else + rdm->errored = 1; +} + +static void +demangle_const_char (struct rust_demangler *rdm) +{ + size_t hex_len; + uint64_t value; + + hex_len = parse_hex_nibbles (rdm, &value); + + if (hex_len == 0 || hex_len > 8) + { + rdm->errored = 1; + return; + } + + /* Match Rust's character "debug" output as best as we can. */ + PRINT ("'"); + if (value == '\t') + PRINT ("\\t"); + else if (value == '\r') + PRINT ("\\r"); + else if (value == '\n') + PRINT ("\\n"); + else if (value > ' ' && value < '~') + /* Rust also considers many non-ASCII codepoints to be printable, but + that logic is not easily ported to C. */ + print_str (rdm, (char *) &value, 1); + else + { + PRINT ("\\u{"); + print_uint64_hex (rdm, value); + PRINT ("}"); + } + PRINT ("'"); } /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c index b87365c85fe..f6b63a60652 100644 --- a/libiberty/rust-demangle.c +++ b/libiberty/rust-demangle.c @@ -64,11 +64,16 @@ struct rust_demangler /* Non-zero if any error occurred. */ int errored; + /* Non-zero if nothing should be printed. */ + int skipping_printing; + /* Non-zero if printing should be verbose (e.g. include hashes). */ int verbose; /* Rust mangling version, with legacy mangling being -1. */ int version; + + uint64_t bound_lifetime_depth; }; /* Parsing functions. */ @@ -81,6 +86,18 @@ peek (const struct rust_demangler *rdm) return 0; } +static int +eat (struct rust_demangler *rdm, char c) +{ + if (peek (rdm) == c) + { + rdm->next++; + return 1; + } + else + return 0; +} + static char next (struct rust_demangler *rdm) { @@ -92,11 +109,87 @@ next (struct rust_demangler *rdm) return c; } +static uint64_t +parse_integer_62 (struct rust_demangler *rdm) +{ + char c; + uint64_t x; + + if (eat (rdm, '_')) + return 0; + + x = 0; + while (!eat (rdm, '_')) + { + c = next (rdm); + x *= 62; + if (ISDIGIT (c)) + x += c - '0'; + else if (ISLOWER (c)) + x += 10 + (c - 'a'); + else if (ISUPPER (c)) + x += 10 + 26 + (c - 'A'); + else + { + rdm->errored = 1; + return 0; + } + } + return x + 1; +} + +static uint64_t +parse_opt_integer_62 (struct rust_demangler *rdm, char tag) +{ + if (!eat (rdm, tag)) + return 0; + return 1 + parse_integer_62 (rdm); +} + +static uint64_t +parse_disambiguator (struct rust_demangler *rdm) +{ + return parse_opt_integer_62 (rdm, 's'); +} + +static size_t +parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value) +{ + char c; + size_t hex_len; + + hex_len = 0; + *value = 0; + + while (!eat (rdm, '_')) + { + *value <<= 4; + + c = next (rdm); + if (ISDIGIT (c)) + *value |= c - '0'; + else if (c >= 'a' && c <= 'f') + *value |= 10 + (c - 'a'); + else + { + rdm->errored = 1; + return 0; + } + hex_len++; + } + + return hex_len; +} + struct rust_mangled_ident { /* ASCII part of the identifier. */ const char *ascii; size_t ascii_len; + + /* Punycode insertion codes for Unicode codepoints, if any. */ + const char *punycode; + size_t punycode_len; }; static struct rust_mangled_ident @@ -104,10 +197,16 @@ parse_ident (struct rust_demangler *rdm) { char c; size_t start, len; + int is_punycode = 0; struct rust_mangled_ident ident; ident.ascii = NULL; ident.ascii_len = 0; + ident.punycode = NULL; + ident.punycode_len = 0; + + if (rdm->version != -1) + is_punycode = eat (rdm, 'u'); c = next (rdm); if (!ISDIGIT (c)) @@ -121,6 +220,10 @@ parse_ident (struct rust_demangler *rdm) while (ISDIGIT (peek (rdm))) len = len * 10 + (next (rdm) - '0'); + /* Skip past the optional `_` separator (v0). */ + if (rdm->version != -1) + eat (rdm, '_'); + start = rdm->next; rdm->next += len; /* Check for overflows. */ @@ -133,6 +236,27 @@ parse_ident (struct rust_demangler *rdm) ident.ascii = rdm->sym + start; ident.ascii_len = len; + if (is_punycode) + { + ident.punycode_len = 0; + while (ident.ascii_len > 0) + { + ident.ascii_len--; + + /* The last '_' is a separator between ascii & punycode. */ + if (ident.ascii[ident.ascii_len] == '_') + break; + + ident.punycode_len++; + } + if (!ident.punycode_len) + { + rdm->errored = 1; + return ident; + } + ident.punycode = ident.ascii + (len - ident.punycode_len); + } + if (ident.ascii_len == 0) ident.ascii = NULL; @@ -144,12 +268,28 @@ parse_ident (struct rust_demangler *rdm) static void print_str (struct rust_demangler *rdm, const char *data, size_t len) { - if (!rdm->errored) + if (!rdm->errored && !rdm->skipping_printing) rdm->callback (data, len, rdm->callback_opaque); } #define PRINT(s) print_str (rdm, s, strlen (s)) +static void +print_uint64 (struct rust_demangler *rdm, uint64_t x) +{ + char s[21]; + snprintf (s, 21, "%" PRIu64, x); + PRINT (s); +} + +static void +print_uint64_hex (struct rust_demangler *rdm, uint64_t x) +{ + char s[17]; + snprintf (s, 17, "%" PRIx64, x); + PRINT (s); +} + /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ static int decode_lower_hex_nibble (char nibble) @@ -230,9 +370,14 @@ static void print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) { char unescaped; - size_t len; + uint8_t *out, *p, d; + size_t len, cap, punycode_pos, j; + /* Punycode parameters and state. */ + uint32_t c; + size_t base, t_min, t_max, skew, damp, bias, i; + size_t delta, w, k, t; - if (rdm->errored) + if (rdm->errored || rdm->skipping_printing) return; if (rdm->version == -1) @@ -273,8 +418,7 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) } else { - /* "." becomes "-" */ - PRINT ("-"); + PRINT ("."); len = 1; } } @@ -294,6 +438,830 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) return; } + + if (!ident.punycode) + { + print_str (rdm, ident.ascii, ident.ascii_len); + return; + } + + len = 0; + cap = 4; + while (cap < ident.ascii_len) + { + cap *= 2; + /* Check for overflows. */ + if ((cap * 4) / 4 != cap) + { + rdm->errored = 1; + return; + } + } + + /* Store the output codepoints as groups of 4 UTF-8 bytes. */ + out = (uint8_t *)malloc (cap * 4); + if (!out) + { + rdm->errored = 1; + return; + } + + /* Populate initial output from ASCII fragment. */ + for (len = 0; len < ident.ascii_len; len++) + { + p = out + 4 * len; + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = ident.ascii[len]; + } + + /* Punycode parameters and initial state. */ + base = 36; + t_min = 1; + t_max = 26; + skew = 38; + damp = 700; + bias = 72; + i = 0; + c = 0x80; + + punycode_pos = 0; + while (punycode_pos < ident.punycode_len) + { + /* Read one delta value. */ + delta = 0; + w = 1; + k = 0; + do + { + k += base; + t = k < bias ? 0 : (k - bias); + if (t < t_min) + t = t_min; + if (t > t_max) + t = t_max; + + if (punycode_pos >= ident.punycode_len) + goto cleanup; + d = ident.punycode[punycode_pos++]; + + if (ISLOWER (d)) + d = d - 'a'; + else if (ISDIGIT (d)) + d = 26 + (d - '0'); + else + { + rdm->errored = 1; + goto cleanup; + } + + delta += d * w; + w *= base - t; + } + while (d >= t); + + /* Compute the new insert position and character. */ + len++; + i += delta; + c += i / len; + i %= len; + + /* Ensure enough space is available. */ + if (cap < len) + { + cap *= 2; + /* Check for overflows. */ + if ((cap * 4) / 4 != cap || cap < len) + { + rdm->errored = 1; + goto cleanup; + } + } + p = (uint8_t *)realloc (out, cap * 4); + if (!p) + { + rdm->errored = 1; + goto cleanup; + } + out = p; + + /* Move the characters after the insert position. */ + p = out + i * 4; + memmove (p + 4, p, (len - i - 1) * 4); + + /* Insert the new character, as UTF-8 bytes. */ + p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0; + p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0; + p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f); + p[3] = 0x80 | (c & 0x3f); + + /* If there are no more deltas, decoding is complete. */ + if (punycode_pos == ident.punycode_len) + break; + + i++; + + /* Perform bias adaptation. */ + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) + { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } + + /* Remove all the 0 bytes to leave behind an UTF-8 string. */ + for (i = 0, j = 0; i < len * 4; i++) + if (out[i] != 0) + out[j++] = out[i]; + + print_str (rdm, (const char *)out, j); + +cleanup: + free (out); +} + +/* Print the lifetime according to the previously decoded index. + An index of `0` always refers to `'_`, but starting with `1`, + indices refer to late-bound lifetimes introduced by a binder. */ +static void +print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt) +{ + char c; + uint64_t depth; + + PRINT ("'"); + if (lt == 0) + { + PRINT ("_"); + return; + } + + depth = rdm->bound_lifetime_depth - lt; + /* Try to print lifetimes alphabetically first. */ + if (depth < 26) + { + c = 'a' + depth; + print_str (rdm, &c, 1); + } + else + { + /* Use `'_123` after running out of letters. */ + PRINT ("_"); + print_uint64 (rdm, depth); + } +} + +/* Demangling functions. */ + +static void demangle_binder (struct rust_demangler *rdm); +static void demangle_path (struct rust_demangler *rdm, int in_value); +static void demangle_generic_arg (struct rust_demangler *rdm); +static void demangle_type (struct rust_demangler *rdm); +static int demangle_path_maybe_open_generics (struct rust_demangler *rdm); +static void demangle_dyn_trait (struct rust_demangler *rdm); +static void demangle_const (struct rust_demangler *rdm); +static void demangle_const_uint (struct rust_demangler *rdm); +static void demangle_const_int (struct rust_demangler *rdm); +static void demangle_const_bool (struct rust_demangler *rdm); +static void demangle_const_char (struct rust_demangler *rdm); + +/* Optionally enter a binder ('G') for late-bound lifetimes, + printing e.g. `for<'a, 'b> `, and make those lifetimes visible + to the caller (via depth level, which the caller should reset). */ +static void +demangle_binder (struct rust_demangler *rdm) +{ + uint64_t i, bound_lifetimes; + + if (rdm->errored) + return; + + bound_lifetimes = parse_opt_integer_62 (rdm, 'G'); + if (bound_lifetimes > 0) + { + PRINT ("for<"); + for (i = 0; i < bound_lifetimes; i++) + { + if (i > 0) + PRINT (", "); + rdm->bound_lifetime_depth++; + print_lifetime_from_index (rdm, 1); + } + PRINT ("> "); + } +} + +static void +demangle_path (struct rust_demangler *rdm, int in_value) +{ + char tag, ns; + int was_skipping_printing; + size_t i, backref, old_next; + uint64_t dis; + struct rust_mangled_ident name; + + if (rdm->errored) + return; + + switch (tag = next (rdm)) + { + case 'C': + dis = parse_disambiguator (rdm); + name = parse_ident (rdm); + + print_ident (rdm, name); + if (rdm->verbose) + { + PRINT ("["); + print_uint64_hex (rdm, dis); + PRINT ("]"); + } + break; + case 'N': + ns = next (rdm); + if (!ISLOWER (ns) && !ISUPPER (ns)) + { + rdm->errored = 1; + return; + } + + demangle_path (rdm, in_value); + + dis = parse_disambiguator (rdm); + name = parse_ident (rdm); + + if (ISUPPER (ns)) + { + /* Special namespaces, like closures and shims. */ + PRINT ("::{"); + switch (ns) + { + case 'C': + PRINT ("closure"); + break; + case 'S': + PRINT ("shim"); + break; + default: + print_str (rdm, &ns, 1); + } + if (name.ascii || name.punycode) + { + PRINT (":"); + print_ident (rdm, name); + } + PRINT ("#"); + print_uint64 (rdm, dis); + PRINT ("}"); + } + else + { + /* Implementation-specific/unspecified namespaces. */ + + if (name.ascii || name.punycode) + { + PRINT ("::"); + print_ident (rdm, name); + } + } + break; + case 'M': + case 'X': + /* Ignore the `impl`'s own path.*/ + parse_disambiguator (rdm); + was_skipping_printing = rdm->skipping_printing; + rdm->skipping_printing = 1; + demangle_path (rdm, in_value); + rdm->skipping_printing = was_skipping_printing; + /* fallthrough */ + case 'Y': + PRINT ("<"); + demangle_type (rdm); + if (tag != 'M') + { + PRINT (" as "); + demangle_path (rdm, 0); + } + PRINT (">"); + break; + case 'I': + demangle_path (rdm, in_value); + if (in_value) + PRINT ("::"); + PRINT ("<"); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_generic_arg (rdm); + } + PRINT (">"); + break; + case 'B': + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_path (rdm, in_value); + rdm->next = old_next; + } + break; + default: + rdm->errored = 1; + return; + } +} + +static void +demangle_generic_arg (struct rust_demangler *rdm) +{ + uint64_t lt; + if (eat (rdm, 'L')) + { + lt = parse_integer_62 (rdm); + print_lifetime_from_index (rdm, lt); + } + else if (eat (rdm, 'K')) + demangle_const (rdm); + else + demangle_type (rdm); +} + +static const char * +basic_type (char tag) +{ + switch (tag) + { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + + default: + return NULL; + } +} + +static void +demangle_type (struct rust_demangler *rdm) +{ + char tag; + size_t i, old_next, backref; + uint64_t lt, old_bound_lifetime_depth; + const char *basic; + struct rust_mangled_ident abi; + + if (rdm->errored) + return; + + tag = next (rdm); + + basic = basic_type (tag); + if (basic) + { + PRINT (basic); + return; + } + + switch (tag) + { + case 'R': + case 'Q': + PRINT ("&"); + if (eat (rdm, 'L')) + { + lt = parse_integer_62 (rdm); + if (lt) + { + print_lifetime_from_index (rdm, lt); + PRINT (" "); + } + } + if (tag != 'R') + PRINT ("mut "); + demangle_type (rdm); + break; + case 'P': + case 'O': + PRINT ("*"); + if (tag != 'P') + PRINT ("mut "); + else + PRINT ("const "); + demangle_type (rdm); + break; + case 'A': + case 'S': + PRINT ("["); + demangle_type (rdm); + if (tag == 'A') + { + PRINT ("; "); + demangle_const (rdm); + } + PRINT ("]"); + break; + case 'T': + PRINT ("("); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_type (rdm); + } + if (i == 1) + PRINT (","); + PRINT (")"); + break; + case 'F': + old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder (rdm); + + if (eat (rdm, 'U')) + PRINT ("unsafe "); + + if (eat (rdm, 'K')) + { + if (eat (rdm, 'C')) + { + abi.ascii = "C"; + abi.ascii_len = 1; + } + else + { + abi = parse_ident (rdm); + if (!abi.ascii || abi.punycode) + { + rdm->errored = 1; + goto restore; + } + } + + PRINT ("extern \""); + + /* If the ABI had any `-`, they were replaced with `_`, + so the parts between `_` have to be re-joined with `-`. */ + for (i = 0; i < abi.ascii_len; i++) + { + if (abi.ascii[i] == '_') + { + print_str (rdm, abi.ascii, i); + PRINT ("-"); + abi.ascii += i + 1; + abi.ascii_len -= i + 1; + i = 0; + } + } + print_str (rdm, abi.ascii, abi.ascii_len); + + PRINT ("\" "); + } + + PRINT ("fn("); + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_type (rdm); + } + PRINT (")"); + + if (eat (rdm, 'u')) + { + /* Skip printing the return type if it's 'u', i.e. `()`. */ + } + else + { + PRINT (" -> "); + demangle_type (rdm); + } + + /* Restore `bound_lifetime_depth` to outside the binder. */ + restore: + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + break; + case 'D': + PRINT ("dyn "); + + old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder (rdm); + + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (" + "); + demangle_dyn_trait (rdm); + } + + /* Restore `bound_lifetime_depth` to outside the binder. */ + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + + if (!eat (rdm, 'L')) + { + rdm->errored = 1; + return; + } + lt = parse_integer_62 (rdm); + if (lt) + { + PRINT (" + "); + print_lifetime_from_index (rdm, lt); + } + break; + case 'B': + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_type (rdm); + rdm->next = old_next; + } + break; + default: + /* Go back to the tag, so `demangle_path` also sees it. */ + rdm->next--; + demangle_path (rdm, 0); + } +} + +/* A trait in a trait object may have some "existential projections" + (i.e. associated type bindings) after it, which should be printed + in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. + To this end, this method will keep the `<...>` of an 'I' path + open, by omitting the `>`, and return `Ok(true)` in that case. */ +static int +demangle_path_maybe_open_generics (struct rust_demangler *rdm) +{ + int open; + size_t i, old_next, backref; + + open = 0; + + if (rdm->errored) + return open; + + if (eat (rdm, 'B')) + { + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + open = demangle_path_maybe_open_generics (rdm); + rdm->next = old_next; + } + } + else if (eat (rdm, 'I')) + { + demangle_path (rdm, 0); + PRINT ("<"); + open = 1; + for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) + { + if (i > 0) + PRINT (", "); + demangle_generic_arg (rdm); + } + } + else + demangle_path (rdm, 0); + return open; +} + +static void +demangle_dyn_trait (struct rust_demangler *rdm) +{ + int open; + struct rust_mangled_ident name; + + if (rdm->errored) + return; + + open = demangle_path_maybe_open_generics (rdm); + + while (eat (rdm, 'p')) + { + if (!open) + PRINT ("<"); + else + PRINT (", "); + open = 1; + + name = parse_ident (rdm); + print_ident (rdm, name); + PRINT (" = "); + demangle_type (rdm); + } + + if (open) + PRINT (">"); +} + +static void +demangle_const (struct rust_demangler *rdm) +{ + char ty_tag; + size_t old_next, backref; + + if (rdm->errored) + return; + + if (eat (rdm, 'B')) + { + backref = parse_integer_62 (rdm); + if (!rdm->skipping_printing) + { + old_next = rdm->next; + rdm->next = backref; + demangle_const (rdm); + rdm->next = old_next; + } + return; + } + + ty_tag = next (rdm); + switch (ty_tag) + { + /* Placeholder. */ + case 'p': + PRINT ("_"); + return; + + /* Unsigned integer types. */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + demangle_const_uint (rdm); + break; + + /* Signed integer types. */ + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + demangle_const_int (rdm); + break; + + /* Boolean. */ + case 'b': + demangle_const_bool (rdm); + break; + + /* Character. */ + case 'c': + demangle_const_char (rdm); + break; + + default: + rdm->errored = 1; + return; + } + + if (rdm->errored) + return; + + if (rdm->verbose) + { + PRINT (": "); + PRINT (basic_type (ty_tag)); + } +} + +static void +demangle_const_uint (struct rust_demangler *rdm) +{ + size_t hex_len; + uint64_t value; + + if (rdm->errored) + return; + + hex_len = parse_hex_nibbles (rdm, &value); + + if (hex_len > 16) + { + /* Print anything that doesn't fit in `uint64_t` verbatim. */ + PRINT ("0x"); + print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len); + } + else if (hex_len > 0) + print_uint64 (rdm, value); + else + rdm->errored = 1; +} + +static void +demangle_const_int (struct rust_demangler *rdm) +{ + if (eat (rdm, 'n')) + PRINT ("-"); + demangle_const_uint (rdm); +} + +static void +demangle_const_bool (struct rust_demangler *rdm) +{ + uint64_t value; + + if (parse_hex_nibbles (rdm, &value) != 1) + { + rdm->errored = 1; + return; + } + + if (value == 0) + PRINT ("false"); + else if (value == 1) + PRINT ("true"); + else + rdm->errored = 1; +} + +static void +demangle_const_char (struct rust_demangler *rdm) +{ + size_t hex_len; + uint64_t value; + + hex_len = parse_hex_nibbles (rdm, &value); + + if (hex_len == 0 || hex_len > 8) + { + rdm->errored = 1; + return; + } + + /* Match Rust's character "debug" output as best as we can. */ + PRINT ("'"); + if (value == '\t') + PRINT ("\\t"); + else if (value == '\r') + PRINT ("\\r"); + else if (value == '\n') + PRINT ("\\n"); + else if (value > ' ' && value < '~') + /* Rust also considers many non-ASCII codepoints to be printable, but + that logic is not easily ported to C. */ + print_str (rdm, (char *) &value, 1); + else + { + PRINT ("\\u{"); + print_uint64_hex (rdm, value); + PRINT ("}"); + } + PRINT ("'"); } /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits. @@ -345,11 +1313,15 @@ rust_demangle_callback (const char *mangled, int options, rdm.next = 0; rdm.errored = 0; + rdm.skipping_printing = 0; rdm.verbose = (options & DMGL_VERBOSE) != 0; rdm.version = 0; + rdm.bound_lifetime_depth = 0; - /* Rust symbols always start with _ZN (legacy). */ - if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') + /* Rust symbols always start with _R (v0) or _ZN (legacy). */ + if (rdm.sym[0] == '_' && rdm.sym[1] == 'R') + rdm.sym += 2; + else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') { rdm.sym += 3; rdm.version = -1; @@ -357,7 +1329,11 @@ rust_demangle_callback (const char *mangled, int options, else return 0; - /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */ + /* Paths (v0) always start with uppercase characters. */ + if (rdm.version != -1 && !ISUPPER (rdm.sym[0])) + return 0; + + /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */ for (p = rdm.sym; *p; p++) { rdm.sym_len++; @@ -365,6 +1341,7 @@ rust_demangle_callback (const char *mangled, int options, if (*p == '_' || ISALNUM (*p)) continue; + /* Legacy Rust symbols can also contain [.:$] characters. */ if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':')) continue; @@ -418,7 +1395,19 @@ rust_demangle_callback (const char *mangled, int options, while (rdm.next < rdm.sym_len); } else - return 0; + { + demangle_path (&rdm, 1); + + /* Skip instantiating crate. */ + if (!rdm.errored && rdm.next < rdm.sym_len) + { + rdm.skipping_printing = 1; + demangle_path (&rdm, 0); + } + + /* It's an error to not reach the end. */ + rdm.errored |= rdm.next != rdm.sym_len; + } return !rdm.errored; } diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected index 74774794736..1351b9a2bd4 100644 --- a/libiberty/testsuite/rust-demangle-expected +++ b/libiberty/testsuite/rust-demangle-expected @@ -163,3 +163,51 @@ _ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f --format=rust _ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo +# +--format=rust +_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h000b1ad6c4f30bd6E +core::ops::function::FnOnce::call_once{{vtable.shim}} +# UTF8 idents. +--format=rust +_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y +utf8_idents::საჭმელად_გემრიელი_სადილი +# Crate with leading digit. +--format=rust +_RNvC6_123foo3bar +123foo::bar +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E +<const_generic::Unsigned<11>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E +<const_generic::Signed<152>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E +<const_generic::Signed<-11>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E +<const_generic::Bool<false>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E +<const_generic::Bool<true>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E +<const_generic::Char<'v'>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E +<const_generic::Char<'\n'>> +# +--format=rust +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E +<const_generic::Char<'\u{2202}'>> +# +--format=rust +_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO +<const_generic::Foo<_>>::foo::FOO