On 11/6/20 12:09 PM, Jeff Law wrote:
So I think the best path forward is to let you and Eduard-Mihai make the
technical decisions about what bits are ready for the trunk.  When y'all
think something is ready, let's go ahead and get it installed and
iterate on things that aren't quite ready yet.


For bits y'all think are ready, ISTM that Eduard-Mihai should commit the
changes.

I've attached an updated version of the patch that contains some
additional unit tests that eddyb noticed I lost. From my perspective,
this is now ready for commit.

Neither eddyb nor I have write access, so someone else will need to
commit. (But please wait for eddyb to sign off too.)

It's better to get it in sooner, but there is some degree of freedom
depending on the impact of the changes.  Changes in the rust demangler
aren't likely to trigger codegen or ABI breakages in the compiler itself
-- so with that in mind I think we should give this code a higher degree
of freedom to land after the stage1 close deadline.

Got it. Thanks. That's very helpful context.

Nikhil
diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index b87365c85fe..08c615f6d8b 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -1,6 +1,7 @@
 /* Demangler for the Rust programming language
    Copyright (C) 2016-2020 Free Software Foundation, Inc.
    Written by David Tolnay (dtol...@gmail.com).
+   Rewritten by Eduard-Mihai Burtescu (ed...@lyken.rs) for v0 support.
 
 This file is part of the libiberty library.
 Libiberty is free software; you can redistribute it and/or
@@ -64,11 +65,16 @@ struct rust_demangler
   /* Non-zero if any error occurred. */
   int errored;
 
+  /* Non-zero if nothing should be printed. */
+  int skipping_printing;
+
   /* Non-zero if printing should be verbose (e.g. include hashes). */
   int verbose;
 
   /* Rust mangling version, with legacy mangling being -1. */
   int version;
+
+  uint64_t bound_lifetime_depth;
 };
 
 /* Parsing functions. */
@@ -81,6 +87,18 @@ peek (const struct rust_demangler *rdm)
   return 0;
 }
 
+static int
+eat (struct rust_demangler *rdm, char c)
+{
+  if (peek (rdm) == c)
+    {
+      rdm->next++;
+      return 1;
+    }
+  else
+    return 0;
+}
+
 static char
 next (struct rust_demangler *rdm)
 {
@@ -92,11 +110,87 @@ next (struct rust_demangler *rdm)
   return c;
 }
 
+static uint64_t
+parse_integer_62 (struct rust_demangler *rdm)
+{
+  char c;
+  uint64_t x;
+
+  if (eat (rdm, '_'))
+    return 0;
+
+  x = 0;
+  while (!eat (rdm, '_'))
+    {
+      c = next (rdm);
+      x *= 62;
+      if (ISDIGIT (c))
+        x += c - '0';
+      else if (ISLOWER (c))
+        x += 10 + (c - 'a');
+      else if (ISUPPER (c))
+        x += 10 + 26 + (c - 'A');
+      else
+        {
+          rdm->errored = 1;
+          return 0;
+        }
+    }
+  return x + 1;
+}
+
+static uint64_t
+parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
+{
+  if (!eat (rdm, tag))
+    return 0;
+  return 1 + parse_integer_62 (rdm);
+}
+
+static uint64_t
+parse_disambiguator (struct rust_demangler *rdm)
+{
+  return parse_opt_integer_62 (rdm, 's');
+}
+
+static size_t
+parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
+{
+  char c;
+  size_t hex_len;
+
+  hex_len = 0;
+  *value = 0;
+
+  while (!eat (rdm, '_'))
+    {
+      *value <<= 4;
+
+      c = next (rdm);
+      if (ISDIGIT (c))
+        *value |= c - '0';
+      else if (c >= 'a' && c <= 'f')
+        *value |= 10 + (c - 'a');
+      else
+        {
+          rdm->errored = 1;
+          return 0;
+        }
+      hex_len++;
+    }
+
+  return hex_len;
+}
+
 struct rust_mangled_ident
 {
   /* ASCII part of the identifier. */
   const char *ascii;
   size_t ascii_len;
+
+  /* Punycode insertion codes for Unicode codepoints, if any. */
+  const char *punycode;
+  size_t punycode_len;
 };
 
 static struct rust_mangled_ident
@@ -104,10 +198,16 @@ parse_ident (struct rust_demangler *rdm)
 {
   char c;
   size_t start, len;
+  int is_punycode = 0;
   struct rust_mangled_ident ident;
 
   ident.ascii = NULL;
   ident.ascii_len = 0;
+  ident.punycode = NULL;
+  ident.punycode_len = 0;
+
+  if (rdm->version != -1)
+    is_punycode = eat (rdm, 'u');
 
   c = next (rdm);
   if (!ISDIGIT (c))
@@ -121,6 +221,10 @@ parse_ident (struct rust_demangler *rdm)
     while (ISDIGIT (peek (rdm)))
       len = len * 10 + (next (rdm) - '0');
 
+  /* Skip past the optional `_` separator (v0). */
+  if (rdm->version != -1)
+    eat (rdm, '_');
+
   start = rdm->next;
   rdm->next += len;
   /* Check for overflows. */
@@ -133,6 +237,27 @@ parse_ident (struct rust_demangler *rdm)
   ident.ascii = rdm->sym + start;
   ident.ascii_len = len;
 
+  if (is_punycode)
+    {
+      ident.punycode_len = 0;
+      while (ident.ascii_len > 0)
+        {
+          ident.ascii_len--;
+
+          /* The last '_' is a separator between ascii & punycode. */
+          if (ident.ascii[ident.ascii_len] == '_')
+            break;
+
+          ident.punycode_len++;
+        }
+      if (!ident.punycode_len)
+        {
+          rdm->errored = 1;
+          return ident;
+        }
+      ident.punycode = ident.ascii + (len - ident.punycode_len);
+    }
+
   if (ident.ascii_len == 0)
     ident.ascii = NULL;
 
@@ -144,12 +269,28 @@ parse_ident (struct rust_demangler *rdm)
 static void
 print_str (struct rust_demangler *rdm, const char *data, size_t len)
 {
-  if (!rdm->errored)
+  if (!rdm->errored && !rdm->skipping_printing)
     rdm->callback (data, len, rdm->callback_opaque);
 }
 
 #define PRINT(s) print_str (rdm, s, strlen (s))
 
+static void
+print_uint64 (struct rust_demangler *rdm, uint64_t x)
+{
+  char s[21];
+  snprintf (s, 21, "%" PRIu64, x);
+  PRINT (s);
+}
+
+static void
+print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
+{
+  char s[17];
+  snprintf (s, 17, "%" PRIx64, x);
+  PRINT (s);
+}
+
 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
 static int
 decode_lower_hex_nibble (char nibble)
@@ -230,9 +371,14 @@ static void
 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
 {
   char unescaped;
-  size_t len;
+  uint8_t *out, *p, d;
+  size_t len, cap, punycode_pos, j;
+  /* Punycode parameters and state. */
+  uint32_t c;
+  size_t base, t_min, t_max, skew, damp, bias, i;
+  size_t delta, w, k, t;
 
-  if (rdm->errored)
+  if (rdm->errored || rdm->skipping_printing)
     return;
 
   if (rdm->version == -1)
@@ -273,8 +419,7 @@ print_ident (struct rust_demangler *rdm, struct 
rust_mangled_ident ident)
                 }
               else
                 {
-                  /* "." becomes "-" */
-                  PRINT ("-");
+                  PRINT (".");
                   len = 1;
                 }
             }
@@ -294,6 +439,830 @@ print_ident (struct rust_demangler *rdm, struct 
rust_mangled_ident ident)
 
       return;
     }
+
+  if (!ident.punycode)
+    {
+      print_str (rdm, ident.ascii, ident.ascii_len);
+      return;
+    }
+
+  len = 0;
+  cap = 4;
+  while (cap < ident.ascii_len)
+    {
+      cap *= 2;
+      /* Check for overflows. */
+      if ((cap * 4) / 4 != cap)
+        {
+          rdm->errored = 1;
+          return;
+        }
+    }
+
+  /* Store the output codepoints as groups of 4 UTF-8 bytes. */
+  out = (uint8_t *)malloc (cap * 4);
+  if (!out)
+    {
+      rdm->errored = 1;
+      return;
+    }
+
+  /* Populate initial output from ASCII fragment. */
+  for (len = 0; len < ident.ascii_len; len++)
+    {
+      p = out + 4 * len;
+      p[0] = 0;
+      p[1] = 0;
+      p[2] = 0;
+      p[3] = ident.ascii[len];
+    }
+
+  /* Punycode parameters and initial state. */
+  base = 36;
+  t_min = 1;
+  t_max = 26;
+  skew = 38;
+  damp = 700;
+  bias = 72;
+  i = 0;
+  c = 0x80;
+
+  punycode_pos = 0;
+  while (punycode_pos < ident.punycode_len)
+    {
+      /* Read one delta value. */
+      delta = 0;
+      w = 1;
+      k = 0;
+      do
+        {
+          k += base;
+          t = k < bias ? 0 : (k - bias);
+          if (t < t_min)
+            t = t_min;
+          if (t > t_max)
+            t = t_max;
+
+          if (punycode_pos >= ident.punycode_len)
+            goto cleanup;
+          d = ident.punycode[punycode_pos++];
+
+          if (ISLOWER (d))
+            d = d - 'a';
+          else if (ISDIGIT (d))
+            d = 26 + (d - '0');
+          else
+            {
+              rdm->errored = 1;
+              goto cleanup;
+            }
+
+          delta += d * w;
+          w *= base - t;
+        }
+      while (d >= t);
+
+      /* Compute the new insert position and character. */
+      len++;
+      i += delta;
+      c += i / len;
+      i %= len;
+
+      /* Ensure enough space is available. */
+      if (cap < len)
+        {
+          cap *= 2;
+          /* Check for overflows. */
+          if ((cap * 4) / 4 != cap || cap < len)
+            {
+              rdm->errored = 1;
+              goto cleanup;
+            }
+        }
+      p = (uint8_t *)realloc (out, cap * 4);
+      if (!p)
+        {
+          rdm->errored = 1;
+          goto cleanup;
+        }
+      out = p;
+
+      /* Move the characters after the insert position. */
+      p = out + i * 4;
+      memmove (p + 4, p, (len - i - 1) * 4);
+
+      /* Insert the new character, as UTF-8 bytes. */
+      p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
+      p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
+      p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
+      p[3] = 0x80 | (c & 0x3f);
+
+      /* If there are no more deltas, decoding is complete. */
+      if (punycode_pos == ident.punycode_len)
+        break;
+
+      i++;
+
+      /* Perform bias adaptation. */
+      delta /= damp;
+      damp = 2;
+
+      delta += delta / len;
+      k = 0;
+      while (delta > ((base - t_min) * t_max) / 2)
+        {
+          delta /= base - t_min;
+          k += base;
+        }
+      bias = k + ((base - t_min + 1) * delta) / (delta + skew);
+    }
+
+  /* Remove all the 0 bytes to leave behind an UTF-8 string. */
+  for (i = 0, j = 0; i < len * 4; i++)
+    if (out[i] != 0)
+      out[j++] = out[i];
+
+  print_str (rdm, (const char *)out, j);
+
+cleanup:
+  free (out);
+}
+
+/* Print the lifetime according to the previously decoded index.
+   An index of `0` always refers to `'_`, but starting with `1`,
+   indices refer to late-bound lifetimes introduced by a binder. */
+static void
+print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
+{
+  char c;
+  uint64_t depth;
+
+  PRINT ("'");
+  if (lt == 0)
+    {
+      PRINT ("_");
+      return;
+    }
+
+  depth = rdm->bound_lifetime_depth - lt;
+  /* Try to print lifetimes alphabetically first. */
+  if (depth < 26)
+    {
+      c = 'a' + depth;
+      print_str (rdm, &c, 1);
+    }
+  else
+    {
+      /* Use `'_123` after running out of letters. */
+      PRINT ("_");
+      print_uint64 (rdm, depth);
+    }
+}
+
+/* Demangling functions. */
+
+static void demangle_binder (struct rust_demangler *rdm);
+static void demangle_path (struct rust_demangler *rdm, int in_value);
+static void demangle_generic_arg (struct rust_demangler *rdm);
+static void demangle_type (struct rust_demangler *rdm);
+static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
+static void demangle_dyn_trait (struct rust_demangler *rdm);
+static void demangle_const (struct rust_demangler *rdm);
+static void demangle_const_uint (struct rust_demangler *rdm);
+static void demangle_const_int (struct rust_demangler *rdm);
+static void demangle_const_bool (struct rust_demangler *rdm);
+static void demangle_const_char (struct rust_demangler *rdm);
+
+/* Optionally enter a binder ('G') for late-bound lifetimes,
+   printing e.g. `for<'a, 'b> `, and make those lifetimes visible
+   to the caller (via depth level, which the caller should reset). */
+static void
+demangle_binder (struct rust_demangler *rdm)
+{
+  uint64_t i, bound_lifetimes;
+
+  if (rdm->errored)
+    return;
+
+  bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
+  if (bound_lifetimes > 0)
+    {
+      PRINT ("for<");
+      for (i = 0; i < bound_lifetimes; i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          rdm->bound_lifetime_depth++;
+          print_lifetime_from_index (rdm, 1);
+        }
+      PRINT ("> ");
+    }
+}
+
+static void
+demangle_path (struct rust_demangler *rdm, int in_value)
+{
+  char tag, ns;
+  int was_skipping_printing;
+  size_t i, backref, old_next;
+  uint64_t dis;
+  struct rust_mangled_ident name;
+
+  if (rdm->errored)
+    return;
+
+  switch (tag = next (rdm))
+    {
+    case 'C':
+      dis = parse_disambiguator (rdm);
+      name = parse_ident (rdm);
+
+      print_ident (rdm, name);
+      if (rdm->verbose)
+        {
+          PRINT ("[");
+          print_uint64_hex (rdm, dis);
+          PRINT ("]");
+        }
+      break;
+    case 'N':
+      ns = next (rdm);
+      if (!ISLOWER (ns) && !ISUPPER (ns))
+        {
+          rdm->errored = 1;
+          return;
+        }
+
+      demangle_path (rdm, in_value);
+
+      dis = parse_disambiguator (rdm);
+      name = parse_ident (rdm);
+
+      if (ISUPPER (ns))
+        {
+          /* Special namespaces, like closures and shims. */
+          PRINT ("::{");
+          switch (ns)
+            {
+            case 'C':
+              PRINT ("closure");
+              break;
+            case 'S':
+              PRINT ("shim");
+              break;
+            default:
+              print_str (rdm, &ns, 1);
+            }
+          if (name.ascii || name.punycode)
+            {
+              PRINT (":");
+              print_ident (rdm, name);
+            }
+          PRINT ("#");
+          print_uint64 (rdm, dis);
+          PRINT ("}");
+        }
+      else
+        {
+          /* Implementation-specific/unspecified namespaces. */
+
+          if (name.ascii || name.punycode)
+            {
+              PRINT ("::");
+              print_ident (rdm, name);
+            }
+        }
+      break;
+    case 'M':
+    case 'X':
+      /* Ignore the `impl`'s own path.*/
+      parse_disambiguator (rdm);
+      was_skipping_printing = rdm->skipping_printing;
+      rdm->skipping_printing = 1;
+      demangle_path (rdm, in_value);
+      rdm->skipping_printing = was_skipping_printing;
+      /* fallthrough */
+    case 'Y':
+      PRINT ("<");
+      demangle_type (rdm);
+      if (tag != 'M')
+        {
+          PRINT (" as ");
+          demangle_path (rdm, 0);
+        }
+      PRINT (">");
+      break;
+    case 'I':
+      demangle_path (rdm, in_value);
+      if (in_value)
+        PRINT ("::");
+      PRINT ("<");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_generic_arg (rdm);
+        }
+      PRINT (">");
+      break;
+    case 'B':
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_path (rdm, in_value);
+          rdm->next = old_next;
+        }
+      break;
+    default:
+      rdm->errored = 1;
+      return;
+    }
+}
+
+static void
+demangle_generic_arg (struct rust_demangler *rdm)
+{
+  uint64_t lt;
+  if (eat (rdm, 'L'))
+    {
+      lt = parse_integer_62 (rdm);
+      print_lifetime_from_index (rdm, lt);
+    }
+  else if (eat (rdm, 'K'))
+    demangle_const (rdm);
+  else
+    demangle_type (rdm);
+}
+
+static const char *
+basic_type (char tag)
+{
+  switch (tag)
+    {
+    case 'b':
+      return "bool";
+    case 'c':
+      return "char";
+    case 'e':
+      return "str";
+    case 'u':
+      return "()";
+    case 'a':
+      return "i8";
+    case 's':
+      return "i16";
+    case 'l':
+      return "i32";
+    case 'x':
+      return "i64";
+    case 'n':
+      return "i128";
+    case 'i':
+      return "isize";
+    case 'h':
+      return "u8";
+    case 't':
+      return "u16";
+    case 'm':
+      return "u32";
+    case 'y':
+      return "u64";
+    case 'o':
+      return "u128";
+    case 'j':
+      return "usize";
+    case 'f':
+      return "f32";
+    case 'd':
+      return "f64";
+    case 'z':
+      return "!";
+    case 'p':
+      return "_";
+    case 'v':
+      return "...";
+
+    default:
+      return NULL;
+    }
+}
+
+static void
+demangle_type (struct rust_demangler *rdm)
+{
+  char tag;
+  size_t i, old_next, backref;
+  uint64_t lt, old_bound_lifetime_depth;
+  const char *basic;
+  struct rust_mangled_ident abi;
+
+  if (rdm->errored)
+    return;
+
+  tag = next (rdm);
+
+  basic = basic_type (tag);
+  if (basic)
+    {
+      PRINT (basic);
+      return;
+    }
+
+  switch (tag)
+    {
+    case 'R':
+    case 'Q':
+      PRINT ("&");
+      if (eat (rdm, 'L'))
+        {
+          lt = parse_integer_62 (rdm);
+          if (lt)
+            {
+              print_lifetime_from_index (rdm, lt);
+              PRINT (" ");
+            }
+        }
+      if (tag != 'R')
+        PRINT ("mut ");
+      demangle_type (rdm);
+      break;
+    case 'P':
+    case 'O':
+      PRINT ("*");
+      if (tag != 'P')
+        PRINT ("mut ");
+      else
+        PRINT ("const ");
+      demangle_type (rdm);
+      break;
+    case 'A':
+    case 'S':
+      PRINT ("[");
+      demangle_type (rdm);
+      if (tag == 'A')
+        {
+          PRINT ("; ");
+          demangle_const (rdm);
+        }
+      PRINT ("]");
+      break;
+    case 'T':
+      PRINT ("(");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_type (rdm);
+        }
+      if (i == 1)
+        PRINT (",");
+      PRINT (")");
+      break;
+    case 'F':
+      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
+      demangle_binder (rdm);
+
+      if (eat (rdm, 'U'))
+        PRINT ("unsafe ");
+
+      if (eat (rdm, 'K'))
+        {
+          if (eat (rdm, 'C'))
+            {
+              abi.ascii = "C";
+              abi.ascii_len = 1;
+            }
+          else
+            {
+              abi = parse_ident (rdm);
+              if (!abi.ascii || abi.punycode)
+                {
+                  rdm->errored = 1;
+                  goto restore;
+                }
+            }
+
+          PRINT ("extern \"");
+
+          /* If the ABI had any `-`, they were replaced with `_`,
+             so the parts between `_` have to be re-joined with `-`. */
+          for (i = 0; i < abi.ascii_len; i++)
+            {
+              if (abi.ascii[i] == '_')
+                {
+                  print_str (rdm, abi.ascii, i);
+                  PRINT ("-");
+                  abi.ascii += i + 1;
+                  abi.ascii_len -= i + 1;
+                  i = 0;
+                }
+            }
+          print_str (rdm, abi.ascii, abi.ascii_len);
+
+          PRINT ("\" ");
+        }
+
+      PRINT ("fn(");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_type (rdm);
+        }
+      PRINT (")");
+
+      if (eat (rdm, 'u'))
+        {
+          /* Skip printing the return type if it's 'u', i.e. `()`. */
+        }
+      else
+        {
+          PRINT (" -> ");
+          demangle_type (rdm);
+        }
+
+    /* Restore `bound_lifetime_depth` to outside the binder. */
+    restore:
+      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
+      break;
+    case 'D':
+      PRINT ("dyn ");
+
+      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
+      demangle_binder (rdm);
+
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (" + ");
+          demangle_dyn_trait (rdm);
+        }
+
+      /* Restore `bound_lifetime_depth` to outside the binder. */
+      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
+
+      if (!eat (rdm, 'L'))
+        {
+          rdm->errored = 1;
+          return;
+        }
+      lt = parse_integer_62 (rdm);
+      if (lt)
+        {
+          PRINT (" + ");
+          print_lifetime_from_index (rdm, lt);
+        }
+      break;
+    case 'B':
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_type (rdm);
+          rdm->next = old_next;
+        }
+      break;
+    default:
+      /* Go back to the tag, so `demangle_path` also sees it. */
+      rdm->next--;
+      demangle_path (rdm, 0);
+    }
+}
+
+/* A trait in a trait object may have some "existential projections"
+   (i.e. associated type bindings) after it, which should be printed
+   in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
+   To this end, this method will keep the `<...>` of an 'I' path
+   open, by omitting the `>`, and return `Ok(true)` in that case. */
+static int
+demangle_path_maybe_open_generics (struct rust_demangler *rdm)
+{
+  int open;
+  size_t i, old_next, backref;
+
+  open = 0;
+
+  if (rdm->errored)
+    return open;
+
+  if (eat (rdm, 'B'))
+    {
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          open = demangle_path_maybe_open_generics (rdm);
+          rdm->next = old_next;
+        }
+    }
+  else if (eat (rdm, 'I'))
+    {
+      demangle_path (rdm, 0);
+      PRINT ("<");
+      open = 1;
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_generic_arg (rdm);
+        }
+    }
+  else
+    demangle_path (rdm, 0);
+  return open;
+}
+
+static void
+demangle_dyn_trait (struct rust_demangler *rdm)
+{
+  int open;
+  struct rust_mangled_ident name;
+
+  if (rdm->errored)
+    return;
+
+  open = demangle_path_maybe_open_generics (rdm);
+
+  while (eat (rdm, 'p'))
+    {
+      if (!open)
+        PRINT ("<");
+      else
+        PRINT (", ");
+      open = 1;
+
+      name = parse_ident (rdm);
+      print_ident (rdm, name);
+      PRINT (" = ");
+      demangle_type (rdm);
+    }
+
+  if (open)
+    PRINT (">");
+}
+
+static void
+demangle_const (struct rust_demangler *rdm)
+{
+  char ty_tag;
+  size_t old_next, backref;
+
+  if (rdm->errored)
+    return;
+
+  if (eat (rdm, 'B'))
+    {
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_const (rdm);
+          rdm->next = old_next;
+        }
+      return;
+    }
+
+  ty_tag = next (rdm);
+  switch (ty_tag)
+    {
+    /* Placeholder. */
+    case 'p':
+      PRINT ("_");
+      return;
+
+    /* Unsigned integer types. */
+    case 'h':
+    case 't':
+    case 'm':
+    case 'y':
+    case 'o':
+    case 'j':
+      demangle_const_uint (rdm);
+      break;
+
+    /* Signed integer types. */
+    case 'a':
+    case 's':
+    case 'l':
+    case 'x':
+    case 'n':
+    case 'i':
+      demangle_const_int (rdm);
+      break;
+
+    /* Boolean. */
+    case 'b':
+      demangle_const_bool (rdm);
+      break;
+
+    /* Character. */
+    case 'c':
+      demangle_const_char (rdm);
+      break;
+
+    default:
+      rdm->errored = 1;
+      return;
+    }
+
+  if (rdm->errored)
+    return;
+
+  if (rdm->verbose)
+    {
+      PRINT (": ");
+      PRINT (basic_type (ty_tag));
+    }
+}
+
+static void
+demangle_const_uint (struct rust_demangler *rdm)
+{
+  size_t hex_len;
+  uint64_t value;
+
+  if (rdm->errored)
+    return;
+
+  hex_len = parse_hex_nibbles (rdm, &value);
+
+  if (hex_len > 16)
+    {
+      /* Print anything that doesn't fit in `uint64_t` verbatim. */
+      PRINT ("0x");
+      print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
+    }
+  else if (hex_len > 0)
+    print_uint64 (rdm, value);
+  else
+    rdm->errored = 1;
+}
+
+static void
+demangle_const_int (struct rust_demangler *rdm)
+{
+  if (eat (rdm, 'n'))
+    PRINT ("-");
+  demangle_const_uint (rdm);
+}
+
+static void
+demangle_const_bool (struct rust_demangler *rdm)
+{
+  uint64_t value;
+
+  if (parse_hex_nibbles (rdm, &value) != 1)
+    {
+      rdm->errored = 1;
+      return;
+    }
+
+  if (value == 0)
+    PRINT ("false");
+  else if (value == 1)
+    PRINT ("true");
+  else
+    rdm->errored = 1;
+}
+
+static void
+demangle_const_char (struct rust_demangler *rdm)
+{
+  size_t hex_len;
+  uint64_t value;
+
+  hex_len = parse_hex_nibbles (rdm, &value);
+
+  if (hex_len == 0 || hex_len > 8)
+    {
+      rdm->errored = 1;
+      return;
+    }
+
+  /* Match Rust's character "debug" output as best as we can. */
+  PRINT ("'");
+  if (value == '\t')
+    PRINT ("\\t");
+  else if (value == '\r')
+    PRINT ("\\r");
+  else if (value == '\n')
+    PRINT ("\\n");
+  else if (value > ' ' && value < '~')
+    /* Rust also considers many non-ASCII codepoints to be printable, but
+       that logic is not easily ported to C. */
+    print_str (rdm, (char *) &value, 1);
+  else
+    {
+      PRINT ("\\u{");
+      print_uint64_hex (rdm, value);
+      PRINT ("}");
+    }
+  PRINT ("'");
 }
 
 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
@@ -345,11 +1314,15 @@ rust_demangle_callback (const char *mangled, int options,
 
   rdm.next = 0;
   rdm.errored = 0;
+  rdm.skipping_printing = 0;
   rdm.verbose = (options & DMGL_VERBOSE) != 0;
   rdm.version = 0;
+  rdm.bound_lifetime_depth = 0;
 
-  /* Rust symbols always start with _ZN (legacy). */
-  if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
+  /* Rust symbols always start with _R (v0) or _ZN (legacy). */
+  if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
+    rdm.sym += 2;
+  else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
     {
       rdm.sym += 3;
       rdm.version = -1;
@@ -357,7 +1330,11 @@ rust_demangle_callback (const char *mangled, int options,
   else
     return 0;
 
-  /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
+  /* Paths (v0) always start with uppercase characters. */
+  if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
+    return 0;
+
+  /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
   for (p = rdm.sym; *p; p++)
     {
       rdm.sym_len++;
@@ -365,6 +1342,7 @@ rust_demangle_callback (const char *mangled, int options,
       if (*p == '_' || ISALNUM (*p))
         continue;
 
+      /* Legacy Rust symbols can also contain [.:$] characters. */
       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
         continue;
 
@@ -418,7 +1396,19 @@ rust_demangle_callback (const char *mangled, int options,
       while (rdm.next < rdm.sym_len);
     }
   else
-    return 0;
+    {
+      demangle_path (&rdm, 1);
+
+      /* Skip instantiating crate. */
+      if (!rdm.errored && rdm.next < rdm.sym_len)
+        {
+          rdm.skipping_printing = 1;
+          demangle_path (&rdm, 0);
+        }
+
+      /* It's an error to not reach the end. */
+      rdm.errored |= rdm.next != rdm.sym_len;
+    }
 
   return !rdm.errored;
 }
diff --git a/libiberty/testsuite/rust-demangle-expected 
b/libiberty/testsuite/rust-demangle-expected
index 74774794736..7dca315d005 100644
--- a/libiberty/testsuite/rust-demangle-expected
+++ b/libiberty/testsuite/rust-demangle-expected
@@ -11,7 +11,7 @@
 #
 ############
 #
-# Coverage Tests
+# Coverage Tests (legacy)
 #
 #
 # Demangles as rust symbol.
@@ -163,3 +163,135 @@ 
_ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f
 --format=rust
 
_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE
 issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo
+--format=rust
+_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h000b1ad6c4f30bd6E
+core::ops::function::FnOnce::call_once{{vtable.shim}}
+#
+############
+#
+# Coverage Tests (v0)
+#
+#
+# Crate with a leading digit.
+--format=rust
+_RNvC6_123foo3bar
+123foo::bar
+# UTF-8 identifiers.
+--format=rust
+_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y
+utf8_idents::საჭმელად_გემრიელი_სადილი
+# Closure path elements.
+--format=rust
+_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_
+cc::spawn::{closure#0}::{closure#0}
+#
+--format=rust
+_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_
+<core::slice::Iter<u8> as 
core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}
+# dyn Trait ("trait object") types.
+--format=rust
+_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std
+alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>
+# Types with const generics parameters.
+--format=rust
+_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new
+<arrayvec::ArrayVec<u8, 123>>::new
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E
+<const_generic::Unsigned<11>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E
+<const_generic::Signed<152>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E
+<const_generic::Signed<-11>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E
+<const_generic::Bool<false>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E
+<const_generic::Bool<true>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E
+<const_generic::Char<'v'>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E
+<const_generic::Char<'\n'>>
+#
+--format=rust
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E
+<const_generic::Char<'\u{2202}'>>
+#
+--format=rust
+_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO
+<const_generic::Foo<_>>::foo::FOO
+#
+# All of the tests above but in auto mode instead:
+#
+# Crate with a leading digit.
+--format=auto
+_RNvC6_123foo3bar
+123foo::bar
+# UTF-8 identifiers.
+--format=auto
+_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y
+utf8_idents::საჭმელად_გემრიელი_სადილი
+# Closure path elements.
+--format=auto
+_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_
+cc::spawn::{closure#0}::{closure#0}
+#
+--format=auto
+_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_
+<core::slice::Iter<u8> as 
core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}
+# dyn Trait ("trait object") types.
+--format=auto
+_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std
+alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>
+# Types with const generics parameters.
+--format=auto
+_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new
+<arrayvec::ArrayVec<u8, 123>>::new
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E
+<const_generic::Unsigned<11>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E
+<const_generic::Signed<152>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E
+<const_generic::Signed<-11>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E
+<const_generic::Bool<false>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E
+<const_generic::Bool<true>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E
+<const_generic::Char<'v'>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E
+<const_generic::Char<'\n'>>
+#
+--format=auto
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E
+<const_generic::Char<'\u{2202}'>>
+#
+--format=auto
+_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO
+<const_generic::Foo<_>>::foo::FOO

Reply via email to