Hello list,

these clean-ups and minor speedups complete some TODOs and semi-finished changes I have gathered in the ELF backend. In a nutshell:

Fixed comment style, used INT_BITS_STRLEN_BOUND from gnulib to be future proof on integer representation string length, replaced long arguments in fast printing functions with HOST_WIDE_INT that is always a larger type (also asserted that), converted some never-negative ints to unsigned. Guarded the output.h:default_elf_asm_output_* declarations, mimicking varasm.c (I'm not exactly sure why this is guarded in the first place). Changed default_elf_asm_output_* to be clearer and faster, they now fwrite() line by line instead of putting char by char. Implemented fast octal output in default_elf_asm_output_*, this should give a good boost to -flto, but I haven't measured a big testcase for this one.

All in all I get a speed-up of ~30 M instr out of ~2 G instr, for -g3 compilation of reload.c. Actually saving all the putc() calls gives more significant gain, but I lost a tiny bit because of converting [sf]print_* functions to HOST_WIDE_INT from long, for PR 51094. So on i586 which has HOST_WIDE_INT 8 byte wide, I can see slow calls to __u{div,mod}di3 taking place. I don't know whether there is a meaning in writing LEB128 values greater than 2^31 but I could change all that to HOST_WIDEST_FAST_INT if you think so.

Time savings are minor too, about 10 ms out of 0.85 s. Memory usage is the same. Bootstrapped on x86, no regressions for C,C++ testsuite.


Thanks Andreas, hp, Mike, for your comments. Mike I'd appreciate if you elaborated on how to speed-up sprint_uw_rev(), I don't think I understood what you have in mind.

Thanks,
Dimitris
2012-08-07 Dimitrios Apostolou <ji...@gmx.net>

        * final.c: Assert that HOST_WIDE_INT is at least as wide as long.
        (output_addr_const): Use fprint_w() instead of fprintf() when
        CONST_INT or CONST_FIXED.
        (_sprint_uw): New static function.
        (sprint_ul_rev): Change to:
        (_sprint_uw_rev): Accept HOST_WIDE_INT arg instead of
        long. Changed i to unsigned.
        (INT_BITS_STRLEN_BOUND): Copied from gnulib.
        (HOST_WIDE_INT_STRING_LEN): Define.
        (fprint_ul, sprint_ul): Change to:
        (fprint_uw, sprint_uw): Accept HOST_WIDE_INT arg instead of
        long. Changed counter variables to unsigned.
        (fprint_uw_hex): Renamed from fprint_whex
        * output.h (fprint_ul, sprint_ul): Remove declarations.
        (fprint_w, fprint_uw, sprint_uw): Declare.
        (default_elf_asm_output_limited_string)
        (default_elf_asm_output_ascii): wrap in #ifdef ELF_ASCII_ESCAPES
        (fprint_uw_hex): Renamed from fprint_whex
        * elfos.h (ASM_GENERATE_INTERNAL_LABEL): Use sprint_uw() instead
        of sprint_ul().
        (ASM_OUTPUT_ASCII): Removed questionmark at the end of macro.
        * i386.c (print_reg): Use fprint_uw() instead of fprint_ul().
        * dwarf2asm.c (asm_output_data_sleb128): Change fprintf() to
        fputs() plus fprint_w(). Change fputc() to putc() in hot path.
        (dw2_assemble_integer, dw2_asm_output_data)
        (dw2_asm_output_data_uleb128): fprint_whex() renamed to
        fprint_uw_hex().
        * dwarf2out.c (dwarf2out_source_line): Changed comment. Use
        fprint_uw() instead of fprint_ul().
        * varasm.c (_elf_escape_char): New static function that writes a
        char to a string according to ELF_ASCII_ESCAPES.
        (_elf_output_ascii_line): New static function that writes to file
        a single .ascii assembler declaration.
        (default_elf_asm_output_limited_string)
        (default_elf_asm_output_ascii): Rewrote functions so that they
        fwrite() a full assembler line instead of putting char by char.
=== modified file 'gcc/config/elfos.h'
--- gcc/config/elfos.h  2012-06-19 19:55:33 +0000
+++ gcc/config/elfos.h  2012-08-06 03:19:16 +0000
@@ -119,7 +119,7 @@ see the files COPYING3 and COPYING.RUNTI
       (LABEL)[0] = '*';                                                \
       (LABEL)[1] = '.';                                                \
       __p = stpcpy (&(LABEL)[2], PREFIX);                      \
-      sprint_ul (__p, (unsigned long) (NUM));                  \
+      sprint_uw (__p, (unsigned HOST_WIDE_INT) (NUM));         \
     }                                                          \
   while (0)
 
@@ -418,7 +418,7 @@ see the files COPYING3 and COPYING.RUNTI
 
 #undef  ASM_OUTPUT_ASCII
 #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)                    \
-  default_elf_asm_output_ascii ((FILE), (STR), (LENGTH));
+  default_elf_asm_output_ascii ((FILE), (STR), (LENGTH))
 
 /* Allow the use of the -frecord-gcc-switches switch via the
    elf_record_gcc_switches function defined in varasm.c.  */

=== modified file 'gcc/config/i386/i386.c'
--- gcc/config/i386/i386.c      2012-07-28 09:16:52 +0000
+++ gcc/config/i386/i386.c      2012-08-04 16:47:01 +0000
@@ -13995,7 +13995,7 @@ print_reg (rtx x, int code, FILE *file)
     {
       gcc_assert (TARGET_64BIT);
       putc ('r', file);
-      fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
+      fprint_uw (file, REGNO (x) - FIRST_REX_INT_REG + 8);
       switch (code)
        {
          case 0:

=== modified file 'gcc/dwarf2asm.c'
--- gcc/dwarf2asm.c     2012-05-29 14:14:06 +0000
+++ gcc/dwarf2asm.c     2012-08-06 23:52:37 +0000
@@ -47,7 +47,7 @@ dw2_assemble_integer (int size, rtx x)
     {
       fputs (op, asm_out_file);
       if (CONST_INT_P (x))
-       fprint_whex (asm_out_file, (unsigned HOST_WIDE_INT) INTVAL (x));
+       fprint_uw_hex (asm_out_file, (unsigned HOST_WIDE_INT) INTVAL (x));
       else
        output_addr_const (asm_out_file, x);
     }
@@ -101,7 +101,7 @@ dw2_asm_output_data (int size, unsigned
   if (op)
     {
       fputs (op, asm_out_file);
-      fprint_whex (asm_out_file, value);
+      fprint_uw_hex (asm_out_file, value);
     }
   else
     assemble_integer (GEN_INT (value), size, BITS_PER_UNIT, 1);
@@ -593,7 +593,7 @@ dw2_asm_output_data_uleb128 (unsigned HO
 
 #ifdef HAVE_AS_LEB128
   fputs ("\t.uleb128 ", asm_out_file);
-  fprint_whex (asm_out_file, value);
+  fprint_uw_hex (asm_out_file, value);
 
   if (flag_debug_asm && comment)
     {
@@ -677,7 +677,8 @@ dw2_asm_output_data_sleb128 (HOST_WIDE_I
   va_start (ap, comment);
 
 #ifdef HAVE_AS_LEB128
-  fprintf (asm_out_file, "\t.sleb128 " HOST_WIDE_INT_PRINT_DEC, value);
+  fputs ("\t.sleb128 ", asm_out_file);
+  fprint_w (asm_out_file, value);
 
   if (flag_debug_asm && comment)
     {
@@ -706,7 +707,7 @@ dw2_asm_output_data_sleb128 (HOST_WIDE_I
          {
            fprintf (asm_out_file, "%#x", byte);
            if (more)
-             fputc (',', asm_out_file);
+             putc (',', asm_out_file);
          }
        else
          assemble_integer (GEN_INT (byte), 1, BITS_PER_UNIT, 1);

=== modified file 'gcc/dwarf2out.c'
--- gcc/dwarf2out.c     2012-07-24 17:31:01 +0000
+++ gcc/dwarf2out.c     2012-08-04 16:47:01 +0000
@@ -20269,13 +20269,13 @@ dwarf2out_source_line (unsigned int line
 
   if (DWARF2_ASM_LINE_DEBUG_INFO)
     {
-      /* Emit the .loc directive understood by GNU as.  */
-      /* "\t.loc %u %u 0 is_stmt %u discriminator %u",
-        file_num, line, is_stmt, discriminator */
+      /* Emit the .loc directive understood by GNU as. Equivalent: */
+      /* printf ("\t.loc %u %u 0 is_stmt %u discriminator %u",
+               file_num, line, is_stmt, discriminator); */
       fputs ("\t.loc ", asm_out_file);
-      fprint_ul (asm_out_file, file_num);
+      fprint_uw (asm_out_file, file_num);
       putc (' ', asm_out_file);
-      fprint_ul (asm_out_file, line);
+      fprint_uw (asm_out_file, line);
       putc (' ', asm_out_file);
       putc ('0', asm_out_file);
 
@@ -20288,7 +20288,7 @@ dwarf2out_source_line (unsigned int line
        {
          gcc_assert (discriminator > 0);
          fputs (" discriminator ", asm_out_file);
-         fprint_ul (asm_out_file, (unsigned long) discriminator);
+         fprint_uw (asm_out_file, discriminator);
        }
       putc ('\n', asm_out_file);
     }

=== modified file 'gcc/final.c'
--- gcc/final.c 2012-07-25 16:01:17 +0000
+++ gcc/final.c 2012-08-06 23:42:10 +0000
@@ -3711,7 +3711,7 @@ output_addr_const (FILE *file, rtx x)
       break;
 
     case CONST_INT:
-      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      fprint_w (file, INTVAL (x));
       break;
 
     case CONST:
@@ -3741,7 +3741,7 @@ output_addr_const (FILE *file, rtx x)
       break;
 
     case CONST_FIXED:
-      fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_FIXED_VALUE_LOW (x));
+      fprint_w (file, CONST_FIXED_VALUE_LOW (x));
       break;
 
     case PLUS:
@@ -3825,10 +3825,17 @@ output_quoted_string (FILE *asm_file, co
 #endif
 }
 
-/* Write a HOST_WIDE_INT number in hex form 0x1234, fast. */
+/* The following functions need to work correctly with both long and
+   HOST_WIDE_INT.  */
+
+#if HOST_BITS_PER_LONG > HOST_BITS_PER_WIDE_INT
+  #error "HOST_WIDE_INT is smaller than long!"
+#endif
+
+/* Write a HOST_WIDE_INT number in hex form 0x1234, fast.  */
 
 void
-fprint_whex (FILE *f, unsigned HOST_WIDE_INT value)
+fprint_uw_hex (FILE *f, unsigned HOST_WIDE_INT value)
 {
   char buf[2 + CHAR_BIT * sizeof (value) / 4];
   if (value == 0)
@@ -3845,13 +3852,13 @@ fprint_whex (FILE *f, unsigned HOST_WIDE
     }
 }
 
-/* Internal function that prints an unsigned long in decimal in reverse.
-   The output string IS NOT null-terminated. */
+/* Write to a string an unsigned HOST_WIDE_INT in decimal in reverse.  The
+   output string IS NOT null-terminated.  */
 
-static int
-sprint_ul_rev (char *s, unsigned long value)
+static unsigned int
+_sprint_uw_rev (char *s, unsigned HOST_WIDE_INT value)
 {
-  int i = 0;
+  unsigned int i = 0;
   do
     {
       s[i] = "0123456789"[value % 10];
@@ -3867,42 +3874,78 @@ sprint_ul_rev (char *s, unsigned long va
   return i;
 }
 
-/* Write an unsigned long as decimal to a file, fast. */
+/* From gnulib:  */
+/* Bound on length of the string representing an unsigned integer
+   value representable in B bits.  log10 (2.0) < 146/485.  The
+   smallest value of B where this bound is not tight is 2621.  */
 
-void
-fprint_ul (FILE *f, unsigned long value)
-{
-  /* python says: len(str(2**64)) == 20 */
-  char s[20];
-  int i;
+#define INT_BITS_STRLEN_BOUND(b) (((b) * 146 + 484) / 485)
+
+#define HOST_WIDE_INT_STRING_LEN (INT_BITS_STRLEN_BOUND 
(HOST_BITS_PER_WIDE_INT))
 
-  i = sprint_ul_rev (s, value);
+/* Return a statically allocated string with the decimal representation of
+   VALUE. String IS NOT null-terminated.  */
+
+static char *
+_sprint_uw (unsigned HOST_WIDE_INT value, unsigned int *len)
+{
+  static char s[HOST_WIDE_INT_STRING_LEN];
+  char *s2 = &s[HOST_WIDE_INT_STRING_LEN];
 
-  /* It's probably too small to bother with string reversal and fputs. */
   do
     {
-      i--;
-      putc (s[i], f);
+      s2--;
+      *s2 = "0123456789"[value % 10];
+      value /= 10;
     }
-  while (i != 0);
+  while (value != 0);
+
+  *len = &s[HOST_WIDE_INT_STRING_LEN] - s2;
+  return s2;
+}
+
+/* Write a signed HOST_WIDE_INT as decimal to a file, fast.  */
+
+void
+fprint_w (FILE *f, HOST_WIDE_INT value)
+{
+  char *s;
+  unsigned int len;
+
+  if (value >= 0)
+    s = _sprint_uw (value, &len);
+  else
+    {
+      s = _sprint_uw ((unsigned HOST_WIDE_INT) (~value) + 1, &len);
+      putc('-', f);
+    }
+  fwrite (s, 1, len, f);
+}
+
+/* Write an unsigned HOST_WIDE_INT as decimal to a file, fast.  */
+
+void
+fprint_uw (FILE *f, unsigned HOST_WIDE_INT value)
+{
+  unsigned int len;
+  char *s = _sprint_uw (value, &len);
+  fwrite (s, 1, len, f);
 }
 
-/* Write an unsigned long as decimal to a string, fast.
+/* Write an unsigned HOST_WIDE_INT as decimal to a string, fast.
    s must be wide enough to not overflow, at least 21 chars.
-   Returns the length of the string (without terminating '\0'). */
+   Return the length of the string (without terminating '\0').  */
 
-int
-sprint_ul (char *s, unsigned long value)
+unsigned int
+sprint_uw (char *s, unsigned HOST_WIDE_INT value)
 {
-  int len;
+  unsigned int len, i, j;
   char tmp_c;
-  int i;
-  int j;
 
-  len = sprint_ul_rev (s, value);
+  len = _sprint_uw_rev (s, value);
   s[len] = '\0';
 
-  /* Reverse the string. */
+  /* Reverse the string.  */
   i = 0;
   j = len - 1;
   while (i < j)

=== modified file 'gcc/output.h'
--- gcc/output.h        2012-06-24 17:58:46 +0000
+++ gcc/output.h        2012-08-06 23:53:37 +0000
@@ -125,9 +125,11 @@ extern void output_addr_const (FILE *, r
 #define ATTRIBUTE_ASM_FPRINTF(m, n) ATTRIBUTE_NONNULL(m)
 #endif
 
-extern void fprint_whex (FILE *, unsigned HOST_WIDE_INT);
-extern void fprint_ul (FILE *, unsigned long);
-extern int sprint_ul (char *, unsigned long);
+/* Fast functions for writing numbers, fastest is to write in hex.  */
+extern void fprint_uw_hex (FILE *, unsigned HOST_WIDE_INT);
+extern void fprint_w (FILE *, HOST_WIDE_INT);
+extern void fprint_uw (FILE *, unsigned HOST_WIDE_INT);
+extern unsigned int sprint_uw (char *, unsigned HOST_WIDE_INT);
 
 extern void asm_fprintf (FILE *file, const char *p, ...)
      ATTRIBUTE_ASM_FPRINTF(2, 3);
@@ -598,8 +600,10 @@ extern void file_end_indicate_split_stac
 
 extern void default_elf_asm_output_external (FILE *file, tree,
                                             const char *);
-extern void default_elf_asm_output_limited_string (FILE *, const char *);
+#ifdef ELF_ASCII_ESCAPES
+extern const char *default_elf_asm_output_limited_string (FILE *, const char 
*);
 extern void default_elf_asm_output_ascii (FILE *, const char *, unsigned int);
+#endif
 extern void default_elf_internal_label (FILE *, const char *, unsigned long);
 
 extern void default_elf_init_array_asm_out_constructor (rtx, int);

=== modified file 'gcc/varasm.c'
--- gcc/varasm.c        2012-06-19 19:55:33 +0000
+++ gcc/varasm.c        2012-08-06 23:07:18 +0000
@@ -7242,38 +7242,83 @@ make_debug_expr_from_rtl (const_rtx exp)
 }
 
 #ifdef ELF_ASCII_ESCAPES
-/* Default ASM_OUTPUT_LIMITED_STRING for ELF targets.  */
 
-void
+/* Write a character to a string according to ELF_ASCII_ESCAPES. Assume there
+   is enough space in P, we need max 4 bytes in case we escape the char in
+   octal.  */
+
+static inline char *
+_elf_escape_char (char *p, unsigned char c)
+{
+  char escape = ELF_ASCII_ESCAPES[c];
+  switch (escape)
+    {
+    case 0:
+      *(p++) = c;
+      break;
+    case 1:
+      /* Escape char in octal. */
+      *(p++) = '\\';
+      *(p++) = "01234567" [(c >> 6) & 7];
+      *(p++) = "01234567" [(c >> 3) & 7];
+      *(p++) = "01234567" [c & 7];
+      break;
+    default:
+      *(p++) = '\\';
+      *(p++) = escape;
+      break;
+    }
+
+  return p;
+}
+
+/* Default ASM_OUTPUT_LIMITED_STRING for ELF targets. Returns pointer in s
+   after last consumed character.  */
+
+const char *
 default_elf_asm_output_limited_string (FILE *f, const char *s)
 {
-  int escape;
-  unsigned char c;
+  /* Worst case size if we escape all characters in string.  */
+  char buf[sizeof (STRING_ASM_OP) + 3 + ELF_STRING_LIMIT * 4];
+  char *p;
 
-  fputs (STRING_ASM_OP, f);
-  putc ('"', f);
+  p = stpcpy (buf, STRING_ASM_OP "\"");                /* Optimised out */
   while (*s != '\0')
-    {
-      c = *s;
-      escape = ELF_ASCII_ESCAPES[c];
-      switch (escape)
-       {
-       case 0:
-         putc (c, f);
-         break;
-       case 1:
-         /* TODO: Print in hex with fast function, important for -flto. */
-         fprintf (f, "\\%03o", c);
-         break;
-       default:
-         putc ('\\', f);
-         putc (escape, f);
-         break;
-       }
-      s++;
-    }
-  putc ('\"', f);
-  putc ('\n', f);
+    p = _elf_escape_char (p, *(s++));
+  *(p++) = '\"';
+  *(p++) = '\n';
+
+  gcc_checking_assert (sizeof (buf) >= (unsigned long) (p - buf));
+  fwrite (buf, 1, p - buf, f);
+
+  return ++s;                  /* Bypass NULL and return */
+}
+
+#define ELF_ASCII_BYTE_LIMIT 64
+
+/* Output max(ELF_ASCII_BYTE_LIMIT, NBYTES) characters from S as
+   ".ascii". Return pointer in S after last consumed character.  */
+
+static const char *
+_elf_output_ascii_line (FILE *f, const char *s, unsigned int nbytes)
+{
+  char buf[sizeof (ASCII_DATA_ASM_OP) + ELF_ASCII_BYTE_LIMIT + 3];
+  char *p;
+  const char *limit = s + nbytes;
+
+  p = stpcpy (buf, ASCII_DATA_ASM_OP "\"");    /* Optimised out */
+  while (((unsigned long) (p - buf)
+          < sizeof (buf) - 4 - 2)              /* while buffer is not full */
+        && (s < limit))                        /* and there are more chars */
+    /* _elf_escape_char() adds at most 4 characters */
+    p = _elf_escape_char (p, *(s++));
+  *(p++) = '\"';
+  *(p++) = '\n';
+
+  gcc_checking_assert (sizeof (buf) >= (unsigned long) (p - buf));
+  fwrite (buf, 1, p - buf, f);
+
+  return s;
 }
 
 /* Default ASM_OUTPUT_ASCII for ELF targets.  */
@@ -7281,78 +7326,45 @@ default_elf_asm_output_limited_string (F
 void
 default_elf_asm_output_ascii (FILE *f, const char *s, unsigned int len)
 {
+  const char *next_null = s - 1;
   const char *limit = s + len;
-  const char *last_null = NULL;
-  unsigned bytes_in_chunk = 0;
-  unsigned char c;
-  int escape;
 
-  for (; s < limit; s++)
+  do
     {
-      const char *p;
+      next_null += strnlen (next_null + 1, limit - next_null - 1) + 1;
 
-      if (bytes_in_chunk >= 60)
+      if (next_null != limit)                  /* NULL found */
        {
-         putc ('\"', f);
-         putc ('\n', f);
-         bytes_in_chunk = 0;
-       }
 
-      if (s > last_null)
-       {
-         for (p = s; p < limit && *p != '\0'; p++)
-           continue;
-         last_null = p;
-       }
-      else
-       p = last_null;
-
-      if (p < limit && (p - s) <= (long) ELF_STRING_LIMIT)
-       {
-         if (bytes_in_chunk > 0)
-           {
-             putc ('\"', f);
-             putc ('\n', f);
-             bytes_in_chunk = 0;
-           }
-
-         default_elf_asm_output_limited_string (f, s);
-         s = p;
-       }
-      else
-       {
-         if (bytes_in_chunk == 0)
-           fputs (ASCII_DATA_ASM_OP "\"", f);
-
-         c = *s;
-         escape = ELF_ASCII_ESCAPES[c];
-         switch (escape)
-           {
-           case 0:
-             putc (c, f);
-             bytes_in_chunk++;
-             break;
-           case 1:
-             /* TODO: Print in hex with fast function, important for -flto. */
-             fprintf (f, "\\%03o", c);
-             bytes_in_chunk += 4;
-             break;
-           default:
-             putc ('\\', f);
-             putc (escape, f);
-             bytes_in_chunk += 2;
-             break;
-           }
+         /* If just a NULL byte at start, search for more NULLs */
+         if (next_null == s)
+           while ((next_null + 1) < limit && *(next_null + 1) == '\0')
+             next_null++;
+
+         /* If short enough */
+         if (((unsigned long) (next_null - s) < ELF_STRING_LIMIT)
+             /* and if it starts with NULL and it is only a
+                single NULL (empty string) */
+             && ((*s != '\0') || (s == next_null)))
+           /* then output as .string */
+           s = default_elf_asm_output_limited_string (f, s);
+         else
+           /* long string or many NULLs, output as .ascii */
+           while (s < next_null + 1)
+             s = _elf_output_ascii_line (f, s, next_null - s + 1);
 
+         /* We are finished with this string including its NULL byte */
+         gcc_checking_assert (s == next_null + 1);
        }
     }
+  while (next_null < limit);
 
-  if (bytes_in_chunk > 0)
-    {
-      putc ('\"', f);
-      putc ('\n', f);
-    }
+  /* No NULL found until end of s, output as .ascii */
+  gcc_checking_assert (next_null == limit);
+  while (s < next_null)
+    s = _elf_output_ascii_line (f, s, next_null - s);
 }
+
 #endif
 
 static GTY(()) section *elf_init_array_section;

Reply via email to