Hello list,these clean-ups and minor speedups complete some TODOs and semi-finished changes I have gathered in the ELF backend. In a nutshell:
Fixed comment style, used INT_BITS_STRLEN_BOUND from gnulib to be future proof on integer representation string length, replaced long arguments in fast printing functions with HOST_WIDE_INT that is always a larger type (also asserted that), converted some never-negative ints to unsigned. Guarded the output.h:default_elf_asm_output_* declarations, mimicking varasm.c (I'm not exactly sure why this is guarded in the first place). Changed default_elf_asm_output_* to be clearer and faster, they now fwrite() line by line instead of putting char by char. Implemented fast octal output in default_elf_asm_output_*, this should give a good boost to -flto, but I haven't measured a big testcase for this one.
All in all I get a speed-up of ~30 M instr out of ~2 G instr, for -g3 compilation of reload.c. Actually saving all the putc() calls gives more significant gain, but I lost a tiny bit because of converting [sf]print_* functions to HOST_WIDE_INT from long, for PR 51094. So on i586 which has HOST_WIDE_INT 8 byte wide, I can see slow calls to __u{div,mod}di3 taking place. I don't know whether there is a meaning in writing LEB128 values greater than 2^31 but I could change all that to HOST_WIDEST_FAST_INT if you think so.
Time savings are minor too, about 10 ms out of 0.85 s. Memory usage is the same. Bootstrapped on x86, no regressions for C,C++ testsuite.
Thanks Andreas, hp, Mike, for your comments. Mike I'd appreciate if you elaborated on how to speed-up sprint_uw_rev(), I don't think I understood what you have in mind.
Thanks, Dimitris
2012-08-07 Dimitrios Apostolou <ji...@gmx.net> * final.c: Assert that HOST_WIDE_INT is at least as wide as long. (output_addr_const): Use fprint_w() instead of fprintf() when CONST_INT or CONST_FIXED. (_sprint_uw): New static function. (sprint_ul_rev): Change to: (_sprint_uw_rev): Accept HOST_WIDE_INT arg instead of long. Changed i to unsigned. (INT_BITS_STRLEN_BOUND): Copied from gnulib. (HOST_WIDE_INT_STRING_LEN): Define. (fprint_ul, sprint_ul): Change to: (fprint_uw, sprint_uw): Accept HOST_WIDE_INT arg instead of long. Changed counter variables to unsigned. (fprint_uw_hex): Renamed from fprint_whex * output.h (fprint_ul, sprint_ul): Remove declarations. (fprint_w, fprint_uw, sprint_uw): Declare. (default_elf_asm_output_limited_string) (default_elf_asm_output_ascii): wrap in #ifdef ELF_ASCII_ESCAPES (fprint_uw_hex): Renamed from fprint_whex * elfos.h (ASM_GENERATE_INTERNAL_LABEL): Use sprint_uw() instead of sprint_ul(). (ASM_OUTPUT_ASCII): Removed questionmark at the end of macro. * i386.c (print_reg): Use fprint_uw() instead of fprint_ul(). * dwarf2asm.c (asm_output_data_sleb128): Change fprintf() to fputs() plus fprint_w(). Change fputc() to putc() in hot path. (dw2_assemble_integer, dw2_asm_output_data) (dw2_asm_output_data_uleb128): fprint_whex() renamed to fprint_uw_hex(). * dwarf2out.c (dwarf2out_source_line): Changed comment. Use fprint_uw() instead of fprint_ul(). * varasm.c (_elf_escape_char): New static function that writes a char to a string according to ELF_ASCII_ESCAPES. (_elf_output_ascii_line): New static function that writes to file a single .ascii assembler declaration. (default_elf_asm_output_limited_string) (default_elf_asm_output_ascii): Rewrote functions so that they fwrite() a full assembler line instead of putting char by char.
=== modified file 'gcc/config/elfos.h' --- gcc/config/elfos.h 2012-06-19 19:55:33 +0000 +++ gcc/config/elfos.h 2012-08-06 03:19:16 +0000 @@ -119,7 +119,7 @@ see the files COPYING3 and COPYING.RUNTI (LABEL)[0] = '*'; \ (LABEL)[1] = '.'; \ __p = stpcpy (&(LABEL)[2], PREFIX); \ - sprint_ul (__p, (unsigned long) (NUM)); \ + sprint_uw (__p, (unsigned HOST_WIDE_INT) (NUM)); \ } \ while (0) @@ -418,7 +418,7 @@ see the files COPYING3 and COPYING.RUNTI #undef ASM_OUTPUT_ASCII #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ - default_elf_asm_output_ascii ((FILE), (STR), (LENGTH)); + default_elf_asm_output_ascii ((FILE), (STR), (LENGTH)) /* Allow the use of the -frecord-gcc-switches switch via the elf_record_gcc_switches function defined in varasm.c. */ === modified file 'gcc/config/i386/i386.c' --- gcc/config/i386/i386.c 2012-07-28 09:16:52 +0000 +++ gcc/config/i386/i386.c 2012-08-04 16:47:01 +0000 @@ -13995,7 +13995,7 @@ print_reg (rtx x, int code, FILE *file) { gcc_assert (TARGET_64BIT); putc ('r', file); - fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8); + fprint_uw (file, REGNO (x) - FIRST_REX_INT_REG + 8); switch (code) { case 0: === modified file 'gcc/dwarf2asm.c' --- gcc/dwarf2asm.c 2012-05-29 14:14:06 +0000 +++ gcc/dwarf2asm.c 2012-08-06 23:52:37 +0000 @@ -47,7 +47,7 @@ dw2_assemble_integer (int size, rtx x) { fputs (op, asm_out_file); if (CONST_INT_P (x)) - fprint_whex (asm_out_file, (unsigned HOST_WIDE_INT) INTVAL (x)); + fprint_uw_hex (asm_out_file, (unsigned HOST_WIDE_INT) INTVAL (x)); else output_addr_const (asm_out_file, x); } @@ -101,7 +101,7 @@ dw2_asm_output_data (int size, unsigned if (op) { fputs (op, asm_out_file); - fprint_whex (asm_out_file, value); + fprint_uw_hex (asm_out_file, value); } else assemble_integer (GEN_INT (value), size, BITS_PER_UNIT, 1); @@ -593,7 +593,7 @@ dw2_asm_output_data_uleb128 (unsigned HO #ifdef HAVE_AS_LEB128 fputs ("\t.uleb128 ", asm_out_file); - fprint_whex (asm_out_file, value); + fprint_uw_hex (asm_out_file, value); if (flag_debug_asm && comment) { @@ -677,7 +677,8 @@ dw2_asm_output_data_sleb128 (HOST_WIDE_I va_start (ap, comment); #ifdef HAVE_AS_LEB128 - fprintf (asm_out_file, "\t.sleb128 " HOST_WIDE_INT_PRINT_DEC, value); + fputs ("\t.sleb128 ", asm_out_file); + fprint_w (asm_out_file, value); if (flag_debug_asm && comment) { @@ -706,7 +707,7 @@ dw2_asm_output_data_sleb128 (HOST_WIDE_I { fprintf (asm_out_file, "%#x", byte); if (more) - fputc (',', asm_out_file); + putc (',', asm_out_file); } else assemble_integer (GEN_INT (byte), 1, BITS_PER_UNIT, 1); === modified file 'gcc/dwarf2out.c' --- gcc/dwarf2out.c 2012-07-24 17:31:01 +0000 +++ gcc/dwarf2out.c 2012-08-04 16:47:01 +0000 @@ -20269,13 +20269,13 @@ dwarf2out_source_line (unsigned int line if (DWARF2_ASM_LINE_DEBUG_INFO) { - /* Emit the .loc directive understood by GNU as. */ - /* "\t.loc %u %u 0 is_stmt %u discriminator %u", - file_num, line, is_stmt, discriminator */ + /* Emit the .loc directive understood by GNU as. Equivalent: */ + /* printf ("\t.loc %u %u 0 is_stmt %u discriminator %u", + file_num, line, is_stmt, discriminator); */ fputs ("\t.loc ", asm_out_file); - fprint_ul (asm_out_file, file_num); + fprint_uw (asm_out_file, file_num); putc (' ', asm_out_file); - fprint_ul (asm_out_file, line); + fprint_uw (asm_out_file, line); putc (' ', asm_out_file); putc ('0', asm_out_file); @@ -20288,7 +20288,7 @@ dwarf2out_source_line (unsigned int line { gcc_assert (discriminator > 0); fputs (" discriminator ", asm_out_file); - fprint_ul (asm_out_file, (unsigned long) discriminator); + fprint_uw (asm_out_file, discriminator); } putc ('\n', asm_out_file); } === modified file 'gcc/final.c' --- gcc/final.c 2012-07-25 16:01:17 +0000 +++ gcc/final.c 2012-08-06 23:42:10 +0000 @@ -3711,7 +3711,7 @@ output_addr_const (FILE *file, rtx x) break; case CONST_INT: - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + fprint_w (file, INTVAL (x)); break; case CONST: @@ -3741,7 +3741,7 @@ output_addr_const (FILE *file, rtx x) break; case CONST_FIXED: - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_FIXED_VALUE_LOW (x)); + fprint_w (file, CONST_FIXED_VALUE_LOW (x)); break; case PLUS: @@ -3825,10 +3825,17 @@ output_quoted_string (FILE *asm_file, co #endif } -/* Write a HOST_WIDE_INT number in hex form 0x1234, fast. */ +/* The following functions need to work correctly with both long and + HOST_WIDE_INT. */ + +#if HOST_BITS_PER_LONG > HOST_BITS_PER_WIDE_INT + #error "HOST_WIDE_INT is smaller than long!" +#endif + +/* Write a HOST_WIDE_INT number in hex form 0x1234, fast. */ void -fprint_whex (FILE *f, unsigned HOST_WIDE_INT value) +fprint_uw_hex (FILE *f, unsigned HOST_WIDE_INT value) { char buf[2 + CHAR_BIT * sizeof (value) / 4]; if (value == 0) @@ -3845,13 +3852,13 @@ fprint_whex (FILE *f, unsigned HOST_WIDE } } -/* Internal function that prints an unsigned long in decimal in reverse. - The output string IS NOT null-terminated. */ +/* Write to a string an unsigned HOST_WIDE_INT in decimal in reverse. The + output string IS NOT null-terminated. */ -static int -sprint_ul_rev (char *s, unsigned long value) +static unsigned int +_sprint_uw_rev (char *s, unsigned HOST_WIDE_INT value) { - int i = 0; + unsigned int i = 0; do { s[i] = "0123456789"[value % 10]; @@ -3867,42 +3874,78 @@ sprint_ul_rev (char *s, unsigned long va return i; } -/* Write an unsigned long as decimal to a file, fast. */ +/* From gnulib: */ +/* Bound on length of the string representing an unsigned integer + value representable in B bits. log10 (2.0) < 146/485. The + smallest value of B where this bound is not tight is 2621. */ -void -fprint_ul (FILE *f, unsigned long value) -{ - /* python says: len(str(2**64)) == 20 */ - char s[20]; - int i; +#define INT_BITS_STRLEN_BOUND(b) (((b) * 146 + 484) / 485) + +#define HOST_WIDE_INT_STRING_LEN (INT_BITS_STRLEN_BOUND (HOST_BITS_PER_WIDE_INT)) - i = sprint_ul_rev (s, value); +/* Return a statically allocated string with the decimal representation of + VALUE. String IS NOT null-terminated. */ + +static char * +_sprint_uw (unsigned HOST_WIDE_INT value, unsigned int *len) +{ + static char s[HOST_WIDE_INT_STRING_LEN]; + char *s2 = &s[HOST_WIDE_INT_STRING_LEN]; - /* It's probably too small to bother with string reversal and fputs. */ do { - i--; - putc (s[i], f); + s2--; + *s2 = "0123456789"[value % 10]; + value /= 10; } - while (i != 0); + while (value != 0); + + *len = &s[HOST_WIDE_INT_STRING_LEN] - s2; + return s2; +} + +/* Write a signed HOST_WIDE_INT as decimal to a file, fast. */ + +void +fprint_w (FILE *f, HOST_WIDE_INT value) +{ + char *s; + unsigned int len; + + if (value >= 0) + s = _sprint_uw (value, &len); + else + { + s = _sprint_uw ((unsigned HOST_WIDE_INT) (~value) + 1, &len); + putc('-', f); + } + fwrite (s, 1, len, f); +} + +/* Write an unsigned HOST_WIDE_INT as decimal to a file, fast. */ + +void +fprint_uw (FILE *f, unsigned HOST_WIDE_INT value) +{ + unsigned int len; + char *s = _sprint_uw (value, &len); + fwrite (s, 1, len, f); } -/* Write an unsigned long as decimal to a string, fast. +/* Write an unsigned HOST_WIDE_INT as decimal to a string, fast. s must be wide enough to not overflow, at least 21 chars. - Returns the length of the string (without terminating '\0'). */ + Return the length of the string (without terminating '\0'). */ -int -sprint_ul (char *s, unsigned long value) +unsigned int +sprint_uw (char *s, unsigned HOST_WIDE_INT value) { - int len; + unsigned int len, i, j; char tmp_c; - int i; - int j; - len = sprint_ul_rev (s, value); + len = _sprint_uw_rev (s, value); s[len] = '\0'; - /* Reverse the string. */ + /* Reverse the string. */ i = 0; j = len - 1; while (i < j) === modified file 'gcc/output.h' --- gcc/output.h 2012-06-24 17:58:46 +0000 +++ gcc/output.h 2012-08-06 23:53:37 +0000 @@ -125,9 +125,11 @@ extern void output_addr_const (FILE *, r #define ATTRIBUTE_ASM_FPRINTF(m, n) ATTRIBUTE_NONNULL(m) #endif -extern void fprint_whex (FILE *, unsigned HOST_WIDE_INT); -extern void fprint_ul (FILE *, unsigned long); -extern int sprint_ul (char *, unsigned long); +/* Fast functions for writing numbers, fastest is to write in hex. */ +extern void fprint_uw_hex (FILE *, unsigned HOST_WIDE_INT); +extern void fprint_w (FILE *, HOST_WIDE_INT); +extern void fprint_uw (FILE *, unsigned HOST_WIDE_INT); +extern unsigned int sprint_uw (char *, unsigned HOST_WIDE_INT); extern void asm_fprintf (FILE *file, const char *p, ...) ATTRIBUTE_ASM_FPRINTF(2, 3); @@ -598,8 +600,10 @@ extern void file_end_indicate_split_stac extern void default_elf_asm_output_external (FILE *file, tree, const char *); -extern void default_elf_asm_output_limited_string (FILE *, const char *); +#ifdef ELF_ASCII_ESCAPES +extern const char *default_elf_asm_output_limited_string (FILE *, const char *); extern void default_elf_asm_output_ascii (FILE *, const char *, unsigned int); +#endif extern void default_elf_internal_label (FILE *, const char *, unsigned long); extern void default_elf_init_array_asm_out_constructor (rtx, int); === modified file 'gcc/varasm.c' --- gcc/varasm.c 2012-06-19 19:55:33 +0000 +++ gcc/varasm.c 2012-08-06 23:07:18 +0000 @@ -7242,38 +7242,83 @@ make_debug_expr_from_rtl (const_rtx exp) } #ifdef ELF_ASCII_ESCAPES -/* Default ASM_OUTPUT_LIMITED_STRING for ELF targets. */ -void +/* Write a character to a string according to ELF_ASCII_ESCAPES. Assume there + is enough space in P, we need max 4 bytes in case we escape the char in + octal. */ + +static inline char * +_elf_escape_char (char *p, unsigned char c) +{ + char escape = ELF_ASCII_ESCAPES[c]; + switch (escape) + { + case 0: + *(p++) = c; + break; + case 1: + /* Escape char in octal. */ + *(p++) = '\\'; + *(p++) = "01234567" [(c >> 6) & 7]; + *(p++) = "01234567" [(c >> 3) & 7]; + *(p++) = "01234567" [c & 7]; + break; + default: + *(p++) = '\\'; + *(p++) = escape; + break; + } + + return p; +} + +/* Default ASM_OUTPUT_LIMITED_STRING for ELF targets. Returns pointer in s + after last consumed character. */ + +const char * default_elf_asm_output_limited_string (FILE *f, const char *s) { - int escape; - unsigned char c; + /* Worst case size if we escape all characters in string. */ + char buf[sizeof (STRING_ASM_OP) + 3 + ELF_STRING_LIMIT * 4]; + char *p; - fputs (STRING_ASM_OP, f); - putc ('"', f); + p = stpcpy (buf, STRING_ASM_OP "\""); /* Optimised out */ while (*s != '\0') - { - c = *s; - escape = ELF_ASCII_ESCAPES[c]; - switch (escape) - { - case 0: - putc (c, f); - break; - case 1: - /* TODO: Print in hex with fast function, important for -flto. */ - fprintf (f, "\\%03o", c); - break; - default: - putc ('\\', f); - putc (escape, f); - break; - } - s++; - } - putc ('\"', f); - putc ('\n', f); + p = _elf_escape_char (p, *(s++)); + *(p++) = '\"'; + *(p++) = '\n'; + + gcc_checking_assert (sizeof (buf) >= (unsigned long) (p - buf)); + fwrite (buf, 1, p - buf, f); + + return ++s; /* Bypass NULL and return */ +} + +#define ELF_ASCII_BYTE_LIMIT 64 + +/* Output max(ELF_ASCII_BYTE_LIMIT, NBYTES) characters from S as + ".ascii". Return pointer in S after last consumed character. */ + +static const char * +_elf_output_ascii_line (FILE *f, const char *s, unsigned int nbytes) +{ + char buf[sizeof (ASCII_DATA_ASM_OP) + ELF_ASCII_BYTE_LIMIT + 3]; + char *p; + const char *limit = s + nbytes; + + p = stpcpy (buf, ASCII_DATA_ASM_OP "\""); /* Optimised out */ + while (((unsigned long) (p - buf) + < sizeof (buf) - 4 - 2) /* while buffer is not full */ + && (s < limit)) /* and there are more chars */ + /* _elf_escape_char() adds at most 4 characters */ + p = _elf_escape_char (p, *(s++)); + *(p++) = '\"'; + *(p++) = '\n'; + + gcc_checking_assert (sizeof (buf) >= (unsigned long) (p - buf)); + fwrite (buf, 1, p - buf, f); + + return s; } /* Default ASM_OUTPUT_ASCII for ELF targets. */ @@ -7281,78 +7326,45 @@ default_elf_asm_output_limited_string (F void default_elf_asm_output_ascii (FILE *f, const char *s, unsigned int len) { + const char *next_null = s - 1; const char *limit = s + len; - const char *last_null = NULL; - unsigned bytes_in_chunk = 0; - unsigned char c; - int escape; - for (; s < limit; s++) + do { - const char *p; + next_null += strnlen (next_null + 1, limit - next_null - 1) + 1; - if (bytes_in_chunk >= 60) + if (next_null != limit) /* NULL found */ { - putc ('\"', f); - putc ('\n', f); - bytes_in_chunk = 0; - } - if (s > last_null) - { - for (p = s; p < limit && *p != '\0'; p++) - continue; - last_null = p; - } - else - p = last_null; - - if (p < limit && (p - s) <= (long) ELF_STRING_LIMIT) - { - if (bytes_in_chunk > 0) - { - putc ('\"', f); - putc ('\n', f); - bytes_in_chunk = 0; - } - - default_elf_asm_output_limited_string (f, s); - s = p; - } - else - { - if (bytes_in_chunk == 0) - fputs (ASCII_DATA_ASM_OP "\"", f); - - c = *s; - escape = ELF_ASCII_ESCAPES[c]; - switch (escape) - { - case 0: - putc (c, f); - bytes_in_chunk++; - break; - case 1: - /* TODO: Print in hex with fast function, important for -flto. */ - fprintf (f, "\\%03o", c); - bytes_in_chunk += 4; - break; - default: - putc ('\\', f); - putc (escape, f); - bytes_in_chunk += 2; - break; - } + /* If just a NULL byte at start, search for more NULLs */ + if (next_null == s) + while ((next_null + 1) < limit && *(next_null + 1) == '\0') + next_null++; + + /* If short enough */ + if (((unsigned long) (next_null - s) < ELF_STRING_LIMIT) + /* and if it starts with NULL and it is only a + single NULL (empty string) */ + && ((*s != '\0') || (s == next_null))) + /* then output as .string */ + s = default_elf_asm_output_limited_string (f, s); + else + /* long string or many NULLs, output as .ascii */ + while (s < next_null + 1) + s = _elf_output_ascii_line (f, s, next_null - s + 1); + /* We are finished with this string including its NULL byte */ + gcc_checking_assert (s == next_null + 1); } } + while (next_null < limit); - if (bytes_in_chunk > 0) - { - putc ('\"', f); - putc ('\n', f); - } + /* No NULL found until end of s, output as .ascii */ + gcc_checking_assert (next_null == limit); + while (s < next_null) + s = _elf_output_ascii_line (f, s, next_null - s); } + #endif static GTY(()) section *elf_init_array_section;