/usr/local/gcc42/bin/gcc -v
Using built-in specs.
Target: i386-apple-darwin8.8.1
Configured with: ../gcc/configure --prefix=/usr/local/gcc42 --enable-threads
--with-arch=nocona --with-gmp=/sw --with-mpfr=/sw --with-tune=nocona
--disable-nls --enable-languages=c,c++,objc,obj-c++
Thread model: posix
gcc version 4.3.0 20070120 (experimental)

This is svn r120997.

The attached file has three differently-coded but equivalent sets of array
accesses, none of which are compiled to the smallest possible form at -Os.

                        case '<':
                                htmled[i2] = '&'; 
                                htmled[i2+1] = 'l'; 
                                htmled[i2+2] = 't'; 
                                htmled[i2+3] = ';';
                                i2 += 4;
                                break;
                        case '>':
                                htmled[i2++] = '&'; 
                                htmled[i2++] = 'g'; 
                                htmled[i2++] = 't'; 
                                htmled[i2++] = ';';
The third is the same as the second, with i2 declared as unsigned char instead
of int.

gcc with -Os -fno-PIC generates:
        movb    $38, (%ebx,%edx)        # 45    *movqi_1/7      [length = 4]
        leal    (%ebx,%edx), %eax       # 122   *lea_1  [length = 3]
        movb    $108, 1(%eax)   # 48    *movqi_1/7      [length = 4]
        movb    $116, 2(%eax)   # 50    *movqi_1/7      [length = 4]
        movb    $59, 3(%eax)    # 52    *movqi_1/7      [length = 4]
        addl    $4, %edx        # 54    *addsi_1/1      [length = 3]

        movb    $38, (%ebx,%edx)        # 61    *movqi_1/7      [length = 4]
        movb    $103, 1(%edx,%ebx)      # 64    *movqi_1/7      [length = 5]
        movb    $116, 2(%edx,%ebx)      # 67    *movqi_1/7      [length = 5]
        movb    $59, 3(%edx,%ebx)       # 70    *movqi_1/7      [length = 5]
        addl    $4, %edx        # 71    *addsi_1/1      [length = 3]

        movzbl  %dl, %eax       # 129   *zero_extendqisi2_movzbw        [length
= 3]
        movb    $38, (%ebx,%eax)        # 61    *movqi_1/7      [length = 4]
        leal    1(%edx), %eax   # 130   *lea_1  [length = 3]
        movzbl  %al, %eax       # 131   *zero_extendqisi2_movzbw        [length
= 3]
        movb    $103, (%ebx,%eax)       # 65    *movqi_1/7      [length = 4]
        leal    2(%edx), %eax   # 132   *lea_1  [length = 3]
        movzbl  %al, %eax       # 133   *zero_extendqisi2_movzbw        [length
= 3]
        movb    $116, (%ebx,%eax)       # 69    *movqi_1/7      [length = 4]
        leal    3(%edx), %eax   # 134   *lea_1  [length = 3]
        movzbl  %al, %eax       # 135   *zero_extendqisi2_movzbw        [length
= 3]
        movb    $59, (%ebx,%eax)        # 73    *movqi_1/7      [length = 4]
        addl    $4, %edx        # 136   *addsi_1/1      [length = 3]

The first is almost perfect, but all four movb instructions should use the lea
instead of the first one using (%ebx,%edx).

The second is the same size as the first at the moment, but should be
transformed into the same thing.

The third has a lot of useless instructions apparently to correct for overflow.

With -m64 added the second becomes much worse:
        movslq  %ecx,%rax       # 83    extendsidi2_rex64/2     [length = 3]
        movb    $38, (%rsi,%rax)        # 84    *movqi_1/7      [length = 4]
        leal    1(%rcx), %eax   # 152   *lea_1_rex64    [length = 2]
        cltq    # 87    extendsidi2_rex64/1     [length = 2]
        movb    $103, (%rsi,%rax)       # 88    *movqi_1/7      [length = 4]
        leal    2(%rcx), %eax   # 153   *lea_1_rex64    [length = 2]
        cltq    # 91    extendsidi2_rex64/1     [length = 2]
        movb    $116, (%rsi,%rax)       # 92    *movqi_1/7      [length = 4]
        leal    3(%rcx), %eax   # 154   *lea_1_rex64    [length = 2]
        cltq    # 95    extendsidi2_rex64/1     [length = 2]
        movb    $59, (%rsi,%rax)        # 96    *movqi_1/7      [length = 4]
        addl    $4, %ecx        # 97    *addsi_1/1      [length = 3]

Since it doesn't generate all these cltqs for the first version (which is
exactly the same apart from register names) I assume these are useless.

Note that i2 will never increase beyond 32 (max of ilen * 4), so i2 will never
wrap around even if declared as char.


-- 
           Summary: Inefficient address calculation on i386
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: astrange at ithinksw dot com
 GCC build triplet: i386-apple-darwin8.8.1
  GCC host triplet: i386-apple-darwin8.8.1
GCC target triplet: i386-apple-darwin8.8.1


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30517

Reply via email to