http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45980

           Summary: Use not in stead of add to generate new constant
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: car...@google.com
                CC: car...@google.com
              Host: i686-linux
            Target: arm-eabi
             Build: i686-linux


Compile the following code:

typedef struct {
      unsigned long state[5];
      unsigned long count[2];
} SHA1_CTX;

void SHA1Init(SHA1_CTX* context)
{
      /* SHA1 initialization constants */
      context->state[0] = 0x67452301;
      context->state[1] = 0xEFCDAB89;
      context->state[2] = 0x98BADCFE;
      context->state[3] = 0x10325476;
      context->state[4] = 0xC3D2E1F0;
      context->count[0] = context->count[1] = 0;
}

With options -march=armv7-a -mthumb -Os, gcc generates:

SHA1Init:
        ldr     r3, .L2
        str     r3, [r0, #0]
        add     r3, r3, #-2004318072    
        str     r3, [r0, #4]
        ldr     r3, .L2+4
        str     r3, [r0, #8]
        sub     r3, r3, #-2004318072     
        str     r3, [r0, #12]
        ldr     r3, .L2+8
        str     r3, [r0, #16]
        movs    r3, #0
        str     r3, [r0, #24]
        str     r3, [r0, #20]
        bx      lr
.L3:
        .align  2
.L2:
        .word   1732584193
        .word   -1732584194
        .word   -1009589776

This function needs to store 5 large constants to memory. Instead of load the 5
constants from constant pool, gcc found two of them can be computed out by a
single add/sub constant instruction. But we can do better, notice that

0x67452301 + 0x98BADCFE = 0xFFFFFFFF
0xEFCDAB89 + 0x10325476 = 0xFFFFFFFF

So if we have one such constant, the other one can be computed out by bitwise
not. So a shorter result could be:

SHA1Init:
        ldr     r3, .L2
        str     r3, [r0, #0]
        add     r2, r3, #-2004318072    
        str     r2, [r0, #4]
        movns     r3, r3
        str     r3, [r0, #8]
        movns     r2, r2
        str     r2, [r0, #12]
        ldr     r3, .L2+4
        str     r3, [r0, #16]
        movs    r3, #0
        str     r3, [r0, #24]
        str     r3, [r0, #20]
        bx      lr
.L3:
        .align  2
.L2:
        .word   1732584193
        .word   -1009589776

Reply via email to