https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108695

--- Comment #9 from Martin Liška <marxin at gcc dot gnu.org> ---
Actually, looking at the tree dumps before and after the revision, it's leading
to a different place:

First difference happens in:
test_aes.ltrans0.ltrans.116t.dse2

   <bb 4> [local count: 8687547526]:
-  _118 = MEM[(ulong *)iv_4(D)];
-  _120 = MEM[(ulong *)input_19];
-  _121 = _118 ^ _120;
-  MEM[(ulong *)iv_4(D)] = _121;
-  _129 = MEM[(ulong *)iv_4(D) + 8B];
-  _131 = MEM[(ulong *)input_19 + 8B];
-  _132 = _129 ^ _131;
-  MEM[(ulong *)iv_4(D) + 8B] = _132;

(there's one more optimized out block like this. Which maps to:

int  AES_Gen_CBC_Enc(AES_Crypt_Blk_fn *cryptfn,
                     const uchar* rkeys, uint rounds,
                     uchar *iv, uint pad,
                     const uchar *input, uchar *output,
                     ssize_t len, ssize_t *olen)
{
        *olen = len;
        while (len >= 16) {
                XOR16(iv, input, iv);
                cryptfn(rkeys, rounds, iv, iv);
                memcpy(output, iv, 16);
                len -= 16; input += 16; output += 16;
        }
        if (len || pad == PAD_ALWAYS) {
                uchar *in = crypto->blkbuf2;
                fill_blk(input, in, len, pad);
                XOR16(iv, in, iv);
                cryptfn(rkeys, rounds, iv, output);
                /* Store last IV */
                memcpy(iv, output, 16);
                *olen += 16-(len&15);
                //memset(in, 0, 16);
                //LFENCE;
        }
        return (pad == PAD_ALWAYS || (len&15))? 16-(len&15): 0;
}

where the XOR16 is implemented as:

#define XORN(in1,in2,out,len)   \
do {                            \
        uint _i;                \
        for (_i = 0; _i < len/sizeof(ulong); ++_i)      \
                *((ulong*)(out)+_i) = *((ulong*)(in1)+_i) ^
*((ulong*)(in2)+_i);        \
} while(0)

Reply via email to