Hi Paul, > On 05/04/2017 03:24 PM, Bruno Haible wrote: > > https://dev.gnupg.org/rCf17d50bbd31b1faa24af1e46c10bba845becf585 > > https://dev.gnupg.org/rCdfb4673da8ee52d95e0a62c9f49ca8599943f22e > > > > But this fix is only effective for GCC. How could a compiler-independent fix > > look like? > > Why do we need a compiler-independent fix? The code itself is portable > as-is, right? So the only reason the change is needed is to pacify GCC > when its warnings are cranked up too high.
I'm not so sure about it. This is the code: char block[16]; ... ((uint32_t *) block)[0] = ((uint32_t *) input)[0] ^ ((uint32_t *) iv)[0]; ((uint32_t *) block)[1] = ((uint32_t *) input)[1] ^ ((uint32_t *) iv)[1]; ((uint32_t *) block)[2] = ((uint32_t *) input)[2] ^ ((uint32_t *) iv)[2]; ((uint32_t *) block)[3] = ((uint32_t *) input)[3] ^ ((uint32_t *) iv)[3]; rijndaelEncrypt (key->rk, key->Nr, block, outBuffer); I think GCC is trying to tell us that, if the rijndaelEncrypt function got inlined, the 4 assignments to '((uint32_t *) block)' might be reordered to occur _after_ the read accesses to 'block' inside the function. This danger is the same for all compilers that do alias-based analysis - at least GCC, xlc [1][2], and SUNWspro C [3]. This appears to not be a mistake in GCC's analysis, because [4] says "It's always assumed that char* aliases other types. However this won't work the other way: there's no assumption that your struct aliases a buffer of chars." To disable this possible optimization and the associated warning - without changing compiler options globally - there appear to be three approaches: (1) Not use ((uint32_t *) block); use the 'char *' based gnulib module 'memxor' instead. Drawback: This is a de-optimization, because memxor does not have a fast path for when the alignment of both source and destination is 4. (2) Use a union. (3) Use the '__may_alias__' attribute.[5][6] The drawbacks of this approach are: - It may trigger internal compiler errors in GCC. [7] - It has no effect on SUNWspro C, since this compiler does not support this attribute. - In the present form, it has no effect on xlc, since Werner Koch's patch [8] did not enable it for xlc. Therefore I would propose to go with the union approach: union { char bytes[16]; uint32_t words[4]; } block; ... block.words[0] = ((uint32_t *) input)[0] ^ ((uint32_t *) iv)[0]; block.words[1] = ((uint32_t *) input)[1] ^ ((uint32_t *) iv)[1]; block.words[2] = ((uint32_t *) input)[2] ^ ((uint32_t *) iv)[2]; block.words[3] = ((uint32_t *) input)[3] ^ ((uint32_t *) iv)[3]; rijndaelEncrypt (key->rk, key->Nr, block.bytes, outBuffer); This code style should still work in 10 years, when compilers have attained even better inlining and type propagation capabilities. Bruno [1] https://www.ibm.com/support/knowledgecenter/en/SSXVZZ_13.1.1/com.ibm.xlcpp1311.lelinux.doc/compiler_ref/opt_alias.html [2] https://www.ibm.com/support/knowledgecenter/SSGH2K_13.1.0/com.ibm.xlc131.aix.doc/compiler_ref/opt_alias.html [3] https://docs.oracle.com/cd/E19205-01/819-5265/bjaso/index.html [4] http://stackoverflow.com/questions/98650/what-is-the-strict-aliasing-rule [5] https://gcc.gnu.org/onlinedocs/gcc-7.1.0/gcc/Common-Type-Attributes.html [6] https://www.ibm.com/support/knowledgecenter/SSGH3R_13.1.2/com.ibm.xlcpp1312.aix.doc/language_ref/type_attr_may_alias.html [7] http://www.liblfds.org/wordpress/?p=520 [8] https://dev.gnupg.org/rCf17d50bbd31b1faa24af1e46c10bba845becf585 2017-05-05 Bruno Haible <br...@clisp.org> crypto/rijndael: Fix "strict-aliasing rules" warnings. * lib/rijndael-api-fst.c (rijndaelBlockEncrypt): Declare 'block' as a union. (rijndaelPadEncrypt, rijndaelBlockDecrypt): Likewise. (rijndaelPadDecrypt): Likewise. Use local variable 'iv' to cache the value of cipher->IV. diff --git a/lib/rijndael-api-fst.c b/lib/rijndael-api-fst.c index b416601..c4972dc 100644 --- a/lib/rijndael-api-fst.c +++ b/lib/rijndael-api-fst.c @@ -203,7 +203,8 @@ rijndaelBlockEncrypt (rijndaelCipherInstance *cipher, size_t inputLen, char *outBuffer) { size_t i, k, t, numBlocks; - char block[16], *iv; + union { char bytes[16]; uint32_t words[4]; } block; + char *iv; if (cipher == NULL || key == NULL || key->direction == RIJNDAEL_DIR_DECRYPT) { @@ -231,15 +232,11 @@ rijndaelBlockEncrypt (rijndaelCipherInstance *cipher, iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - ((uint32_t *) block)[0] = ((uint32_t *) input)[0] ^ - ((uint32_t *) iv)[0]; - ((uint32_t *) block)[1] = ((uint32_t *) input)[1] ^ - ((uint32_t *) iv)[1]; - ((uint32_t *) block)[2] = ((uint32_t *) input)[2] ^ - ((uint32_t *) iv)[2]; - ((uint32_t *) block)[3] = ((uint32_t *) input)[3] ^ - ((uint32_t *) iv)[3]; - rijndaelEncrypt (key->rk, key->Nr, block, outBuffer); + block.words[0] = ((uint32_t *) input)[0] ^ ((uint32_t *) iv)[0]; + block.words[1] = ((uint32_t *) input)[1] ^ ((uint32_t *) iv)[1]; + block.words[2] = ((uint32_t *) input)[2] ^ ((uint32_t *) iv)[2]; + block.words[3] = ((uint32_t *) input)[3] ^ ((uint32_t *) iv)[3]; + rijndaelEncrypt (key->rk, key->Nr, block.bytes, outBuffer); memcpy (cipher->IV, outBuffer, 16); input += 16; outBuffer += 16; @@ -253,8 +250,8 @@ rijndaelBlockEncrypt (rijndaelCipherInstance *cipher, memcpy (outBuffer, input, 16); for (k = 0; k < 128; k++) { - rijndaelEncrypt (key->ek, key->Nr, iv, block); - outBuffer[k >> 3] ^= (block[0] & 0x80U) >> (k & 7); + rijndaelEncrypt (key->ek, key->Nr, iv, block.bytes); + outBuffer[k >> 3] ^= (block.bytes[0] & 0x80U) >> (k & 7); for (t = 0; t < 15; t++) { iv[t] = (iv[t] << 1) | (iv[t + 1] >> 7); @@ -281,7 +278,8 @@ rijndaelPadEncrypt (rijndaelCipherInstance *cipher, size_t inputOctets, char *outBuffer) { size_t i, numBlocks, padLen; - char block[16], *iv; + union { char bytes[16]; uint32_t words[4]; } block; + char *iv; if (cipher == NULL || key == NULL || key->direction == RIJNDAEL_DIR_DECRYPT) { @@ -305,24 +303,20 @@ rijndaelPadEncrypt (rijndaelCipherInstance *cipher, } padLen = 16 - (inputOctets - 16 * numBlocks); assert (padLen > 0 && padLen <= 16); - memcpy (block, input, 16 - padLen); - memset (block + 16 - padLen, padLen, padLen); - rijndaelEncrypt (key->rk, key->Nr, block, outBuffer); + memcpy (block.bytes, input, 16 - padLen); + memset (block.bytes + 16 - padLen, padLen, padLen); + rijndaelEncrypt (key->rk, key->Nr, block.bytes, outBuffer); break; case RIJNDAEL_MODE_CBC: iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - ((uint32_t *) block)[0] = ((uint32_t *) input)[0] ^ - ((uint32_t *) iv)[0]; - ((uint32_t *) block)[1] = ((uint32_t *) input)[1] ^ - ((uint32_t *) iv)[1]; - ((uint32_t *) block)[2] = ((uint32_t *) input)[2] ^ - ((uint32_t *) iv)[2]; - ((uint32_t *) block)[3] = ((uint32_t *) input)[3] ^ - ((uint32_t *) iv)[3]; - rijndaelEncrypt (key->rk, key->Nr, block, outBuffer); + block.words[0] = ((uint32_t *) input)[0] ^ ((uint32_t *) iv)[0]; + block.words[1] = ((uint32_t *) input)[1] ^ ((uint32_t *) iv)[1]; + block.words[2] = ((uint32_t *) input)[2] ^ ((uint32_t *) iv)[2]; + block.words[3] = ((uint32_t *) input)[3] ^ ((uint32_t *) iv)[3]; + rijndaelEncrypt (key->rk, key->Nr, block.bytes, outBuffer); memcpy (cipher->IV, outBuffer, 16); input += 16; outBuffer += 16; @@ -331,13 +325,13 @@ rijndaelPadEncrypt (rijndaelCipherInstance *cipher, assert (padLen > 0 && padLen <= 16); for (i = 0; i < 16 - padLen; i++) { - block[i] = input[i] ^ iv[i]; + block.bytes[i] = input[i] ^ iv[i]; } for (i = 16 - padLen; i < 16; i++) { - block[i] = (char) padLen ^ iv[i]; + block.bytes[i] = (char) padLen ^ iv[i]; } - rijndaelEncrypt (key->rk, key->Nr, block, outBuffer); + rijndaelEncrypt (key->rk, key->Nr, block.bytes, outBuffer); memcpy (cipher->IV, outBuffer, 16); break; @@ -355,7 +349,8 @@ rijndaelBlockDecrypt (rijndaelCipherInstance *cipher, size_t inputLen, char *outBuffer) { size_t i, k, t, numBlocks; - char block[16], *iv; + union { char bytes[16]; uint32_t words[4]; } block; + char *iv; if (cipher == NULL || key == NULL @@ -386,13 +381,13 @@ rijndaelBlockDecrypt (rijndaelCipherInstance *cipher, iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - rijndaelDecrypt (key->rk, key->Nr, input, block); - ((uint32_t *) block)[0] ^= ((uint32_t *) iv)[0]; - ((uint32_t *) block)[1] ^= ((uint32_t *) iv)[1]; - ((uint32_t *) block)[2] ^= ((uint32_t *) iv)[2]; - ((uint32_t *) block)[3] ^= ((uint32_t *) iv)[3]; + rijndaelDecrypt (key->rk, key->Nr, input, block.bytes); + block.words[0] ^= ((uint32_t *) iv)[0]; + block.words[1] ^= ((uint32_t *) iv)[1]; + block.words[2] ^= ((uint32_t *) iv)[2]; + block.words[3] ^= ((uint32_t *) iv)[3]; memcpy (cipher->IV, input, 16); - memcpy (outBuffer, block, 16); + memcpy (outBuffer, block.bytes, 16); input += 16; outBuffer += 16; } @@ -405,13 +400,13 @@ rijndaelBlockDecrypt (rijndaelCipherInstance *cipher, memcpy (outBuffer, input, 16); for (k = 0; k < 128; k++) { - rijndaelEncrypt (key->ek, key->Nr, iv, block); + rijndaelEncrypt (key->ek, key->Nr, iv, block.bytes); for (t = 0; t < 15; t++) { iv[t] = (iv[t] << 1) | (iv[t + 1] >> 7); } iv[15] = (iv[15] << 1) | ((input[k >> 3] >> (7 - (k & 7))) & 1); - outBuffer[k >> 3] ^= (block[0] & 0x80U) >> (k & 7); + outBuffer[k >> 3] ^= (block.bytes[0] & 0x80U) >> (k & 7); } outBuffer += 16; input += 16; @@ -432,7 +427,8 @@ rijndaelPadDecrypt (rijndaelCipherInstance *cipher, size_t inputOctets, char *outBuffer) { size_t i, numBlocks, padLen; - char block[16]; + union { char bytes[16]; uint32_t words[4]; } block; + char *iv; if (cipher == NULL || key == NULL || key->direction == RIJNDAEL_DIR_ENCRYPT) { @@ -460,55 +456,56 @@ rijndaelPadDecrypt (rijndaelCipherInstance *cipher, outBuffer += 16; } /* last block */ - rijndaelDecrypt (key->rk, key->Nr, input, block); - padLen = block[15]; + rijndaelDecrypt (key->rk, key->Nr, input, block.bytes); + padLen = block.bytes[15]; if (padLen >= 16) { return RIJNDAEL_BAD_DATA; } for (i = 16 - padLen; i < 16; i++) { - if (block[i] != padLen) + if (block.bytes[i] != padLen) { return RIJNDAEL_BAD_DATA; } } - memcpy (outBuffer, block, 16 - padLen); + memcpy (outBuffer, block.bytes, 16 - padLen); break; case RIJNDAEL_MODE_CBC: + iv = cipher->IV; /* all blocks but last */ for (i = numBlocks - 1; i > 0; i--) { - rijndaelDecrypt (key->rk, key->Nr, input, block); - ((uint32_t *) block)[0] ^= ((uint32_t *) cipher->IV)[0]; - ((uint32_t *) block)[1] ^= ((uint32_t *) cipher->IV)[1]; - ((uint32_t *) block)[2] ^= ((uint32_t *) cipher->IV)[2]; - ((uint32_t *) block)[3] ^= ((uint32_t *) cipher->IV)[3]; - memcpy (cipher->IV, input, 16); - memcpy (outBuffer, block, 16); + rijndaelDecrypt (key->rk, key->Nr, input, block.bytes); + block.words[0] ^= ((uint32_t *) iv)[0]; + block.words[1] ^= ((uint32_t *) iv)[1]; + block.words[2] ^= ((uint32_t *) iv)[2]; + block.words[3] ^= ((uint32_t *) iv)[3]; + memcpy (iv, input, 16); + memcpy (outBuffer, block.bytes, 16); input += 16; outBuffer += 16; } /* last block */ - rijndaelDecrypt (key->rk, key->Nr, input, block); - ((uint32_t *) block)[0] ^= ((uint32_t *) cipher->IV)[0]; - ((uint32_t *) block)[1] ^= ((uint32_t *) cipher->IV)[1]; - ((uint32_t *) block)[2] ^= ((uint32_t *) cipher->IV)[2]; - ((uint32_t *) block)[3] ^= ((uint32_t *) cipher->IV)[3]; - padLen = block[15]; + rijndaelDecrypt (key->rk, key->Nr, input, block.bytes); + block.words[0] ^= ((uint32_t *) iv)[0]; + block.words[1] ^= ((uint32_t *) iv)[1]; + block.words[2] ^= ((uint32_t *) iv)[2]; + block.words[3] ^= ((uint32_t *) iv)[3]; + padLen = block.bytes[15]; if (padLen <= 0 || padLen > 16) { return RIJNDAEL_BAD_DATA; } for (i = 16 - padLen; i < 16; i++) { - if (block[i] != padLen) + if (block.bytes[i] != padLen) { return RIJNDAEL_BAD_DATA; } } - memcpy (outBuffer, block, 16 - padLen); + memcpy (outBuffer, block.bytes, 16 - padLen); break; default: