------- Comment #20 from jakub at gcc dot gnu dot org 2007-12-03 13:09 ------- Andrew, do you have any testcases to back up #c17 claim?
Anyway, some things can be handled already without VCE (and it is undesirable to generate VCE), like the #c11 testcase. Here are 2 alternatives I've been playing with: --- fold-const.c.jj99 2007-11-29 19:38:34.000000000 +0100 +++ fold-const.c 2007-12-03 13:28:02.000000000 +0100 @@ -14939,6 +14939,23 @@ fold_indirect_ref_1 (tree type, tree op0 tree index = bitsize_int (0); return fold_build3 (BIT_FIELD_REF, type, op, part_width, index); } + else if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && (INTEGRAL_TYPE_P (optype) || POINTER_TYPE_P (optype)) + && TYPE_PRECISION (type) == GET_MODE_BITSIZE (TYPE_MODE (type))) + { + /* Handle e.g. unsigned short int x; *(short int *) &x; */ + if (TYPE_MODE (type) == TYPE_MODE (optype) + && TYPE_PRECISION (type) == TYPE_PRECISION (optype)) + return fold_convert (type, op); + + /* Handle e.g. unsigned long int x; *(char *) &x; */ + if (TYPE_PRECISION (type) == BITS_PER_UNIT + && TYPE_PRECISION (optype) > TYPE_PRECISION (type) + && TYPE_PRECISION (optype) + == GET_MODE_BITSIZE (TYPE_MODE (optype))) + return fold_build3 (BIT_FIELD_REF, type, op, + bitsize_int (BITS_PER_UNIT), bitsize_int (0)); + } } /* ((foo*)&complexfoo)[1] => __imag__ complexfoo */ vs. --- fold-const.c.jj99 2007-11-29 19:38:34.000000000 +0100 +++ fold-const.c 2007-12-03 13:05:13.000000000 +0100 @@ -14939,6 +14939,46 @@ fold_indirect_ref_1 (tree type, tree op0 tree index = bitsize_int (0); return fold_build3 (BIT_FIELD_REF, type, op, part_width, index); } + else if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && (INTEGRAL_TYPE_P (optype) || POINTER_TYPE_P (optype)) + && TYPE_PRECISION (type) == GET_MODE_BITSIZE (TYPE_MODE (type))) + { + /* Handle e.g. unsigned short int x; *(short int *) &x; */ + if (TYPE_MODE (type) == TYPE_MODE (optype) + && TYPE_PRECISION (type) == TYPE_PRECISION (optype)) + return fold_convert (type, op); + + /* Handle e.g. unsigned long int x; *(char *) &x; */ + if (TYPE_PRECISION (type) == BITS_PER_UNIT + && TYPE_PRECISION (optype) > TYPE_PRECISION (type) + && TYPE_PRECISION (optype) + == GET_MODE_BITSIZE (TYPE_MODE (optype))) + { + int shift = 0; + tree itype; + + if (TYPE_PRECISION (optype) > BITS_PER_WORD) + { + if (WORDS_BIG_ENDIAN) + shift = (TYPE_PRECISION (optype) / BITS_PER_WORD - 1) + * BITS_PER_WORD; + if (BYTES_BIG_ENDIAN) + shift += BITS_PER_WORD - BITS_PER_UNIT; + } + else if (BYTES_BIG_ENDIAN) + shift = TYPE_PRECISION (optype) - BITS_PER_UNIT; + + if (shift == 0) + return fold_convert (type, op); + + itype = lang_hooks.types.type_for_size (TYPE_PRECISION (optype), + TYPE_UNSIGNED (type)); + return fold_convert (type, + fold_build2 (RSHIFT_EXPR, itype, + fold_convert (itype, op), + build_int_cst (NULL, shift))); + } + } } /* ((foo*)&complexfoo)[1] => __imag__ complexfoo */ The handling of integral types with the same size, just different size or cv qualification is valid C and so is accessing it through char pointer. It handles even things which violate strict aliasing, but we have already warned at this point. The same size case is I guess not very controversial, probably just it shouldn't be done if volatile types are involved. The above patches only differ in how they deal with the *(char *) &var case. Both are able to optimize properly out the tell_endian routine, but in other cases they e.g. result in bigger code, even with -Os. Say on: struct S { int i; } s; int foo (void) { return *(char *) &s.i; } int bar (int x) { int ret = *(char *) &x; baz (&x); return ret; } on ppc64-linux with -O2 the diff for the first one is: @@ -41,10 +41,11 @@ bar: stdu 1,-144(1) stw 3,192(1) addi 3,1,144 - lbzu 29,48(3) + ldu 29,48(3) bl .baz nop addi 1,1,144 + srdi 29,29,56 mr 3,29 ld 0,16(1) ld 29,-24(1) and for the second patch is: @@ -37,11 +37,12 @@ bar: .bar: mflr 0 std 29,-24(1) + rldicl 29,3,40,56 std 0,16(1) stdu 1,-144(1) - stw 3,192(1) - addi 3,1,144 - lbzu 29,48(3) + addi 9,1,144 + stwu 3,48(9) + mr 3,9 bl .baz nop addi 1,1,144 I guess the latter is probably even faster, but the same difference is there even for -Os. On the other side, on various testcases it will create even shorter code. The important thing is whether this optimization will remove all reasons why something had to be TREE_ADDRESSABLE or not, something the folder really doesn't know. Anyway, would a patch like the second one be appropriate for 4.3 stage 3 with added volatile guards and perhaps also another case to handle ((char *) &i) [1] or other offsetted cases? If VCE can work (I don't see why it wouldn't, but haven't found the reasoning for #c17), we can also handle any type to any other type iff they have the same size, or any optype with char type. -- jakub at gcc dot gnu dot org changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |rguenth at gcc dot gnu dot | |org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26069