On Fri, May 10, 2013 at 12:03 PM, Marc Glisse <marc.gli...@inria.fr> wrote: > On Fri, 10 May 2013, Richard Biener wrote: > >> On Thu, May 9, 2013 at 12:55 AM, Marc Glisse <marc.gli...@inria.fr> wrote: >>> >>> Hello, >>> >>> here are a few more changes to fold-const.c so vectors can use the >>> existing >>> optimizations. Note that I made fold_truth_not_expr safe for use with >>> vector >>> BIT_NOT_EXPR. >>> >>> Passes bootstrap+testsuite on x86_64-linux-gnu. >>> >>> 2013-05-09 Marc Glisse <marc.gli...@inria.fr> >>> >>> gcc/ >>> * fold-const.c (fold_negate_expr): Handle vectors. >>> (fold_truth_not_expr): Likewise. >>> (invert_truthvalue_loc): Likewise. >>> (fold_single_bit_test): Likewise. >>> (fold_comparison): Likewise. >>> (fold_ternary_loc): Likewise. >>> >>> gcc/testsuite/ >>> * g++.dg/ext/vector22.C: New testcase. >>> >>> -- >>> Marc Glisse >>> Index: gcc/testsuite/g++.dg/ext/vector22.C >>> =================================================================== >>> --- gcc/testsuite/g++.dg/ext/vector22.C (revision 0) >>> +++ gcc/testsuite/g++.dg/ext/vector22.C (revision 0) >>> @@ -0,0 +1,20 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-O -fdump-tree-gimple" } */ >>> + >>> +typedef unsigned vec __attribute__((vector_size(4*sizeof(int)))); >>> + >>> +void f(vec*a,vec*b){ >>> + *a=(*a)?-1:(*b<10); >>> + *b=(*b)?(*a<10):0; >>> +} >>> +void g(vec*a,vec*b){ >>> + *a=(*a)?(*a<*a):-1; >>> + *b=(*b)?-1:(*b<*b); >>> +} >>> +void h(vec*a){ >>> + *a=(~*a==5); >>> +} >>> + >>> +/* { dg-final { scan-tree-dump-not "~" "gimple" } } */ >>> +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR" "gimple" } } */ >>> +/* { dg-final { cleanup-tree-dump "gimple" } } */ >>> >>> Property changes on: gcc/testsuite/g++.dg/ext/vector22.C >>> ___________________________________________________________________ >>> Added: svn:eol-style >>> + native >>> Added: svn:keywords >>> + Author Date Id Revision URL >>> >>> Index: gcc/fold-const.c >>> =================================================================== >>> --- gcc/fold-const.c (revision 198726) >>> +++ gcc/fold-const.c (working copy) >>> @@ -519,21 +519,21 @@ fold_negate_expr (location_t loc, tree t >>> { >>> tree type = TREE_TYPE (t); >>> tree tem; >>> >>> switch (TREE_CODE (t)) >>> { >>> /* Convert - (~A) to A + 1. */ >>> case BIT_NOT_EXPR: >>> if (INTEGRAL_TYPE_P (type)) >>> return fold_build2_loc (loc, PLUS_EXPR, type, TREE_OPERAND (t, >>> 0), >>> - build_int_cst (type, 1)); >>> + build_one_cst (type)); >>> break; >>> >>> case INTEGER_CST: >>> tem = fold_negate_const (t, type); >>> if (TREE_OVERFLOW (tem) == TREE_OVERFLOW (t) >>> || !TYPE_OVERFLOW_TRAPS (type)) >>> return tem; >>> break; >>> >>> case REAL_CST: >>> @@ -3110,20 +3110,23 @@ fold_truth_not_expr (location_t loc, tre >>> >>> return build2_loc (loc, code, type, TREE_OPERAND (arg, 0), >>> TREE_OPERAND (arg, 1)); >>> } >>> >>> switch (code) >>> { >>> case INTEGER_CST: >>> return constant_boolean_node (integer_zerop (arg), type); >>> >>> + case VECTOR_CST: >>> + return fold_unary_loc (loc, BIT_NOT_EXPR, type, arg); >>> + >>> case TRUTH_AND_EXPR: >>> loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc); >>> loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc); >>> return build2_loc (loc, TRUTH_OR_EXPR, type, >>> invert_truthvalue_loc (loc1, TREE_OPERAND (arg, >>> 0)), >>> invert_truthvalue_loc (loc2, TREE_OPERAND (arg, >>> 1))); >>> >>> case TRUTH_OR_EXPR: >>> loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc); >>> loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc); >>> @@ -3195,26 +3198,28 @@ fold_truth_not_expr (location_t loc, tre >>> return build1_loc (loc, TRUTH_NOT_EXPR, type, arg); >>> >>> /* ... fall through ... */ >>> >>> case FLOAT_EXPR: >>> loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc); >>> return build1_loc (loc, TREE_CODE (arg), type, >>> invert_truthvalue_loc (loc1, TREE_OPERAND (arg, >>> 0))); >>> >>> case BIT_AND_EXPR: >>> - if (!integer_onep (TREE_OPERAND (arg, 1))) >>> + if (VECTOR_TYPE_P (type) || !integer_onep (TREE_OPERAND (arg, 1))) >>> return NULL_TREE; >>> return build2_loc (loc, EQ_EXPR, type, arg, build_int_cst (type, >>> 0)); >>> >>> case SAVE_EXPR: >>> - return build1_loc (loc, TRUTH_NOT_EXPR, type, arg); >>> + return build1_loc (loc, VECTOR_TYPE_P (type) ? BIT_NOT_EXPR >>> + : TRUTH_NOT_EXPR, >>> + type, arg); >>> >>> case CLEANUP_POINT_EXPR: >>> loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc); >>> return build1_loc (loc, CLEANUP_POINT_EXPR, type, >>> invert_truthvalue_loc (loc1, TREE_OPERAND (arg, >>> 0))); >>> >>> default: >>> return NULL_TREE; >>> } >>> } >>> @@ -3222,28 +3227,28 @@ fold_truth_not_expr (location_t loc, tre >>> /* Return a simplified tree node for the truth-negation of ARG. This >>> never alters ARG itself. We assume that ARG is an operation that >>> returns a truth value (0 or 1). >>> >>> FIXME: one would think we would fold the result, but it causes >>> problems with the dominator optimizer. */ >>> >>> tree >>> invert_truthvalue_loc (location_t loc, tree arg) >>> { >>> - tree tem; >>> - >>> if (TREE_CODE (arg) == ERROR_MARK) >>> return arg; >>> >>> - tem = fold_truth_not_expr (loc, arg); >>> + tree tem = fold_truth_not_expr (loc, arg); >>> + tree type = TREE_TYPE (arg); >>> if (!tem) >>> - tem = build1_loc (loc, TRUTH_NOT_EXPR, TREE_TYPE (arg), arg); >>> + tem = build1_loc (loc, VECTOR_TYPE_P (type) ? BIT_NOT_EXPR : >>> TRUTH_NOT_EXPR, >>> + type, arg); >> >> >> I'd rather avoid changing fold_truth_not_expr for vector types (we >> shouldn't >> ever truth-negate vectors) but use fold_unary_loc (BIT_NOT_EXPR... here. >> And add a comment that for vectors a truth-value is either all-zeros or >> all-ones. > > > So invert_truthvalue_loc would be ok for vectors but not > fold_truth_not_expr?
Oddly yes ;) vectors do have truthvalues but there is never a TRUTH_NOT_EXPR of a vector ... > Ok, but I'll have to duplicate some code from > fold_truth_not_expr into the BIT_NOT_EXPR part of fold_unary_loc (or extract > it into its own function). There were other pieces of existing code that > called fold_truth_not_expr on vectors, but only after checking that they > wouldn't go further than the first case in fold_truth_not_expr, which is > safe. I guess I should make them call > fold_unary_loc(vec?BIT_NOT_EXPR:TRUTH_NOT_EXPR,...) as well, if I can find > them (I'll try to add an assert in fold_truth_not_expr). > > >>> return tem; >>> } >>> >>> /* Given a bit-wise operation CODE applied to ARG0 and ARG1, see if both >>> operands are another bit-wise operation with a common input. If so, >>> distribute the bit operations to save an operation and possibly two >>> if >>> constants are involved. For example, convert >>> (A | B) & (A | C) into A | (B & C) >>> Further simplification will occur if B and C are constants. >>> @@ -6602,21 +6607,21 @@ fold_single_bit_test (location_t loc, en >>> >>> signed_type = lang_hooks.types.type_for_mode (operand_mode, 0); >>> unsigned_type = lang_hooks.types.type_for_mode (operand_mode, 1); >>> intermediate_type = ops_unsigned ? unsigned_type : signed_type; >>> inner = fold_convert_loc (loc, intermediate_type, inner); >>> >>> if (bitnum != 0) >>> inner = build2 (RSHIFT_EXPR, intermediate_type, >>> inner, size_int (bitnum)); >>> >>> - one = build_int_cst (intermediate_type, 1); >>> + one = build_one_cst (intermediate_type); >> >> >> Are you sure that the langhooks even know the proper vector types? Or >> that >> the transforms that fold_single_bit_test does are even profitable for >> vectors? >> (the idea is that instructions modify flags accordingly) >> >> The rest looks ok to me. >> >> Richard. >> >>> if (code == EQ_EXPR) >>> inner = fold_build2_loc (loc, BIT_XOR_EXPR, intermediate_type, >>> inner, one); >>> >>> /* Put the AND last so it can combine with more things. */ >>> inner = build2 (BIT_AND_EXPR, intermediate_type, inner, one); >>> >>> /* Make sure to return the proper type. */ >>> inner = fold_convert_loc (loc, result_type, inner); >>> >>> @@ -9578,21 +9583,21 @@ fold_comparison (location_t loc, enum tr >>> { >>> tree cmp_type = TREE_TYPE (TREE_OPERAND (arg0, 0)); >>> return fold_build2_loc (loc, code, type, >>> fold_convert_loc (loc, cmp_type, >>> TREE_OPERAND (arg1, 0)), >>> TREE_OPERAND (arg0, 0)); >>> } >>> >>> /* Fold ~X op C as X op' ~C, where op' is the swapped comparison. */ >>> if (TREE_CODE (arg0) == BIT_NOT_EXPR >>> - && TREE_CODE (arg1) == INTEGER_CST) >>> + && (TREE_CODE (arg1) == INTEGER_CST || TREE_CODE (arg1) == >>> VECTOR_CST)) >>> { >>> tree cmp_type = TREE_TYPE (TREE_OPERAND (arg0, 0)); >>> return fold_build2_loc (loc, swap_tree_comparison (code), type, >>> TREE_OPERAND (arg0, 0), >>> fold_build1_loc (loc, BIT_NOT_EXPR, cmp_type, >>> fold_convert_loc (loc, cmp_type, >>> arg1))); >>> } >>> >>> return NULL_TREE; >>> } >>> @@ -14038,52 +14043,52 @@ fold_ternary_loc (location_t loc, enum t >>> location_t loc0 = expr_location_or (arg0, loc); >>> tem = fold_truth_not_expr (loc0, arg0); >>> if (tem && COMPARISON_CLASS_P (tem)) >>> { >>> tem = fold_cond_expr_with_comparison (loc, type, tem, op2, >>> op1); >>> if (tem) >>> return tem; >>> } >>> } >>> >>> - /* ??? Fixup the code below for VEC_COND_EXPR. */ >>> - if (code == VEC_COND_EXPR) >>> - return NULL_TREE; >>> - >>> /* If the second operand is simpler than the third, swap them >>> since that produces better jump optimization results. */ >>> if (truth_value_p (TREE_CODE (arg0)) >>> && tree_swap_operands_p (op1, op2, false)) >>> { >>> location_t loc0 = expr_location_or (arg0, loc); >>> /* See if this can be inverted. If it can't, possibly because >>> it was a floating-point inequality comparison, don't do >>> anything. */ >>> tem = fold_truth_not_expr (loc0, arg0); >>> if (tem) >>> return fold_build3_loc (loc, code, type, tem, op2, op1); >>> } >>> >>> /* Convert A ? 1 : 0 to simply A. */ >>> - if (integer_onep (op1) >>> + if ((code == VEC_COND_EXPR ? integer_all_onesp (op1) >>> + : (integer_onep (op1) >>> + && !VECTOR_TYPE_P (type))) >>> && integer_zerop (op2) >>> /* If we try to convert OP0 to our type, the >>> call to fold will try to move the conversion inside >>> a COND, which will recurse. In that case, the COND_EXPR >>> is probably the best choice, so leave it alone. */ >>> && type == TREE_TYPE (arg0))) >>> return pedantic_non_lvalue_loc (loc, arg0); >>> >>> /* Convert A ? 0 : 1 to !A. This prefers the use of NOT_EXPR >>> over COND_EXPR in cases such as floating point comparisons. */ >>> if (integer_zerop (op1) >>> - && integer_onep (op2) >>> + && (code == VEC_COND_EXPR ? integer_all_onesp (op2) >>> + : (integer_onep (op2) >>> + && !VECTOR_TYPE_P (type))) >>> && truth_value_p (TREE_CODE (arg0))) >>> return pedantic_non_lvalue_loc (loc, >>> fold_convert_loc (loc, type, >>> invert_truthvalue_loc (loc, >>> >>> arg0))); >>> >>> /* A < 0 ? <sign bit of A> : 0 is simply (A & <sign bit of A>). >>> */ >>> if (TREE_CODE (arg0) == LT_EXPR >>> && integer_zerop (TREE_OPERAND (arg0, 1)) >>> && integer_zerop (op2) >>> @@ -14186,60 +14191,67 @@ fold_ternary_loc (location_t loc, enum t >>> && TREE_CODE (TREE_OPERAND (arg0, 0)) == BIT_AND_EXPR >>> && operand_equal_p (TREE_OPERAND (TREE_OPERAND (arg0, 0), 1), >>> arg1, OEP_ONLY_CONST)) >>> return pedantic_non_lvalue_loc (loc, >>> fold_convert_loc (loc, type, >>> TREE_OPERAND (arg0, >>> 0))); >>> >>> /* Convert A ? B : 0 into A && B if A and B are truth values. */ >>> if (integer_zerop (op2) >>> && truth_value_p (TREE_CODE (arg0)) >>> - && truth_value_p (TREE_CODE (arg1))) >>> - return fold_build2_loc (loc, TRUTH_ANDIF_EXPR, type, >>> - fold_convert_loc (loc, type, arg0), >>> - arg1); >>> + && truth_value_p (TREE_CODE (arg1)) >>> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type))) >>> + return fold_build2_loc (loc, code == VEC_COND_EXPR ? BIT_AND_EXPR >>> + : >>> TRUTH_ANDIF_EXPR, >>> + type, fold_convert_loc (loc, type, arg0), >>> arg1); >>> >>> /* Convert A ? B : 1 into !A || B if A and B are truth values. */ >>> - if (integer_onep (op2) >>> + if (code == VEC_COND_EXPR ? integer_all_onesp (op2) : integer_onep >>> (op2) >>> && truth_value_p (TREE_CODE (arg0)) >>> - && truth_value_p (TREE_CODE (arg1))) >>> + && truth_value_p (TREE_CODE (arg1)) >>> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type))) >>> { >>> location_t loc0 = expr_location_or (arg0, loc); >>> /* Only perform transformation if ARG0 is easily inverted. */ >>> tem = fold_truth_not_expr (loc0, arg0); >>> if (tem) >>> - return fold_build2_loc (loc, TRUTH_ORIF_EXPR, type, >>> - fold_convert_loc (loc, type, tem), >>> - arg1); >>> + return fold_build2_loc (loc, code == VEC_COND_EXPR >>> + ? BIT_IOR_EXPR >>> + : TRUTH_ORIF_EXPR, >>> + type, fold_convert_loc (loc, type, >>> tem), >>> + arg1); >>> } >>> >>> /* Convert A ? 0 : B into !A && B if A and B are truth values. */ >>> if (integer_zerop (arg1) >>> && truth_value_p (TREE_CODE (arg0)) >>> - && truth_value_p (TREE_CODE (op2))) >>> + && truth_value_p (TREE_CODE (op2)) >>> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type))) >>> { >>> location_t loc0 = expr_location_or (arg0, loc); >>> /* Only perform transformation if ARG0 is easily inverted. */ >>> tem = fold_truth_not_expr (loc0, arg0); >>> if (tem) >>> - return fold_build2_loc (loc, TRUTH_ANDIF_EXPR, type, >>> - fold_convert_loc (loc, type, tem), >>> - op2); >>> + return fold_build2_loc (loc, code == VEC_COND_EXPR >>> + ? BIT_AND_EXPR : >>> TRUTH_ANDIF_EXPR, >>> + type, fold_convert_loc (loc, type, >>> tem), >>> + op2); >>> } >>> >>> /* Convert A ? 1 : B into A || B if A and B are truth values. */ >>> - if (integer_onep (arg1) >>> + if (code == VEC_COND_EXPR ? integer_all_onesp (arg1) : >>> integer_onep >>> (arg1) >>> && truth_value_p (TREE_CODE (arg0)) >>> - && truth_value_p (TREE_CODE (op2))) >>> - return fold_build2_loc (loc, TRUTH_ORIF_EXPR, type, >>> - fold_convert_loc (loc, type, arg0), >>> - op2); >>> + && truth_value_p (TREE_CODE (op2)) >>> + && (code == VEC_COND_EXPR || !VECTOR_TYPE_P (type))) >>> + return fold_build2_loc (loc, code == VEC_COND_EXPR >>> + ? BIT_IOR_EXPR : TRUTH_ORIF_EXPR, >>> + type, fold_convert_loc (loc, type, arg0), >>> op2); >>> >>> return NULL_TREE; >>> >>> case CALL_EXPR: >>> /* CALL_EXPRs used to be ternary exprs. Catch any mistaken uses >>> of fold_ternary on them. */ >>> gcc_unreachable (); >>> >>> case BIT_FIELD_REF: >>> if ((TREE_CODE (arg0) == VECTOR_CST >>> > > -- > Marc Glisse