w.r.t. the PR48052, here is the patch which finds out if scev would wrap or not. The patch symbolically evaluates if valid_niter>= loop->nb_iterations is true. In that case the scev would not wrap (??). Currently, we only look for two special 'patterns', which are sufficient to analyze the simple test cases.
valid_niter = ~s (= UNIT_MAX - s) We have to prove that valid_niter>= loop->nb_iterations Pattern1 loop->nb_iterations: s>= e ? s - e : 0 Pattern2 loop->nb_iterations: (e - s) -1 In the first case we prove that valid_niter>= loop->nb_iterations in both the cases i.e., when s>=e and when not. In the second case we prove valid_niter>= loop->nb_iterations, by simple analysis that UINT_MAX>= e is true in all cases. I haven't tested this patch completely. I'm looking for feedback and any scope for improvement. hth, -Aditya Vectorize loops which has typecast. 2015-05-19 hiraditya <hiradi...@msn.com> * gcc.dg/vect/pr48052.c: New test. gcc/ChangeLog: 2015-05-19 hiraditya <hiradi...@msn.com> * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are available. (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are available. (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations. diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c new file mode 100644 index 0000000..8e406d7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr48052.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + +int foo(int* A, int* B, unsigned start, unsigned BS) +{ + int s; + for (unsigned k = start; k < start + BS; k++) + { + s += A[k] * B[k]; + } + + return s; +} + +int bar(int* A, int* B, unsigned BS) +{ + int s; + for (unsigned k = 0; k < BS; k++) + { + s += A[k] * B[k]; + } + + return s; +} + diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 042f8df..ddc00cc 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -3773,6 +3773,117 @@ nowrap_type_p (tree type) return false; } +/* Return true when op0>= op1. + For example: + Where, op0 = ~start_3(D); + op1 = start_3(D) <= stop_6(D) ? stop_6(D) - start_3(D) : 0; + In this case op0 = UINT_MAX - start_3(D); + So, op0>= op1 in all cases because UINT_MAX>= stop_6(D), + when TREE_TYPE(stop_6(D)) == unsigned int; */ +bool +fold_binary_cond_p (enum tree_code code, tree type, tree op0, tree op1) +{ + gcc_assert (type == boolean_type_node); + + if (TREE_TYPE (op0) != TREE_TYPE (op1)) + return false; + + /* TODO: Handle other operations. */ + if (code != GE_EXPR) + return false; + // The type of op0 and op1 should be unsigned. + if (!TYPE_UNSIGNED (TREE_TYPE(op0))) + return false; + if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != COND_EXPR)) + return false; + + /* We have to show that in both the cases, + (when cond is true and when cond is false) op (op0, op1) is true. */ + tree neg_op0 = TREE_OPERAND (op0, 0); + tree cond_op1 = TREE_OPERAND (op1, 0); + tree true_op1 = TREE_OPERAND (op1, 1); + tree false_op1 = TREE_OPERAND (op1, 2); + gcc_assert(neg_op0 && cond_op1 && true_op1 && false_op1); + + /* When cond is false. Evaluate op (op0, false_op1). */ + tree running_exp = fold_binary (code, boolean_type_node, op0, false_op1); + if (running_exp == NULL || integer_zerop (running_exp)) + /* TODO: Handle more cases here. */ + return false; + + /* When cond is true. Evaluate op (op0, true_op1). */ + running_exp = fold_binary (code, boolean_type_node, op0, true_op1); + if (running_exp != NULL && integer_nonzerop (running_exp)) + return true; + + tree smaller, bigger; + if (TREE_CODE (cond_op1) == LE_EXPR) + { + smaller = TREE_OPERAND (cond_op1, 0); + bigger = TREE_OPERAND (cond_op1, 1); + } else return false; + + if (TREE_CODE (true_op1) == MINUS_EXPR) + { + tree minuend = TREE_OPERAND (true_op1, 0); + tree subtrahend = TREE_OPERAND (true_op1, 1); + if (subtrahend == neg_op0 && subtrahend == smaller && minuend == bigger) + { + tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0), + TREE_TYPE (neg_op0)); + running_exp = fold_binary (code, boolean_type_node, extreme, minuend); + return running_exp != NULL && integer_nonzerop(running_exp); + } else return false; + } else return false; +} + +/* Return true when op0>= op1 and + op0 is ~start3(D) or, UINT_MAX - start3(D) + op1 is (_21 - start_3(D)) - 1; */ +bool +fold_binary_minus_p (enum tree_code code, tree type, tree op0, tree op1) +{ + gcc_assert (type == boolean_type_node); + + if (TREE_TYPE (op0) != TREE_TYPE (op1)) + return false; + /* TODO: Handle other operations. */ + if (code != GE_EXPR) + return false; + + // The type of op0 and op1 should be unsigned. + if (!TYPE_UNSIGNED (TREE_TYPE(op0))) + return false; + if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != MINUS_EXPR)) + return false; + + /* We have to show that op (op0, op1) is true. */ + tree neg_op0 = TREE_OPERAND (op0, 0); + tree minuend_op1 = TREE_OPERAND (op1, 0); + tree subtrahend_op1 = TREE_OPERAND (op1, 1); + gcc_assert(neg_op0 && subtrahend_op1 && minuend_op1); + + /* TODO: Also check that the integer_cst is positive. */ + if (TREE_CODE (minuend_op1) != MINUS_EXPR || + TREE_CODE (subtrahend_op1) != INTEGER_CST) + return false; + + tree minuend_minuend_op1 = TREE_OPERAND (minuend_op1, 0); + tree subtrahend_minuend_op1 = TREE_OPERAND (minuend_op1, 1); + + /* TODO: Extend this to evaluate the subtrahends. + i.e., when there are complicated operations in the subtrahend. */ + if (subtrahend_minuend_op1 != neg_op0) + return false; + + tree extreme = upper_bound_in_type (TREE_TYPE (neg_op0), TREE_TYPE (neg_op0)); + tree compare_minuend = fold_binary (GE_EXPR, boolean_type_node, + extreme, minuend_minuend_op1); + if (compare_minuend != NULL && integer_nonzerop (compare_minuend)) + return true; + return false; +} + /* Return false only when the induction variable BASE + STEP * I is known to not overflow: i.e. when the number of iterations is small enough with respect to the step and initial condition in order to @@ -3867,6 +3978,17 @@ scev_probably_wraps_p (tree base, tree step, fold_undefer_and_ignore_overflow_warnings (); return false; } + + if (loop->nb_iterations && at_stmt + && (fold_binary_cond_p (GE_EXPR, boolean_type_node, + valid_niter, loop->nb_iterations) || + fold_binary_minus_p (GE_EXPR, boolean_type_node, + valid_niter, loop->nb_iterations))) + { + fold_undefer_and_ignore_overflow_warnings (); + return false; + } + if (at_stmt) for (bound = loop->bounds; bound; bound = bound->next) {