Hi! The PR50482 testcase shows that we don't optimize logical vector ops with all ones vectors very well.
The following patch implements that optimization it, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-09-22 Jakub Jelinek <ja...@redhat.com> * rtlanal.c (all_ones_cst): New function. * rtl.h (all_ones_cst): New prototype. * simplify-rtx.c (simplify_binary_operation_1) <case IOR, XOR, AND>: Optimize if one operand is all_ones_cst. * config/i386/i386.c (ix86_expand_sse_movcc): Optimize mask ? -1 : x into mask | x. --- gcc/rtlanal.c.jj 2011-08-26 18:41:44.000000000 +0200 +++ gcc/rtlanal.c 2011-09-22 18:43:21.000000000 +0200 @@ -5209,3 +5209,24 @@ low_bitmask_len (enum machine_mode mode, return exact_log2 (m + 1); } + +bool +all_ones_cst (const_rtx x) +{ + + if (x == constm1_rtx) + return true; + + if (GET_CODE (x) == CONST_VECTOR + && CONST_VECTOR_NUNITS (x) == GET_MODE_NUNITS (GET_MODE (x))) + { + int i, units = CONST_VECTOR_NUNITS (x); + + for (i = 0; i < units; i++) + if (CONST_VECTOR_ELT (x, i) != constm1_rtx) + return false; + return true; + } + + return false; +} --- gcc/rtl.h.jj 2011-08-28 12:36:57.000000000 +0200 +++ gcc/rtl.h 2011-09-22 18:43:41.000000000 +0200 @@ -1218,6 +1218,7 @@ extern unsigned int num_sign_bit_copies extern bool constant_pool_constant_p (rtx); extern bool truncated_to_mode (enum machine_mode, const_rtx); extern int low_bitmask_len (enum machine_mode, unsigned HOST_WIDE_INT); +extern bool all_ones_cst (const_rtx); #ifndef GENERATOR_FILE /* Return the cost of SET X. SPEED_P is true if optimizing for speed --- gcc/simplify-rtx.c.jj 2011-09-15 12:18:54.000000000 +0200 +++ gcc/simplify-rtx.c 2011-09-22 18:48:39.000000000 +0200 @@ -2431,9 +2431,10 @@ simplify_binary_operation_1 (enum rtx_co case IOR: if (trueop1 == CONST0_RTX (mode)) return op0; - if (CONST_INT_P (trueop1) - && ((UINTVAL (trueop1) & GET_MODE_MASK (mode)) - == GET_MODE_MASK (mode))) + if ((CONST_INT_P (trueop1) + && ((UINTVAL (trueop1) & GET_MODE_MASK (mode)) + == GET_MODE_MASK (mode))) + || all_ones_cst (trueop1)) return op1; if (rtx_equal_p (trueop0, trueop1) && ! side_effects_p (op0)) return op0; @@ -2573,9 +2574,10 @@ simplify_binary_operation_1 (enum rtx_co case XOR: if (trueop1 == CONST0_RTX (mode)) return op0; - if (CONST_INT_P (trueop1) - && ((UINTVAL (trueop1) & GET_MODE_MASK (mode)) - == GET_MODE_MASK (mode))) + if ((CONST_INT_P (trueop1) + && ((UINTVAL (trueop1) & GET_MODE_MASK (mode)) + == GET_MODE_MASK (mode))) + || all_ones_cst (trueop1)) return simplify_gen_unary (NOT, mode, op0, mode); if (rtx_equal_p (trueop0, trueop1) && ! side_effects_p (op0) @@ -2721,6 +2723,8 @@ simplify_binary_operation_1 (enum rtx_co case AND: if (trueop1 == CONST0_RTX (mode) && ! side_effects_p (op0)) return trueop1; + if (all_ones_cst (trueop1)) + return op0; if (HWI_COMPUTABLE_MODE_P (mode)) { HOST_WIDE_INT nzop0 = nonzero_bits (trueop0, mode); --- gcc/config/i386/i386.c.jj 2011-09-22 18:37:00.000000000 +0200 +++ gcc/config/i386/i386.c 2011-09-22 18:55:45.000000000 +0200 @@ -18899,6 +18899,12 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp x = gen_rtx_AND (mode, x, op_false); emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } + else if (all_ones_cst (op_true)) + { + op_false = force_reg (mode, op_false); + x = gen_rtx_IOR (mode, cmp, op_false); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } else if (TARGET_XOP) { op_true = force_reg (mode, op_true); Jakub