Hi, I had the problem with passing information about single variable
from expand_vec_cond_expr optab into ix86_expand_*_vcond.

I looked into it this problem for quite a while and found a solution.
Now the question if it could be done better.

First of all the problem:

If we represent any vector comparison with VEC_COND_EXPR < v0 <OP> v1
? {-1,...} : {0,...} >, then in the assembler we do not want to see
this useless comparison with {-1...}.

Now it is easy to fix the problem about excessive masking. The real
challenge starts when the comparison inside vcond is expressed as a
variable. In that case in order to construct correct vector expression
we need to adjust cond in cond ? v0 : v1 to  cond == {-1...} or as we
agreed recently cond != {0,..}. But hat we need to do only to make
vec_cond_expr happy. On the level of assembler we don't want this
condition.

Now, if I just construct the tree, then in x86, rtx_equal_p, does not
know that this is a constant vector full of -1, because the comparison
operands are not immediate. So I need somehow to mark the fact in
optabs, and then check the information in the x86.

At the moment I do something like this:

optabs:

if (!COMPARISON_CLASS_P (op0))
  ops[3] = gen_rtx_EQ (mode, NULL_RTX, NULL_RTX);

This expression is preserved while checking and verifying.

ix86:
if (GET_CODE (comp) == EQ && XEXP (comp, 0) == NULL_RTX
      && XEXP (comp, 1) == NULL_RTX)

See the patch attached for more details. The patch is just to give you
an idea of the way I am doing it and it seems to work. Please don't
criticise the patch itself, better help me to understand if there is a
better way to pass the information from optabs to ix86.


Thanks,
Artem.

On Thu, Aug 18, 2011 at 3:31 PM, Richard Henderson <r...@redhat.com> wrote:
> On 08/18/2011 02:23 AM, Richard Guenther wrote:
>>>> >> The first one (inefficient) is vec0 > vec1 ? {-1,...} : {0,...}
>>>> >> The second is vec0 > vec1. expand_vec_cond_expr is stupid, which is
>>>> >> fine, but it means that we need to construct it carefully.
>>> >
>>> > This is still important.
>> Yes.  I think the backends need to handle optimizing this case,
>> esp. considering targets that do not have instructions to produce
>> a {-1,...}/{0,...} bitmask from a comparison but produce a vector
>> of condition codes.  With using vec0 > vec1 ? {-1...} : {0,...} for
>> mask = vec0 > vec1; we avoid exposing the result kind of
>> vector comparisons.
>>
>> It should be easily possible for x86 for example to recognize
>> the -1 : 0 case.
>>
>
> I think you've been glossing over the hard part with "..." up there.
> I challenge you to actually fill that in with something meaningful
> in rtl.
>
> I suspect that you simply have to add another named pattern that
> will Do What You Want on mips and suchlike that produce a CCmode.
>
>
>
> r~
>
Index: gcc/optabs.c
===================================================================
--- gcc/optabs.c        (revision 177665)
+++ gcc/optabs.c        (working copy)
@@ -6557,6 +6557,8 @@ expand_vec_cond_expr_p (tree type, enum
 
 /* Generate insns for a VEC_COND_EXPR, given its TYPE and its
    three operands.  */
+rtx rtx_build_vector_from_val (enum machine_mode, HOST_WIDE_INT);
+rtx gen_const_vector1 (enum machine_mode, int);
 
 rtx
 expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
@@ -6572,16 +6574,39 @@ expand_vec_cond_expr (tree vec_cond_type
   if (icode == CODE_FOR_nothing)
     return 0;
 
-  comparison = vector_compare_rtx (op0, unsignedp, icode);
   rtx_op1 = expand_normal (op1);
   rtx_op2 = expand_normal (op2);
+  
+  if (COMPARISON_CLASS_P (op0))
+    {
+      comparison = vector_compare_rtx (op0, unsignedp, icode);
+      create_output_operand (&ops[0], target, mode);
+      create_input_operand (&ops[1], rtx_op1, mode);
+      create_input_operand (&ops[2], rtx_op2, mode);
+      create_fixed_operand (&ops[3], comparison);
+      create_fixed_operand (&ops[4], XEXP (comparison, 0));
+      create_fixed_operand (&ops[5], XEXP (comparison, 1));
+
+    }
+  else
+    {
+      enum rtx_code rcode;
+      rtx rtx_op0;
+      rtx vec; 
+    
+      rtx_op0 = expand_normal (op0);
+      rcode = get_rtx_code (EQ_EXPR, unsignedp);
+      comparison = gen_rtx_EQ (mode, NULL_RTX, NULL_RTX); 
+      vec = rtx_build_vector_from_val (mode, -1);
+
+      create_output_operand (&ops[0], target, mode);
+      create_input_operand (&ops[1], rtx_op1, mode);
+      create_input_operand (&ops[2], rtx_op2, mode);
+      create_input_operand (&ops[3], comparison, mode);
+      create_input_operand (&ops[4], rtx_op0, mode);
+      create_input_operand (&ops[5], vec, mode);
+    }
 
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
   expand_insn (icode, 6, ops);
   return ops[0].value;
 }
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 177665)
+++ gcc/config/i386/i386.c      (working copy)
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
+#include "tree-flow.h"
 #include "tm_p.h"
 #include "regs.h"
 #include "hard-reg-set.h"
@@ -18402,6 +18403,23 @@ ix86_expand_sse_fp_minmax (rtx dest, enu
   return true;
 }
 
+/* Returns a vector of mode MODE where all the elements are ARG.  */
+rtx
+rtx_build_vector_from_val (enum machine_mode mode, HOST_WIDE_INT arg)
+{
+  rtvec v;
+  int units, i;
+  enum machine_mode inner;
+  
+  units = GET_MODE_NUNITS (mode);
+  inner = GET_MODE_INNER (mode);
+  v = rtvec_alloc (units);
+  for (i = 0; i < units; ++i)
+    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (inner, arg);
+  
+  return gen_rtx_raw_CONST_VECTOR (mode, v);
+}
+
 /* Expand an sse vector comparison.  Return the register with the result.  */
 
 static rtx
@@ -18411,18 +18429,28 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_
   enum machine_mode mode = GET_MODE (dest);
   rtx x;
 
-  cmp_op0 = force_reg (mode, cmp_op0);
-  if (!nonimmediate_operand (cmp_op1, mode))
-    cmp_op1 = force_reg (mode, cmp_op1);
+  /* Avoid useless comparison.  */
+  if (code == EQ 
+      && rtx_equal_p (cmp_op1, rtx_build_vector_from_val (mode, -1)))
+    {
+      cmp_op0 = force_reg (mode, cmp_op0);
+      x = cmp_op0;
+    }
+  else
+    {
+      cmp_op0 = force_reg (mode, cmp_op0);
+      if (!nonimmediate_operand (cmp_op1, mode))
+       cmp_op1 = force_reg (mode, cmp_op1);
+
+      x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+    }
 
   if (optimize
       || reg_overlap_mentioned_p (dest, op_true)
       || reg_overlap_mentioned_p (dest, op_false))
     dest = gen_reg_rtx (mode);
 
-  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
   return dest;
 }
 
@@ -18434,8 +18462,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
 {
   enum machine_mode mode = GET_MODE (dest);
   rtx t2, t3, x;
-
-  if (op_false == CONST0_RTX (mode))
+  rtx mask_true;
+  
+  if (rtx_equal_p (op_true, rtx_build_vector_from_val (mode, -1))
+      && rtx_equal_p (op_false, CONST0_RTX (mode)))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
+    }
+  else if (op_false == CONST0_RTX (mode))
     {
       op_true = force_reg (mode, op_true);
       x = gen_rtx_AND (mode, cmp, op_true);
@@ -18569,7 +18603,9 @@ ix86_expand_int_vcond (rtx operands[])
   enum rtx_code code = GET_CODE (operands[3]);
   bool negate = false;
   rtx x, cop0, cop1;
+  rtx comp;
 
+  comp = operands[3];
   cop0 = operands[4];
   cop1 = operands[5];
 
@@ -18681,8 +18717,18 @@ ix86_expand_int_vcond (rtx operands[])
        }
     }
 
-  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
-                          operands[1+negate], operands[2-negate]);
+  if (GET_CODE (comp) == EQ && XEXP (comp, 0) == NULL_RTX 
+      && XEXP (comp, 1) == NULL_RTX)
+    {
+      rtx vec = rtx_build_vector_from_val (mode, -1);
+      x = ix86_expand_sse_cmp (operands[0], code, cop0, vec,
+                              operands[1+negate], operands[2-negate]);
+    }
+  else
+    {
+      x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+                              operands[1+negate], operands[2-negate]);
+    }
 
   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
                         operands[2-negate]);

Reply via email to