Hi Richard(s),

I'm just looking to see if I'm going about this the right way, based on the 
discussion we had on IRC. I've managed to hack something together, I've 
attached a (very) WIP patch which gives the correct codegen for the testcase in 
question (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98772). It would 
obviously need to support other widening patterns and differentiate between 
big/little endian among other things.

I added a backend pattern because I wasn't quite clear which changes to make in 
order to allow the existing backend patterns to be used with a V8QI, or how to 
represent V16QI where we don't care about the top/bottom 8. I made some attempt 
in optabs.c, which is in the patch commented out, but I'm not sure if I'm going 
about this the right way.

Joel
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index be2a5a865172bdd7848be4082abb0fdfb0b35937..c66b8a367623c8daf4423677d292e292feee3606 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3498,6 +3498,14 @@
   DONE;
 })
 
+(define_insn "vec_widen_usubl_half_v8qi"
+  [(match_operand:V8HI 0 "register_operand")
+    (match_operand:V8QI 1 "register_operand")
+    (match_operand:V8QI 2 "register_operand")]
+  "TARGET_SIMD"
+  "usubl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+)
+
 (define_expand "vec_widen_<su>subl_hi_<mode>"
   [(match_operand:<VWIDE> 0 "register_operand")
    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
diff --git a/gcc/expr.c b/gcc/expr.c
index 04ef5ad114d0662948c896cdbf58e67737b39c7e..0939a156deef63f1cf2fa7e29c2c94925820f2ba 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9785,6 +9785,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 
     case VEC_WIDEN_PLUS_HI_EXPR:
     case VEC_WIDEN_PLUS_LO_EXPR:
+    case VEC_WIDEN_MINUS_HALF_EXPR:
     case VEC_WIDEN_MINUS_HI_EXPR:
     case VEC_WIDEN_MINUS_LO_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 876a3a6f348de122e5a52e6dd70d7946bc810162..10aa21d07595325fd8ef3057444853fc946385de 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -186,6 +186,9 @@ bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &,
 enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode,
 						     machine_mode,
 						     machine_mode *);
+enum insn_code find_half_mode_optab_and_mode (optab, machine_mode,
+						     machine_mode,
+						     machine_mode *);
 int can_mult_highpart_p (machine_mode, bool);
 bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
 opt_machine_mode get_len_load_store_mode (machine_mode, bool);
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index 3248ce2c06e65c9c0366757907ab057407f7c594..7abfc04aa18b7ee5b734a1b1f4378b4615ee31fd 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -462,6 +462,17 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
   return false;
 }
 
+enum insn_code
+find_half_mode_optab_and_mode (optab op, machine_mode to_mode,
+				      machine_mode from_mode,
+				      machine_mode *found_mode)
+{
+    insn_code icode = CODE_FOR_nothing;
+    if (GET_MODE_2XWIDER_MODE(from_mode).exists(found_mode))
+      icode = optab_handler (op, *found_mode);
+    return icode;
+}
+
 /* Find a widening optab even if it doesn't widen as much as we want.
    E.g. if from_mode is HImode, and to_mode is DImode, and there is no
    direct HI->SI insn, then return SI->DI, if that exists.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index c94073e3ed98f8c4cab65891f65dedebdb1ec274..eb52dc15f8094594c4aa22d5fc1c442886e4ebf6 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -185,6 +185,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
     case VEC_WIDEN_MINUS_HI_EXPR:
       return (TYPE_UNSIGNED (type)
 	      ? vec_widen_usubl_hi_optab : vec_widen_ssubl_hi_optab);
+    
+    case VEC_WIDEN_MINUS_HALF_EXPR:
+      return vec_widen_usubl_half_optab;
 
     case VEC_UNPACK_HI_EXPR:
       return (TYPE_UNSIGNED (type)
@@ -308,6 +311,16 @@ supportable_convert_operation (enum tree_code code,
   if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2))
     return false;
 
+  /* The case where vectype_in is half the vector width, as opposed to the
+     normal case for widening patterns of vector width input, with output in
+     multiple registers. */
+  if (code == WIDEN_MINUS_EXPR &&
+      known_eq(TYPE_VECTOR_SUBPARTS(vectype_in),TYPE_VECTOR_SUBPARTS(vectype_out)) )
+  {
+    *code1 = VEC_WIDEN_MINUS_HALF_EXPR;
+    return true;
+  }
+
   /* First check if we can done conversion directly.  */
   if ((code == FIX_TRUNC_EXPR
        && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index f4614a394587787293dc8b680a38901f7906f61c..1252097be9893d7d65ea844fc0eda9bad70b9256 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -293,6 +293,13 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
     icode = find_widening_optab_handler (widen_pattern_optab,
 					 TYPE_MODE (TREE_TYPE (ops->op2)),
 					 tmode0);
+  // Perhaps something like this can eliminate the need for an additional backend pattern?
+  //else if (ops->code == VEC_WIDEN_MINUS_HI_EXPR)
+  //{
+  //  icode = find_half_mode_optab_and_mode (widen_pattern_optab, tmode0,
+  //      				       tmode0,
+  //      				       &tmode1);
+  //}
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index b192a9d070b8aa72e5676b2eaa020b5bdd7ffcc8..43fccfa29127d99ce0131a21c2dc58fcb247bd25 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -403,6 +403,7 @@ OPTAB_D (vec_widen_umult_lo_optab, "vec_widen_umult_lo_$a")
 OPTAB_D (vec_widen_umult_odd_optab, "vec_widen_umult_odd_$a")
 OPTAB_D (vec_widen_ushiftl_hi_optab, "vec_widen_ushiftl_hi_$a")
 OPTAB_D (vec_widen_ushiftl_lo_optab, "vec_widen_ushiftl_lo_$a")
+OPTAB_D (vec_widen_usubl_half_optab, "vec_widen_usubl_half_$a")
 OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a")
 OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a")
 OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 75d814bd121f40c6a430f33f4c7d6395642f6c33..0e2313009d39c17d998c2285b9a9938e616dc35c 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4007,6 +4007,7 @@ verify_gimple_assign_binary (gassign *stmt)
         return false;
       }
 
+    case VEC_WIDEN_MINUS_HALF_EXPR:
     case VEC_WIDEN_MINUS_HI_EXPR:
     case VEC_WIDEN_MINUS_LO_EXPR:
     case VEC_WIDEN_PLUS_HI_EXPR:
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index a710fa590279234e5e8062a87bac68eb324df3cb..2c415abaf6091693c31d636644e54e18a90650b1 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4242,6 +4242,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
 
     case VEC_WIDEN_PLUS_HI_EXPR:
     case VEC_WIDEN_PLUS_LO_EXPR:
+    case VEC_WIDEN_MINUS_HALF_EXPR:
     case VEC_WIDEN_MINUS_HI_EXPR:
     case VEC_WIDEN_MINUS_LO_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index c8d8493e6eaefca589ff73bcae4dc014140a1c5c..1911d2b0d637e058affadb21dac93e6880376eae 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -2121,6 +2121,7 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi,
       || code == VEC_WIDEN_PLUS_HI_EXPR
       || code == VEC_WIDEN_PLUS_LO_EXPR
       || code == VEC_WIDEN_MINUS_HI_EXPR
+      || code == VEC_WIDEN_MINUS_HALF_EXPR
       || code == VEC_WIDEN_MINUS_LO_EXPR
       || code == VEC_WIDEN_MULT_HI_EXPR
       || code == VEC_WIDEN_MULT_LO_EXPR
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index f180ced312443ba1e698932d5e8362208690b3fc..0a31c7b004eaa6fba7bbbaca0ef39a265774093f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4545,6 +4545,51 @@ vect_create_vectorized_promotion_stmts (vec_info *vinfo,
   *vec_oprnds0 = vec_tmp;
 }
 
+/* Create vectorized promotion stmts for widening stmts using only half the
+   potential vector size for input */
+static void
+vect_create_vectorized_promotion_stmts (vec_info *vinfo,
+					vec<tree> *vec_oprnds0,
+					vec<tree> *vec_oprnds1,
+					stmt_vec_info stmt_info, tree vec_dest,
+					gimple_stmt_iterator *gsi,
+					enum tree_code code1,
+					int op_type)
+{
+  int i;
+  tree vop0, vop1, new_tmp;
+  gimple *new_stmt;
+  vec<tree> vec_tmp = vNULL;
+
+  vec_tmp.create (vec_oprnds0->length () * 2);
+  FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
+    {
+      if (op_type == binary_op)
+	vop1 = (*vec_oprnds1)[i];
+      else
+	vop1 = NULL_TREE;
+
+      /* Generate the two halves of promotion operation.  */
+      new_stmt = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
+						 op_type, vec_dest, gsi,
+						 stmt_info);
+      if (is_gimple_call (new_stmt))
+	{
+	  new_tmp = gimple_call_lhs (new_stmt);
+	}
+      else
+	{
+	  new_tmp = gimple_assign_lhs (new_stmt);
+	}
+
+      /* Store the results for the next step.  */
+      vec_tmp.quick_push (new_tmp);
+    }
+
+  vec_oprnds0->release ();
+  *vec_oprnds0 = vec_tmp;
+}
+
 
 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
@@ -4731,7 +4776,8 @@ vectorizable_conversion (vec_info *vinfo,
     case NONE:
       if (code != FIX_TRUNC_EXPR
 	  && code != FLOAT_EXPR
-	  && !CONVERT_EXPR_CODE_P (code))
+	  && !CONVERT_EXPR_CODE_P (code)
+	  && code != WIDEN_MINUS_EXPR)
 	return false;
       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
 	break;
@@ -4937,22 +4983,55 @@ vectorizable_conversion (vec_info *vinfo,
   switch (modifier)
     {
     case NONE:
-      vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
-			 op0, &vec_oprnds0);
-      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
-	{
-	  /* Arguments are ready, create the new vector stmt.  */
-	  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
-	  gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
-	  new_temp = make_ssa_name (vec_dest, new_stmt);
-	  gimple_assign_set_lhs (new_stmt, new_temp);
-	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      if (code == WIDEN_MINUS_EXPR)
+      {
+	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
+			   op0, &vec_oprnds0,
+			   op1,
+			   &vec_oprnds1);
+	vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
+						&vec_oprnds1, stmt_info,
+						vec_dsts[0], gsi,
+						code1, op_type);
 
-	  if (slp_node)
-	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
-	  else
-	    STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
-	}
+	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+	  {
+	    gimple *new_stmt;
+	    if (cvt_type)
+	      {
+		gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+		new_temp = make_ssa_name (vec_dest);
+		new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
+		vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      }
+	    else
+	      new_stmt = SSA_NAME_DEF_STMT (vop0);
+
+	    if (slp_node)
+	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+	    else
+	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+	  }
+      }
+      else
+      {
+	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+			   op0, &vec_oprnds0);
+	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+	  {
+	    /* Arguments are ready, create the new vector stmt.  */
+	    gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
+	    gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
+	    new_temp = make_ssa_name (vec_dest, new_stmt);
+	    gimple_assign_set_lhs (new_stmt, new_temp);
+	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+	    if (slp_node)
+	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+	    else
+	      STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+	  }
+      }
       break;
 
     case WIDEN:
diff --git a/gcc/tree.def b/gcc/tree.def
index eda050bdc55c68fa11ac5526e3a3f618aad0df4b..5b2c4e74a85be18738eb6fc36bbaedd036acf89a 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1433,6 +1433,7 @@ DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_PLUS_HI_EXPR, "widen_plus_hi_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_PLUS_LO_EXPR, "widen_plus_lo_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_MINUS_HALF_EXPR, "widen_minus_half_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_MINUS_HI_EXPR, "widen_minus_hi_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_MINUS_LO_EXPR, "widen_minus_lo_expr", tcc_binary, 2)
 

Reply via email to