[Bug middle-end/117542] Missed loop vectorization for truncate from float to __bf16.

liuhongt at gcc dot gnu.org via Gcc-bugs Wed, 20 Nov 2024 22:33:10 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117542


--- Comment #3 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to Hongtao Liu from comment #2)
> (In reply to Richard Biener from comment #1)
> > It doesn't even unambiguously specify whether the mode is that of the source
> > or the destination.  The original idea was of course that the size
> > unambiguously specifies the destination mode and thus specifying it would be
> > redundant. Making
> > all those optabs conversion optabs has some overhead and is useless in 99% 
> > of
> > the cases.
> > 
> > Can you combine both destination mode variants in vec_pack_trunc_VnSF and
> > use predicates to select? 
> Then the mode of operand[0] will be hided in the predicates, I doubt it
> would fail below check in supportable_narrowing_operation
> 4739  if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 
> 

diff --git a/gcc/expr.cc b/gcc/expr.cc
index aa6ee85e719..f935d0e7767 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10900,6 +10900,30 @@ expand_expr_real_2 (const_sepops ops, rtx target,
machine_mode tmode,
          expand_insn (icode, 4, eops);
          return eops[0].value;
        }
+      /* There're 2 kinds of half precison floating point, and
vec_pack_trunc_m
+        can't be overloaded. Making all those optabs conversion optabs has
+       some overhead and is useless in 99% of the cases. So the mode could
+       be hided in predicate and mode of type is real tmode.  */
+      if (VECTOR_FLOAT_TYPE_P (type)
+         && VECTOR_FLOAT_TYPE_P (TREE_TYPE (treeop0))
+         && GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (type))) == 2
+         && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (treeop0)) * 2,
+                      TYPE_VECTOR_SUBPARTS (type))
+         && tmode == E_VOIDmode)
+       {
+         mode = TYPE_MODE (TREE_TYPE (treeop0));
+         tmode = TYPE_MODE (type);
+         class expand_operand eops[3];
+         expand_operands (treeop0, treeop1,
+                          subtarget, &op0, &op1, EXPAND_NORMAL);
+         this_optab = vec_pack_trunc_optab;
+         enum insn_code icode = optab_handler (this_optab, mode);
+         create_output_operand (&eops[0], target, tmode);
+         create_input_operand (&eops[1], op0, mode);
+         create_input_operand (&eops[2], op1, mode);
+         expand_insn (icode, 3, eops);
+         return eops[0].value;
+       }
       mode = TYPE_MODE (TREE_TYPE (treeop0));
       subtarget = NULL_RTX;
       goto binop;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7a92da00f7d..d4744063045 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -15010,6 +15010,24 @@ supportable_narrowing_operation (code_helper code,
        return true;
     }

+  /* There're 2 kinds of half precison floating point, and vec_pack_trunc_m
+     can't be overloaded. Making all those optabs conversion optabs has
+     some overhead and is useless in 99% of the cases.
+     So check predicate here.  */
+  if (c1 == VEC_PACK_TRUNC_EXPR
+      && VECTOR_FLOAT_TYPE_P (narrow_vectype)
+      && VECTOR_FLOAT_TYPE_P (vectype)
+      && GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (narrow_vectype))) == 2
+      && known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
+                  TYPE_VECTOR_SUBPARTS (narrow_vectype))
+      && insn_data[icode1].operand[0].predicate)
+    {
+      machine_mode dpmode = insn_data[icode1].operand[0].mode;
+      machine_mode dmode = TYPE_MODE (narrow_vectype);
+      if (insn_data[icode1].operand[0].predicate (gen_reg_rtx (dmode),
dpmode))
+       return true;
+    }
+

[Bug middle-end/117542] Missed loop vectorization for truncate from float to __bf16.

Reply via email to