Hi,

On 2020/9/3 18:29, Richard Biener wrote:
> On Thu, Sep 3, 2020 at 11:20 AM luoxhu <luo...@linux.ibm.com> wrote:
>>
>>
>>
>> On 2020/9/2 17:30, Richard Biener wrote:
>>>> so maybe bypass convert_vector_to_array_for_subscript for special 
>>>> circumstance
>>>> like "i = v[n%4]" or "v[n&3]=i" to generate vec_extract or vec_insert 
>>>> builtin
>>>> call a relative simpler method?
>>> I think you have it backward.  You need to work with what
>>> convert_vector_to_array_for_subscript
>>> gives and deal with it during RTL expansion / optimization to generate
>>> more optimal
>>> code for power.  The goal is to have as little target specific
>>> builtins during the GIMPLE
>>> optimization phase (because we cannot work out its semantics in optimizers).
>>
>> OK, got it, will add optabs vec_insert and expand 
>> "VIEW_CONVERT_EXPR<int[4]>(v1)[_1] = i_6(D);"
>> expressions to rs6000_expand_vector_insert instead of builtin call.
>> vec_extract already has optabs and "i = v[n%4]" should be in another patch
>> after this.
> 
> There is already vec_set and vec_extract - the question is whether the 
> expander
> tries those for variable index.
> 

Yes, I checked and found that both vec_set and vec_extract doesn't support
variable index for most targets, store_bit_field_1 and extract_bit_field_1
would only consider use optabs when index is integer value.  Anyway, it
shouldn't be hard to extend depending on target requirements. 

Another problem is v[n&3]=i and vec_insert(v, i, n) are generating with
different gimple code:

{
_1 = n & 3;
VIEW_CONVERT_EXPR<int[4]>(v1)[_1] = i;
}

vs:

{
  __vector signed int v1;
  __vector signed int D.3192;
  long unsigned int _1;
  long unsigned int _2;
  int * _3;

  <bb 2> [local count: 1073741824]:
  D.3192 = v_4(D);
  _1 = n_7(D) & 3;
  _2 = _1 * 4;
  _3 = &D.3192 + _2; 
  *_3 = i_8(D);
  v1_10 = D.3192;
  return v1_10;
}

If not use builtin for "vec_insert(v, i, n)", the pointer is "int*" instead
of vector type, will this be difficult for expander to capture so many
statements then call the optabs?  So shall we still keep the builtin style
for "vec_insert(v, i, n)" and expand "v[n&3]=i" with optabs or expand both 
with optabs???

Drafted a fast patch to expand "v[n&3]=i" with optabs as below, sorry that not
using existed vec_set yet as not quite sure, together with the first patch, both
cases could be handled as expected:


[PATCH] Expander: expand VIEW_CONVERT_EXPR to vec_insert with variable index

v[n%4] = i has same semantic with vec_insert (i, v, n), but it will be
optimized to "VIEW_CONVERT_EXPR<int[4]>(v1)[_1] = i;" in gimple, this
patch tries to recognize the pattern in expander and use optabs to
expand it to fast instructions like vec_insert: lvsl+xxperm+xxsel.

gcc/ChangeLog:

        * config/rs6000/vector.md:
        * expr.c (expand_assignment):
        * optabs.def (OPTAB_CD):
---
 gcc/config/rs6000/vector.md | 13 +++++++++++
 gcc/expr.c                  | 46 +++++++++++++++++++++++++++++++++++++
 gcc/optabs.def              |  1 +
 3 files changed, 60 insertions(+)

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 796345c80d3..46d21271e17 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -1244,6 +1244,19 @@ (define_expand "vec_extract<mode><VEC_base_l>"
   DONE;
 })
 
+(define_expand "vec_insert<VEC_base_l><mode>"
+  [(match_operand:VEC_E 0 "vlogical_operand")
+   (match_operand:<VEC_base> 1 "register_operand")
+   (match_operand 2 "register_operand")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rtx target = gen_reg_rtx (V16QImode);
+  rs6000_expand_vector_insert (target, operands[0], operands[1], operands[2]);
+  rtx sub_target = simplify_gen_subreg (GET_MODE(operands[0]), target, 
V16QImode, 0);
+  emit_insn (gen_rtx_SET (operands[0], sub_target));
+  DONE;
+})
+
 ;; Convert double word types to single word types
 (define_expand "vec_pack_trunc_v2df"
   [(match_operand:V4SF 0 "vfloat_operand")
diff --git a/gcc/expr.c b/gcc/expr.c
index dd2200ddea8..ce2890c1a2d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -5237,6 +5237,52 @@ expand_assignment (tree to, tree from, bool nontemporal)
 
       to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
 
+      tree type = TREE_TYPE (to);
+      if (TREE_CODE (to) == ARRAY_REF && tree_fits_uhwi_p (TYPE_SIZE (type))
+         && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
+         && tree_to_uhwi (TYPE_SIZE (type))
+                * tree_to_uhwi (TYPE_SIZE_UNIT (type))
+              == 128)
+       {
+         tree op0 = TREE_OPERAND (to, 0);
+         tree op1 = TREE_OPERAND (to, 1);
+         if (TREE_CODE (op0) == VIEW_CONVERT_EXPR)
+           {
+             tree view_op0 = TREE_OPERAND (op0, 0);
+             mode = TYPE_MODE (TREE_TYPE (view_op0));
+             if (TREE_CODE (TREE_TYPE (view_op0)) == VECTOR_TYPE)
+               {
+                 rtx value
+                   = expand_expr (from, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+                 rtx pos
+                   = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+                 rtx temp_target = gen_reg_rtx (mode);
+                 emit_move_insn (temp_target, to_rtx);
+
+                 machine_mode outermode = mode;
+                 scalar_mode innermode = GET_MODE_INNER (outermode);
+                 class expand_operand ops[3];
+                 enum insn_code icode
+                   = convert_optab_handler (vec_insert_optab, innermode,
+                                            outermode);
+
+                 if (icode != CODE_FOR_nothing)
+                   {
+                     pos = convert_to_mode (E_SImode, pos, 0);
+
+                     create_fixed_operand (&ops[0], temp_target);
+                     create_input_operand (&ops[1], value, innermode);
+                     create_input_operand (&ops[2], pos, GET_MODE (pos));
+                     if (maybe_expand_insn (icode, 3, ops))
+                       {
+                         emit_move_insn (to_rtx, temp_target);
+                         pop_temp_slots ();
+                         return;
+                       }
+                   }
+               }
+           }
+       }
       /* If the field has a mode, we want to access it in the
         field's mode, not the computed mode.
         If a MEM has VOIDmode (external with incomplete type),
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 78409aa1453..21b163a969e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -96,6 +96,7 @@ OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
 OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
 OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store$a$b")
 OPTAB_CD(vec_extract_optab, "vec_extract$a$b")
+OPTAB_CD(vec_insert_optab, "vec_insert$a$b")
 OPTAB_CD(vec_init_optab, "vec_init$a$b")
 
 OPTAB_CD (while_ult_optab, "while_ult$a$b")
-- 
2.27.0.90.geebb51ba8c




Reply via email to