Hi,
When adding else operands to maskload and friends we didn't bother to do
the same for len_load (as we never use the residual elements anyway).
In order to simplify handling in gimple-fold, this patch adds the else
operand now. Both, power and s390, zero out inactive elements.
This patch adds what is needed but doesn't change the larger structure.
Ideally we should treat len_load/store more similar to other load/store types,
e.g. determine the "pun type" and store it in e.g. ls.ls_type, set the else
value type early, and so on. But I didn't want to change more than necessary
now.
Bootstrapped and regtested on x86, power10. Regtested on riscv64 and s390
(qemu).
Regards
Robin
gcc/ChangeLog:
* config/rs6000/predicates.md (lxvl_else_operand): New
predicate.
* config/rs6000/vsx.md: Add else operand.
* config/s390/predicates.md (vll_else_operand): New predicate.
* config/s390/vector.md: Add else operand.
* doc/md.texi: Document else operand.
* internal-fn.cc (internal_fn_len_index): Adjust IFN_LEN_LOAD.
(internal_fn_else_index): Add IFN_LEN_LOAD.
* optabs-tree.cc (target_supports_len_load_store_p): Get else
value for len_load.
* tree-vect-stmts.cc (vectorizable_load): Pun the else value
type.
---
gcc/config/rs6000/predicates.md | 5 +++++
gcc/config/rs6000/vsx.md | 7 ++++---
gcc/config/s390/predicates.md | 5 +++++
gcc/config/s390/vector.md | 7 ++++---
gcc/doc/md.texi | 20 +++++++++++---------
gcc/internal-fn.cc | 13 +++++++++----
gcc/optabs-tree.cc | 31 +++++++++++++++----------------
gcc/tree-vect-stmts.cc | 17 +++++++++++++----
8 files changed, 66 insertions(+), 39 deletions(-)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a..5133dacd794 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2166,3 +2166,8 @@ (define_predicate "lowpart_subreg_operator"
(and (match_code "subreg")
(match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
== SUBREG_BYTE (op)")))
+
+; Else operand for LEN_LOAD.
+(define_predicate "lxvl_else_operand"
+ (and (match_code "const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b8086..4d47833c944 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5798,13 +5798,14 @@ (define_expand "stxvl"
(define_expand "len_load_v16qi"
[(match_operand:V16QI 0 "vlogical_operand")
(match_operand:V16QI 1 "memory_operand")
- (match_operand:QI 2 "gpc_reg_operand")
- (match_operand:QI 3 "zero_constant")]
+ (match_operand:V16QI 2 "lxvl_else_operand")
+ (match_operand:QI 3 "gpc_reg_operand")
+ (match_operand:QI 4 "zero_constant")]
"TARGET_P9_VECTOR && TARGET_64BIT"
{
rtx mem = XEXP (operands[1], 0);
mem = force_reg (DImode, mem);
- rtx len = gen_lowpart (DImode, operands[2]);
+ rtx len = gen_lowpart (DImode, operands[3]);
emit_insn (gen_lxvl (operands[0], mem, len));
DONE;
})
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index c7b93bd7fcb..e181399e91b 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -614,3 +614,8 @@ (define_predicate "subreg_register_operand"
(define_predicate "vll_bias_operand"
(and (match_code "const_int")
(match_test "op == CONSTM1_RTX (QImode)")))
+
+; Else operand for LEN_LOAD.
+(define_predicate "vll_else_operand"
+ (and (match_code "const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 375e3e81ae1..367389c3e58 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3557,15 +3557,16 @@ (define_insn "*vstbr<mode>"
(define_expand "len_load_v16qi"
[(match_operand:V16QI 0 "register_operand")
(match_operand:V16QI 1 "memory_operand")
- (match_operand:QI 2 "register_operand")
- (match_operand:QI 3 "vll_bias_operand")
+ (match_operand:V16QI 2 "vll_else_operand")
+ (match_operand:QI 3 "register_operand")
+ (match_operand:QI 4 "vll_bias_operand")
]
"TARGET_VX && TARGET_64BIT"
{
rtx mem = adjust_address (operands[1], BLKmode, 0);
rtx len = gen_reg_rtx (SImode);
- emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
+ emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[3]));
emit_insn (gen_vllv16qi (operands[0], len, mem));
DONE;
})
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7bf2cc0aa1b..86ed4ffe1e8 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5408,18 +5408,20 @@ This pattern is not allowed to @code{FAIL}.
@cindex @code{len_load_@var{m}} instruction pattern
@item @samp{len_load_@var{m}}
-Load (operand 2 + operand 3) elements from memory operand 1
-into vector register operand 0, setting the other elements of
-operand 0 to undefined values. Operands 0 and 1 have mode @var{m},
-which must be a vector mode. Operand 2 has whichever integer mode the
-target prefers. Operand 3 conceptually has mode @code{QI}.
-
-Operand 2 can be a variable or a constant amount. Operand 3 specifies a
+Load (operand 3 + operand 4) elements from memory operand 1
+into vector register operand 0. Operands 0 and 1 have mode @var{m},
+which must be a vector mode. Operand 3 has whichever integer mode the
+target prefers. Operand 2 (the “else value”) is of mode @var{m} and
+specifies which value is loaded for the remaining elements. The predicate
+of operand 2 must only accept the else values that the target actually
+supports. Operand 4 conceptually has mode @code{QI}.
+
+Operand 3 can be a variable or a constant amount. Operand 4 specifies a
constant bias: it is either a constant 0 or a constant -1. The predicate on
-operand 3 must only accept the bias values that the target actually supports.
+operand 4 must only accept the bias values that the target actually supports.
GCC handles a bias of 0 more efficiently than a bias of -1.
-If (operand 2 + operand 3) exceeds the number of elements in mode
+If (operand 3 + operand 4) exceeds the number of elements in mode
@var{m}, the behavior is undefined.
If the target prefers the length to be measured in bytes rather than
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 13fbd2ce788..8df4f5008a1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5006,6 +5006,7 @@ internal_fn_len_index (internal_fn fn)
switch (fn)
{
case IFN_LEN_LOAD:
+ return 3;
case IFN_LEN_STORE:
return 2;
@@ -5071,6 +5072,9 @@ internal_fn_else_index (internal_fn fn)
case IFN_COND_LEN_NOT:
return 2;
+ case IFN_LEN_LOAD:
+ return 2;
+
case IFN_COND_ADD:
case IFN_COND_SUB:
case IFN_COND_MUL:
@@ -5401,7 +5405,7 @@ internal_len_load_store_bias (internal_fn ifn,
machine_mode mode)
{
optab optab = direct_internal_fn_optab (ifn);
insn_code icode = direct_optab_handler (optab, mode);
- int bias_no = 3;
+ int bias_idx = internal_fn_len_index (ifn) + 1;
if (icode == CODE_FOR_nothing)
{
@@ -5412,22 +5416,23 @@ internal_len_load_store_bias (internal_fn ifn,
machine_mode mode)
{
/* Try MASK_LEN_LOAD. */
optab = direct_internal_fn_optab (IFN_MASK_LEN_LOAD);
+ bias_idx = internal_fn_len_index (IFN_MASK_LEN_LOAD) + 1;
}
else
{
/* Try MASK_LEN_STORE. */
optab = direct_internal_fn_optab (IFN_MASK_LEN_STORE);
+ bias_idx = internal_fn_len_index (IFN_MASK_LEN_STORE) + 1;
}
icode = convert_optab_handler (optab, mode, mask_mode);
- bias_no = 4;
}
if (icode != CODE_FOR_nothing)
{
/* For now we only support biases of 0 or -1. Try both of them. */
- if (insn_operand_matches (icode, bias_no, GEN_INT (0)))
+ if (insn_operand_matches (icode, bias_idx, GEN_INT (0)))
return 0;
- if (insn_operand_matches (icode, bias_no, GEN_INT (-1)))
+ if (insn_operand_matches (icode, bias_idx, GEN_INT (-1)))
return -1;
}
diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
index 0de74c7966a..53788b9259b 100644
--- a/gcc/optabs-tree.cc
+++ b/gcc/optabs-tree.cc
@@ -615,28 +615,27 @@ target_supports_len_load_store_p (machine_mode mode, bool
is_load,
{
optab op = is_load ? len_load_optab : len_store_optab;
optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
+ internal_fn which_ifn;
- if (direct_optab_handler (op, mode))
+ enum insn_code icode;
+ if ((icode = direct_optab_handler (op, mode)) != CODE_FOR_nothing)
{
- if (ifn)
- *ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE;
- return true;
+ which_ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE;
}
machine_mode mask_mode;
- enum insn_code icode;
- if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
+ if (!icode
+ && targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
&& ((icode = convert_optab_handler (masked_op, mode, mask_mode))
!= CODE_FOR_nothing))
- {
- if (ifn)
- *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
- if (elsvals && is_load)
- get_supported_else_vals (icode,
- internal_fn_else_index (IFN_MASK_LEN_LOAD),
- *elsvals);
- return true;
- }
- return false;
+ which_ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
+
+ if (icode && elsvals && is_load)
+ get_supported_else_vals (icode, internal_fn_else_index (which_ifn),
+ *elsvals);
+
+ if (icode && ifn)
+ *ifn = which_ifn;
+ return icode;
}
/* If target supports vector load/store with length for vector mode MODE,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index de28316ddc6..7279ce1def6 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11347,10 +11347,18 @@ vectorizable_load (vec_info *vinfo,
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call;
+
+ /* Need conversion if the vectype is punned by VnQI. */
+ els_vectype = vectype;
+ if (vmode != new_vmode)
+ els_vectype
+ = build_vector_type_for_mode (unsigned_intQI_type_node,
+ new_vmode);
+ vec_els = vect_get_mask_load_else (maskload_elsval,
+ els_vectype);
+
if (partial_ifn == IFN_MASK_LEN_LOAD)
{
- vec_els = vect_get_mask_load_else (maskload_elsval,
- vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
@@ -11360,9 +11368,10 @@ vectorizable_load (vec_info *vinfo,
final_len, bias);
}
else
- call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
+ call = gimple_build_call_internal (IFN_LEN_LOAD, 5,
dataref_ptr, ptr,
- final_len, bias);
+ vec_els, final_len,
+ bias);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
--
2.51.1