gcc/ChangeLog:
* config/loongarch/lasx.md (vec_extract<mode><lasxhalf>): New
define_expand.
(vec_extract_lo_<mode>): New define_insn_and_split.
(vec_extract_hi_<mode>): New define_insn.
(@vec_extract_lo_<mode>): New define_insn_and_split.
(@vec_extract_hi_<mode>): New define_insn.
(vec_extract_lo_v16hi): New define_insn_and_split.
(vec_extract_hi_v16hi): New define_insn.
(vec_extract_lo_v32qi): New define_insn_and_split.
(vec_extract_hi_v32qi): New define_insn.
* config/loongarch/loongarch.cc (loongarch_split_reduction):
Implement TARGET_VECTORIZE_SPLIT_REDUCTION.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/lasx-reduc-1.c: New test.
---
gcc/config/loongarch/lasx.md | 119 ++++++++++++++++++
gcc/config/loongarch/loongarch.cc | 29 +++++
.../gcc.target/loongarch/lasx-reduc-1.c | 11 ++
3 files changed, 159 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3d71f30a54b..b8f881ddcd6 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -633,6 +633,125 @@ (define_insn_and_split "vec_extract<mode>_0"
[(set_attr "move_type" "fmove")
(set_attr "mode" "<UNITMODE>")])
+(define_expand "vec_extract<mode><lasxhalf>"
+ [(match_operand:<VHMODE256_ALL> 0 "register_operand")
+ (match_operand:LASX 1 "register_operand")
+ (match_operand 2 "const_0_or_1_operand")]
+ "ISA_HAS_LASX"
+{
+ if (INTVAL (operands[2]))
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+ else
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+ DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_D 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "ISA_HAS_LASX"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_D 1 "register_operand" "f")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xe"
+ [(set_attr "move_type" "fmove")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "@vec_extract_lo_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_W 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "ISA_HAS_LASX"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "@vec_extract_hi_<mode>"
+ [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_W 1 "register_operand" "f")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xe"
+ [(set_attr "move_type" "fmove")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "vec_extract_lo_v16hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=f")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "ISA_HAS_LASX"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (V8HImode, operands[1]);")
+
+(define_insn "vec_extract_hi_v16hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=f")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "f")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xe"
+ [(set_attr "move_type" "fmove")
+ (set_attr "mode" "V16HI")])
+
+(define_insn_and_split "vec_extract_lo_v32qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=f")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "ISA_HAS_LASX"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (V16QImode, operands[1]);")
+
+(define_insn "vec_extract_hi_v32qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=f")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "f")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "ISA_HAS_LASX"
+ "xvpermi.d\t%u0,%u1,0xe"
+ [(set_attr "move_type" "fmove")
+ (set_attr "mode" "V32QI")])
+
(define_expand "vec_perm<mode>"
[(match_operand:LASX 0 "register_operand")
(match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 3fe8c766cc7..7bc856b6c60 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4143,6 +4143,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int
outer_code,
}
}
+/* All CPUs prefer to avoid cross-lane operations so perform reductions
+ upper against lower halves up to LSX reg size. */
+
+machine_mode
+loongarch_split_reduction (machine_mode mode)
+{
+ switch (mode)
+ {
+ case E_V4DImode:
+ return V2DImode;
+ case E_V8SImode:
+ return V4SImode;
+ case E_V16HImode:
+ return V8HImode;
+ case E_V32QImode:
+ return V16QImode;
+ case E_V8SFmode:
+ return V4SFmode;
+ case E_V4DFmode:
+ return V2DFmode;
+ default:
+ return mode;
+ }
+}
+
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
@@ -11397,6 +11422,10 @@ loongarch_can_inline_p (tree caller, tree callee)
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
loongarch_autovectorize_vector_modes
+#undef TARGET_VECTORIZE_SPLIT_REDUCTION
+#define TARGET_VECTORIZE_SPLIT_REDUCTION \
+ loongarch_split_reduction
+
#undef TARGET_OPTAB_SUPPORTED_P
#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
new file mode 100644
index 00000000000..eb3933b7079
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 1 "optimized" } } */
+
+int sumint(const int arr[]) {
+ arr = __builtin_assume_aligned (arr, 64);
+ int sum = 0;
+ for (int i = 0 ; i < 1040; i++)
+ sum += arr[i];
+ return sum;
+}
--
2.20.1