Hi, Please find attached the patch that adds "neon_pairwise_add" & "neon_pairwise_add_qcrypto_pmull" for AArch64.
The patch doesn't change spec but improve other benchmarks. Bootstrapped and Regression tested on aarch64-thunder-linux. Please review the patch and let us know if its okay for Stage-1? Thanks, Naveen 2017-03-06 Julian Brown <jul...@codesourcery.com> Naveen H.S <naveen.hurugalaw...@cavium.com> * config/aarch64/aarch64-simd.md (aarch64_reduc_plus_internal<mode>) (aarch64_reduc_plus_internalv2si, aarch64_addp<mode>, aarch64_addpdi): Use neon_pairwise_add/neon_pairwise_add_q as appropriate. * config/aarch64/iterators.md (reduc_pairwise): New mode attribute. * config/aarch64/thunderx.md (thunderx_neon_add, thunderx_neon_add_q): Tweak for neon_pairwise_add split. * config/aarch64/thunderx2t99.md (thunderx2t99_asimd_int): Add neon_pairwise_add/neon_pairwise_add_q types. * config/arm/cortex-a15-neon.md (cortex_a15_neon_type): Likewise. * config/arm/cortex-a17-neon.md (cortex_a17_neon_type): Likewise. * config/arm/cortex-a57.md (cortex_a57_neon_type): Likewise. * config/arm/cortex-a8-neon.md (cortex_a8_neon_type): Likewise. * config/arm/cortex-a9-neon.md (cortex_a9_neon_type): Likewise. * config/arm/xgene1.md (xgene1_neon_arith): Likewise. * config/arm/types.md (neon_pairwise_add, neon_pairwise_add_q): Add.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 338b9f8..878f86a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2101,7 +2101,7 @@ UNSPEC_ADDV))] "TARGET_SIMD" "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" - [(set_attr "type" "neon_reduc_add<q>")] + [(set_attr "type" "neon_<reduc_pairwise>_add<q>")] ) (define_insn "aarch64_reduc_plus_internalv2si" @@ -2110,7 +2110,7 @@ UNSPEC_ADDV))] "TARGET_SIMD" "addp\\t%0.2s, %1.2s, %1.2s" - [(set_attr "type" "neon_reduc_add")] + [(set_attr "type" "neon_pairwise_add")] ) (define_insn "reduc_plus_scal_<mode>" @@ -4405,7 +4405,7 @@ UNSPEC_ADDP))] "TARGET_SIMD" "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" - [(set_attr "type" "neon_reduc_add<q>")] + [(set_attr "type" "neon_pairwise_add<q>")] ) (define_insn "aarch64_addpdi" @@ -4415,7 +4415,7 @@ UNSPEC_ADDP))] "TARGET_SIMD" "addp\t%d0, %1.2d" - [(set_attr "type" "neon_reduc_add")] + [(set_attr "type" "neon_pairwise_add")] ) ;; sqrt diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c59d31e..c829cb5 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -790,6 +790,12 @@ (V2SF "p") (V4SF "v") (V4HF "v") (V8HF "v")]) +(define_mode_attr reduc_pairwise [(V8QI "reduc") (V16QI "reduc") + (V4HI "reduc") (V8HI "reduc") + (V2SI "pairwise") (V4SI "reduc") + (V2DI "pairwise") (V2DF "pairwise") + (V2SF "pairwise") (V4SF "reduc")]) + (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md index b67671d..95bfad4 100644 --- a/gcc/config/aarch64/thunderx.md +++ b/gcc/config/aarch64/thunderx.md @@ -266,7 +266,8 @@ (define_insn_reservation "thunderx_neon_add" 4 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \ + (eq_attr "type" "neon_reduc_add, neon_pairwise_add, neon_reduc_minmax,\ + neon_fp_reduc_add_s, \ neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \ neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ @@ -280,7 +281,8 @@ (define_insn_reservation "thunderx_neon_add_q" 5 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ + (eq_attr "type" "neon_reduc_add_q, neon_pairwise_add_q,\ + neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \ neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \ neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md index 67011ac..f807547 100644 --- a/gcc/config/aarch64/thunderx2t99.md +++ b/gcc/config/aarch64/thunderx2t99.md @@ -231,6 +231,7 @@ neon_abs,neon_abs_q,\ neon_add,neon_add_q,\ neon_neg,neon_neg_q,\ + neon_pairwise_add,neon_pairwise_add_q,\ neon_add_long,neon_add_widen,\ neon_add_halve,neon_add_halve_q,\ neon_sub_long,neon_sub_widen,\ diff --git a/gcc/config/arm/cortex-a15-neon.md b/gcc/config/arm/cortex-a15-neon.md index 73ee84c..1a02fa2 100644 --- a/gcc/config/arm/cortex-a15-neon.md +++ b/gcc/config/arm/cortex-a15-neon.md @@ -48,6 +48,7 @@ (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ neon_add_widen, neon_neg, neon_neg_q,\ neon_reduc_add, neon_reduc_add_q,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_long, neon_sub, neon_sub_q,\ neon_sub_long, neon_sub_widen, neon_logic,\ neon_logic_q, neon_tst, neon_tst_q") diff --git a/gcc/config/arm/cortex-a17-neon.md b/gcc/config/arm/cortex-a17-neon.md index 29d08de..d4f9e03 100644 --- a/gcc/config/arm/cortex-a17-neon.md +++ b/gcc/config/arm/cortex-a17-neon.md @@ -47,6 +47,7 @@ (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ neon_add_widen, neon_neg, neon_neg_q,\ neon_reduc_add, neon_reduc_add_q,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_long, neon_sub, neon_sub_q,\ neon_sub_long, neon_sub_widen, neon_logic,\ neon_logic_q, neon_tst, neon_tst_q") diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md index ebf4a49..cb5e181 100644 --- a/gcc/config/arm/cortex-a57.md +++ b/gcc/config/arm/cortex-a57.md @@ -50,6 +50,7 @@ (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ neon_add_widen, neon_neg, neon_neg_q,\ neon_reduc_add, neon_reduc_add_q,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_long, neon_sub, neon_sub_q,\ neon_sub_long, neon_sub_widen, neon_logic,\ neon_logic_q, neon_tst, neon_tst_q") diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md index baa9180..652ba4e 100644 --- a/gcc/config/arm/cortex-a8-neon.md +++ b/gcc/config/arm/cortex-a8-neon.md @@ -51,6 +51,7 @@ (const_string "neon_int_2") (eq_attr "type" "neon_neg, neon_neg_q,\ neon_reduc_add, neon_reduc_add_q,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_long,\ neon_add_long, neon_sub_long") (const_string "neon_int_3") diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md index 9e9827f..bb1e300 100644 --- a/gcc/config/arm/cortex-a9-neon.md +++ b/gcc/config/arm/cortex-a9-neon.md @@ -52,6 +52,7 @@ (const_string "neon_int_2") (eq_attr "type" "neon_neg, neon_neg_q,\ neon_reduc_add, neon_reduc_add_q,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_long,\ neon_add_long, neon_sub_long") (const_string "neon_int_3") diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index 253f496..ce1b6bc 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -274,6 +274,8 @@ ; neon_reduc_add_long ; neon_reduc_add_acc ; neon_reduc_add_acc_q +; neon_pairwise_add +; neon_pairwise_add_q ; neon_reduc_minmax ; neon_reduc_minmax_q ; neon_logic @@ -788,6 +790,8 @@ neon_reduc_add_long,\ neon_reduc_add_acc,\ neon_reduc_add_acc_q,\ + neon_pairwise_add,\ + neon_pairwise_add_q,\ neon_reduc_minmax,\ neon_reduc_minmax_q,\ neon_logic,\ @@ -1116,6 +1120,7 @@ neon_compare_q, neon_compare_zero, neon_compare_zero_q,\ neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\ neon_reduc_add_q, neon_reduc_add_long, neon_reduc_add_acc,\ + neon_pairwise_add, neon_pairwise_add_q,\ neon_reduc_add_acc_q, neon_reduc_minmax, neon_reduc_minmax_q,\ neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ neon_shift_imm, neon_shift_imm_q, neon_shift_imm_narrow_q,\ diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md index 34a13f4..1f68305 100644 --- a/gcc/config/arm/xgene1.md +++ b/gcc/config/arm/xgene1.md @@ -288,6 +288,8 @@ neon_arith_acc_q,\ neon_reduc_add,\ neon_reduc_add_q,\ + neon_pairwise_add,\ + neon_pairwise_add_q,\ neon_add_halve,\ neon_add_halve_q,\ neon_sub_halve,\