From: Lingling Kong <lingling.k...@intel.com> gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_expand_int_cfmovcc): Expand to cfcmov pattern. * config/i386/i386-opts.h (enum apx_features): New. * config/i386/i386-protos.h (ix86_expand_int_cfmovcc): Define. * config/i386/i386.cc (ix86_rtx_costs): Add UNSPEC_APX_CFCMOV cost. * config/i386/i386.h (TARGET_APX_CFCMOV): Define. * config/i386/i386.md (maskload<mode><mode>): New define_expand. (maskstore<mode><mode>): Ditto. (*cfmov<mode>cc): New define_insn. (*cfmov<mode>cc_2): Ditto. (*cfmov<mode>ccz): Ditto. (UNSPEC_APX_CFCMOV): New unspec for cfcmov. * config/i386/i386.opt: Add enum value for cfcmov. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-cfcmov-1.c: New test. * gcc.target/i386/apx-cfcmov-2.c: Ditto. --- gcc/config/i386/i386-expand.cc | 46 ++++++++++++ gcc/config/i386/i386-opts.h | 4 +- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.cc | 16 +++-- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 76 +++++++++++++++++++- gcc/config/i386/i386.opt | 3 + gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++++++++++ 9 files changed, 254 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 2ab57874234..48809b5b289 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3536,6 +3536,52 @@ ix86_expand_int_addcc (rtx operands[]) return true; } +void +ix86_expand_int_cfmovcc (rtx dest, rtx compare_op, rtx vtrue, rtx vfalse) +{ + machine_mode mode = GET_MODE(dest); + enum rtx_code code = GET_CODE (compare_op); + rtx_insn *compare_seq; + rtx op0 = XEXP (compare_op, 0); + rtx op1 = XEXP (compare_op, 1); + rtx op2 = vtrue; + rtx op3 = vfalse; + + gcc_assert (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3)); + /* For Conditional store only handle "if (test) *x = a; else skip;". */ + if (MEM_P (dest)) + gcc_assert (rtx_equal_p (dest, op3)); + + start_sequence (); + compare_op = ix86_expand_compare (code, op0, op1); + compare_seq = get_insns (); + end_sequence (); + + if (may_trap_or_fault_p (op2)) + op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, op2), + UNSPEC_APX_CFCMOV); + if (may_trap_or_fault_p (op3)) + op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1, op3), + UNSPEC_APX_CFCMOV); + emit_insn (compare_seq); + /* For "if (test) x = *a; else x = *b",generate 2 cfcmov. */ + if (may_trap_or_fault_p (op2) && may_trap_or_fault_p (op3)) + { + emit_insn (gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (mode, compare_op, + op2, dest))); + emit_insn (gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (mode, compare_op, + dest, op3))); + } + /* For conditional load one mem, like "if (test) x = *a; else x = b/0." + and "if (test) x = b/0; else x = *b". */ + else + emit_insn (gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (mode, compare_op, + op2, op3))); +} + bool ix86_expand_int_movcc (rtx operands[]) { diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index d47184e2879..899873dfeca 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -144,8 +144,10 @@ enum apx_features { apx_nf = 1 << 4, apx_ccmp = 1 << 5, apx_zu = 1 << 6, + apx_cfcmov = 1 << 7, apx_all = apx_egpr | apx_push2pop2 | apx_ndd - | apx_ppx | apx_nf | apx_ccmp | apx_zu, + | apx_ppx | apx_nf | apx_ccmp | apx_zu + | apx_cfcmov, }; #endif diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index f122fd8a0a3..e98fcc8c857 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -153,6 +153,7 @@ extern bool ix86_match_ccmode (rtx, machine_mode); extern bool ix86_match_ptest_ccmode (rtx); extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx); extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx); +extern void ix86_expand_int_cfmovcc (rtx, rtx, rtx, rtx); extern bool ix86_expand_int_movcc (rtx[]); extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 11770aa8a50..85af9347421 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22621,10 +22621,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, *total = COSTS_N_INSNS (1); if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0))) *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); - if (!REG_P (XEXP (x, 1))) - *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); - if (!REG_P (XEXP (x, 2))) - *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); + rtx op1, op2; + op1 = XEXP (x, 1); + op2 = XEXP (x, 2); + /* Handle UNSPEC_APX_CFCMOV for cfcmov. */ + if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) == UNSPEC_APX_CFCMOV) + op1 = XVECEXP (op1, 0, 0); + if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) == UNSPEC_APX_CFCMOV) + op2 = XVECEXP (op2, 0, 0); + if (!REG_P (op1)) + *total += rtx_cost (op1, mode, code, 1, speed); + if (!REG_P (op2)) + *total += rtx_cost (op2, mode, code, 2, speed); return true; } return false; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e8e528c7811..51f2cea5f84 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -58,6 +58,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_APX_NF (ix86_apx_features & apx_nf) #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp) #define TARGET_APX_ZU (ix86_apx_features & apx_zu) +#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov) #include "config/vxworks-dummy.h" diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 362b0ddcf40..e89cc0153db 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -223,6 +223,9 @@ (define_c_enum "unspec" [ ;; For APX CCMP support ;; DFV = default flag value UNSPEC_APX_DFV + + ;; For APX CFCMOV support + UNSPEC_APX_CFCMOV ]) (define_c_enum "unspecv" [ @@ -584,7 +587,7 @@ (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf,avx10_2" + vaes_avx512vl,noapx_nf,avx10_2,apx_cfcmov" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -995,6 +998,7 @@ (define_attr "enabled" "" (eq_attr "mmx_isa" "avx") (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF") + (eq_attr "isa" "apx_cfcmov") (symbol_ref "TARGET_APX_CFCMOV") ] (const_int 1))) @@ -26088,6 +26092,30 @@ (define_expand "mov<mode>cc" "" "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") +(define_expand "maskload<mode>void" + [(set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operand 2 "comparison_operator") + (match_operand:SWI248 1 "nonimm_or_0_operand") + (match_operand:SWI248 3 "nonimm_or_0_operand")))] + "TARGET_APX_CFCMOV" +{ + ix86_expand_int_cfmovcc (operands[0], operands[2], + operands[1], operands[3]); + DONE; +}) + +(define_expand "maskstore<mode>void" + [(set (match_operand:SWI248 0 "memory_operand") + (if_then_else:SWI248 (match_operand 2 "comparison_operator") + (match_operand:SWI248 1 "register_operand") + (match_dup 0)))] + "TARGET_APX_CFCMOV" +{ + ix86_expand_int_cfmovcc (operands[0], operands[2], + operands[1], operands[0]); + DONE; +}) + ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. ;; So just document what we're doing explicitly. @@ -26189,6 +26217,52 @@ (define_split (set (match_dup 0) (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))]) +(define_insn "*cfmov<mode>cc" + [(set (match_operand:SWI248 0 "register_operand" "=r,r") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (unspec:SWI248 + [(match_operand:SWI248 2 "memory_operand" "m,m")] + UNSPEC_APX_CFCMOV) + (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))] + "TARGET_CMOVE && TARGET_APX_CFCMOV" + "@ + cfcmov%O2%C1\t{%2, %0|%0, %2} + cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "icmov") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cfmov<mode>cc_2" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,r,m") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "reg_or_0_operand" "r,C,r") + (unspec:SWI248 + [(match_operand:SWI248 3 "memory_operand" "m,m,0")] + UNSPEC_APX_CFCMOV)))] + "TARGET_CMOVE && TARGET_APX_CFCMOV" + "@ + cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3} + cfcmov%O2%c1\t{%3, %0|%0, %3} + cfcmov%O2%C1\t{%2, %0|%0, %2}" + [(set_attr "isa" "apx_ndd,*,*") + (set_attr "type" "icmov") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cfmov<mode>ccz" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "register_operand" "r") + (match_operand:SWI248 3 "const0_operand" "C")))] + "TARGET_CMOVE && TARGET_APX_CFCMOV" + "cfcmov%O2%C1\t{%2, %0|%0, %2}" + [(set_attr "type" "icmov") + (set_attr "mode" "<MODE>")]) + (define_insn "*mov<mode>cc_noc" [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 5c889b72cc5..1bfe372724c 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1348,6 +1348,9 @@ Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7) EnumValue Enum(apx_features) String(zu) Value(apx_zu) Set(8) +EnumValue +Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9) + EnumValue Enum(apx_features) String(all) Value(apx_all) Set(1) diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c new file mode 100644 index 00000000000..4a1fb91b24c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O3 -mapxf" } */ + +/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */ +/* { dg-final { scan-assembler-times "cfcmovg" 2} } */ +/* { dg-final { scan-assembler-times "cfcmove" 1 } } */ +/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */ +/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */ + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_store (int a, int b, int c, int d, int *arr) +{ + if (a != b) + *arr = c; + return d; + +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_ndd (int a, int b, int c, int *p) +{ + if (a > b) + return *p; + return c; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_2_trap (int a, int b, int *c, int *p) +{ + if (a > b) + return *p; + return *c; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_zero (int a, int b, int c) +{ + int sum = 0; + if (a == b) + return c; + return sum; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_mem (int a, int b, int *p) +{ + int sum = 0; + if (a < b ) + sum = *p; + return sum; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_arith_1 (int a, int b, int c, int *p) +{ + int sum = 0; + if (a > b) + sum = *p; + else + sum = a + c; + return sum + 1; +} + +__attribute__((noinline, noclone, target("apxf"))) +int cfc_load_arith_2 (int a, int b, int c, int *p) +{ + int sum = 0; + if (a > b) + sum = a + c; + else + sum = *p; + return sum + 1; +} diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c new file mode 100644 index 00000000000..2b1660f64fa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c @@ -0,0 +1,40 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target apxf } */ +/* { dg-options "-mapxf -march=x86-64 -O3" } */ + +#include "apx-cfcmov-1.c" + +extern void abort (void); + +int main () +{ + if (!__builtin_cpu_supports ("apxf")) + return 0; + + int arr = 6; + int arr1 = 5; + int res = cfc_store (1, 2, 3, 4, &arr); + if (arr != 3 && res != 4) + abort (); + res = cfc_load_ndd (2, 1, 2, &arr); + if (res != 3) + abort (); + res = cfc_load_2_trap (1, 2, &arr1, &arr); + if (res != 5) + abort (); + res = cfc_load_zero (1, 2, 3); + res = cfc_load_zero (1, 2, 3); + if (res != 0) + abort (); + res = cfc_load_mem (2, 1, &arr); + if (res != 0) + abort (); + res = cfc_load_arith_1 (1, 2, 3, &arr); + if (res != 5) + abort(); + res = cfc_load_arith_2 (2, 1, 3,&arr); + if (res != 6) + abort(); + return 0; +} + -- 2.31.1