Hello Uroš, On 13 Feb 18:25, Uros Bizjak wrote: > On Thu, Feb 13, 2014 at 1:55 PM, Uros Bizjak <ubiz...@gmail.com> wrote: > > >> > >> Please don't change srcp pattern, it should be defined similar to > >> vrcpss (aka sse_vmrcpv4sf). You need to switch operand order > >> elsewhere. > > > > No, you are correct. Operands should be swapped as in your patch. > > Eh, sorry that after some more thinking, I have to again revert this decision. > > The srcp pattern should remain as is, and you should swap operands in > avx512fintrin.h instead:
In the bottom there's updated patch. Added "sse" type. mem operand made second. Built-ins & tests fixed. Testing in progress. Is it ok for mainline if pass? -- Thanks, K --- gcc/config/i386/sse.md | 19 ++++++++++++------- gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c | 11 ++++++----- gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c | 11 ++++++----- gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c | 11 ++++++----- gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c | 11 ++++++----- gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c | 4 ++-- gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c | 8 ++++---- 7 files changed, 42 insertions(+), 33 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5595767..3d360a0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1456,12 +1456,12 @@ [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] + [(match_operand:VF_128 2 "nonimmediate_operand" "vm")] UNSPEC_RCP14) - (match_operand:VF_128 2 "register_operand" "v") + (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" + "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -12804,6 +12804,7 @@ "TARGET_AVX512ER" "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>" @@ -12814,20 +12815,22 @@ "TARGET_AVX512ER" "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_RCP28) - (match_operand:VF_128 2 "register_operand" "v") + (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512ER" "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" [(set_attr "length_immediate" "1") (set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>" @@ -12838,19 +12841,21 @@ "TARGET_AVX512ER" "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "prefix" "evex") + (set_attr "type" "sse") (set_attr "mode" "<MODE>")]) (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_RSQRT28) - (match_operand:VF_128 2 "register_operand" "v") + (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512ER" "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" [(set_attr "length_immediate" "1") + (set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c index d30f088..889f990 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128d src, res; + union128d src1, src2, res; double res_ref[2]; int i; for (i = 0; i < 2; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 204179.345 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / src.a[0]; + res_ref[0] = 1.0 / src2.a[0]; - res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVd (res.a, res_ref, 2)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c index 499a977..3280879 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128 src, res; + union128 src1, src2, res; float res_ref[4]; int i; for (i = 0; i < 4; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 179345.006 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / src.a[0]; + res_ref[0] = 1.0 / src2.a[0]; - res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVf (res.a, res_ref, 4)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c index 1537a59..bd217e8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128d src, res; + union128d src1, src2, res; double res_ref[2]; int i; for (i = 0; i < 2; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 45 - 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / sqrt (src.a[0]); + res_ref[0] = 1.0 / sqrt (src2.a[0]); - res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVd (res.a, res_ref, 2)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c index f88422e..f7bfff5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c @@ -10,19 +10,20 @@ void static avx512er_test (void) { - union128 src, res; + union128 src1, src2, res; float res_ref[4]; int i; for (i = 0; i < 4; i++) { - src.a[i] = 179.345 - 6.5645 * i; - res_ref[i] = src.a[i]; + src1.a[i] = 179.345 - 6.5645 * i; + src2.a[i] = 179221345 + 6.5645 * i; + res_ref[i] = src1.a[i]; } - res_ref[0] = 1.0 / sqrt (src.a[0]); + res_ref[0] = 1.0 / sqrt (src2.a[0]); - res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); if (checkVf (res.a, res_ref, 4)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c index 0c9211a..f944600 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c @@ -8,8 +8,8 @@ static void compute_vrcp14sd (double *s1, double *s2, double *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; } static void diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c index 3344dad..7aca591 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c @@ -8,10 +8,10 @@ static void compute_vrcp14ss (float *s1, float *s2, float *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; - r[2] = s2[2]; - r[3] = s2[3]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; + r[2] = s1[2]; + r[3] = s1[3]; } static void