LA664 introduces new instructions for reciprocal approximation and reciprocal square root approximation. It includes the scalar instructions frecipe and frsrte, as well as their corresponding vector instructions [x]vfrecipe and [x]vfrsqrte. This patch adds define_insn/builtins/intrinsics for these instructions.
gcc/ChangeLog: * config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern. (lasx_xvfrsqrte_<flasxfmt>): Ditto. * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. (__lasx_xvfrecipe_d): Ditto. (__lasx_xvfrsqrte_s): Ditto. (__lasx_xvfrsqrte_d): Ditto. * config/loongarch/loongarch-builtins.cc: Add new builtin functions. * config/loongarch/loongarch.md (recipe<mode>2): New insn pattern. (rsqrte<mode>): Ditto. * config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): Ditto. (lsx_vfrsqrte_<flsxfmt>): Ditto. * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. (__lsx_vfrecipe_d): Ditto. (__lsx_vfsqrte_s): Ditto. (__lsx_vfsqrte_d): Ditto. diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 2e11f061202..dd60d2bfed3 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -40,8 +40,10 @@ (define_c_enum "unspec" [ UNSPEC_LASX_XVFCVTL UNSPEC_LASX_XVFLOGB UNSPEC_LASX_XVFRECIP + UNSPEC_LASX_XVFRECIPE UNSPEC_LASX_XVFRINT UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SEQ UNSPEC_LASX_XVFCMP_SLE @@ -1688,6 +1690,17 @@ (define_insn "lasx_xvfrecip_<flasxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lasx_xvfrecipe_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRECIPE))] + "ISA_HAS_LASX" + "xvfrecipe.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvfrint_<flasxfmt>" [(set (match_operand:FLASX 0 "register_operand" "=f") (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] @@ -1706,6 +1719,17 @@ (define_insn "lasx_xvfrsqrt_<flasxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lasx_xvfrsqrte_<flasxfmt>" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRTE))] + "ISA_HAS_LASX" + "xvfrsqrte.<flasxfmt>\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lasx_xvftint_s_<ilasxfmt>_<flasxfmt>" [(set (match_operand:<VIMODE256> 0 "register_operand" "=f") (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h index 7bce2c757f1..3017361a924 100644 --- a/gcc/config/loongarch/lasxintrin.h +++ b/gcc/config/loongarch/lasxintrin.h @@ -2399,6 +2399,22 @@ __m256d __lasx_xvfrecip_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); } +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecipe_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecipe_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); +} + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2431,6 +2447,22 @@ __m256d __lasx_xvfrsqrt_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrsqrt_d ((v4f64)_1); } +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrte_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrte_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); +} + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index db02aacdc3f..47f658d6ab5 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -1195,10 +1195,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LSX_BUILTIN (vfsqrt_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrecip_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrecip_d, LARCH_V2DF_FTYPE_V2DF), + LSX_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF), + LSX_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrint_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrint_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrsqrt_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrsqrt_d, LARCH_V2DF_FTYPE_V2DF), + LSX_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF), + LSX_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vflogb_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vflogb_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfcvth_s_h, LARCH_V4SF_FTYPE_V8HI), @@ -1901,10 +1905,14 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LASX_BUILTIN (xvfsqrt_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrecip_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrecip_d, LARCH_V4DF_FTYPE_V4DF), + LASX_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF), + LASX_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrint_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrint_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfrsqrt_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrsqrt_d, LARCH_V4DF_FTYPE_V4DF), + LASX_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF), + LASX_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvflogb_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvflogb_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvfcvth_s_h, LARCH_V8SF_FTYPE_V16HI), diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index cd4ed495697..7b09926d1a7 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -59,6 +59,12 @@ (define_c_enum "unspec" [ ;; Stack tie UNSPEC_TIE + ;; RSQRT + UNSPEC_RSQRTE + + ;; RECIP + UNSPEC_RECIPE + ;; CRC UNSPEC_CRC UNSPEC_CRCC @@ -220,6 +226,7 @@ (define_attr "qword_mode" "no,yes" ;; fmadd floating point multiply-add ;; fdiv floating point divide ;; frdiv floating point reciprocal divide +;; frecipe floating point approximate reciprocal ;; fabs floating point absolute value ;; flogb floating point exponent extract ;; fneg floating point negation @@ -229,6 +236,7 @@ (define_attr "qword_mode" "no,yes" ;; fscaleb floating point scale ;; fsqrt floating point square root ;; frsqrt floating point reciprocal square root +;; frsqrte floating point approximate reciprocal square root ;; multi multiword sequence (or user asm statements) ;; atomic atomic memory update instruction ;; syncloop memory atomic operation implemented as a sync loop @@ -238,8 +246,8 @@ (define_attr "type" "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, shift,slt,signext,clz,trap,imul,idiv,move, - fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, - fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, + fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, + fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, @@ -911,6 +919,18 @@ (define_insn "*recip<mode>3" [(set_attr "type" "frdiv") (set_attr "mode" "<UNITMODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "recipe<mode>2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RECIPE))] + "TARGET_HARD_FLOAT" + "frecipe.<fmt>\t%0,%1" + [(set_attr "type" "frecipe") + (set_attr "mode" "<UNITMODE>") + (set_attr "insn_count" "1")]) + ;; Integer division and modulus. (define_expand "<optab><mode>3" [(set (match_operand:GPR 0 "register_operand") @@ -1136,6 +1156,17 @@ (define_insn "*rsqrt<mode>b" [(set_attr "type" "frsqrt") (set_attr "mode" "<UNITMODE>") (set_attr "insn_count" "1")]) + +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "rsqrte<mode>" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRTE))] + "TARGET_HARD_FLOAT" + "frsqrte.<fmt>\t%0,%1" + [(set_attr "type" "frsqrte") + (set_attr "mode" "<UNITMODE>")]) ;; ;; .................... diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 5e8d8d74b43..391e84f8d1d 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -42,8 +42,10 @@ (define_c_enum "unspec" [ UNSPEC_LSX_VFCVTL UNSPEC_LSX_VFLOGB UNSPEC_LSX_VFRECIP + UNSPEC_LSX_VFRECIPE UNSPEC_LSX_VFRINT UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SLE @@ -1616,6 +1618,17 @@ (define_insn "lsx_vfrecip_<flsxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lsx_vfrecipe_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRECIPE))] + "ISA_HAS_LSX" + "vfrecipe.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vfrint_<flsxfmt>" [(set (match_operand:FLSX 0 "register_operand" "=f") (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] @@ -1634,6 +1647,17 @@ (define_insn "lsx_vfrsqrt_<flsxfmt>" [(set_attr "type" "simd_fdiv") (set_attr "mode" "<MODE>")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lsx_vfrsqrte_<flsxfmt>" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRTE))] + "ISA_HAS_LSX" + "vfrsqrte.<flsxfmt>\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "<MODE>")]) + (define_insn "lsx_vftint_s_<ilsxfmt>_<flsxfmt>" [(set (match_operand:<VIMODE> 0 "register_operand" "=f") (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h index 29553c093fa..e1e0df2971c 100644 --- a/gcc/config/loongarch/lsxintrin.h +++ b/gcc/config/loongarch/lsxintrin.h @@ -2480,6 +2480,22 @@ __m128d __lsx_vfrecip_d (__m128d _1) return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); } +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecipe_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecipe_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); +} + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2512,6 +2528,22 @@ __m128d __lsx_vfrsqrt_d (__m128d _1) return (__m128d)__builtin_lsx_vfrsqrt_d ((v2f64)_1); } +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrte_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrte_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); +} + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -- 2.20.1