--- mingw-w64-crt/math/_chgsignl.S | 3 +++ mingw-w64-crt/math/ceil.S | 3 +++ mingw-w64-crt/math/ceilf.S | 3 +++ mingw-w64-crt/math/ceill.S | 3 +++ mingw-w64-crt/math/copysignl.S | 2 ++ mingw-w64-crt/math/floor.S | 3 +++ mingw-w64-crt/math/floorf.S | 3 +++ mingw-w64-crt/math/floorl.S | 3 +++ mingw-w64-crt/math/fma.c | 12 ++++++++++++ mingw-w64-crt/math/fmaf.c | 12 ++++++++++++ mingw-w64-crt/math/lrint.c | 5 +++++ mingw-w64-crt/math/lrintf.c | 5 +++++ mingw-w64-crt/math/nearbyint.S | 5 +++++ mingw-w64-crt/math/nearbyintf.S | 5 +++++ mingw-w64-crt/math/nearbyintl.S | 5 +++++ mingw-w64-crt/math/rint.c | 2 ++ mingw-w64-crt/math/rintf.c | 2 ++ mingw-w64-crt/math/sqrt.def.h | 6 ++++++ mingw-w64-crt/math/trunc.S | 3 +++ mingw-w64-crt/math/truncf.S | 3 +++ 20 files changed, 88 insertions(+)
diff --git a/mingw-w64-crt/math/_chgsignl.S b/mingw-w64-crt/math/_chgsignl.S index 2eb7de8..2f8b906 100644 --- a/mingw-w64-crt/math/_chgsignl.S +++ b/mingw-w64-crt/math/_chgsignl.S @@ -39,6 +39,9 @@ __MINGW_USYMBOL(_chgsignl): #elif defined(_ARM_) || defined(__arm__) vneg.f64 d0, d0 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + fneg d0, d0 + ret #elif defined(_X86_) || defined(__i386__) movl 12(%esp),%eax xorl $0x8000,%eax diff --git a/mingw-w64-crt/math/ceil.S b/mingw-w64-crt/math/ceil.S index 6458cec..eac4cb2 100644 --- a/mingw-w64-crt/math/ceil.S +++ b/mingw-w64-crt/math/ceil.S @@ -107,6 +107,9 @@ __MINGW_USYMBOL(ceil): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintp d0, d0 + ret #elif defined(_X86_) || defined(__i386__) fldl 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/ceilf.S b/mingw-w64-crt/math/ceilf.S index 31078e6..c63c629 100644 --- a/mingw-w64-crt/math/ceilf.S +++ b/mingw-w64-crt/math/ceilf.S @@ -102,6 +102,9 @@ __MINGW_USYMBOL(ceilf): vcvt.f32.s32 s0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintp s0, s0 + ret #elif defined(_X86_) || defined(__i386__) flds 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/ceill.S b/mingw-w64-crt/math/ceill.S index 0152fb1..2beee4c 100644 --- a/mingw-w64-crt/math/ceill.S +++ b/mingw-w64-crt/math/ceill.S @@ -55,6 +55,9 @@ __MINGW_USYMBOL(ceill): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintp d0, d0 + ret #elif defined(_X86_) || defined(__i386__) fldt 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/copysignl.S b/mingw-w64-crt/math/copysignl.S index 67b46d0..1c0dbab 100644 --- a/mingw-w64-crt/math/copysignl.S +++ b/mingw-w64-crt/math/copysignl.S @@ -34,6 +34,8 @@ __MINGW_USYMBOL(copysignl): ret #elif defined(_ARM_) || defined(__arm__) b copysign +#elif defined(_ARM64_) || defined(__aarch64__) + b copysign #elif defined(_X86_) || defined(__i386__) movl 24(%esp),%edx movl 12(%esp),%eax diff --git a/mingw-w64-crt/math/floor.S b/mingw-w64-crt/math/floor.S index 932df0f..e8f59c6 100644 --- a/mingw-w64-crt/math/floor.S +++ b/mingw-w64-crt/math/floor.S @@ -159,6 +159,9 @@ __MINGW_USYMBOL(floor): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintm d0, d0 + ret #elif defined(_X86_) || defined(__i386__) fldl 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/floorf.S b/mingw-w64-crt/math/floorf.S index cb182a3..d08205b 100644 --- a/mingw-w64-crt/math/floorf.S +++ b/mingw-w64-crt/math/floorf.S @@ -42,6 +42,9 @@ __MINGW_USYMBOL(floorf): vcvt.f32.s32 s0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintm s0, s0 + ret #elif defined(_X86_) || defined(__i386__) flds 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/floorl.S b/mingw-w64-crt/math/floorl.S index ec99b92..80838e8 100644 --- a/mingw-w64-crt/math/floorl.S +++ b/mingw-w64-crt/math/floorl.S @@ -55,6 +55,9 @@ __MINGW_USYMBOL(floorl): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintm d0, d0 + ret #elif defined(_X86_) || defined(__i386__) fldt 4(%esp) subl $8,%esp diff --git a/mingw-w64-crt/math/fma.c b/mingw-w64-crt/math/fma.c index 645a3d1..c4ce738 100644 --- a/mingw-w64-crt/math/fma.c +++ b/mingw-w64-crt/math/fma.c @@ -17,6 +17,18 @@ double fma(double x, double y, double z){ return z; } +#elif defined(_ARM64_) || defined(__aarch64__) + +/* Use hardware FMA on ARM64. */ +double fma(double x, double y, double z){ + __asm__ ( + "fmadd %d0, %d1, %d2, %d0 \n" + : "+w"(z) + : "w"(x), "w"(y) + ); + return z; +} + #else long double fmal(long double x, long double y, long double z); diff --git a/mingw-w64-crt/math/fmaf.c b/mingw-w64-crt/math/fmaf.c index 9a0971d..b3f58a8 100644 --- a/mingw-w64-crt/math/fmaf.c +++ b/mingw-w64-crt/math/fmaf.c @@ -17,6 +17,18 @@ float fmaf(float x, float y, float z){ return z; } +#elif defined(_ARM64_) || defined(__aarch64__) + +/* Use hardware FMA on ARM64. */ +float fmaf(float x, float y, float z){ + __asm__ ( + "fmadd %s0, %s1, %s2, %s0 \n" + : "+w"(z) + : "w"(x), "w"(y) + ); + return z; +} + #else long double fmal(long double x, long double y, long double z); diff --git a/mingw-w64-crt/math/lrint.c b/mingw-w64-crt/math/lrint.c index c97e036..ec80e4e 100644 --- a/mingw-w64-crt/math/lrint.c +++ b/mingw-w64-crt/math/lrint.c @@ -16,6 +16,11 @@ long lrint (double x) "vcvtr.s32.f64 %[tmp], %[src]\n\t" "fmrs %[dst], %[tmp]\n\t" : [dst] "=r" (retval), [tmp] "=t" (temp) : [src] "w" (x)); +#elif defined(__aarch64__) || defined(_ARM64_) + __asm__ __volatile__ ( + "frintx %d1, %d1\n\t" + "fcvtzs %w0, %d1\n\t" + : "=r" (retval), "+w" (x)); #endif return retval; } diff --git a/mingw-w64-crt/math/lrintf.c b/mingw-w64-crt/math/lrintf.c index fadc29d..91fc5e1 100644 --- a/mingw-w64-crt/math/lrintf.c +++ b/mingw-w64-crt/math/lrintf.c @@ -15,6 +15,11 @@ long lrintf (float x) "vcvtr.s32.f32 %[src], %[src]\n\t" "fmrs %[dst], %[src]\n\t" : [dst] "=r" (retval), [src] "+w" (x)); +#elif defined(__aarch64__) || defined(_ARM64_) + __asm__ __volatile__ ( + "frintx %s1, %s1\n\t" + "fcvtzs %w0, %s1\n\t" + : "=r" (retval), "+w" (x)); #endif return retval; } diff --git a/mingw-w64-crt/math/nearbyint.S b/mingw-w64-crt/math/nearbyint.S index 5c38570..6c89e9b 100644 --- a/mingw-w64-crt/math/nearbyint.S +++ b/mingw-w64-crt/math/nearbyint.S @@ -46,6 +46,11 @@ __MINGW_USYMBOL(nearbyint): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + mrs x1, fpcr + frintx d0, d0 + msr fpcr, x1 + ret #elif defined(_X86_) || defined(__i386__) fldl 4(%esp) pushl %eax diff --git a/mingw-w64-crt/math/nearbyintf.S b/mingw-w64-crt/math/nearbyintf.S index 146ea47..b1ea50f 100644 --- a/mingw-w64-crt/math/nearbyintf.S +++ b/mingw-w64-crt/math/nearbyintf.S @@ -46,6 +46,11 @@ __MINGW_USYMBOL(nearbyintf): vcvt.f32.s32 s0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + mrs x1, fpcr + frintx s0, s0 + msr fpcr, x1 + ret #elif defined(_X86_) || defined(__i386__) flds 4(%esp) pushl %eax diff --git a/mingw-w64-crt/math/nearbyintl.S b/mingw-w64-crt/math/nearbyintl.S index 8f0e539..e4a1edb 100644 --- a/mingw-w64-crt/math/nearbyintl.S +++ b/mingw-w64-crt/math/nearbyintl.S @@ -47,6 +47,11 @@ __MINGW_USYMBOL(nearbyintl): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + mrs x1, fpcr + frintx d0, d0 + msr fpcr, x1 + ret #elif defined(_X86_) || defined(__i386__) fldt 4(%esp) pushl %eax diff --git a/mingw-w64-crt/math/rint.c b/mingw-w64-crt/math/rint.c index 8d69075..d883314 100644 --- a/mingw-w64-crt/math/rint.c +++ b/mingw-w64-crt/math/rint.c @@ -17,6 +17,8 @@ double rint (double x) { "vcvtr.s32.f64 %[tmp], %[src]\n\t" "vcvt.f64.s32 %[dst], %[tmp]\n\t" : [dst] "=w" (retval), [tmp] "=t" (temp) : [src] "w" (x)); +#elif defined(__aarch64__) || defined(_ARM64_) + __asm__ __volatile__ ("frintx %d0, %d1\n\t" : "=w" (retval) : "w" (x)); #endif return retval; } diff --git a/mingw-w64-crt/math/rintf.c b/mingw-w64-crt/math/rintf.c index 46b3a47..bfae515 100644 --- a/mingw-w64-crt/math/rintf.c +++ b/mingw-w64-crt/math/rintf.c @@ -16,6 +16,8 @@ float rintf (float x) { "vcvtr.s32.f32 %[dst], %[src]\n\t" "vcvt.f32.s32 %[dst], %[dst]\n\t" : [dst] "=t" (retval) : [src] "w" (x)); +#elif defined(__aarch64__) || defined(_ARM64_) + __asm__ __volatile__ ("frintx %s0, %s1\n\t" : "=w" (retval) : "w" (x)); #endif return retval; } diff --git a/mingw-w64-crt/math/sqrt.def.h b/mingw-w64-crt/math/sqrt.def.h index eaba8e0..041bc82 100644 --- a/mingw-w64-crt/math/sqrt.def.h +++ b/mingw-w64-crt/math/sqrt.def.h @@ -77,6 +77,12 @@ __FLT_ABI (sqrt) (__FLT_TYPE x) #else asm ("fsqrtd %[dst], %[src];\n" : [dst] "=w" (res) : [src] "w" (x)); #endif +#elif defined(__aarch64__) || defined(_ARM64_) +#if _NEW_COMPLEX_FLOAT + asm ("fsqrt %s[dst], %s[src]\n" : [dst] "=w" (res) : [src] "w" (x)); +#else + asm ("fsqrt %d[dst], %d[src]\n" : [dst] "=w" (res) : [src] "w" (x)); +#endif #elif defined(_X86_) || defined(__i386__) || defined(_AMD64_) || defined(__x86_64__) asm ("fsqrt" : "=t" (res) : "0" (x)); #else diff --git a/mingw-w64-crt/math/trunc.S b/mingw-w64-crt/math/trunc.S index 86f61e9..2f14a18 100755 --- a/mingw-w64-crt/math/trunc.S +++ b/mingw-w64-crt/math/trunc.S @@ -90,6 +90,9 @@ __MINGW_USYMBOL(trunc): vcvt.f64.s32 d0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintz d0, d0 + ret #elif defined(_X86_) || defined(__i386__) fldl 4(%esp) subl $8, %esp diff --git a/mingw-w64-crt/math/truncf.S b/mingw-w64-crt/math/truncf.S index 4b399da..afb63a8 100755 --- a/mingw-w64-crt/math/truncf.S +++ b/mingw-w64-crt/math/truncf.S @@ -70,6 +70,9 @@ __MINGW_USYMBOL(truncf): vcvt.f32.s32 s0, s0 vmsr fpscr, r1 bx lr +#elif defined(_ARM64_) || defined(__aarch64__) + frintz s0, s0 + ret #elif defined(_X86_) || defined(__i386__) flds 4(%esp) subl $8, %esp -- 2.7.4 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public