Re: [PATCH] ppc: Use hard-float in ppc fp_hlper as early as possible. This would increase the performance better than enable hard-float it in soft-float.c; Just using fadd fsub fmul fdiv as a simple bench demo. With this patch, performance are increased 2x. and 1.3x than the one enable hard-float in soft-float.c Both version are not considerate inexact fp exception yet.

Yonggang Luo Mon, 04 May 2020 13:03:10 -0700

Bench result;
orignal:
-> FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 27.768 sec
MFLOPS: 38.65
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 28.359 sec
MFLOPS: 37.84



soft-hard-float:

GCC version: 4.3.3
Ops count: 1073217024
Time spent: 14.874 sec
MFLOPS: 72.15
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 14.249 sec
MFLOPS: 75.32

direct-hard-float:

-> FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 13.021 sec
MFLOPS: 82.42
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 12.472 sec
MFLOPS: 86.05
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 11.803 sec
MFLOPS: 90.93
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 11.945 sec
MFLOPS: 89.85

bench program:

```
#include <stdio.h>
#include <stdlib.h>
#ifdef __vxworks
#include <sys/resource.h>
#include <vxworks.h>
#include <timers.h>
#include <time.h>
#elif defined(_MSC_VER)
#include <Windows.h>
#include <time.h>
#else
#include <time.h>
#endif
/*
cl -O2 test_flops.c
gcc -O2 test_flops.c -o test_flops

*/
#ifndef DIM
#define DIM 1024
const long long int nop = 1073217024;
#else
#define COUNT
long long int nop = 0;
#endif

void printm(double A[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; i++) {
for (j=0; j<DIM; j++)
printf("%6.3f", A[i][j]);
printf("\n");
}
}

void initm(double A[DIM][DIM])
{
int i,j;
srand(38741);
for (i = 0; i < DIM; i++)
for (j = 0; j < DIM; j++)
A[i][j] = (double)rand() / (double)RAND_MAX - 0.5;
}

void dge(double A[DIM][DIM])
{
int i, j, k;
double c;
for (k = 1; k < DIM; k++) {
for (i = k; i < DIM; i++) {
c = A[i][k-1] / A[k-1][k-1];
#ifdef COUNT
nop += 1;
#endif
for (j = 0; j < DIM; j++) {
A[i][j] -= c * A[k-1][j];
#ifdef COUNT
nop += 2;
#endif
}
}
}
}

double X[DIM][DIM];

/*
 * return a timestamp with sub-second precision
 * QueryPerformanceCounter and clock_gettime have an undefined starting
point (null/zero)
 * and can wrap around, i.e. be nulled again.
 */
double get_seconds()
{
#ifdef _MSC_VER
  static LARGE_INTEGER frequency;
  if (frequency.QuadPart == 0)
    QueryPerformanceFrequency(&frequency);
  LARGE_INTEGER now;
  QueryPerformanceCounter(&now);
  return (now.QuadPart * 1.0) / frequency.QuadPart;
#else
  struct timespec now;
  clock_gettime(CLOCK_REALTIME, &now);
  return now.tv_sec + now.tv_nsec * 1e-9;
#endif
}

int main (int argc, char **argv)
{
double a = 1.0;
double b = 2.0;
double c = a + b;
double t;
int count = 1;
int i;
printf("FLOPS %.2lf\n", c);
#ifdef _MSC_VER
printf("MSC_VER version: %d\n", _MSC_VER);
#else
printf("GCC version: " __VERSION__ "\n");
#endif
initm(X);
t = get_seconds();
#ifndef __vxworks
if (argc > 1) {
sscanf(argv[1], "%d", &count);
}
#endif
for (i = 0; i < count; i += 1) {
dge(X);
}
t = get_seconds() - t;
printf("Ops count: %llu\n", nop * count);
printf("Time spent: %.3lf sec\n", t);
printf("MFLOPS: %.2f\n", 1e-6 * nop * count / t );
#ifdef PRINTM
printm(X);
#endif
return 0;
}
```

On Tue, May 5, 2020 at 3:30 AM <luoyongg...@gmail.com> wrote:

> From: Yonggang Luo <luoyongg...@gmail.com>
>
> Just post as an idea to improve PPC fp performance.
> With this idea, we have no need to adjust the helper orders.
>
> Signed-off-by: Yonggang Luo <luoyongg...@gmail.com>
> ---
>  target/ppc/fpu_helper.c | 44 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 44 insertions(+)
>
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index 2bd49a2cdf..79051e4540 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -926,6 +926,17 @@ static void float_invalid_op_addsub(CPUPPCState *env,
> bool set_fpcc,
>  /* fadd - fadd. */
>  float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2)
>  {
> +    CPU_DoubleU u1, u2;
> +
> +    u1.d = arg1;
> +    u2.d = arg2;
> +    CPU_DoubleU retDouble;
> +    retDouble.nd = u1.nd + u2.nd;
> +    if (likely(float64_is_zero_or_normal(retDouble.d)))
> +    {
> +        /* TODO: Handling inexact */
> +        return retDouble.d;
> +    }
>      float64 ret = float64_add(arg1, arg2, &env->fp_status);
>      int status = get_float_exception_flags(&env->fp_status);
>
> @@ -941,6 +952,17 @@ float64 helper_fadd(CPUPPCState *env, float64 arg1,
> float64 arg2)
>  /* fsub - fsub. */
>  float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2)
>  {
> +    CPU_DoubleU u1, u2;
> +
> +    u1.d = arg1;
> +    u2.d = arg2;
> +    CPU_DoubleU retDouble;
> +    retDouble.nd = u1.nd - u2.nd;
> +    if (likely(float64_is_zero_or_normal(retDouble.d)))
> +    {
> +        /* TODO: Handling inexact */
> +        return retDouble.d;
> +    }
>      float64 ret = float64_sub(arg1, arg2, &env->fp_status);
>      int status = get_float_exception_flags(&env->fp_status);
>
> @@ -967,6 +989,17 @@ static void float_invalid_op_mul(CPUPPCState *env,
> bool set_fprc,
>  /* fmul - fmul. */
>  float64 helper_fmul(CPUPPCState *env, float64 arg1, float64 arg2)
>  {
> +    CPU_DoubleU u1, u2;
> +
> +    u1.d = arg1;
> +    u2.d = arg2;
> +    CPU_DoubleU retDouble;
> +    retDouble.nd = u1.nd * u2.nd;
> +    if (likely(float64_is_zero_or_normal(retDouble.d)))
> +    {
> +        /* TODO: Handling inexact */
> +        return retDouble.d;
> +    }
>      float64 ret = float64_mul(arg1, arg2, &env->fp_status);
>      int status = get_float_exception_flags(&env->fp_status);
>
> @@ -997,6 +1030,17 @@ static void float_invalid_op_div(CPUPPCState *env,
> bool set_fprc,
>  /* fdiv - fdiv. */
>  float64 helper_fdiv(CPUPPCState *env, float64 arg1, float64 arg2)
>  {
> +    CPU_DoubleU u1, u2;
> +
> +    u1.d = arg1;
> +    u2.d = arg2;
> +    CPU_DoubleU retDouble;
> +    retDouble.nd = u1.nd / u2.nd;
> +    if (likely(float64_is_zero_or_normal(retDouble.d)))
> +    {
> +        /* TODO: Handling inexact */
> +        return retDouble.d;
> +    }
>      float64 ret = float64_div(arg1, arg2, &env->fp_status);
>      int status = get_float_exception_flags(&env->fp_status);
>
> --
> 2.23.0.windows.1
>
>

-- 
         此致
礼
罗勇刚
Yours
    sincerely,
Yonggang Luo

Reply via email to