On Fri, Dec 24, 2021 at 4:51 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all,
>
> This patch fix the testcase of amxbf16-dpbf16ps-2.c. Previously the type 
> convert has some issue.
>
> Ok for trunk?
Ok.
>
> BRs,
> Haochen
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/amx-check.h (check_float_tile_register):
>         New check function for float to prevent precision loss.
>         * gcc.target/i386/amxbf16-dpbf16ps-2.c: Correct the type convert
>         and byte offset. Use the new check function.
> ---
>  gcc/testsuite/gcc.target/i386/amx-check.h     | 23 ++++++++++++--
>  .../gcc.target/i386/amxbf16-dpbf16ps-2.c      | 30 ++++++++++++-------
>  2 files changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h 
> b/gcc/testsuite/gcc.target/i386/amx-check.h
> index 03616ff0b8e..434b0e59703 100644
> --- a/gcc/testsuite/gcc.target/i386/amx-check.h
> +++ b/gcc/testsuite/gcc.target/i386/amx-check.h
> @@ -139,8 +139,27 @@ int check_tile_register (__tile* ref, __tile* target)
>
>    for (i = 0; i < rows; i++)
>      for (j = 0; j < colsb; j++)
> -       if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
> -           return 0;
> +      if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
> +       return 0;
> +
> +  return 1;
> +}
> +
> +/* Compare float tile register value with __tile variable */
> +int check_float_tile_register (__tile* ref, __tile* target)
> +{
> +  /* Tile register should be stored from tmm to
> +     memory and compare with emulation results. */
> +  int rows = target->rows;
> +  int colsb = target->colsb / 4;
> +  int i, j;
> +  uint32_t *ref_buf = (uint32_t *) ref->buf;
> +  uint32_t *target_buf = (uint32_t *) target->buf;
> +
> +  for (i = 0; i < rows; i++)
> +    for (j = 0; j < colsb; j++)
> +      if (abs(ref_buf[i * colsb + j] - target_buf[i * colsb + j]) > 1)
> +       return 0;
>
>    return 1;
>  }
> diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c 
> b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
> index f7002ca5ea5..b00bc13ec78 100644
> --- a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
> +++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
> @@ -12,15 +12,25 @@ void test_amx_bf16_dpbf16ps ();
>  /* Transformation functions between bf16/float */
>  static uint16_t make_bf16 (float f)
>  {
> -  uint32_t u = (uint32_t)f;
> -  u = (u >> 16) & 0xffff;
> -  return (uint16_t)u;
> +  union
> +  {
> +    float f;
> +    uint32_t u;
> +  } fu;
> +  fu.f = f;
> +  fu.u = (fu.u >> 16) & 0xffff;
> +  return (uint16_t) fu.u;
>  }
>
>  static float make_f32 (uint16_t bf)
>  {
> -  uint32_t u = (uint32_t)(bf << 16);
> -  return (float)u;
> +  union
> +  {
> +    float f;
> +    uint32_t u;
> +  } fu;
> +  fu.u = (uint32_t) bf << 16;
> +  return fu.f;
>  }
>
>  /* Init tile buffer with bf16 pairs */
> @@ -54,10 +64,10 @@ void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, 
> __tile *src2)
>         for (t = 0; t < 2; t+=2)
>           {
>             dst_buf[i * N + k] +=
> -             (make_f32(src1_buf[i * 4 * N + 4 * j + t]) *
> -             make_f32(src2_buf[j * 4 * K + 4 * k + t])) +
> -             (make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) *
> -             make_f32(src2_buf[j * 4 * K + 4 * k + t + 1]));
> +             (make_f32(src1_buf[i * 2 * N + 2 * j + t]) *
> +             make_f32(src2_buf[j * 2 * K + 2 * k + t])) +
> +             (make_f32(src1_buf[i * 2 * N + 2 * j + t + 1]) *
> +             make_f32(src2_buf[j * 2 * K + 2 * k + t + 1]));
>           }
>
>  }
> @@ -80,6 +90,6 @@ void test_amx_bf16_dpbf16ps ()
>    _tile_dpbf16ps (1, 2, 3);
>    _tile_stored (1, dst_ref.buf, _STRIDE);
>
> -  if (!check_tile_register (&dst_ref, &dst))
> +  if (!check_float_tile_register (&dst_ref, &dst))
>          abort();
>  }
> --
> 2.18.1
>


-- 
BR,
Hongtao

Reply via email to