On Fri, Dec 24, 2021 at 4:51 PM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi all, > > This patch fix the testcase of amxbf16-dpbf16ps-2.c. Previously the type > convert has some issue. > > Ok for trunk? Ok. > > BRs, > Haochen > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/amx-check.h (check_float_tile_register): > New check function for float to prevent precision loss. > * gcc.target/i386/amxbf16-dpbf16ps-2.c: Correct the type convert > and byte offset. Use the new check function. > --- > gcc/testsuite/gcc.target/i386/amx-check.h | 23 ++++++++++++-- > .../gcc.target/i386/amxbf16-dpbf16ps-2.c | 30 ++++++++++++------- > 2 files changed, 41 insertions(+), 12 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h > b/gcc/testsuite/gcc.target/i386/amx-check.h > index 03616ff0b8e..434b0e59703 100644 > --- a/gcc/testsuite/gcc.target/i386/amx-check.h > +++ b/gcc/testsuite/gcc.target/i386/amx-check.h > @@ -139,8 +139,27 @@ int check_tile_register (__tile* ref, __tile* target) > > for (i = 0; i < rows; i++) > for (j = 0; j < colsb; j++) > - if (ref->buf[i * colsb + j] != target->buf[i * colsb + j]) > - return 0; > + if (ref->buf[i * colsb + j] != target->buf[i * colsb + j]) > + return 0; > + > + return 1; > +} > + > +/* Compare float tile register value with __tile variable */ > +int check_float_tile_register (__tile* ref, __tile* target) > +{ > + /* Tile register should be stored from tmm to > + memory and compare with emulation results. */ > + int rows = target->rows; > + int colsb = target->colsb / 4; > + int i, j; > + uint32_t *ref_buf = (uint32_t *) ref->buf; > + uint32_t *target_buf = (uint32_t *) target->buf; > + > + for (i = 0; i < rows; i++) > + for (j = 0; j < colsb; j++) > + if (abs(ref_buf[i * colsb + j] - target_buf[i * colsb + j]) > 1) > + return 0; > > return 1; > } > diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c > b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c > index f7002ca5ea5..b00bc13ec78 100644 > --- a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c > +++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c > @@ -12,15 +12,25 @@ void test_amx_bf16_dpbf16ps (); > /* Transformation functions between bf16/float */ > static uint16_t make_bf16 (float f) > { > - uint32_t u = (uint32_t)f; > - u = (u >> 16) & 0xffff; > - return (uint16_t)u; > + union > + { > + float f; > + uint32_t u; > + } fu; > + fu.f = f; > + fu.u = (fu.u >> 16) & 0xffff; > + return (uint16_t) fu.u; > } > > static float make_f32 (uint16_t bf) > { > - uint32_t u = (uint32_t)(bf << 16); > - return (float)u; > + union > + { > + float f; > + uint32_t u; > + } fu; > + fu.u = (uint32_t) bf << 16; > + return fu.f; > } > > /* Init tile buffer with bf16 pairs */ > @@ -54,10 +64,10 @@ void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, > __tile *src2) > for (t = 0; t < 2; t+=2) > { > dst_buf[i * N + k] += > - (make_f32(src1_buf[i * 4 * N + 4 * j + t]) * > - make_f32(src2_buf[j * 4 * K + 4 * k + t])) + > - (make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) * > - make_f32(src2_buf[j * 4 * K + 4 * k + t + 1])); > + (make_f32(src1_buf[i * 2 * N + 2 * j + t]) * > + make_f32(src2_buf[j * 2 * K + 2 * k + t])) + > + (make_f32(src1_buf[i * 2 * N + 2 * j + t + 1]) * > + make_f32(src2_buf[j * 2 * K + 2 * k + t + 1])); > } > > } > @@ -80,6 +90,6 @@ void test_amx_bf16_dpbf16ps () > _tile_dpbf16ps (1, 2, 3); > _tile_stored (1, dst_ref.buf, _STRIDE); > > - if (!check_tile_register (&dst_ref, &dst)) > + if (!check_float_tile_register (&dst_ref, &dst)) > abort(); > } > -- > 2.18.1 >
-- BR, Hongtao