Hello, On 03 сен 08:17, H.J. Lu wrote: > On Thu, Sep 3, 2020 at 8:08 AM Kirill Yukhin via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hello, > > > > On 06 июл 09:58, Hongyu Wang via Gcc-patches wrote: > > > Hi: > > > > > > This patch is about to support Intel Advanced Matrix Extensions (AMX) > > > which will be enabled in GLC. > > > > > > AMX is a new 64-bit programming paradigm consisting of two > > > compo nents: a set of 2-dimensional registers (tiles) representing > > > sub-arrays from a larger 2-dimensional memory image, > > > and an accelerator able to operate on tiles > > > > > > Supported instructions are > > > > > > AMX-TILE:ldtilecfg/sttilecfg/tileloadd/tileloaddt1/tilezero/tilerelease > > > AMX-INT8:tdpbssd/tdpbsud/tdpbusd/tdpbuud > > > AMX-BF16:tdpbf16ps > > > > > > The intrinsics adopts constant tile register number as its input > > > parameters. > > > > > > For detailed information, please refer to > > > https://software.intel.com/content/dam/develop/public/us/en/documents/architecture-instruction-set-extensions-programming-reference.pdf > > > > > > Bootstrap ok, regression test on i386/x86 backend is ok. > > > > > > OK for master? > > > > I was trying to apply your patch to recent master and got > > compilation error: > > > > g++ -std=gnu++11 -fno-PIE -c -g -O2 -DIN_GCC -fno-exceptions > > -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowi > > ng -Wwrite-strings -Wcast-qual -Wmissing-format-attribute > > -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wn > > o-overlength-strings -fno-common -DHAVE_CONFIG_H -I. -I. > > -I/export/kyukhin/gcc/src/gcc -I/export/kyukhin/gcc/src/gcc/. -I/expor > > t/kyukhin/gcc/src/gcc/../include > > -I/export/kyukhin/gcc/src/gcc/../libcpp/include > > -I/export/kyukhin/gcc/src/gcc/../libdecnumber > > -I/export/kyukhin/gcc/src/gcc/../libdecnumber/bid -I../libdecnumber > > -I/export/kyukhin/gcc/src/gcc/../libbacktrace -o i386-opti > > ons.o -MT i386-options.o -MMD -MP -MF ./.deps/i386-options.TPo > > /export/kyukhin/gcc/src/gcc/config/i386/i386-options.c > > /export/kyukhin/gcc/src/gcc/config/i386/i386-options.c: In function ‘bool > > ix86_option_override_internal(bool, gcc_options*, gcc_ > > options*)’: > > /export/kyukhin/gcc/src/gcc/config/i386/i386-options.c:2263:41: error: > > ‘PTA_AMX_TILE’ was not declared in this scope > > if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0) > > ^ > > /export/kyukhin/gcc/src/gcc/config/i386/i386-options.c:2267:41: error: > > ‘PTA_AMX_INT8’ was not declared in this scope > > if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0) > > ^ > > /export/kyukhin/gcc/src/gcc/config/i386/i386-options.c:2271:41: error: > > ‘PTA_AMX_BF16’ was not declared in this scope > > if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0) > > > > Could you please fix that? > > Here is the rebased patch against > > commit 3c219134152f645103f2fcd50735b177ccd76cde > Author: Jonathan Wakely <jwak...@redhat.com> > Date: Thu Sep 3 12:38:50 2020 +0100 > > libstdc++: Optimise GCD algorithms > > Thanks. > > -- > H.J.
> diff --git a/gcc/config.gcc b/gcc/config.gcc > index 797f0ad5edd..d0e59e86a5c 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -412,7 +412,7 @@ i[34567]86-*-*) > waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h > avx512bf16intrin.h enqcmdintrin.h serializeintrin.h > avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h > - tsxldtrkintrin.h" > + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h > amxbf16intrin.h" Line more than 80 chars. > ;; > x86_64-*-*) > cpu_type=i386 > @@ -447,7 +447,7 @@ x86_64-*-*) > waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h > avx512bf16intrin.h enqcmdintrin.h serializeintrin.h > avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h > - tsxldtrkintrin.h" > + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h > amxbf16intrin.h" Ditto. > diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h > new file mode 100644 > index 00000000000..df0e2262d50 > --- /dev/null > +++ b/gcc/config/i386/amxbf16intrin.h > @@ -0,0 +1,25 @@ > +#if !defined _IMMINTRIN_H_INCLUDED > +#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead." > +#endif > + > +#ifndef _AMXBF16INTRIN_H_INCLUDED > +#define _AMXBF16INTRIN_H_INCLUDED > + > +#if !defined(__AMX_BF16__) > +#pragma GCC push_options > +#pragma GCC target("amx-bf16") > +#define __DISABLE_AMX_BF16__ > +#endif /* __AMX_BF16__ */ > + > +#if defined(__x86_64__) && defined(__AMX_BF16__) > +#define _tile_dpbf16ps(dst,src1,src2) > \ > + __asm__ volatile\ > + ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", > %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) > +#endif I hope in future we'll replace it with unspecs at least... > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index c9f7195d423..9389dc24948 100644 > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index bca8c856dc8..a46e31f5862 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -1357,6 +1357,7 @@ See RS/6000 and PowerPC Options. > -mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol > -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol > -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol > +-mamx-tile -mamx-int8 -mamx-bf16@gol Add space please. > diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-2.c > b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-2.c > new file mode 100644 > index 00000000000..605a44df3f8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-2.c > @@ -0,0 +1,4 @@ > +/* { dg-do assemble { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mamx-bf16 -masm=intel" } */ > +/* { dg-require-effective-target amx_bf16 } */ > +#include"amxbf16-asmintel-1.c" I didn't get it. We ususally use second tescase to actually execute it and (well, a little) verify that semantics is ok. E.g. that operands order is correct. Could you please do that? This applies to all *-2.c cases. I've checked and looks like public SDE simulator supports AMX. -- K