On 08/31/2018 04:14 PM, Cesar Philippidis wrote: > Attached is an nvptx patch that adds support for a new, albeit rarely > used, compiler option -misa. At present, there are only two valid ISA > arguments, SM_30 and SM_35. Without that flag, GCC will default to > SM_30. The major advantage of using the SM_35 ISA is to enable the use > PTX atom instructions for __atomic_fetch_{add,and,or,xor} for DI > integers. Without -misa, GCC would use an atomic CAS loop for them.
> As > an aside, this patch also enables PTX atom instructions for those > aforementioned functions for SI integers. Make the SI part a separate patch, and make sure that one has a testcase. Pre-approved. > > Is this patch OK for trunk? > Well, how did you test this ( https://gcc.gnu.org/contribute.html#patches : "Bootstrapping and testing. State the host and target combinations you used to do proper testing as described above, and the results of your testing.") ? > Thanks, > Cesar > > > 0001-Basic-misa-support-for-nvptx.patch > > > Basic -misa support for nvptx > > 2018-XX-YY Cesar Philippidis <ce...@codesourcery.com> > Bernd Schmidt <bernds_...@t-online.de> > > gcc/ > * config/nvptx/nvptx-opts.h: New file. > * config/nvptx/nvptx.c (nvptx_file_start): Print the correct .target. > * config/nvptx/nvptx.h: Include "nvptx-opts.h". > (ASM_SPEC): Define. > (TARGET_SM35): New macro. > * config/nvptx/nvptx.md (atomic_fetch_<logic><mode>): Enable with the > correct predicate. > * config/nvptx/nvptx.opt (ptx_isa, sm_30, sm_35): New enum and its > values. > (misa=): New option. > * doc/invoke.texi (Nvidia PTX Options): Document -misa. > > gcc/testsuite/ > * gcc.target/nvptx/atomic_fetch-1.c: New test. > * gcc.target/nvptx/atomic_fetch-1.c: New test. > > > diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h > new file mode 100644 > index 00000000000..55d9599917e > --- /dev/null > +++ b/gcc/config/nvptx/nvptx-opts.h > @@ -0,0 +1,30 @@ > +/* Definitions for the NVPTX port needed for option handling. > + Copyright (C) 2015-2018 Free Software Foundation, Inc. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published > + by the Free Software Foundation; either version 3, or (at your > + option) any later version. > + > + GCC is distributed in the hope that it will be useful, but WITHOUT > + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY > + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > + License for more details. > + > + You should have received a copy of the GNU General Public License > + along with GCC; see the file COPYING3. If not see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef NVPTX_OPTS_H > +#define NVPTX_OPTS_H > + > +enum ptx_isa > +{ > + PTX_ISA_SM30, > + PTX_ISA_SM35 > +}; > + > +#endif > + > diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c > index c0b0a2ec3ab..9903a273863 100644 > --- a/gcc/config/nvptx/nvptx.c > +++ b/gcc/config/nvptx/nvptx.c > @@ -4931,7 +4931,10 @@ nvptx_file_start (void) > { > fputs ("// BEGIN PREAMBLE\n", asm_out_file); > fputs ("\t.version\t3.1\n", asm_out_file); > - fputs ("\t.target\tsm_30\n", asm_out_file); > + if (TARGET_SM35) > + fputs ("\t.target\tsm_35\n", asm_out_file); > + else > + fputs ("\t.target\tsm_30\n", asm_out_file); > fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode)); > fputs ("// END PREAMBLE\n", asm_out_file); > } > diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h > index dfa1e9aa859..a2fe8b68b22 100644 > --- a/gcc/config/nvptx/nvptx.h > +++ b/gcc/config/nvptx/nvptx.h > @@ -21,10 +21,16 @@ > #ifndef GCC_NVPTX_H > #define GCC_NVPTX_H > > +#ifndef NVPTX_OPTS_H > +#include "config/nvptx/nvptx-opts.h" > +#endif > + > /* Run-time Target. */ > > #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" > > +#define ASM_SPEC "%{misa=*:-m %*}" > + > #define TARGET_CPU_CPP_BUILTINS() \ > do \ > { \ > @@ -87,6 +93,8 @@ > #define Pmode (TARGET_ABI64 ? DImode : SImode) > #define STACK_SIZE_MODE Pmode > > +#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35) > + > /* Registers. Since ptx is a virtual target, we just define a few > hard registers for special purposes and leave pseudos unallocated. > We have to have some available hard registers, to keep gcc setup > diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md > index 2988f5dfa91..ca00b1d8073 100644 > --- a/gcc/config/nvptx/nvptx.md > +++ b/gcc/config/nvptx/nvptx.md > @@ -1440,7 +1440,6 @@ > (define_code_iterator any_logic [and ior xor]) > (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) > > -;; Currently disabled until we add better subtarget support - requires sm_32. > (define_insn "atomic_fetch_<logic><mode>" > [(set (match_operand:SDIM 1 "memory_operand" "+m") > (unspec_volatile:SDIM > @@ -1450,7 +1449,7 @@ > UNSPECV_LOCK)) > (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") > (match_dup 1))] > - "0" > + "<MODE>mode == SImode || TARGET_SM35" > "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;" > [(set_attr "atomic" "true")]) > > diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt > index 04277d1d98e..8194c0324d6 100644 > --- a/gcc/config/nvptx/nvptx.opt > +++ b/gcc/config/nvptx/nvptx.opt > @@ -48,3 +48,17 @@ Generate code that can keep local state uniform across all > lanes. > mgomp > Target Report Mask(GOMP) > Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt. > + > +Enum > +Name(ptx_isa) Type(int) > +Known PTX ISA versions (for use with the -misa= option): > + > +EnumValue > +Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30) > + > +EnumValue > +Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35) > + > +misa= > +Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) > Init(PTX_ISA_SM30) > +Specify the version of the ptx ISA to use. > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index e37233d6ed4..95bc7d9e509 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -22456,6 +22456,12 @@ These options are defined for Nvidia PTX: > @opindex m64 > Generate code for 32-bit or 64-bit ABI. > > +@item -misa=@var{ISA-string} > +@opindex march > +Generate code for given the specified PTX ISA (e.g.@ @samp{sm_35}). ISA > +strings must be lower-case. Valid ISA strings include @samp{sm_30} and > +@samp{sm_35}. The default ISA is sm_30. > + > @item -mmainkernel > @opindex mmainkernel > Link in code for a __main kernel. This is for stand-alone instead of > diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c > b/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c > new file mode 100644 > index 00000000000..1d35a176a62 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c > @@ -0,0 +1,24 @@ > +/* Test the nvptx atomic instructions for __atomic_fetch_OP for SM_30 > + targets. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O2 -misa=sm_30" } */ > + > +int > +main() > +{ > + unsigned long long a = ~0; > + unsigned b = 0xa; > + > + __atomic_fetch_add (&a, b, 0); > + __atomic_fetch_and (&a, b, 0); > + __atomic_fetch_or (&a, b, 0); > + __atomic_fetch_xor (&a, b, 0); > + > + return a; > +} > + > +/* { dg-final { scan-assembler-not "atom.b64.add" } } */ > +/* { dg-final { scan-assembler-not "atom.b64.and" } } */ > +/* { dg-final { scan-assembler-not "atom.b64.or" } } */ > +/* { dg-final { scan-assembler-not "atom.b64.xor" } } */ > diff --git a/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c > b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c > new file mode 100644 > index 00000000000..c637caa79a0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c > @@ -0,0 +1,24 @@ > +/* Test the nvptx atomic instructions for __atomic_fetch_OP for SM_35 > + targets. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O2 -misa=sm_35" } */ > + > +int > +main() > +{ > + unsigned long long a = ~0; > + unsigned b = 0xa; > + > + __atomic_fetch_add (&a, b, 0); > + __atomic_fetch_and (&a, b, 0); > + __atomic_fetch_or (&a, b, 0); > + __atomic_fetch_xor (&a, b, 0); > + > + return a; > +} > + > +/* { dg-final { scan-assembler "atom.add.u64" } } */ > +/* { dg-final { scan-assembler "atom.b64.and" } } */ > +/* { dg-final { scan-assembler "atom.b64.or" } } */ > +/* { dg-final { scan-assembler "atom.b64.xor" } } */ > -- 2.17.1 > Hmm, the add.u64 vs b64.and looks odd (and the scan-assembler-not testcase does not use this difference, so that needs to be fixed, or for bonus points, changed into a scan-assembler testcase). The documentation uses "op.type", we should fix the compiler to emit that consistently. Separate patch that fixes that pre-approved. This is ok (with, as I mentioned above, the SI part split off into a separate patch), on the condition that you test libgomp with -foffload=-misa=sm_35. Thanks, - Tom