On 08/31/2018 04:14 PM, Cesar Philippidis wrote:
> Attached is an nvptx patch that adds support for a new, albeit rarely
> used, compiler option -misa. At present, there are only two valid ISA
> arguments, SM_30 and SM_35. Without that flag, GCC will default to
> SM_30. The major advantage of using the SM_35 ISA is to enable the use
> PTX atom instructions for __atomic_fetch_{add,and,or,xor} for DI
> integers. Without -misa, GCC would use an atomic CAS loop for them.


> As
> an aside, this patch also enables PTX atom instructions for those
> aforementioned functions for SI integers.

Make the SI part a separate patch, and make sure that one has a
testcase. Pre-approved.

> 
> Is this patch OK for trunk?
> 

Well, how did you test this (
https://gcc.gnu.org/contribute.html#patches : "Bootstrapping and
testing. State the host and target combinations you used to do proper
testing as described above, and the results of your testing.") ?

> Thanks,
> Cesar
> 
> 
> 0001-Basic-misa-support-for-nvptx.patch
> 
> 
> Basic -misa support for nvptx
> 
> 2018-XX-YY  Cesar Philippidis  <ce...@codesourcery.com>
>           Bernd Schmidt  <bernds_...@t-online.de>
> 
>       gcc/
>       * config/nvptx/nvptx-opts.h: New file.
>       * config/nvptx/nvptx.c (nvptx_file_start): Print the correct .target.
>       * config/nvptx/nvptx.h: Include "nvptx-opts.h".
>       (ASM_SPEC): Define.
>       (TARGET_SM35): New macro.
>       * config/nvptx/nvptx.md (atomic_fetch_<logic><mode>): Enable with the
>       correct predicate.
>       * config/nvptx/nvptx.opt (ptx_isa, sm_30, sm_35): New enum and its
>       values.
>       (misa=): New option.
>       * doc/invoke.texi (Nvidia PTX Options): Document -misa.
> 
>       gcc/testsuite/
>       * gcc.target/nvptx/atomic_fetch-1.c: New test.
>       * gcc.target/nvptx/atomic_fetch-1.c: New test.
> 
> 
> diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
> new file mode 100644
> index 00000000000..55d9599917e
> --- /dev/null
> +++ b/gcc/config/nvptx/nvptx-opts.h
> @@ -0,0 +1,30 @@
> +/* Definitions for the NVPTX port needed for option handling.
> +   Copyright (C) 2015-2018 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +   License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef NVPTX_OPTS_H
> +#define NVPTX_OPTS_H
> +
> +enum ptx_isa
> +{
> +  PTX_ISA_SM30,
> +  PTX_ISA_SM35
> +};
> +
> +#endif
> +
> diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
> index c0b0a2ec3ab..9903a273863 100644
> --- a/gcc/config/nvptx/nvptx.c
> +++ b/gcc/config/nvptx/nvptx.c
> @@ -4931,7 +4931,10 @@ nvptx_file_start (void)
>  {
>    fputs ("// BEGIN PREAMBLE\n", asm_out_file);
>    fputs ("\t.version\t3.1\n", asm_out_file);
> -  fputs ("\t.target\tsm_30\n", asm_out_file);
> +  if (TARGET_SM35)
> +    fputs ("\t.target\tsm_35\n", asm_out_file);
> +  else
> +    fputs ("\t.target\tsm_30\n", asm_out_file);
>    fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
>    fputs ("// END PREAMBLE\n", asm_out_file);
>  }
> diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
> index dfa1e9aa859..a2fe8b68b22 100644
> --- a/gcc/config/nvptx/nvptx.h
> +++ b/gcc/config/nvptx/nvptx.h
> @@ -21,10 +21,16 @@
>  #ifndef GCC_NVPTX_H
>  #define GCC_NVPTX_H
>  
> +#ifndef NVPTX_OPTS_H
> +#include "config/nvptx/nvptx-opts.h"
> +#endif
> +
>  /* Run-time Target.  */
>  
>  #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
>  
> +#define ASM_SPEC "%{misa=*:-m %*}"
> +
>  #define TARGET_CPU_CPP_BUILTINS()            \
>    do                                         \
>      {                                                \
> @@ -87,6 +93,8 @@
>  #define Pmode (TARGET_ABI64 ? DImode : SImode)
>  #define STACK_SIZE_MODE Pmode
>  
> +#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
> +
>  /* Registers.  Since ptx is a virtual target, we just define a few
>     hard registers for special purposes and leave pseudos unallocated.
>     We have to have some available hard registers, to keep gcc setup
> diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
> index 2988f5dfa91..ca00b1d8073 100644
> --- a/gcc/config/nvptx/nvptx.md
> +++ b/gcc/config/nvptx/nvptx.md
> @@ -1440,7 +1440,6 @@
>  (define_code_iterator any_logic [and ior xor])
>  (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
>  
> -;; Currently disabled until we add better subtarget support - requires sm_32.
>  (define_insn "atomic_fetch_<logic><mode>"
>    [(set (match_operand:SDIM 1 "memory_operand" "+m")
>       (unspec_volatile:SDIM
> @@ -1450,7 +1449,7 @@
>         UNSPECV_LOCK))
>     (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
>       (match_dup 1))]
> -  "0"
> +  "<MODE>mode == SImode || TARGET_SM35"
>    "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
>    [(set_attr "atomic" "true")])
>  
> diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
> index 04277d1d98e..8194c0324d6 100644
> --- a/gcc/config/nvptx/nvptx.opt
> +++ b/gcc/config/nvptx/nvptx.opt
> @@ -48,3 +48,17 @@ Generate code that can keep local state uniform across all 
> lanes.
>  mgomp
>  Target Report Mask(GOMP)
>  Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt.
> +
> +Enum
> +Name(ptx_isa) Type(int)
> +Known PTX ISA versions (for use with the -misa= option):
> +
> +EnumValue
> +Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30)
> +
> +EnumValue
> +Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35)
> +
> +misa=
> +Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) 
> Init(PTX_ISA_SM30)
> +Specify the version of the ptx ISA to use.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e37233d6ed4..95bc7d9e509 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -22456,6 +22456,12 @@ These options are defined for Nvidia PTX:
>  @opindex m64
>  Generate code for 32-bit or 64-bit ABI.
>  
> +@item -misa=@var{ISA-string}
> +@opindex march
> +Generate code for given the specified PTX ISA (e.g.@ @samp{sm_35}).  ISA
> +strings must be lower-case.  Valid ISA strings include @samp{sm_30} and
> +@samp{sm_35}.  The default ISA is sm_30.
> +
>  @item -mmainkernel
>  @opindex mmainkernel
>  Link in code for a __main kernel.  This is for stand-alone instead of
> diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c 
> b/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c
> new file mode 100644
> index 00000000000..1d35a176a62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/nvptx/atomic-fetch-2.c
> @@ -0,0 +1,24 @@
> +/* Test the nvptx atomic instructions for __atomic_fetch_OP for SM_30
> +   targets.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -misa=sm_30" } */
> +
> +int
> +main()
> +{
> +  unsigned long long a = ~0;
> +  unsigned b = 0xa;
> +
> +  __atomic_fetch_add (&a, b, 0);
> +  __atomic_fetch_and (&a, b, 0);
> +  __atomic_fetch_or (&a, b, 0);
> +  __atomic_fetch_xor (&a, b, 0);
> +  
> +  return a;
> +}
> +
> +/* { dg-final { scan-assembler-not "atom.b64.add" } } */
> +/* { dg-final { scan-assembler-not "atom.b64.and" } } */
> +/* { dg-final { scan-assembler-not "atom.b64.or" } } */
> +/* { dg-final { scan-assembler-not "atom.b64.xor" } } */
> diff --git a/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c 
> b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c
> new file mode 100644
> index 00000000000..c637caa79a0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/nvptx/atomic_fetch-1.c
> @@ -0,0 +1,24 @@
> +/* Test the nvptx atomic instructions for __atomic_fetch_OP for SM_35
> +   targets.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -misa=sm_35" } */
> +
> +int
> +main()
> +{
> +  unsigned long long a = ~0;
> +  unsigned b = 0xa;
> +
> +  __atomic_fetch_add (&a, b, 0);
> +  __atomic_fetch_and (&a, b, 0);
> +  __atomic_fetch_or (&a, b, 0);
> +  __atomic_fetch_xor (&a, b, 0);
> +  
> +  return a;
> +}
> +
> +/* { dg-final { scan-assembler "atom.add.u64" } } */
> +/* { dg-final { scan-assembler "atom.b64.and" } } */
> +/* { dg-final { scan-assembler "atom.b64.or" } } */
> +/* { dg-final { scan-assembler "atom.b64.xor" } } */
> -- 2.17.1
> 

Hmm, the add.u64 vs b64.and looks odd (and the scan-assembler-not
testcase does not use this difference, so that needs to be fixed, or for
bonus points, changed into a scan-assembler testcase).

The documentation uses "op.type", we should fix the compiler to emit
that consistently. Separate patch that fixes that pre-approved.

This is ok (with, as I mentioned above, the SI part split off into a
separate patch), on the condition that you test libgomp with
-foffload=-misa=sm_35.

Thanks,
- Tom

Reply via email to