https://gcc.gnu.org/g:5deeae29dab2af64e3342daf7a30000e424c64ea

commit r15-9190-g5deeae29dab2af64e3342daf7a30000e424c64ea
Author: Thomas Schwinge <tschwi...@baylibre.com>
Date:   Wed Apr 2 10:25:17 2025 +0200

    nvptx: Don't use PTX '.const', constant state space [PR119573]
    
    This avoids cases where a "File uses too much global constant data" (final
    executable, or single object file), and avoids cases of wrong code 
generation:
    "error : State space incorrect for instruction 'st'" ('st.const'), or 
another
    case where an "illegal instruction was encountered", or a lot of cases where
    for two compilation units (such as a library linked with user code) we ran 
into
    "error : Memory space doesn't match" due to differences in '.const' usage
    between definition and use of a variable.
    
    We progress:
    
        ptxas error   : File uses too much global constant data (0x1f01a bytes, 
0x10000 max)
        nvptx-run: cuLinkAddData failed: a PTX JIT compilation failed 
(CUDA_ERROR_INVALID_PTX, 218)
    
    ... into:
    
        PASS: 20_util/to_chars/103955.cc  -std=gnu++17 (test for excess errors)
        [-FAIL:-]{+PASS:+} 20_util/to_chars/103955.cc  -std=gnu++17 execution 
test
    
    We progress:
    
        ptxas error   : File uses too much global constant data (0x36c65 bytes, 
0x10000 max)
        nvptx-as: ptxas returned 255 exit status
    
    ... into:
    
        [-UNSUPPORTED:-]{+PASS:+} gcc.c-torture/compile/pr46534.c   -O0  
{+(test for excess errors)+}
        [-UNSUPPORTED:-]{+PASS:+} gcc.c-torture/compile/pr46534.c   -O1  
{+(test for excess errors)+}
        [-UNSUPPORTED:-]{+PASS:+} gcc.c-torture/compile/pr46534.c   -O2  
{+(test for excess errors)+}
        [-UNSUPPORTED:-]{+PASS:+} gcc.c-torture/compile/pr46534.c   -O3 -g  
{+(test for excess errors)+}
        [-UNSUPPORTED:-]{+PASS:+} gcc.c-torture/compile/pr46534.c   -Os  
{+(test for excess errors)+}
    
        [-FAIL:-]{+PASS:+} g++.dg/torture/pr31863.C   -O0  (test for excess 
errors)
        [-FAIL:-]{+PASS:+} g++.dg/torture/pr31863.C   -O1  (test for excess 
errors)
        [-FAIL:-]{+PASS:+} g++.dg/torture/pr31863.C   -O2  (test for excess 
errors)
        [-FAIL:-]{+PASS:+} g++.dg/torture/pr31863.C   -O3 -g  (test for excess 
errors)
        [-FAIL:-]{+PASS:+} g++.dg/torture/pr31863.C   -Os  (test for excess 
errors)
    
        [-FAIL:-]{+PASS:+} gfortran.dg/bind-c-contiguous-1.f90   -O0  (test for 
excess errors)
        [-UNRESOLVED:-]{+PASS:+} gfortran.dg/bind-c-contiguous-1.f90   -O0  
[-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} gfortran.dg/bind-c-contiguous-4.f90   -O0  (test for 
excess errors)
        [-UNRESOLVED:-]{+PASS:+} gfortran.dg/bind-c-contiguous-4.f90   -O0  
[-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} gfortran.dg/bind-c-contiguous-5.f90   -O0  (test for 
excess errors)
        [-UNRESOLVED:-]{+PASS:+} gfortran.dg/bind-c-contiguous-5.f90   -O0  
[-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} 20_util/to_chars/double.cc  -std=gnu++17 (test for 
excess errors)
        [-UNRESOLVED:-]{+PASS:+} 20_util/to_chars/double.cc  -std=gnu++17 
[-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} 20_util/to_chars/float.cc  -std=gnu++17 (test for 
excess errors)
        [-UNRESOLVED:-]{+PASS:+} 20_util/to_chars/float.cc  -std=gnu++17 
[-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} special_functions/13_ellint_3/check_value.cc  
-std=gnu++17 (test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} special_functions/13_ellint_3/check_value.cc  
-std=gnu++17 [-compilation failed to produce executable-]{+execution test+}
    
        [-FAIL:-]{+PASS:+} 
tr1/5_numerical_facilities/special_functions/14_ellint_3/check_value.cc  
-std=gnu++17 (test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} 
tr1/5_numerical_facilities/special_functions/14_ellint_3/check_value.cc  
-std=gnu++17 [-compilation failed to produce executable-]{+execution test+}
    
    ..., and progress likewise, but fail later with an unrelated error:
    
        [-FAIL:-]{+PASS:+} ext/special_functions/hyperg/check_value.cc  
-std=gnu++17 (test for excess errors)
        [-UNRESOLVED:-]{+FAIL:+} ext/special_functions/hyperg/check_value.cc  
-std=gnu++17 [-compilation failed to produce executable-]{+execution test+}
    
        
[...]/libstdc++-v3/testsuite/ext/special_functions/hyperg/check_value.cc:12317: 
void test(const testcase_hyperg<Ret> (&)[Num], Ret) [with Ret = double; 
unsigned int Num = 19]: Assertion 'max_abs_frac < toler' failed.
    
    ..., and:
    
        [-FAIL:-]{+PASS:+} 
tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc  
-std=gnu++17 (test for excess errors)
        [-UNRESOLVED:-]{+FAIL:+} 
tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc  
-std=gnu++17 [-compilation failed to produce executable-]{+execution test+}
    
        
[...]/libstdc++-v3/testsuite/tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc:12316:
 void test(const testcase_hyperg<Ret> (&)[Num], Ret) [with Ret = double; 
unsigned int Num = 19]: Assertion 'max_abs_frac < toler' failed.
    
    We progress:
    
        nvptx-run: error getting kernel result: an illegal instruction was 
encountered (CUDA_ERROR_ILLEGAL_INSTRUCTION, 715)
    
    ... into:
    
        PASS: g++.dg/cpp1z/inline-var1.C  -std=gnu++17 (test for excess errors)
        [-FAIL:-]{+PASS:+} g++.dg/cpp1z/inline-var1.C  -std=gnu++17 execution 
test
        PASS: g++.dg/cpp1z/inline-var1.C  -std=gnu++20 (test for excess errors)
        [-FAIL:-]{+PASS:+} g++.dg/cpp1z/inline-var1.C  -std=gnu++20 execution 
test
        PASS: g++.dg/cpp1z/inline-var1.C  -std=gnu++26 (test for excess errors)
        [-FAIL:-]{+PASS:+} g++.dg/cpp1z/inline-var1.C  -std=gnu++26 execution 
test
    
    (A lot of '.const' -> '.global' etc.  Haven't researched what the actual
    problem was.)
    
    We progress:
    
        ptxas /tmp/cc5TSZZp.o, line 142; error   : State space incorrect for 
instruction 'st'
        ptxas /tmp/cc5TSZZp.o, line 174; error   : State space incorrect for 
instruction 'st'
        ptxas fatal   : Ptx assembly aborted due to errors
        nvptx-as: ptxas returned 255 exit status
    
    ... into:
    
        [-FAIL:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O0  
(test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O0 
 [-compilation failed to produce executable-]{+execution test+}
        PASS: g++.dg/torture/builtin-clear-padding-1.C   -O1  (test for excess 
errors)
        PASS: g++.dg/torture/builtin-clear-padding-1.C   -O1  execution test
        [-FAIL:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O2  
(test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O2 
 [-compilation failed to produce executable-]{+execution test+}
        [-FAIL:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O3 -g  
(test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -O3 
-g  [-compilation failed to produce executable-]{+execution test+}
        [-FAIL:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -Os  
(test for excess errors)
        [-UNRESOLVED:-]{+PASS:+} g++.dg/torture/builtin-clear-padding-1.C   -Os 
 [-compilation failed to produce executable-]{+execution test+}
    
    This indeed tried to write ('st.const') into 's2', which was '.const'
    (also: 's1' was '.const') -- even though, no explicit 'const' in
    'g++.dg/torture/builtin-clear-padding-1.C'; "interesting".
    
    We progress:
    
        error   : Memory space doesn't match for 
'_ZNSt3tr18__detail12__prime_listE' in 'input file 3 at offset 53085', first 
specified in 'input file 1 at offset 1924'
        nvptx-run: cuLinkAddData failed: device kernel image is invalid 
(CUDA_ERROR_INVALID_SOURCE, 300)
    
    ... into execution test PASS for a few dozens of libstdc++ test cases.
    
    We progress:
    
        error   : Memory space doesn't match for 
'_ZNSt6locale17_S_twinned_facetsE' in 'input file 11 at offset 479903', first 
specified in 'input file 9 at offset 59300'
        nvptx-run: cuLinkAddData failed: device kernel image is invalid 
(CUDA_ERROR_INVALID_SOURCE, 300)
    
    ... into:
    
        PASS: g++.dg/tree-ssa/pr20458.C  -std=gnu++17 (test for excess errors)
        [-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++17 execution 
test
        PASS: g++.dg/tree-ssa/pr20458.C  -std=gnu++26 (test for excess errors)
        [-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++26 execution 
test
    
    ..., and likewise for a few hundreds of libstdc++ test cases.
    
    We progress:
    
        error   : Memory space doesn't match for 
'_ZNSt6locale5_Impl19_S_facet_categoriesE' in 'input file 11 at offset 821962', 
first specified in 'input file 10 at offset 676317'
        nvptx-run: cuLinkAddData failed: device kernel image is invalid 
(CUDA_ERROR_INVALID_SOURCE, 300)
    
    ... into execution test PASS for a hundred of libstdc++ test cases.
    
    We progress:
    
        error   : Memory space doesn't match for '_ctype_' in 'input file 22 at 
offset 1698331', first specified in 'input file 9 at offset 57095'
        nvptx-run: cuLinkAddData failed: device kernel image is invalid 
(CUDA_ERROR_INVALID_SOURCE, 300)
    
    ... into execution test PASS for another few libstdc++ test cases.
    
            PR target/119573
            gcc/
            * config/nvptx/nvptx.cc (nvptx_encode_section_info): Don't set
            'DATA_AREA_CONST' for 'TREE_CONSTANT', or 'TREE_READONLY'.
            (nvptx_asm_declare_constant_name): Use '.global' instead of
            '.const'.
            gcc/testsuite/
            * gcc.c-torture/compile/pr46534.c: Don't 'dg-skip-if' nvptx.
            * gcc.target/nvptx/decl.c: Adjust.
            libstdc++-v3/
            * config/cpu/nvptx/t-nvptx (AM_MAKEFLAGS): Don't amend.

Diff:
---
 gcc/config/nvptx/nvptx.cc                     | 8 +++-----
 gcc/testsuite/gcc.c-torture/compile/pr46534.c | 1 -
 gcc/testsuite/gcc.target/nvptx/decl.c         | 6 +++---
 libstdc++-v3/config/cpu/nvptx/t-nvptx         | 6 ------
 4 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 022037f13627..de0ce5d91645 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -470,9 +470,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first)
     {
       nvptx_data_area area = DATA_AREA_GENERIC;
 
-      if (TREE_CONSTANT (decl))
-       area = DATA_AREA_CONST;
-      else if (VAR_P (decl))
+      if (VAR_P (decl))
        {
          if (lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
            {
@@ -482,7 +480,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first)
                       " memory is not supported", decl);
            }
          else
-           area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL;
+           area = DATA_AREA_GLOBAL;
        }
 
       SET_SYMBOL_DATA_AREA (XEXP (rtl, 0), area);
@@ -2597,7 +2595,7 @@ nvptx_asm_declare_constant_name (FILE *file, const char 
*name,
   fprintf (file, "\t");
 
   tree type = TREE_TYPE (exp);
-  nvptx_assemble_decl_begin (file, name, ".const", type, obj_size,
+  nvptx_assemble_decl_begin (file, name, ".global", type, obj_size,
                             TYPE_ALIGN (type));
 }
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr46534.c 
b/gcc/testsuite/gcc.c-torture/compile/pr46534.c
index 1894636f0ead..7f10bc097eba 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr46534.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr46534.c
@@ -1,4 +1,3 @@
-/* { dg-skip-if "too big" { nvptx-*-* } } */
 /* PR middle-end/46534 */
 
 extern int printf (const char *, ...);
diff --git a/gcc/testsuite/gcc.target/nvptx/decl.c 
b/gcc/testsuite/gcc.target/nvptx/decl.c
index 190a64d5679a..45dd699f420f 100644
--- a/gcc/testsuite/gcc.target/nvptx/decl.c
+++ b/gcc/testsuite/gcc.target/nvptx/decl.c
@@ -13,8 +13,8 @@ int Foo ()
 }
 
 /* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.visible .global 
\[^,\r\n\]*glob_export" } } */
-/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.visible .const 
\[^,\r\n\]*cst_export" } } */
+/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.visible .global 
\[^,\r\n\]*cst_export" } } */
 /* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.global \[^,\r\n\]*glob_local" 
} } */
-/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.const \[^,\r\n\]*cst_local" } 
} */
+/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.global \[^,\r\n\]*cst_local" 
} } */
 /* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.extern .global 
\[^,\r\n\]*glob_import" } } */
-/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.extern .const 
\[^,\r\n\]*cst_import" } } */
+/* { dg-final { scan-assembler "\[\r\n\]\[\t \]*.extern .global 
\[^,\r\n\]*cst_import" } } */
diff --git a/libstdc++-v3/config/cpu/nvptx/t-nvptx 
b/libstdc++-v3/config/cpu/nvptx/t-nvptx
index a2f0f2dcb609..eacc5468d627 100644
--- a/libstdc++-v3/config/cpu/nvptx/t-nvptx
+++ b/libstdc++-v3/config/cpu/nvptx/t-nvptx
@@ -1,7 +1 @@
 # Per-file flags, see '../../../configure.host', "inject per-file flags".
-
-# 'ptxas'/CUDA Driver rejects objects with a lot of global constant data:
-#     ptxas error   : File uses too much global constant data ([...])
-# Cut short the assembly-time check; defer to actual use of the object file.
-AM_MAKEFLAGS += CXXFLAGS-src/c++17/floating_to_chars.lo=-Wa,--no-verify
-AM_MAKEFLAGS += CXXFLAGS-src/c++20/tzdb.lo=-Wa,--no-verify

Reply via email to