[gomp-nvptx 6/7] nvptx backend: change mul.u32 to mul.lo.u32

Alexander Monakov Sat, 19 Mar 2016 01:01:15 -0700

Recent testing uncovered that PTX JIT may reject attempts to use 'mul.u32' as
a non-widening 32-bit multiply instruction.  Use 'mul.lo.u32' to fix 32-bit
code generation and conform to the PTX spec better.


        * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Emit
        'mul.lo.u32' instead of 'mul.u32' for 32-bit ABI target.
        (nvptx_declare_function_name): Ditto.
---
 gcc/ChangeLog.gomp-nvptx | 6 ++++++
 gcc/config/nvptx/nvptx.c | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 93bf781..bc187ea 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -955,7 +955,7 @@ nvptx_init_unisimt_predicate (FILE *file)
   fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
   fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
   fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
-          bits == 64 ? ".wide" : "");
+          bits == 64 ? ".wide" : ".lo");
   fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
   fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
   fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
@@ -1115,7 +1115,7 @@ nvptx_declare_function_name (FILE *file, const char 
*name, const_tree decl)
       fprintf (file, "\t.reg.u%d %%fstmp2;\n", bits);
       fprintf (file, "\tmov.u32 %%fstmp0, %%tid.y;\n");
       fprintf (file, "\tmul%s.u32 %%fstmp1, %%fstmp0, %d;\n",
-              bits == 64 ? ".wide" : "", bits / 8);
+              bits == 64 ? ".wide" : ".lo", bits / 8);
       fprintf (file, "\tmov.u%d %%fstmp2, __nvptx_stacks;\n", bits);
       /* fstmp2 = &__nvptx_stacks[tid.y];  */
       fprintf (file, "\tadd.u%d %%fstmp2, %%fstmp2, %%fstmp1;\n", bits);

[gomp-nvptx 6/7] nvptx backend: change mul.u32 to mul.lo.u32

Reply via email to