llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-nvptx Author: Youngsuk Kim (JOE1994) <details> <summary>Changes</summary> Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code. --- Patch is 474.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113017.diff 66 Files Affected: - (modified) clang/test/CodeGenCUDA/bf16.cu (+4-4) - (modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp (+8) - (modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h (+2) - (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+59-55) - (modified) llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/activemask.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/addr-mode.ll (+5-5) - (modified) llvm/test/CodeGen/NVPTX/aggregate-return.ll (+4-4) - (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+95-95) - (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+30-30) - (modified) llvm/test/CodeGen/NVPTX/bswap.ll (+4-4) - (modified) llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/chain-different-as.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/cmpxchg.ll (+8-8) - (modified) llvm/test/CodeGen/NVPTX/combine-mad.ll (+8-8) - (modified) llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll (+4-4) - (modified) llvm/test/CodeGen/NVPTX/convert-int-sm20.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/copysign.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/dot-product.ll (+13-13) - (modified) llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/elect.ll (+3-3) - (modified) llvm/test/CodeGen/NVPTX/extractelement.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+79-79) - (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+79-79) - (modified) llvm/test/CodeGen/NVPTX/i128-param.ll (+4-4) - (modified) llvm/test/CodeGen/NVPTX/i128-retval.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/i128-struct.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/i128.ll (+9-9) - (modified) llvm/test/CodeGen/NVPTX/i16x2-instructions.ll (+50-50) - (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+59-59) - (modified) llvm/test/CodeGen/NVPTX/indirect_byval.ll (+8-8) - (modified) llvm/test/CodeGen/NVPTX/jump-table.ll (+7-7) - (modified) llvm/test/CodeGen/NVPTX/ldparam-v4.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/local-stack-frame.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/lower-alloca.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+14-14) - (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (+3-3) - (modified) llvm/test/CodeGen/NVPTX/math-intrins.ll (+87-87) - (modified) llvm/test/CodeGen/NVPTX/mulhi-intrins.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll (+15-15) - (modified) llvm/test/CodeGen/NVPTX/param-load-store.ll (+172-172) - (modified) llvm/test/CodeGen/NVPTX/param-overalign.ll (+8-8) - (modified) llvm/test/CodeGen/NVPTX/param-vectorize-device.ll (+38-38) - (modified) llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll (+31-31) - (modified) llvm/test/CodeGen/NVPTX/rcp-opt.ll (+3-3) - (modified) llvm/test/CodeGen/NVPTX/rotate.ll (+24-24) - (modified) llvm/test/CodeGen/NVPTX/rotate_64.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/sad-intrins.ll (+6-6) - (modified) llvm/test/CodeGen/NVPTX/sext-setcc.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/st-param-imm.ll (+83-83) - (modified) llvm/test/CodeGen/NVPTX/store-undef.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/tex-read-cuda.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/tid-range.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll (+21-21) - (modified) llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll (+29-29) - (modified) llvm/test/CodeGen/NVPTX/vaargs.ll (+11-11) - (modified) llvm/test/CodeGen/NVPTX/variadics-backend.ll (+24-24) - (modified) llvm/test/CodeGen/NVPTX/vec-param-load.ll (+7-7) - (modified) llvm/test/CodeGen/NVPTX/vector-args.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/vector-call.ll (+2-2) - (modified) llvm/test/CodeGen/NVPTX/vector-returns.ll (+38-38) - (modified) llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll (+1-1) - (modified) llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll (+3-3) - (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected (+4-4) ``````````diff diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu index 3c443420dbd36a..f794b83239f14a 100644 --- a/clang/test/CodeGenCUDA/bf16.cu +++ b/clang/test/CodeGenCUDA/bf16.cu @@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) { __device__ __bf16 test_ret( __bf16 in) { // CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0]; return in; -// CHECK: st.param.b16 [func_retval0+0], %[[R]] +// CHECK: st.param.b16 [func_retval0], %[[R]] // CHECK: ret; } @@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in); // CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2] __device__ __bf16 test_call( __bf16 in) { // CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0]; -// CHECK: st.param.b16 [param0+0], %[[R]]; +// CHECK: st.param.b16 [param0], %[[R]]; // CHECK: .param .align 2 .b8 retval0[2]; // CHECK: call.uni (retval0), // CHECK-NEXT: _Z13external_funcDF16b, // CHECK-NEXT: ( // CHECK-NEXT: param0 // CHECK-NEXT ); -// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0]; +// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0]; return external_func(in); -// CHECK: st.param.b16 [func_retval0+0], %[[RET]] +// CHECK: st.param.b16 [func_retval0], %[[RET]] // CHECK: ret; } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 7d6442a611125f..3bda3b72674276 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -363,6 +363,14 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum, } } +void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + if (auto &Op = MI->getOperand(OpNum); Op.isImm() && Op.getImm() == 0) + return; // don't print '+0' + O << "+"; + printOperand(MI, OpNum, O); +} + void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier) { const MCOperand &Op = MI->getOperand(OpNum); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index e6954f861cd10e..e8a4a6dbdd5324 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter { const char *Modifier = nullptr); void printMemOperand(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); + void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 8b34ce4f1001c1..b5478b8f09ceb4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> { let PrintMethod = "printMmaCode"; } +def Offseti32imm : Operand<i32> { + let PrintMethod = "printOffseti32imm"; +} + def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; @@ -2482,21 +2486,21 @@ def ProxyReg : let mayLoad = true in { class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), + NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b), + !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"), []>; class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> : - NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), + NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b), !strconcat("ld.param.v2", opstr, - " \t{{$dst, $dst2}}, [retval0+$b];"), []>; + " \t{{$dst, $dst2}}, [retval0$b];"), []>; class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> : NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins i32imm:$b), + (ins Offseti32imm:$b), !strconcat("ld.param.v4", opstr, - " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), + " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"), []>; } @@ -2512,8 +2516,8 @@ let mayStore = true in { if !or(support_imm, !isa<NVPTXRegClass>(op)) then def _ # !if(!isa<NVPTXRegClass>(op), "r", "i") : NVPTXInst<(outs), - (ins op:$val, i32imm:$a, i32imm:$b), - "st.param" # opstr # " \t[param$a+$b], $val;", + (ins op:$val, i32imm:$a, Offseti32imm:$b), + "st.param" # opstr # " \t[param$a$b], $val;", []>; } @@ -2524,8 +2528,8 @@ let mayStore = true in { # !if(!isa<NVPTXRegClass>(op2), "r", "i") : NVPTXInst<(outs), (ins op1:$val1, op2:$val2, - i32imm:$a, i32imm:$b), - "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};", + i32imm:$a, Offseti32imm:$b), + "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};", []>; } @@ -2541,29 +2545,29 @@ let mayStore = true in { : NVPTXInst<(outs), (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4, - i32imm:$a, i32imm:$b), + i32imm:$a, Offseti32imm:$b), "st.param.v4" # opstr # - " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};", + " \t[param$a$b], {{$val1, $val2, $val3, $val4}};", []>; } class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), - !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"), + NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a), + !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"), []>; class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a), !strconcat("st.param.v2", opstr, - " \t[func_retval0+$a], {{$val, $val2}};"), + " \t[func_retval0$a], {{$val, $val2}};"), []>; class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a), + regclass:$val4, Offseti32imm:$a), !strconcat("st.param.v4", opstr, - " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"), []>; } @@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> { def _ari : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; def _ari_64 : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; def _asi : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { @@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> { (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; def _ari_64 : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; def _asi : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; } let mayStore=1, hasSideEffects=0 in { @@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> { def _v2_ari : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v2_ari_64 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v2_asi : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v4_avar : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, @@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> { def _v4_ari : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; def _v4_ari_64 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; def _v4_asi : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC<Int16Regs>; @@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> { (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int32Regs:$addr, i32imm:$offset), + Int32Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v2_ari_64 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int64Regs:$addr, i32imm:$offset), + Int64Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v2_asi : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - imem:$addr, i32imm:$offset), + imem:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v4_avar : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, @@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> { (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; def _v4_ari_64 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; def _v4_asi : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; } let mayStore=1, hasSideEffects=0 in { @@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta : Requires<[hasPTX<60>, hasSM<70>]>; def atomic_thread_fence_acq_rel_cta : NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; \ No newline at end of file + Requires<[hasPTX<60>, hasSM<70>]>; diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll index bc58a700cb9828..028fab7ae54d6a 100644 --- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll +++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll @@ -19,7 +19,7 @@ define i32 @f(ptr %p) { ; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0]; ; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1]; ; ENABLED-NEXT: add.s32 %r3, %r1, %r2; -; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3; +; ENABLED-NEXT: st.param.b32 [func_retval0], %r3; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: f( @@ -32,7 +32,7 @@ define i32 @f(ptr %p) { ; DISABLED-NEXT: ld.u32 %r1, [%rd1]; ; DISABLED-NEXT: ld.u32 %r2, [%rd1+4]; ; DISABLED-NEXT: add.s32 %r3, %r1, %r2; -; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3; +; DISABLED-NEXT: st.param.b32 [func_retval0], %r3; ; DISABLED-NEXT: ret; %p.1 = getelementptr i32, ptr %p, i32 1 %v0 = load i32, ptr %p, align 8 @@ -68,7 +68,7 @@ define half @fh(ptr %p) { ; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5; ; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11; ; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12; -; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9; +; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: fh( @@ -100,7 +100,7 @@ define half @fh(ptr %p) { ; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5; ; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11; ; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12; -; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9; +; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9; ; DISABLED-NEXT: ret; %p.1 = getelementptr half, ptr %p, i32 1 %p.2 = getelementptr half, ptr %p, i32 2 @@ -132,7 +132,7 @@ define float @ff(ptr %p) { ; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4; ; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7; ; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5; -; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9; +; ENABLED-NEXT: st.param.f32 [func_retval0], %f9; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: ff( @@ -151,7 +151,7 @@ define float @ff(ptr %p) { ; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4; ; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7; ; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5; -; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9; +; DISABLED-NEXT: st.param.f32 [func_retval0], %f9; ; DISABLED-NEXT: ret; %p.1 = getelementptr float, ptr %p, i32 1 %p.2 = getelementptr float, ptr %p, i32 2 diff --git a/llvm/test/CodeGen/NVPTX/activemask.ll b/llvm/test/CodeGen/NVPTX/activemask.ll index 1496b2ebdd4427..e1d169d17c60e9 100644 --- a/llvm/test/CodeGen/NVPTX/activemask.ll +++ b/llvm/test/CodeGen/NVPTX/activemask.ll @@ -6,7 +6,7 @@ declare i32 @llvm.nvvm.activemask() ; CHECK-LABEL: activemask( ; ; CHECK: ac... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/113017 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits