================
@@ -1506,3 +1506,69 @@ define void @local_volatile_4xdouble(ptr addrspace(5) 
%a, ptr addrspace(5) %b) {
   store volatile <4 x double> %a.load, ptr addrspace(5) %b
   ret void
 }
+
+define void @test_i256_global(ptr addrspace(1) %a, ptr addrspace(1) %b) {
+; SM90-LABEL: test_i256_global(
+; SM90:       {
+; SM90-NEXT:    .reg .b64 %rd<7>;
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM90-NEXT:    ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; SM90-NEXT:    ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; SM90-NEXT:    ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM90-NEXT:    st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; SM90-NEXT:    st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; SM90-NEXT:    ret;
+;
+; SM100-LABEL: test_i256_global(
+; SM100:       {
+; SM100-NEXT:    .reg .b64 %rd<7>;
+; SM100-EMPTY:
+; SM100-NEXT:  // %bb.0:
+; SM100-NEXT:    ld.param.b64 %rd1, [test_i256_global_param_0];
+; SM100-NEXT:    ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
+; SM100-NEXT:    ld.param.b64 %rd6, [test_i256_global_param_1];
+; SM100-NEXT:    st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5};
+; SM100-NEXT:    ret;
+  %a.load = load i256, ptr addrspace(1) %a, align 32
+  store i256 %a.load, ptr addrspace(1) %b, align 32
+  ret void
+}
+
+
+define void @test_i256_global_unaligned(ptr addrspace(1) %a, ptr addrspace(1) 
%b) {
+; CHECK-LABEL: test_i256_global_unaligned(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_i256_global_unaligned_param_0];
+; CHECK-NEXT:    ld.global.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT:    ld.param.b64 %rd6, [test_i256_global_unaligned_param_1];
+; CHECK-NEXT:    st.global.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT:    st.global.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT:    ret;
+  %a.load = load i256, ptr addrspace(1) %a, align 16
+  store i256 %a.load, ptr addrspace(1) %b, align 16
+  ret void
+}
+
+define void @test_i256_generic(ptr %a, ptr %b) {
+; CHECK-LABEL: test_i256_generic(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [test_i256_generic_param_0];
+; CHECK-NEXT:    ld.v2.b64 {%rd2, %rd3}, [%rd1];
+; CHECK-NEXT:    ld.v2.b64 {%rd4, %rd5}, [%rd1+16];
+; CHECK-NEXT:    ld.param.b64 %rd6, [test_i256_generic_param_1];
+; CHECK-NEXT:    st.v2.b64 [%rd6+16], {%rd4, %rd5};
+; CHECK-NEXT:    st.v2.b64 [%rd6], {%rd2, %rd3};
+; CHECK-NEXT:    ret;
+  %a.load = load i256, ptr %a, align 32
+  store i256 %a.load, ptr %b, align 32
+  ret void
+}
----------------
AlexMaclean wrote:

I've added tests for both `atomic` and `volatile`. We don't support any 
`atomic` loads/stores of size greater than 64-bits. 

https://github.com/llvm/llvm-project/pull/155198
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to