[PATCH] D146873: [2/11][POC][Clang][RISCV] Define RVV tuple types

Yueh-Ting (eop) Chen via Phabricator via cfe-commits Thu, 18 May 2023 10:39:50 -0700

eopXD updated this revision to Diff 523446.
eopXD marked 2 inline comments as done.
eopXD added a comment.


Address comments from Craig.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146873/new/

https://reviews.llvm.org/D146873

Files:
  clang/include/clang/Basic/RISCVVTypes.def
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CodeGenTypes.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
  clang/test/Sema/riscv-types.c

Index: clang/test/Sema/riscv-types.c
===================================================================
--- clang/test/Sema/riscv-types.c
+++ clang/test/Sema/riscv-types.c
@@ -133,6 +133,9 @@
 
   // CHECK: __rvv_int8mf2_t x43;
   __rvv_int8mf2_t x43;
+
+  // CHECK: __rvv_int32m1x2_t x44;
+  __rvv_int32m1x2_t x44;
 }
 
 typedef __rvv_bool4_t vbool4_t;
Index: clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
@@ -0,0 +1,90 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefix=O0
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefix=AFTER_MEM2REG
+
+
+#include <riscv_vector.h>
+
+// Declare local variable
+// O0-LABEL: define dso_local void @foo
+// O0-SAME: () #[[ATTR0:[0-9]+]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    ret void
+//
+// AFTER_MEM2REG-LABEL: define dso_local void @foo
+// AFTER_MEM2REG-SAME: () #[[ATTR0:[0-9]+]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    ret void
+//
+void foo() {
+  __rvv_int32m1x2_t v_tuple;
+}
+
+// Declare local variable and return
+// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// O0-SAME: () #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]]
+//
+// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// AFTER_MEM2REG-SAME: () #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } undef
+//
+__rvv_int32m1x2_t bar() {
+  __rvv_int32m1x2_t v_tuple;
+  return v_tuple;
+}
+
+// Pass as function parameter
+// O0-LABEL: define dso_local void @baz
+// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// O0-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
+// O0-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    ret void
+//
+// AFTER_MEM2REG-LABEL: define dso_local void @baz
+// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// AFTER_MEM2REG-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// AFTER_MEM2REG-NEXT:    ret void
+//
+void baz(__rvv_int32m1x2_t v_tuple) {
+}
+
+// Pass as function parameter and return
+// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// O0-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
+// O0-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    [[TMP2:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]]
+//
+// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// AFTER_MEM2REG-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// AFTER_MEM2REG-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]]
+//
+__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
+  return v_tuple;
+}
Index: clang/lib/CodeGen/CodeGenTypes.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenTypes.cpp
+++ clang/lib/CodeGen/CodeGenTypes.cpp
@@ -630,13 +630,22 @@
 #include "clang/Basic/PPCTypes.def"
 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
 #include "clang/Basic/RISCVVTypes.def"
-    {
-      ASTContext::BuiltinVectorTypeInfo Info =
-          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
-      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
-                                           Info.EC.getKnownMinValue() *
-                                           Info.NumVectors);
-    }
+      {
+        ASTContext::BuiltinVectorTypeInfo Info =
+            Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+        // Tuple types are expressed as aggregregate types of the same scalable
+        // vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {<vscale x
+        // 2 x i32>, <vscale x 2 x i32>}).
+        if (Info.NumVectors != 1) {
+          llvm::Type *EltTy = llvm::ScalableVectorType::get(
+              ConvertType(Info.ElementType), Info.EC.getKnownMinValue());
+          llvm::SmallVector<llvm::Type *, 4> EltTys(Info.NumVectors, EltTy);
+          return llvm::StructType::get(getLLVMContext(), EltTys);
+        }
+        return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
+                                             Info.EC.getKnownMinValue() *
+                                                 Info.NumVectors);
+      }
 #define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS)                  \
   case BuiltinType::Id: {                                                      \
     if (BuiltinType::Id == BuiltinType::WasmExternRef)                         \
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -3132,30 +3132,51 @@
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
       if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
           STy->getNumElements() > 1) {
-        uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
-        llvm::Type *DstTy = Ptr.getElementType();
-        uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+        if (CGM.getDataLayout().getTypeAllocSize(STy).isScalable()) {
+          assert(STy->containsHomogeneousScalableVectorTypes() &&
+                 "ABI only supports structure with homogeneous scalable vector "
+                 "type");
+          assert(
+              CGM.getDataLayout().getTypeAllocSize(STy) ==
+                  CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType()) &&
+              "Only allow non-fractional movement of structure with"
+              "homogeneous scalable vector type");
+          assert(STy->getNumElements() == NumIRArgs);
+
+          llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto *AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            LoadedStructValue =
+                Builder.CreateInsertValue(LoadedStructValue, AI, i);
+          }
 
-        Address AddrToStoreInto = Address::invalid();
-        if (SrcSize <= DstSize) {
-          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          Builder.CreateStore(LoadedStructValue, Ptr);
         } else {
-          AddrToStoreInto =
-            CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
-        }
+          uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
+          llvm::Type *DstTy = Ptr.getElementType();
+          uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+
+          Address AddrToStoreInto = Address::invalid();
+          if (SrcSize <= DstSize) {
+            AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          } else {
+            AddrToStoreInto =
+                CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
+          }
 
-        assert(STy->getNumElements() == NumIRArgs);
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          auto AI = Fn->getArg(FirstIRArg + i);
-          AI->setName(Arg->getName() + ".coerce" + Twine(i));
-          Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
-          Builder.CreateStore(AI, EltPtr);
-        }
+          assert(STy->getNumElements() == NumIRArgs);
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
+            Builder.CreateStore(AI, EltPtr);
+          }
 
-        if (SrcSize > DstSize) {
-          Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          if (SrcSize > DstSize) {
+            Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          }
         }
-
       } else {
         // Simple case, just do a coerced store of the argument into the alloca.
         assert(NumIRArgs == 1);
Index: clang/include/clang/Basic/RISCVVTypes.def
===================================================================
--- clang/include/clang/Basic/RISCVVTypes.def
+++ clang/include/clang/Basic/RISCVVTypes.def
@@ -144,6 +144,10 @@
 RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2)
 RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1)
 
+//===- Tuple vector types -------------------------------------------------===//
+
+RVV_VECTOR_TYPE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2,  32, 2, true)
+
 #undef RVV_VECTOR_TYPE_FLOAT
 #undef RVV_VECTOR_TYPE_INT
 #undef RVV_VECTOR_TYPE

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D146873: [2/11][POC][Clang][RISCV] Define RVV tuple types

Reply via email to