4vtomat created this revision.
Herald added subscribers: jobnoorman, luke, VincentWu, ormris, vkmr, 
frasercrmck, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, 
psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, steven_wu, 
edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, 
simoncook, johnrusso, rbar, asb, hiraditya, arichardson.
Herald added a reviewer: aaron.ballman.
Herald added a project: All.
4vtomat requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wangpc, eopXD, MaskRay.
Herald added projects: clang, LLVM.

This is the vector calling convention based on
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389,
the idea is to split between "scalar" callee-saved registers
and "vector" callee-saved registers. "scalar" ones remain the
original strategy, however, "vector" ones are handled together
with RVV objects.

The stack layout would be:

  |--------------------------| <-- FP
  | callee-allocated save    |
  | area for register varargs|
  |--------------------------|
  | callee-saved registers   | <-- scalar callee-saved
  |        (scalar)          |
  |--------------------------|
  | RVV alignment padding    |
  |--------------------------|
  | callee-saved registers   | <-- vector callee-saved
  |        (vector)          |
  |--------------------------|
  | RVV objects              |
  |--------------------------|
  | padding before RVV       |
  |--------------------------|
  | scalar local variables   |
  |--------------------------| <-- BP
  | variable size objects    |
  |--------------------------| <-- SP

Note: This patch doesn't contain "tuple" type, e.g. vint32m1x2.

  It will be handled in future patch once the PR389 is ready.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154576

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/include/clang/Basic/Specifiers.h
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypePrinter.cpp
  clang/lib/Basic/Targets/RISCV.cpp
  clang/lib/Basic/Targets/RISCV.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c
  llvm/include/llvm/AsmParser/LLToken.h
  llvm/include/llvm/IR/CallingConv.h
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/IR/AsmWriter.cpp
  llvm/lib/Target/RISCV/RISCVCallingConv.td
  llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
  llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll

Index: llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -0,0 +1,222 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @test_vector_std(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_std:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    addi a0, sp, 16
+; SPILL-O2-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i32> %va
+}
+
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 4
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 14
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 13
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 12
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v3, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 11
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 10
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v5, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v6, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v7, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 6
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 2
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v27, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 1
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v28, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v29, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v30, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    addi a0, sp, 16
+; SPILL-O2-NEXT:    vs1r.v v31, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 4
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 4
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 14
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v1, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 13
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v2, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 12
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v3, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 11
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 10
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v5, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v24, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 6
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v25, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 2
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v26, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v27, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 1
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v28, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v29, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v30, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    addi a0, sp, 16
+; SPILL-O2-NEXT:    vl1r.v v31, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 4
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i32> %va
+}
Index: llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -66,17 +66,26 @@
     return CSR_Interrupt_SaveList;
   }
 
+  bool HasVectorCSR = MF->getFunction().getCallingConv() ==
+                      CallingConv::RISCV_VectorCall;
+
   switch (Subtarget.getTargetABI()) {
   default:
     llvm_unreachable("Unrecognized ABI");
   case RISCVABI::ABI_ILP32:
   case RISCVABI::ABI_LP64:
+    if (HasVectorCSR)
+      return CSR_ILP32_LP64_V_SaveList;
     return CSR_ILP32_LP64_SaveList;
   case RISCVABI::ABI_ILP32F:
   case RISCVABI::ABI_LP64F:
+    if (HasVectorCSR)
+      return CSR_ILP32F_LP64F_V_SaveList;
     return CSR_ILP32F_LP64F_SaveList;
   case RISCVABI::ABI_ILP32D:
   case RISCVABI::ABI_LP64D:
+    if (HasVectorCSR)
+      return CSR_ILP32D_LP64D_V_SaveList;
     return CSR_ILP32D_LP64D_SaveList;
   }
 }
@@ -636,12 +645,18 @@
     llvm_unreachable("Unrecognized ABI");
   case RISCVABI::ABI_ILP32:
   case RISCVABI::ABI_LP64:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32_LP64_V_RegMask;
     return CSR_ILP32_LP64_RegMask;
   case RISCVABI::ABI_ILP32F:
   case RISCVABI::ABI_LP64F:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32F_LP64F_V_RegMask;
     return CSR_ILP32F_LP64F_RegMask;
   case RISCVABI::ABI_ILP32D:
   case RISCVABI::ABI_LP64D:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32D_LP64D_V_RegMask;
     return CSR_ILP32D_LP64D_RegMask;
   }
 }
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14890,6 +14890,7 @@
     report_fatal_error("Unsupported calling convention");
   case CallingConv::C:
   case CallingConv::Fast:
+  case CallingConv::RISCV_VectorCall:
     break;
   case CallingConv::GHC:
     if (!Subtarget.hasStdExtF() || !Subtarget.hasStdExtD())
Index: llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -302,7 +302,8 @@
 
   for (auto &CS : CSI) {
     int FI = CS.getFrameIdx();
-    if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default)
+    if (FI >= 0 && (MFI.getStackID(FI) == TargetStackID::Default ||
+                    MFI.getStackID(FI) == TargetStackID::ScalableVector))
       NonLibcallCSI.push_back(CS);
   }
 
@@ -479,7 +480,13 @@
       .addCFIIndex(CFIIndex)
       .setMIFlag(MachineInstr::FrameSetup);
 
-  const auto &CSI = MFI.getCalleeSavedInfo();
+  const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo());
+  size_t VectorCSISize = 0;
+  for (auto &CS : CSI) {
+    int FI = CS.getFrameIdx();
+    if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
+      VectorCSISize++;
+  }
 
   // The frame pointer is callee-saved, and code has been generated for us to
   // save it to the stack. We need to skip over the storing of callee-saved
@@ -487,7 +494,7 @@
   // to the stack, not before.
   // FIXME: assumes exactly one instruction is used to save each callee-saved
   // register.
-  std::advance(MBBI, getNonLibcallCSI(MF, CSI).size());
+  std::advance(MBBI, CSI.size() - VectorCSISize);
 
   // Iterate over list of callee-saved registers and emit .cfi_offset
   // directives.
@@ -640,8 +647,12 @@
   // FIXME: assumes exactly one instruction is used to restore each
   // callee-saved register.
   auto LastFrameDestroy = MBBI;
-  if (!CSI.empty())
-    LastFrameDestroy = std::prev(MBBI, CSI.size());
+  for (const auto &CS : CSI) {
+    int FI = CS.getFrameIdx();
+    if (FI < 0 || MFI.getStackID(FI) != TargetStackID::Default)
+      break;
+    LastFrameDestroy = std::prev(LastFrameDestroy);
+  }
 
   uint64_t StackSize = getStackSizeWithRVVPadding(MF);
   uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
@@ -727,7 +738,8 @@
     MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
   }
 
-  if (FI >= MinCSFI && FI <= MaxCSFI) {
+  if (FI >= MinCSFI && FI <= MaxCSFI &&
+      StackID != TargetStackID::ScalableVector) {
     FrameReg = RISCV::X2;
 
     if (FirstSPAdjustAmount)
@@ -1261,20 +1273,53 @@
     DL = MI->getDebugLoc();
 
   // Manually restore values not restored by libcall.
-  // Keep the same order as in the prologue. There is no need to reverse the
-  // order in the epilogue. In addition, the return address will be restored
-  // first in the epilogue. It increases the opportunity to avoid the
+  //
+  // We first change the restore order for scalar and vector
+  // callee-saved registers as the layout shown below:
+  //
+  // Epilog restore order (original):
+  //     ----------------------------
+  //      RVV objects
+  //     ----------------------------
+  //      Callee-saved regs(scalar)
+  //      Callee-saved regs(vector)
+  //     ----------------------------
+  //
+  // Epilog restore order (after):
+  //     ----------------------------
+  //      RVV objects
+  //     ----------------------------
+  //      Callee-saved regs(vector)
+  //      Callee-saved regs(scalar)
+  //     ----------------------------
+  //
+  // So that it is able to put all vector registers which need
+  // to be restored together. The return address will be restored
+  // first in the scalar regs. It increases the opportunity to avoid the
   // load-to-use data hazard between loading RA and return by RA.
   // loadRegFromStackSlot can insert multiple instructions.
   const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : NonLibcallCSI) {
-    Register Reg = CS.getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
-                             Register());
-    assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  auto *It = NonLibcallCSI.begin();
+  while (It != NonLibcallCSI.end()) {
+    int FI = It->getFrameIdx();
+    if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
+      break;
+    It++;
   }
 
+  auto loadRegFromStackSlot = [&](decltype(It) CSBeg, decltype(It) CSEnd) {
+    for (auto CS = CSBeg; CS != CSEnd; CS++) {
+      Register Reg = CS->getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CS->getFrameIdx(), RC, TRI,
+                               Register());
+      assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+    }
+  };
+  loadRegFromStackSlot(It, NonLibcallCSI.end());
+  loadRegFromStackSlot(NonLibcallCSI.begin(), It);
+
   const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
   if (RestoreLibCall) {
     // Add restore libcall via tail call.
Index: llvm/lib/Target/RISCV/RISCVCallingConv.td
===================================================================
--- llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -24,6 +24,17 @@
     : CalleeSavedRegs<(add CSR_ILP32_LP64,
                        F8_D, F9_D, (sequence "F%u_D", 18, 27))>;
 
+defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31));
+
+def CSR_ILP32_LP64_V
+    : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>;
+
+def CSR_ILP32F_LP64F_V
+    : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>;
+
+def CSR_ILP32D_LP64D_V
+    : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>;
+
 // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
Index: llvm/lib/IR/AsmWriter.cpp
===================================================================
--- llvm/lib/IR/AsmWriter.cpp
+++ llvm/lib/IR/AsmWriter.cpp
@@ -350,6 +350,7 @@
     break;
   case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
   case CallingConv::AMDGPU_Gfx:    Out << "amdgpu_gfx"; break;
+  case CallingConv::RISCV_VectorCall: Out << "riscv_vector_cc"; break;
   }
 }
 
Index: llvm/lib/AsmParser/LLParser.cpp
===================================================================
--- llvm/lib/AsmParser/LLParser.cpp
+++ llvm/lib/AsmParser/LLParser.cpp
@@ -2031,6 +2031,7 @@
 ///   ::= 'amdgpu_cs_chain_preserve'
 ///   ::= 'amdgpu_kernel'
 ///   ::= 'tailcc'
+///   ::= 'riscv_vector_cc'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@@ -2099,6 +2100,7 @@
     break;
   case lltok::kw_amdgpu_kernel:  CC = CallingConv::AMDGPU_KERNEL; break;
   case lltok::kw_tailcc:         CC = CallingConv::Tail; break;
+  case lltok::kw_riscv_vector_cc:CC = CallingConv::RISCV_VectorCall; break;
   case lltok::kw_cc: {
       Lex.Lex();
       return parseUInt32(CC);
Index: llvm/lib/AsmParser/LLLexer.cpp
===================================================================
--- llvm/lib/AsmParser/LLLexer.cpp
+++ llvm/lib/AsmParser/LLLexer.cpp
@@ -632,6 +632,7 @@
   KEYWORD(amdgpu_kernel);
   KEYWORD(amdgpu_gfx);
   KEYWORD(tailcc);
+  KEYWORD(riscv_vector_cc);
 
   KEYWORD(cc);
   KEYWORD(c);
Index: llvm/include/llvm/IR/CallingConv.h
===================================================================
--- llvm/include/llvm/IR/CallingConv.h
+++ llvm/include/llvm/IR/CallingConv.h
@@ -245,6 +245,9 @@
     /// placement. Preserves active lane values for input VGPRs.
     AMDGPU_CS_ChainPreserve = 105,
 
+    /// Calling convention used for RISC-V V-extension.
+    RISCV_VectorCall = 106,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
Index: llvm/include/llvm/AsmParser/LLToken.h
===================================================================
--- llvm/include/llvm/AsmParser/LLToken.h
+++ llvm/include/llvm/AsmParser/LLToken.h
@@ -175,6 +175,7 @@
   kw_amdgpu_kernel,
   kw_amdgpu_gfx,
   kw_tailcc,
+  kw_riscv_vector_cc,
 
   // Attributes:
   kw_attributes,
Index: clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/RISCV/riscv-vector-cc-attr.c
@@ -0,0 +1,30 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+#include <riscv_vector.h>
+
+vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input);
+
+// CHECK: call riscv_vector_cc <vscale x 2 x i32> @bar
+vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+  vint32m1_t ret;
+  vint32m1_t val;
+  val = vle32_v_i32m1(base, vl);
+  ret = bar(input);
+  vse32_v_i32m1(base, val, vl);
+  return ret;
+}
+
+vint32m1_t baz(vint32m1_t input);
+
+// CHECK: call <vscale x 2 x i32> @baz
+vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+  vint32m1_t ret;
+  vint32m1_t val;
+  val = vle32_v_i32m1(base, vl);
+  ret = baz(input);
+  vse32_v_i32m1(base, val, vl);
+  return ret;
+}
Index: clang/lib/Sema/SemaType.cpp
===================================================================
--- clang/lib/Sema/SemaType.cpp
+++ clang/lib/Sema/SemaType.cpp
@@ -135,7 +135,8 @@
   case ParsedAttr::AT_Pcs:                                                     \
   case ParsedAttr::AT_IntelOclBicc:                                            \
   case ParsedAttr::AT_PreserveMost:                                            \
-  case ParsedAttr::AT_PreserveAll
+  case ParsedAttr::AT_PreserveAll:                                             \
+  case ParsedAttr::AT_RISCVVectorCC
 
 // Function type attributes.
 #define FUNCTION_TYPE_ATTRS_CASELIST                                           \
@@ -7768,6 +7769,8 @@
     return createSimpleAttr<PreserveMostAttr>(Ctx, Attr);
   case ParsedAttr::AT_PreserveAll:
     return createSimpleAttr<PreserveAllAttr>(Ctx, Attr);
+  case ParsedAttr::AT_RISCVVectorCC:
+    return createSimpleAttr<RISCVVectorCCAttr>(Ctx, Attr);
   }
   llvm_unreachable("unexpected attribute kind!");
 }
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5194,6 +5194,9 @@
   case ParsedAttr::AT_PreserveAll:
     D->addAttr(::new (S.Context) PreserveAllAttr(S.Context, AL));
     return;
+  case ParsedAttr::AT_RISCVVectorCC:
+    D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL));
+    return;
   default:
     llvm_unreachable("unexpected attribute kind");
   }
@@ -5393,6 +5396,9 @@
   case ParsedAttr::AT_PreserveAll:
     CC = CC_PreserveAll;
     break;
+  case ParsedAttr::AT_RISCVVectorCC:
+    CC = CC_RISCVVectorCall;
+    break;
   default: llvm_unreachable("unexpected attribute kind");
   }
 
@@ -9229,6 +9235,7 @@
   case ParsedAttr::AT_AArch64VectorPcs:
   case ParsedAttr::AT_AArch64SVEPcs:
   case ParsedAttr::AT_AMDGPUKernelCall:
+  case ParsedAttr::AT_RISCVVectorCC:
     handleCallConvAttr(S, D, AL);
     break;
   case ParsedAttr::AT_Suppress:
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -71,6 +71,7 @@
   case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
   case CC_Swift: return llvm::CallingConv::Swift;
   case CC_SwiftAsync: return llvm::CallingConv::SwiftTail;
+  case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall;
   }
 }
 
@@ -252,6 +253,9 @@
   if (D->hasAttr<PreserveAllAttr>())
     return CC_PreserveAll;
 
+  if (D->hasAttr<RISCVVectorCCAttr>())
+    return CC_RISCVVectorCall;
+
   return CC_C;
 }
 
Index: clang/lib/Basic/Targets/RISCV.h
===================================================================
--- clang/lib/Basic/Targets/RISCV.h
+++ clang/lib/Basic/Targets/RISCV.h
@@ -101,6 +101,8 @@
 
   bool hasBitIntType() const override { return true; }
 
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;
+
   bool useFP16ConversionIntrinsics() const override {
     return false;
   }
Index: clang/lib/Basic/Targets/RISCV.cpp
===================================================================
--- clang/lib/Basic/Targets/RISCV.cpp
+++ clang/lib/Basic/Targets/RISCV.cpp
@@ -346,3 +346,14 @@
   bool Is64Bit = getTriple().isArch64Bit();
   llvm::RISCV::fillValidTuneCPUArchList(Values, Is64Bit);
 }
+
+TargetInfo::CallingConvCheckResult
+RISCVTargetInfo::checkCallingConvention(CallingConv CC) const {
+  switch (CC) {
+    default:
+      return CCCR_Warning;
+    case CC_C:
+    case CC_RISCVVectorCall:
+      return CCCR_OK;
+  }
+}
Index: clang/lib/AST/TypePrinter.cpp
===================================================================
--- clang/lib/AST/TypePrinter.cpp
+++ clang/lib/AST/TypePrinter.cpp
@@ -1035,6 +1035,9 @@
     case CC_PreserveAll:
       OS << " __attribute__((preserve_all))";
       break;
+    case CC_RISCVVectorCall:
+      OS << "__attribute__((riscv_vector_cc))";
+      break;
     }
   }
 
@@ -1855,6 +1858,9 @@
   case attr::PreserveAll:
     OS << "preserve_all";
     break;
+  case attr::RISCVVectorCC:
+    OS << "riscv_vector_cc";
+    break;
   case attr::NoDeref:
     OS << "noderef";
     break;
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -3350,6 +3350,7 @@
   case CC_SwiftAsync: return "swiftasynccall";
   case CC_PreserveMost: return "preserve_most";
   case CC_PreserveAll: return "preserve_all";
+  case CC_RISCVVectorCall: return "riscv_vector_cc";
   }
 
   llvm_unreachable("Invalid calling convention.");
@@ -3818,6 +3819,7 @@
   case attr::IntelOclBicc:
   case attr::PreserveMost:
   case attr::PreserveAll:
+  case attr::RISCVVectorCC:
     return true;
   }
   llvm_unreachable("invalid attr kind");
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -3264,6 +3264,7 @@
   case CC_OpenCLKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_RISCVVectorCall:
     // FIXME: we should be mangling all of the above.
     return "";
 
Index: clang/include/clang/Basic/Specifiers.h
===================================================================
--- clang/include/clang/Basic/Specifiers.h
+++ clang/include/clang/Basic/Specifiers.h
@@ -288,6 +288,7 @@
     CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
     CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
     CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
+    CC_RISCVVectorCall, // __attribute__((riscv_vector_cc))
   };
 
   /// Checks whether the given calling convention supports variadic
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -5193,6 +5193,16 @@
   }];
 }
 
+def RISCVVectorCCDocs : Documentation {
+ let Category = DocCatCallingConvs;
+ let Content = [{
+The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15
+registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need
+to save these registers before function calls, and callees only need to save
+them only if they use them.
+ }];
+}
+
 def PreferredNameDocs : Documentation {
   let Category = DocCatDecl;
   let Content = [{
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -2748,6 +2748,11 @@
   let Documentation = [PreserveAllDocs];
 }
 
+def RISCVVectorCC: DeclOrTypeAttr {
+ let Spellings = [Clang<"riscv_vector_cc">];
+ let Documentation = [RISCVVectorCCDocs];
+}
+
 def Target : InheritableAttr {
   let Spellings = [GCC<"target">];
   let Args = [StringArgument<"featuresStr">];
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to