llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) <details> <summary>Changes</summary> --- Patch is 81.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172615.diff 6 Files Affected: - (modified) llvm/lib/Target/RISCV/CMakeLists.txt (+1) - (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+15-1050) - (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+8) - (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.h (+3) - (added) llvm/lib/Target/RISCV/RISCVVSETVLIInfoAnalysis.cpp (+516) - (added) llvm/lib/Target/RISCV/RISCVVSETVLIInfoAnalysis.h (+605) ``````````diff diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 768c6c4ae3171..3b529c1471d54 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -72,6 +72,7 @@ add_llvm_target(RISCVCodeGen RISCVVectorPeephole.cpp RISCVVLOptimizer.cpp RISCVVMV0Elimination.cpp + RISCVVSETVLIInfoAnalysis.cpp RISCVZacasABIFix.cpp RISCVZilsdOptimizer.cpp GISel/RISCVCallLowering.cpp diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 1a043936383f8..e5c3dc66cb18b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -26,6 +26,7 @@ #include "RISCV.h" #include "RISCVSubtarget.h" +#include "RISCVVSETVLIInfoAnalysis.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveDebugVariables.h" @@ -34,6 +35,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include <queue> using namespace llvm; +using namespace RISCV; #define DEBUG_TYPE "riscv-insert-vsetvli" #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" @@ -66,877 +68,13 @@ static unsigned getVLOpNum(const MachineInstr &MI) { return RISCVII::getVLOpNum(MI.getDesc()); } -static unsigned getSEWOpNum(const MachineInstr &MI) { - return RISCVII::getSEWOpNum(MI.getDesc()); -} - -static unsigned getVecPolicyOpNum(const MachineInstr &MI) { - return RISCVII::getVecPolicyOpNum(MI.getDesc()); -} - -/// Get the EEW for a load or store instruction. Return std::nullopt if MI is -/// not a load or store which ignores SEW. -static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { - switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { - default: - return std::nullopt; - case RISCV::VLE8_V: - case RISCV::VLSE8_V: - case RISCV::VSE8_V: - case RISCV::VSSE8_V: - return 8; - case RISCV::VLE16_V: - case RISCV::VLSE16_V: - case RISCV::VSE16_V: - case RISCV::VSSE16_V: - return 16; - case RISCV::VLE32_V: - case RISCV::VLSE32_V: - case RISCV::VSE32_V: - case RISCV::VSSE32_V: - return 32; - case RISCV::VLE64_V: - case RISCV::VLSE64_V: - case RISCV::VSE64_V: - case RISCV::VSSE64_V: - return 64; - } -} - -/// Return true if this is an operation on mask registers. Note that -/// this includes both arithmetic/logical ops and load/store (vlm/vsm). -static bool isMaskRegOp(const MachineInstr &MI) { - if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) - return false; - const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); - // A Log2SEW of 0 is an operation on mask registers only. - return Log2SEW == 0; -} - -/// Return true if the inactive elements in the result are entirely undefined. -/// Note that this is different from "agnostic" as defined by the vector -/// specification. Agnostic requires each lane to either be undisturbed, or -/// take the value -1; no other value is allowed. -static bool hasUndefinedPassthru(const MachineInstr &MI) { - - unsigned UseOpIdx; - if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) - // If there is no passthrough operand, then the pass through - // lanes are undefined. - return true; - - // All undefined passthrus should be $noreg: see - // RISCVDAGToDAGISel::doPeepholeNoRegPassThru - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return !UseMO.getReg().isValid() || UseMO.isUndef(); -} - -/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. -static bool isVectorCopy(const TargetRegisterInfo *TRI, - const MachineInstr &MI) { - return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() && - RISCVRegisterInfo::isRVVRegClass( - TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg())); -} - -/// Which subfields of VL or VTYPE have values we need to preserve? -struct DemandedFields { - // Some unknown property of VL is used. If demanded, must preserve entire - // value. - bool VLAny = false; - // Only zero vs non-zero is used. If demanded, can change non-zero values. - bool VLZeroness = false; - // What properties of SEW we need to preserve. - enum : uint8_t { - SEWEqual = 3, // The exact value of SEW needs to be preserved. - SEWGreaterThanOrEqualAndLessThan64 = - 2, // SEW can be changed as long as it's greater - // than or equal to the original value, but must be less - // than 64. - SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater - // than or equal to the original value. - SEWNone = 0 // We don't need to preserve SEW at all. - } SEW = SEWNone; - enum : uint8_t { - LMULEqual = 2, // The exact value of LMUL needs to be preserved. - LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. - LMULNone = 0 // We don't need to preserve LMUL at all. - } LMUL = LMULNone; - bool SEWLMULRatio = false; - bool TailPolicy = false; - bool MaskPolicy = false; - // If this is true, we demand that VTYPE is set to some legal state, i.e. that - // vill is unset. - bool VILL = false; - bool TWiden = false; - bool AltFmt = false; - - // Return true if any part of VTYPE was used - bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL || - TWiden || AltFmt; - } - - // Return true if any property of VL was used - bool usedVL() { - return VLAny || VLZeroness; - } - - // Mark all VTYPE subfields and properties as demanded - void demandVTYPE() { - SEW = SEWEqual; - LMUL = LMULEqual; - SEWLMULRatio = true; - TailPolicy = true; - MaskPolicy = true; - VILL = true; - TWiden = true; - AltFmt = true; - } - - // Mark all VL properties as demanded - void demandVL() { - VLAny = true; - VLZeroness = true; - } - - static DemandedFields all() { - DemandedFields DF; - DF.demandVTYPE(); - DF.demandVL(); - return DF; - } - - // Make this the result of demanding both the fields in this and B. - void doUnion(const DemandedFields &B) { - VLAny |= B.VLAny; - VLZeroness |= B.VLZeroness; - SEW = std::max(SEW, B.SEW); - LMUL = std::max(LMUL, B.LMUL); - SEWLMULRatio |= B.SEWLMULRatio; - TailPolicy |= B.TailPolicy; - MaskPolicy |= B.MaskPolicy; - VILL |= B.VILL; - AltFmt |= B.AltFmt; - TWiden |= B.TWiden; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Support for debugging, callable in GDB: V->dump() - LLVM_DUMP_METHOD void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - - /// Implement operator<<. - void print(raw_ostream &OS) const { - OS << "{"; - OS << "VLAny=" << VLAny << ", "; - OS << "VLZeroness=" << VLZeroness << ", "; - OS << "SEW="; - switch (SEW) { - case SEWEqual: - OS << "SEWEqual"; - break; - case SEWGreaterThanOrEqual: - OS << "SEWGreaterThanOrEqual"; - break; - case SEWGreaterThanOrEqualAndLessThan64: - OS << "SEWGreaterThanOrEqualAndLessThan64"; - break; - case SEWNone: - OS << "SEWNone"; - break; - }; - OS << ", "; - OS << "LMUL="; - switch (LMUL) { - case LMULEqual: - OS << "LMULEqual"; - break; - case LMULLessThanOrEqualToM1: - OS << "LMULLessThanOrEqualToM1"; - break; - case LMULNone: - OS << "LMULNone"; - break; - }; - OS << ", "; - OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; - OS << "TailPolicy=" << TailPolicy << ", "; - OS << "MaskPolicy=" << MaskPolicy << ", "; - OS << "VILL=" << VILL << ", "; - OS << "AltFmt=" << AltFmt << ", "; - OS << "TWiden=" << TWiden; - OS << "}"; - } -#endif -}; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_ATTRIBUTE_USED -inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { - DF.print(OS); - return OS; -} -#endif - -static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) { - auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); - return Fractional || LMul == 1; -} - -/// Return true if moving from CurVType to NewVType is -/// indistinguishable from the perspective of an instruction (or set -/// of instructions) which use only the Used subfields and properties. -static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, - const DemandedFields &Used) { - switch (Used.SEW) { - case DemandedFields::SEWNone: - break; - case DemandedFields::SEWEqual: - if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqual: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || - RISCVVType::getSEW(NewVType) >= 64) - return false; - break; - } - - switch (Used.LMUL) { - case DemandedFields::LMULNone: - break; - case DemandedFields::LMULEqual: - if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) - return false; - break; - case DemandedFields::LMULLessThanOrEqualToM1: - if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) - return false; - break; - } - - if (Used.SEWLMULRatio) { - auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), - RISCVVType::getVLMUL(CurVType)); - auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), - RISCVVType::getVLMUL(NewVType)); - if (Ratio1 != Ratio2) - return false; - } - - if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != - RISCVVType::isTailAgnostic(NewVType)) - return false; - if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != - RISCVVType::isMaskAgnostic(NewVType)) - return false; - if (Used.TWiden && (RISCVVType::hasXSfmmWiden(CurVType) != - RISCVVType::hasXSfmmWiden(NewVType) || - (RISCVVType::hasXSfmmWiden(CurVType) && - RISCVVType::getXSfmmWiden(CurVType) != - RISCVVType::getXSfmmWiden(NewVType)))) - return false; - if (Used.AltFmt && - RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType)) - return false; - return true; -} - -/// Return the fields and properties demanded by the provided instruction. -DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { - // This function works in coalesceVSETVLI too. We can still use the value of a - // SEW, VL, or Policy operand even though it might not be the exact value in - // the VL or VTYPE, since we only care about what the instruction originally - // demanded. - - // Most instructions don't use any of these subfeilds. - DemandedFields Res; - // Start conservative if registers are used - if (MI.isCall() || MI.isInlineAsm() || - MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) - Res.demandVL(); - if (MI.isCall() || MI.isInlineAsm() || - MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) - Res.demandVTYPE(); - // Start conservative on the unlowered form too - uint64_t TSFlags = MI.getDesc().TSFlags; - if (RISCVII::hasSEWOp(TSFlags)) { - Res.demandVTYPE(); - if (RISCVII::hasVLOp(TSFlags)) - if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); - !VLOp.isReg() || !VLOp.isUndef()) - Res.demandVL(); - - // Behavior is independent of mask policy. - if (!RISCVII::usesMaskPolicy(TSFlags)) - Res.MaskPolicy = false; - } - - // Loads and stores with implicit EEW do not demand SEW or LMUL directly. - // They instead demand the ratio of the two which is used in computing - // EMUL, but which allows us the flexibility to change SEW and LMUL - // provided we don't change the ratio. - // Note: We assume that the instructions initial SEW is the EEW encoded - // in the opcode. This is asserted when constructing the VSETVLIInfo. - if (getEEWForLoadStore(MI)) { - Res.SEW = DemandedFields::SEWNone; - Res.LMUL = DemandedFields::LMULNone; - } - - // Store instructions don't use the policy fields. - if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - // If this is a mask reg operation, it only cares about VLMAX. - // TODO: Possible extensions to this logic - // * Probably ok if available VLMax is larger than demanded - // * The policy bits can probably be ignored.. - if (isMaskRegOp(MI)) { - Res.SEW = DemandedFields::SEWNone; - Res.LMUL = DemandedFields::LMULNone; - } - - // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (RISCVInstrInfo::isScalarInsertInstr(MI)) { - Res.LMUL = DemandedFields::LMULNone; - Res.SEWLMULRatio = false; - Res.VLAny = false; - // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't - // need to preserve any other bits and are thus compatible with any larger, - // etype and can disregard policy bits. Warning: It's tempting to try doing - // this for any tail agnostic operation, but we can't as TA requires - // tail lanes to either be the original value or -1. We are writing - // unknown bits to the lanes here. - if (hasUndefinedPassthru(MI)) { - if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && - !ST->hasVInstructionsF64()) - Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; - else - Res.SEW = DemandedFields::SEWGreaterThanOrEqual; - Res.TailPolicy = false; - } - } - - // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW. - if (RISCVInstrInfo::isScalarExtractInstr(MI)) { - assert(!RISCVII::hasVLOp(TSFlags)); - Res.LMUL = DemandedFields::LMULNone; - Res.SEWLMULRatio = false; - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { - const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); - // A slidedown/slideup with an *undefined* passthru can freely clobber - // elements not copied from the source vector (e.g. masked off, tail, or - // slideup's prefix). Notes: - // * We can't modify SEW here since the slide amount is in units of SEW. - // * VL=1 is special only because we have existing support for zero vs - // non-zero VL. We could generalize this if we had a VL > C predicate. - // * The LMUL1 restriction is for machines whose latency may depend on LMUL. - // * As above, this is only legal for tail "undefined" not "agnostic". - // * We avoid increasing vl if the subtarget has +vl-dependent-latency - if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() && - VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && - !ST->hasVLDependentLatency()) { - Res.VLAny = false; - Res.VLZeroness = true; - Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; - Res.TailPolicy = false; - } - - // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the - // same semantically as vmv.s.x. This is particularly useful since we don't - // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in - // it's place. Since a splat is non-constant time in LMUL, we do need to be - // careful to not increase the number of active vector registers (unlike for - // vmv.s.x.) - if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() && - VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && - !ST->hasVLDependentLatency()) { - Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; - Res.SEWLMULRatio = false; - Res.VLAny = false; - if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && - !ST->hasVInstructionsF64()) - Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; - else - Res.SEW = DemandedFields::SEWGreaterThanOrEqual; - Res.TailPolicy = false; - } - } - - // In ยง32.16.6, whole vector register moves have a dependency on SEW. At the - // MIR level though we don't encode the element type, and it gives the same - // result whatever the SEW may be. - // - // However it does need valid SEW, i.e. vill must be cleared. The entry to a - // function, calls and inline assembly may all set it, so make sure we clear - // it for whole register copies. Do this by leaving VILL demanded. - if (isVectorCopy(ST->getRegisterInfo(), MI)) { - Res.LMUL = DemandedFields::LMULNone; - Res.SEW = DemandedFields::SEWNone; - Res.SEWLMULRatio = false; - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - if (RISCVInstrInfo::isVExtractInstr(MI)) { - assert(!RISCVII::hasVLOp(TSFlags)); - // TODO: LMUL can be any larger value (without cost) - Res.TailPolicy = false; - } - - Res.AltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) != - RISCVII::AltFmtType::DontCare; - Res.TWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || - RISCVInstrInfo::isXSfmmVectorConfigInstr(MI); - - return Res; -} - -/// Defines the abstract state with which the forward dataflow models the -/// values of the VL and VTYPE registers after insertion. -class VSETVLIInfo { - struct AVLDef { - // Every AVLDef should have a VNInfo, unless we're running without - // LiveIntervals in which case this will be nullptr. - const VNInfo *ValNo; - Register DefReg; - }; - union { - AVLDef AVLRegDef; - unsigned AVLImm; - }; - - enum class AVLState : uint8_t { - Uninitialized, - AVLIsReg, - AVLIsImm, - AVLIsVLMAX, - Unknown, // AVL and VTYPE are fully unknown - } State = AVLState::Uninitialized; - - // Fields from VTYPE. - RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1; - uint8_t SEW = 0; - uint8_t TailAgnostic : 1; - uint8_t MaskAgnostic : 1; - uint8_t SEWLMULRatioOnly : 1; - uint8_t AltFmt : 1; - uint8_t TWiden : 3; - -public: - VSETVLIInfo() - : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), - SEWLMULRatioOnly(false), AltFmt(false), TWiden(0) {} - - static VSETVLIInfo getUnknown() { - VSETVLIInfo Info; - Info.setUnknown(); - return Info; - } - - bool isValid() const { return State != AVLState::Uninitialized; } - void setUnknown() { State = AVLState::Unknown; } - bool isUnknown() const { return State == AVLState::Unknown; } - - void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { - assert(AVLReg.isVirtual()); - AVLRegDef.ValNo = VNInfo; - AVLRegDef.DefReg = AVLReg; - State = AVLState::AVLIsReg; - } - - void setAVLImm(unsigned Imm) { - AVLImm = Imm; - State = AVLState::AVLIsImm; - } - - void setAVLVLMAX() { State = AVLState::AVLIsVLMAX; } - - bool hasAVLImm() const { return State == AVLState::AVLIsImm; } - bool hasAVLReg() const { return State == AVLState::AVLIsReg; } - bool hasAVLVLMAX() const { return State == AVLState::AVLIsVLMAX; } - Register getAVLReg() const { - assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); - return AVLRegDef.DefReg; - } - unsigned getAVLImm() const { - assert(hasAVLImm()); - return AVLImm; - } - const VNInfo *getAVLVNInfo() const { - assert(hasAVLReg()); - return AVLRegDef.ValNo; - } - // Most AVLIsReg infos will have a single defining MachineInstr, unless it was - // a PHI node. In that case getAVLVNInfo()->def will point to the block - // boundary slot and this will return nullptr. If LiveIntervals isn't - // available, nullptr is also returned. - const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { - assert(hasAVLReg()); - if (!LIS || getAVLVNInfo()->isPHIDef()) - return nullptr; - auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); - assert(MI); - return MI; - } - - void setAVL(const VSETVLIInfo &Info) { - assert(Info.isValid()); - if (Info.isUnknown()) - setUnknown(); - else if (Info.hasAVLReg()) - setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); - else if (Info.hasAVLVLMAX()) - setAVLVLMAX(); - else { - assert(Info.hasAVLImm()); - setAVLImm(Info.getAVLImm()); - } - } - - bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } - - unsigned getS... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/172615 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
