Author: dim Date: Sun Jan 22 16:52:30 2017 New Revision: 312625 URL: https://svnweb.freebsd.org/changeset/base/312625
Log: Vendor import of llvm release_40 branch r292732: https://llvm.org/svn/llvm-project/llvm/branches/release_40@292732 Added: vendor/llvm/dist/test/Transforms/LoopStrengthReduce/pr31627.ll vendor/llvm/dist/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll vendor/llvm/dist/test/Transforms/NewGVN/pr31613.ll Modified: vendor/llvm/dist/cmake/modules/AddLLVM.cmake vendor/llvm/dist/docs/ReleaseNotes.rst vendor/llvm/dist/docs/index.rst vendor/llvm/dist/include/llvm/Analysis/AssumptionCache.h vendor/llvm/dist/lib/Analysis/AssumptionCache.cpp vendor/llvm/dist/lib/Analysis/ModuleSummaryAnalysis.cpp vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp vendor/llvm/dist/lib/LTO/ThinLTOCodeGenerator.cpp vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp vendor/llvm/dist/lib/Target/X86/X86Subtarget.cpp vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp vendor/llvm/dist/lib/Transforms/Scalar/NewGVN.cpp vendor/llvm/dist/lib/Transforms/Vectorize/LoopVectorize.cpp vendor/llvm/dist/test/CodeGen/X86/atomic-eflags-reuse.ll vendor/llvm/dist/test/CodeGen/X86/slow-pmulld.ll vendor/llvm/dist/test/ThinLTO/X86/lazyload_metadata.ll Modified: vendor/llvm/dist/cmake/modules/AddLLVM.cmake ============================================================================== --- vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sun Jan 22 16:52:30 2017 (r312625) @@ -462,11 +462,9 @@ function(llvm_add_library name) if(UNIX AND NOT APPLE AND NOT ARG_SONAME) set_target_properties(${name} PROPERTIES - # Concatenate the version numbers since ldconfig expects exactly - # one component indicating the ABI version, while LLVM uses - # major+minor for that. - SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} - VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) + # Since 4.0.0, the ABI version is indicated by the major version + SOVERSION ${LLVM_VERSION_MAJOR} + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) endif() endif() Modified: vendor/llvm/dist/docs/ReleaseNotes.rst ============================================================================== --- vendor/llvm/dist/docs/ReleaseNotes.rst Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/docs/ReleaseNotes.rst Sun Jan 22 16:52:30 2017 (r312625) @@ -67,13 +67,46 @@ Non-comprehensive list of changes in thi Makes programs 10x faster by doing Special New Thing. + Improvements to ThinLTO (-flto=thin) + ------------------------------------ + * Integration with profile data (PGO). When available, profile data + enables more accurate function importing decisions, as well as + cross-module indirect call promotion. + * Significant build-time and binary-size improvements when compiling with + debug info (-g). + Changes to the LLVM IR ---------------------- -Changes to the ARM Backend +Changes to the ARM Targets -------------------------- - During this release ... +**During this release the AArch64 target has:** + +* Gained support for ILP32 relocations. +* Gained support for XRay. +* Made even more progress on GlobalISel. There is still some work left before + it is production-ready though. +* Refined the support for Qualcomm's Falkor and Samsung's Exynos CPUs. +* Learned a few new tricks for lowering multiplications by constants, folding + spilled/refilled copies etc. + +**During this release the ARM target has:** + +* Gained support for ROPI (read-only position independence) and RWPI + (read-write position independence), which can be used to remove the need for + a dynamic linker. +* Gained support for execute-only code, which is placed in pages without read + permissions. +* Gained a machine scheduler for Cortex-R52. +* Gained support for XRay. +* Gained Thumb1 implementations for several compiler-rt builtins. It also + has some support for building the builtins for HF targets. +* Started using the generic bitreverse intrinsic instead of rbit. +* Gained very basic support for GlobalISel. + +A lot of work has also been done in LLD for ARM, which now supports more +relocations and TLS. Changes to the MIPS Target Modified: vendor/llvm/dist/docs/index.rst ============================================================================== --- vendor/llvm/dist/docs/index.rst Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/docs/index.rst Sun Jan 22 16:52:30 2017 (r312625) @@ -1,11 +1,6 @@ Overview ======== -.. warning:: - - If you are using a released version of LLVM, see `the download page - <http://llvm.org/releases/>`_ to find your documentation. - The LLVM compiler infrastructure supports a wide range of projects, from industrial strength compilers to specialized JIT applications to small research projects. Modified: vendor/llvm/dist/include/llvm/Analysis/AssumptionCache.h ============================================================================== --- vendor/llvm/dist/include/llvm/Analysis/AssumptionCache.h Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/include/llvm/Analysis/AssumptionCache.h Sun Jan 22 16:52:30 2017 (r312625) @@ -68,7 +68,10 @@ class AssumptionCache { AffectedValuesMap AffectedValues; /// Get the vector of assumptions which affect a value from the cache. - SmallVector<WeakVH, 1> &getAffectedValues(Value *V); + SmallVector<WeakVH, 1> &getOrInsertAffectedValues(Value *V); + + /// Copy affected values in the cache for OV to be affected values for NV. + void copyAffectedValuesInCache(Value *OV, Value *NV); /// \brief Flag tracking whether we have scanned the function yet. /// Modified: vendor/llvm/dist/lib/Analysis/AssumptionCache.cpp ============================================================================== --- vendor/llvm/dist/lib/Analysis/AssumptionCache.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Analysis/AssumptionCache.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -24,7 +24,7 @@ using namespace llvm; using namespace llvm::PatternMatch; -SmallVector<WeakVH, 1> &AssumptionCache::getAffectedValues(Value *V) { +SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) { // Try using find_as first to avoid creating extra value handles just for the // purpose of doing the lookup. auto AVI = AffectedValues.find_as(V); @@ -98,7 +98,7 @@ void AssumptionCache::updateAffectedValu } for (auto &AV : Affected) { - auto &AVV = getAffectedValues(AV); + auto &AVV = getOrInsertAffectedValues(AV); if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end()) AVV.push_back(CI); } @@ -111,20 +111,27 @@ void AssumptionCache::AffectedValueCallb // 'this' now dangles! } +void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { + auto &NAVV = getOrInsertAffectedValues(NV); + auto AVI = AffectedValues.find(OV); + if (AVI == AffectedValues.end()) + return; + + for (auto &A : AVI->second) + if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) + NAVV.push_back(A); +} + void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { if (!isa<Instruction>(NV) && !isa<Argument>(NV)) return; // Any assumptions that affected this value now affect the new value. - auto &NAVV = AC->getAffectedValues(NV); - auto AVI = AC->AffectedValues.find(getValPtr()); - if (AVI == AC->AffectedValues.end()) - return; - - for (auto &A : AVI->second) - if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) - NAVV.push_back(A); + AC->copyAffectedValuesInCache(getValPtr(), NV); + // 'this' now might dangle! If the AffectedValues map was resized to add an + // entry for NV then this object might have been destroyed in favor of some + // copy in the grown map. } void AssumptionCache::scanFunction() { Modified: vendor/llvm/dist/lib/Analysis/ModuleSummaryAnalysis.cpp ============================================================================== --- vendor/llvm/dist/lib/Analysis/ModuleSummaryAnalysis.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Analysis/ModuleSummaryAnalysis.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) Modified: vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp ============================================================================== --- vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -768,13 +768,12 @@ void MetadataLoader::MetadataLoaderImpl: unsigned ID, PlaceholderQueue &Placeholders) { assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size()); assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); -#ifndef NDEBUG // Lookup first if the metadata hasn't already been loaded. if (auto *MD = MetadataList.lookup(ID)) { auto *N = dyn_cast_or_null<MDNode>(MD); - assert(N && N->isTemporary() && "Lazy loading an already loaded metadata"); + if (!N->isTemporary()) + return; } -#endif SmallVector<uint64_t, 64> Record; StringRef Blob; IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]); @@ -827,8 +826,22 @@ Error MetadataLoader::MetadataLoaderImpl auto getMD = [&](unsigned ID) -> Metadata * { if (ID < MDStringRef.size()) return lazyLoadOneMDString(ID); - if (!IsDistinct) + if (!IsDistinct) { + if (auto *MD = MetadataList.lookup(ID)) + return MD; + // If lazy-loading is enabled, we try recursively to load the operand + // instead of creating a temporary. + if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) { + // Create a temporary for the node that is referencing the operand we + // will lazy-load. It is needed before recursing in case there are + // uniquing cycles. + MetadataList.getMetadataFwdRef(NextMetadataNo); + lazyLoadOneMetadata(ID, Placeholders); + return MetadataList.lookup(ID); + } + // Return a temporary. return MetadataList.getMetadataFwdRef(ID); + } if (auto *MD = MetadataList.getMetadataIfResolved(ID)) return MD; return &Placeholders.getPlaceholderOp(ID); Modified: vendor/llvm/dist/lib/LTO/ThinLTOCodeGenerator.cpp ============================================================================== --- vendor/llvm/dist/lib/LTO/ThinLTOCodeGenerator.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/LTO/ThinLTOCodeGenerator.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -829,11 +829,22 @@ static std::string writeGeneratedObject( // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { + // Prepare the resulting object vector + assert(ProducedBinaries.empty() && "The generator should not be reused"); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } + if (CodeGenOnly) { // Perform only parallel codegen and return. ThreadPool Pool; - assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); int count = 0; for (auto &ModuleBuffer : Modules) { Pool.async([&](int count) { @@ -845,7 +856,12 @@ void ThinLTOCodeGenerator::run() { /*IsImporting*/ false); // CodeGen - ProducedBinaries[count] = codegen(*TheModule); + auto OutputBuffer = codegen(*TheModule); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(OutputBuffer); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, "", SavedObjectsDirectoryPath, *OutputBuffer); }, count++); } @@ -866,18 +882,6 @@ void ThinLTOCodeGenerator::run() { WriteIndexToFile(*Index, OS); } - // Prepare the resulting object vector - assert(ProducedBinaries.empty() && "The generator should not be reused"); - if (SavedObjectsDirectoryPath.empty()) - ProducedBinaries.resize(Modules.size()); - else { - sys::fs::create_directories(SavedObjectsDirectoryPath); - bool IsDir; - sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); - if (!IsDir) - report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); - ProducedBinaryFiles.resize(Modules.size()); - } // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); Modified: vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -29455,19 +29455,11 @@ static SDValue combineSelect(SDNode *N, return SDValue(); } -/// Combine brcond/cmov/setcc/.. based on comparing the result of -/// atomic_load_add to use EFLAGS produced by the addition -/// directly if possible. For example: -/// -/// (setcc (cmp (atomic_load_add x, -C) C), COND_E) -/// becomes: -/// (setcc (LADD x, -C), COND_E) -/// -/// and +/// Combine: /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) -/// becomes: +/// to: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) -/// +/// i.e., reusing the EFLAGS produced by the LOCKed instruction. /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG) { @@ -29482,7 +29474,7 @@ static SDValue combineSetCCAtomicArith(S if (!Cmp.hasOneUse()) return SDValue(); - // This applies to variations of the common case: + // This only applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) @@ -29501,9 +29493,8 @@ static SDValue combineSetCCAtomicArith(S return SDValue(); auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); - if (!CmpRHSC) + if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) return SDValue(); - APInt Comparand = CmpRHSC->getAPIntValue(); const unsigned Opc = CmpLHS.getOpcode(); @@ -29519,19 +29510,16 @@ static SDValue combineSetCCAtomicArith(S if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; - if (Comparand == -Addend) { - // No change to CC. - } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) { + if (CC == X86::COND_S && Addend == 1) CC = X86::COND_LE; - } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) { + else if (CC == X86::COND_NS && Addend == 1) CC = X86::COND_G; - } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) { + else if (CC == X86::COND_G && Addend == -1) CC = X86::COND_GE; - } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) { + else if (CC == X86::COND_LE && Addend == -1) CC = X86::COND_L; - } else { + else return SDValue(); - } SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG); DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), Modified: vendor/llvm/dist/lib/Target/X86/X86Subtarget.cpp ============================================================================== --- vendor/llvm/dist/lib/Target/X86/X86Subtarget.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Target/X86/X86Subtarget.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; - - assert((!isPMULLDSlow() || hasSSE41()) && - "Feature Slow PMULLD can only be set on a subtarget with SSE4.1"); } void X86Subtarget::initializeEnvironment() { Modified: vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp ============================================================================== --- vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Transforms/Scalar/LoopStrengthReduce.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -3163,6 +3163,9 @@ LSRInstance::CollectLoopInvariantFixupsA // Don't bother if the instruction is in a BB which ends in an EHPad. if (UseBB->getTerminator()->isEHPad()) continue; + // Don't bother rewriting PHIs in catchswitch blocks. + if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator())) + continue; // Ignore uses which are part of other SCEV expressions, to avoid // analyzing them multiple times. if (SE.isSCEVable(UserInst->getType())) { @@ -4672,7 +4675,8 @@ void LSRInstance::RewriteForPHI(PHINode // is the canonical backedge for this loop, which complicates post-inc // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(BB->getTerminator())) { + !isa<IndirectBrInst>(BB->getTerminator()) && + !isa<CatchSwitchInst>(BB->getTerminator())) { BasicBlock *Parent = PN->getParent(); Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { Modified: vendor/llvm/dist/lib/Transforms/Scalar/NewGVN.cpp ============================================================================== --- vendor/llvm/dist/lib/Transforms/Scalar/NewGVN.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Transforms/Scalar/NewGVN.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -81,6 +81,10 @@ STATISTIC(NumGVNOpsSimplified, "Number o STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); +STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); +STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); +STATISTIC(NumGVNAvoidedSortedLeaderChanges, + "Number of avoided sorted leader changes"); //===----------------------------------------------------------------------===// // GVN Pass @@ -139,6 +143,10 @@ struct CongruenceClass { // This is used so we can detect store equivalence changes properly. int StoreCount = 0; + // The most dominating leader after our current leader, because the member set + // is not sorted and is expensive to keep sorted all the time. + std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; + explicit CongruenceClass(unsigned ID) : ID(ID) {} CongruenceClass(unsigned ID, Value *Leader, const Expression *E) : ID(ID), RepLeader(Leader), DefiningExpr(E) {} @@ -320,8 +328,8 @@ private: // Templated to allow them to work both on BB's and BB-edges. template <class T> Value *lookupOperandLeader(Value *, const User *, const T &) const; - void performCongruenceFinding(Value *, const Expression *); - void moveValueToNewCongruenceClass(Value *, CongruenceClass *, + void performCongruenceFinding(Instruction *, const Expression *); + void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *, CongruenceClass *); // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); @@ -1056,20 +1064,43 @@ void NewGVN::markLeaderChangeTouched(Con // Move a value, currently in OldClass, to be part of NewClass // Update OldClass for the move (including changing leaders, etc) -void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass, +void NewGVN::moveValueToNewCongruenceClass(Instruction *I, + CongruenceClass *OldClass, CongruenceClass *NewClass) { - DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID + DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID << "\n"); - OldClass->Members.erase(V); - NewClass->Members.insert(V); - if (isa<StoreInst>(V)) { + + if (I == OldClass->NextLeader.first) + OldClass->NextLeader = {nullptr, ~0U}; + + // The new instruction and new class leader may either be siblings in the + // dominator tree, or the new class leader should dominate the new member + // instruction. We simply check that the member instruction does not properly + // dominate the new class leader. + assert( + !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader || + I == NewClass->RepLeader || + !DT->properlyDominates( + I->getParent(), + cast<Instruction>(NewClass->RepLeader)->getParent()) && + "New class for instruction should not be dominated by instruction"); + + if (NewClass->RepLeader != I) { + auto DFSNum = InstrDFS.lookup(I); + if (DFSNum < NewClass->NextLeader.second) + NewClass->NextLeader = {I, DFSNum}; + } + + OldClass->Members.erase(I); + NewClass->Members.insert(I); + if (isa<StoreInst>(I)) { --OldClass->StoreCount; assert(OldClass->StoreCount >= 0); ++NewClass->StoreCount; assert(NewClass->StoreCount > 0); } - ValueToClass[V] = NewClass; + ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. if (OldClass->Members.empty() && OldClass != InitialClass) { if (OldClass->DefiningExpr) { @@ -1078,25 +1109,48 @@ void NewGVN::moveValueToNewCongruenceCla << " from table\n"); ExpressionToClass.erase(OldClass->DefiningExpr); } - } else if (OldClass->RepLeader == V) { + } else if (OldClass->RepLeader == I) { // When the leader changes, the value numbering of // everything may change due to symbolization changes, so we need to // reprocess. - OldClass->RepLeader = *(OldClass->Members.begin()); + DEBUG(dbgs() << "Leader change!\n"); + ++NumGVNLeaderChanges; + // We don't need to sort members if there is only 1, and we don't care about + // sorting the initial class because everything either gets out of it or is + // unreachable. + if (OldClass->Members.size() == 1 || OldClass == InitialClass) { + OldClass->RepLeader = *(OldClass->Members.begin()); + } else if (OldClass->NextLeader.first) { + ++NumGVNAvoidedSortedLeaderChanges; + OldClass->RepLeader = OldClass->NextLeader.first; + OldClass->NextLeader = {nullptr, ~0U}; + } else { + ++NumGVNSortedLeaderChanges; + // TODO: If this ends up to slow, we can maintain a dual structure for + // member testing/insertion, or keep things mostly sorted, and sort only + // here, or .... + std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U}; + for (const auto X : OldClass->Members) { + auto DFSNum = InstrDFS.lookup(X); + if (DFSNum < MinDFS.second) + MinDFS = {X, DFSNum}; + } + OldClass->RepLeader = MinDFS.first; + } markLeaderChangeTouched(OldClass); } } // Perform congruence finding on a given value numbering expression. -void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { - ValueToExpression[V] = E; +void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { + ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find // INITIAL. - CongruenceClass *VClass = ValueToClass[V]; - assert(VClass && "Should have found a vclass"); + CongruenceClass *IClass = ValueToClass[I]; + assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. - assert(!VClass->Dead && "Found a dead class"); + assert(!IClass->Dead && "Found a dead class"); CongruenceClass *EClass; if (const auto *VE = dyn_cast<VariableExpression>(E)) { @@ -1118,13 +1172,13 @@ void NewGVN::performCongruenceFinding(Va NewClass->RepLeader = lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); } else { - NewClass->RepLeader = V; + NewClass->RepLeader = I; } assert(!isa<VariableExpression>(E) && "VariableExpression should have been handled already"); EClass = NewClass; - DEBUG(dbgs() << "Created new congruence class for " << *V + DEBUG(dbgs() << "Created new congruence class for " << *I << " using expression " << *E << " at " << NewClass->ID << " and leader " << *(NewClass->RepLeader) << "\n"); DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); @@ -1140,36 +1194,31 @@ void NewGVN::performCongruenceFinding(Va assert(!EClass->Dead && "We accidentally looked up a dead class"); } } - bool ClassChanged = VClass != EClass; - bool LeaderChanged = LeaderChanges.erase(V); + bool ClassChanged = IClass != EClass; + bool LeaderChanged = LeaderChanges.erase(I); if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E << "\n"); if (ClassChanged) - - moveValueToNewCongruenceClass(V, VClass, EClass); - - - markUsersTouched(V); - if (auto *I = dyn_cast<Instruction>(V)) { - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { - // If this is a MemoryDef, we need to update the equivalence table. If - // we determined the expression is congruent to a different memory - // state, use that different memory state. If we determined it didn't, - // we update that as well. Right now, we only support store - // expressions. - if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && - EClass->Members.size() != 1) { - auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } - markMemoryUsersTouched(MA); + moveValueToNewCongruenceClass(I, IClass, EClass); + markUsersTouched(I); + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { + // If this is a MemoryDef, we need to update the equivalence table. If + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } + markMemoryUsersTouched(MA); } - } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) { + } else if (auto *SI = dyn_cast<StoreInst>(I)) { // There is, sadly, one complicating thing for stores. Stores do not // produce values, only consume them. However, in order to make loads and // stores value number the same, we ignore the value operand of the store. Modified: vendor/llvm/dist/lib/Transforms/Vectorize/LoopVectorize.cpp ============================================================================== --- vendor/llvm/dist/lib/Transforms/Vectorize/LoopVectorize.cpp Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/lib/Transforms/Vectorize/LoopVectorize.cpp Sun Jan 22 16:52:30 2017 (r312625) @@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectL // is consecutive-like, the pointer operand should remain uniform. else if (hasConsecutiveLikePtrOperand(&I)) ConsecutiveLikePtrs.insert(Ptr); + + // Otherwise, if the memory instruction will be vectorized and its + // pointer operand is non-consecutive-like, the memory instruction should + // be a gather or scatter operation. Its pointer operand will be + // non-uniform. + else + PossibleNonUniformPtrs.insert(Ptr); } // Add to the Worklist all consecutive and consecutive-like pointers that Modified: vendor/llvm/dist/test/CodeGen/X86/atomic-eflags-reuse.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/X86/atomic-eflags-reuse.ll Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/test/CodeGen/X86/atomic-eflags-reuse.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -192,68 +192,4 @@ entry: ret i8 %s2 } -define i8 @test_sub_1_setcc_eq(i64* %p) #0 { -; CHECK-LABEL: test_sub_1_setcc_eq: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq -entry: - %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst - %tmp1 = icmp eq i64 %tmp0, 1 - %tmp2 = zext i1 %tmp1 to i8 - ret i8 %tmp2 -} - -define i8 @test_add_5_setcc_ne(i64* %p) #0 { -; CHECK-LABEL: test_add_5_setcc_ne: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock addq $5, (%rdi) -; CHECK-NEXT: setne %al -; CHECK-NEXT: retq -entry: - %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst - %tmp1 = icmp ne i64 %tmp0, -5 - %tmp2 = zext i1 %tmp1 to i8 - ret i8 %tmp2 -} - -define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 { -; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $5, %eax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: setne %al -; CHECK-NEXT: retq -entry: - %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst - %tmp1 = icmp ne i64 %tmp0, 0 - %tmp2 = zext i1 %tmp1 to i8 - ret i8 %tmp2 -} - -declare void @g() -define zeroext i1 @test_sub_1_setcc_jcc(i64* %p) local_unnamed_addr #0 { -; TODO: It's possible to use "lock dec" here, but both uses of the cmp need to -; be updated. -; CHECK-LABEL: test_sub_1_setcc_jcc: -; CHECK: # BB#0: # %entry -; CHECK: movq $-1, %rax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: cmpq $1, %rax -; CHECK-NEXT: sete %bl -; CHECK-NEXT: jne -entry: - %add = atomicrmw volatile add i64* %p, i64 -1 seq_cst - %cmp = icmp ne i64 %add, 1 - %not = xor i1 %cmp, true - br i1 %cmp, label %else, label %then -then: - tail call void @g() - br label %else -else: - ret i1 %not -} - attributes #0 = { nounwind } Modified: vendor/llvm/dist/test/CodeGen/X86/slow-pmulld.ll ============================================================================== --- vendor/llvm/dist/test/CodeGen/X86/slow-pmulld.ll Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/test/CodeGen/X86/slow-pmulld.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -4,6 +4,9 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-64 +; Make sure that the slow-pmulld feature can be used without SSE4.1. +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1 + define <4 x i32> @foo(<4 x i8> %A) { ; CHECK32-LABEL: foo: ; CHECK32: # BB#0: Modified: vendor/llvm/dist/test/ThinLTO/X86/lazyload_metadata.ll ============================================================================== --- vendor/llvm/dist/test/ThinLTO/X86/lazyload_metadata.ll Sun Jan 22 16:05:13 2017 (r312624) +++ vendor/llvm/dist/test/ThinLTO/X86/lazyload_metadata.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -17,7 +17,7 @@ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY ; NOTLAZY: 58 bitcode-reader - Number of Metadata records loaded -; NOTLAZY: 8 bitcode-reader - Number of MDStrings loaded +; NOTLAZY: 6 bitcode-reader - Number of MDStrings loaded target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -48,7 +48,7 @@ define void @globalfunc3(i32 %arg) { !3 = !{!"3"} !4 = !{!"4"} !5 = !{!"5"} -!6 = !{!"6"} +!6 = !{!9} !7 = !{!"7"} !8 = !{!"8"} -!9 = !{!"9"} +!9 = !{!6} Added: vendor/llvm/dist/test/Transforms/LoopStrengthReduce/pr31627.ll ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/Transforms/LoopStrengthReduce/pr31627.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -0,0 +1,58 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.0.24215" + +define void @fn3() personality i32 (...)* @__CxxFrameHandler3 { +entry: + %call = invoke i32 @fn2() + to label %for.cond.preheader unwind label %catch.dispatch2 + +for.cond.preheader: ; preds = %entry + br label %for.cond + +for.cond: ; preds = %for.cond.preheader, %for.cond + %b.0 = phi i32 [ %inc, %for.cond ], [ %call, %for.cond.preheader ] + %inc = add nsw i32 %b.0, 1 + invoke void @fn1(i32 %inc) + to label %for.cond unwind label %catch.dispatch + +; CHECK: %[[add:.*]] = add i32 %call, 1 +; CHECK: br label %for.cond + +; CHECK: for.cond: ; preds = %for.cond, %for.cond.preheader +; CHECK: %[[lsr_iv:.*]] = phi i32 [ %lsr.iv.next, %for.cond ], [ %[[add]], %for.cond.preheader ] +; CHECK: %[[lsr_iv_next:.*]] = add i32 %lsr.iv, 1 +; CHECK: invoke void @fn1(i32 %[[lsr_iv]]) + + +catch.dispatch: ; preds = %for.cond + %0 = catchswitch within none [label %catch] unwind label %catch.dispatch2 + +catch: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null, i32 64, i8* null] + invoke void @_CxxThrowException(i8* null, i8* null) #2 [ "funclet"(token %1) ] + to label %unreachable unwind label %catch.dispatch2 + +catch.dispatch2: ; preds = %catch.dispatch, %catch, %entry + %a.0 = phi i32 [ undef, %entry ], [ %call, %catch ], [ %call, %catch.dispatch ] + %2 = catchswitch within none [label %catch3] unwind to caller + +catch3: ; preds = %catch.dispatch2 + %3 = catchpad within %2 [i8* null, i32 64, i8* null] + call void @fn1(i32 %a.0) [ "funclet"(token %3) ] + catchret from %3 to label %try.cont4 + +try.cont4: ; preds = %catch3 + ret void + +unreachable: ; preds = %catch + unreachable +} + +declare i32 @fn2() + +declare i32 @__CxxFrameHandler3(...) + +declare void @fn1(i32) + +declare void @_CxxThrowException(i8*, i8*) Added: vendor/llvm/dist/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -0,0 +1,56 @@ +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -instcombine -S -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: PR31671 +; +; Check a pointer in which one of its uses is consecutive-like and another of +; its uses is non-consecutive-like. In the test case below, %tmp3 is the +; pointer operand of an interleaved load, making it consecutive-like. However, +; it is also the pointer operand of a non-interleaved store that will become a +; scatter operation. %tmp3 (and the induction variable) should not be marked +; uniform-after-vectorization. +; +; CHECK: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i +; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i +; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] +; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5 +; CHECK: vector.body: +; CHECK: %vec.ind = phi <16 x i64> +; CHECK: %[[T0:.+]] = extractelement <16 x i64> %vec.ind, i32 0 +; CHECK: %[[T1:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %[[T0]] +; CHECK: %[[T2:.+]] = bitcast float* %[[T1]] to <80 x float>* +; CHECK: load <80 x float>, <80 x float>* %[[T2]], align 4 +; CHECK: %[[T3:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %[[T0]] +; CHECK: %[[T4:.+]] = bitcast float* %[[T3]] to <80 x float>* +; CHECK: load <80 x float>, <80 x float>* %[[T4]], align 4 +; CHECK: %VectorGep = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> %vec.ind +; CHECK: call void @llvm.masked.scatter.v16f32({{.*}}, <16 x float*> %VectorGep, {{.*}}) +; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body + +%data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] } + +define void @PR31671(float %x, %data* %d) #0 { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] + %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i + %tmp1 = load float, float* %tmp0, align 4 + %tmp2 = fmul float %x, %tmp1 + %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, %tmp2 + store float %tmp5, float* %tmp3, align 4 + %i.next = add nuw nsw i64 %i, 5 + %cond = icmp slt i64 %i.next, 32000 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +attributes #0 = { "target-cpu"="knl" } Added: vendor/llvm/dist/test/Transforms/NewGVN/pr31613.ll ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ vendor/llvm/dist/test/Transforms/NewGVN/pr31613.ll Sun Jan 22 16:52:30 2017 (r312625) @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +;; Both of these tests are tests of phi nodes that end up all equivalent to each other +;; Without proper leader ordering, we will end up cycling the leader between all of them and never converge. + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 1, [[BB18:%.*]] ] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br i1 undef, label [[BB18]], label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: br i1 undef, label [[BB16:%.*]], label [[BB14:%.*]] +; CHECK: bb14: +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb16: +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb18: +; CHECK-NEXT: br label [[BB1]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb18, %bb + %tmp = phi i32 [ 0, %bb ], [ 1, %bb18 ] + br label %bb2 + +bb2: ; preds = %bb9, %bb1 + %tmp3 = phi i32 [ %tmp, %bb1 ], [ %tmp8, %bb9 ] + br label %bb4 + +bb4: ; preds = %bb14, %bb2 + %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp15, %bb14 ] + br i1 undef, label %bb18, label %bb7 + +bb7: ; preds = %bb16, %bb4 + %tmp8 = phi i32 [ %tmp17, %bb16 ], [ %tmp5, %bb4 ] + br label %bb9 + +bb9: ; preds = %bb7 + br i1 undef, label %bb2, label %bb11 + +bb11: ; preds = %bb9 + br i1 undef, label %bb16, label %bb14 + +bb14: ; preds = %bb11 + %tmp15 = phi i32 [ %tmp8, %bb11 ] + br label %bb4 + +bb16: ; preds = %bb11 + %tmp17 = phi i32 [ %tmp8, %bb11 ] + br label %bb7 + +bb18: ; preds = %bb4 + br label %bb1 +} + +%struct.a = type {} +%struct.b = type {} + +declare void @c.d.p(i64, i8*) + +define void @e() { +; CHECK-LABEL: @e( +; CHECK-NEXT: [[F:%.*]] = alloca i32 +; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0 +; CHECK-NEXT: br label [[H:%.*]] +; CHECK: h: +; CHECK-NEXT: call void @c.d.p(i64 8, i8* undef) +; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[F]] +; CHECK-NEXT: [[J:%.*]] = load i32, i32* null +; CHECK-NEXT: [[K:%.*]] = icmp eq i32 [[I]], [[J]] +; CHECK-NEXT: br i1 [[K]], label [[L:%.*]], label [[Q:%.*]] +; CHECK: l: +; CHECK-NEXT: br label [[R:%.*]] +; CHECK: q: +; CHECK-NEXT: [[M:%.*]] = load %struct.a*, %struct.a** null +; CHECK-NEXT: br label [[R]] +; CHECK: r: +; CHECK-NEXT: switch i32 undef, label [[N:%.*]] [ +; CHECK-NEXT: i32 0, label [[S:%.*]] +; CHECK-NEXT: ] +; CHECK: s: +; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0 +; CHECK-NEXT: br label [[H]] +; CHECK: n: +; CHECK-NEXT: [[O:%.*]] = load %struct.a*, %struct.a** null +; CHECK-NEXT: ret void +; + %f = alloca i32 + store i32 undef, i32* %f, !g !0 + br label %h + +h: ; preds = %s, %0 + call void @c.d.p(i64 8, i8* undef) + %i = load i32, i32* %f + %j = load i32, i32* null + %k = icmp eq i32 %i, %j + br i1 %k, label %l, label %q + +l: ; preds = %h + br label %r + +q: ; preds = %h + %m = load %struct.a*, %struct.a** null + %1 = bitcast %struct.a* %m to %struct.b* + br label %r + +r: ; preds = %q, %l + switch i32 undef, label %n [ + i32 0, label %s + ] + +s: ; preds = %r + store i32 undef, i32* %f, !g !0 + br label %h + +n: ; preds = %r + %o = load %struct.a*, %struct.a** null + %2 = bitcast %struct.a* %o to %struct.b* + ret void +} + +!0 = !{} _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"