marcrasi created this revision. Herald added subscribers: cfe-commits, jfb, mgorny. Herald added a project: clang. marcrasi edited the summary of this revision.
This allows clients to invoke clang without writing anything to disk. I'm not sure if this is the right design, so I'm sending this patch to start a discussion about the right way to do this. I'll be happy to make changes and add tests after talking about the right way to do this! Motivation: At Google, we run some Swift source tooling on diskless servers. The Swift source tooling calls Clang through the ClangImporter (https://github.com/apple/swift/blob/master/lib/ClangImporter/ClangImporter.cpp). Clang compiles module to an on-disk module cache, which does not work on the diskless servers. This patch lets clients ask Clang to write the module cache to memory. Current design: - Adds `class InMemoryOutputFileSystem : public llvm::vfs::FileSystem` that supports write operations backed by memory. After a file is written, its contents are accessible through the `llvm::vfs::FileSystem` interface. - Adds an `InMemoryOutputFileSystem` field to `CompilerInstance`. When this field is set, the `CompilerInstance` writes to the `InMemoryOutputFileSystem` instead of the real file system. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D78058 Files: clang/include/clang/Basic/InMemoryOutputFileSystem.h clang/include/clang/Frontend/CompilerInstance.h clang/lib/Basic/CMakeLists.txt clang/lib/Basic/InMemoryOutputFileSystem.cpp clang/lib/Frontend/CompilerInstance.cpp
Index: clang/lib/Frontend/CompilerInstance.cpp =================================================================== --- clang/lib/Frontend/CompilerInstance.cpp +++ clang/lib/Frontend/CompilerInstance.cpp @@ -642,6 +642,18 @@ void CompilerInstance::clearOutputFiles(bool EraseFiles) { for (OutputFile &OF : OutputFiles) { + if (InMemoryOutputFileSystem) { + assert(!OF.TempFilename.empty() && + "InMemoryOutputFileSystem requires using temporary files"); + if (EraseFiles) { + InMemoryOutputFileSystem->DeleteTemporaryBuffer(OF.TempFilename); + } else { + InMemoryOutputFileSystem->FinalizeTemporaryBuffer(OF.Filename, + OF.TempFilename); + } + continue; + } + if (!OF.TempFilename.empty()) { if (EraseFiles) { llvm::sys::fs::remove(OF.TempFilename); @@ -728,6 +740,18 @@ OutFile = "-"; } + if (InMemoryOutputFileSystem) { + assert(UseTemporary && + "InMemoryOutputFileSystem requires using temporary files"); + auto stream = + InMemoryOutputFileSystem->CreateTemporaryBuffer(OutFile, &TempFile); + if (ResultPathName) + *ResultPathName = OutFile; + if (TempPathName) + *TempPathName = TempFile; + return stream; + } + std::unique_ptr<llvm::raw_fd_ostream> OS; std::string OSFile; @@ -1126,6 +1150,9 @@ ImportingInstance.getDiagnosticClient()), /*ShouldOwnClient=*/true); + Instance.setInMemoryOutputFileSystem( + ImportingInstance.getInMemoryOutputFileSystem()); + // Note that this module is part of the module build stack, so that we // can detect cycles in the module graph. Instance.setFileManager(&ImportingInstance.getFileManager()); @@ -1271,6 +1298,32 @@ << Module->Name << SourceRange(ImportLoc, ModuleNameLoc); }; + // If we're writing to an InMemoryOutputFileSystem, then immediately compile + // and read the module, rather than doing all the lockfile based locking logic + // below, because the InMemoryOutputFileSystem doesn't support lockfiles. This + // is okay because the locks are only necessary for performance, not + // correctness. + if (ImportingInstance.getInMemoryOutputFileSystem()) { + if (!compileModule(ImportingInstance, ModuleNameLoc, Module, + ModuleFileName)) { + diagnoseBuildFailure(); + return false; + } + + // Try to read the module file, now that we've compiled it. + ASTReader::ASTReadResult ReadResult = + ImportingInstance.getASTReader()->ReadAST( + ModuleFileName, serialization::MK_ImplicitModule, ImportLoc, + ASTReader::ARR_None); + + if (ReadResult != ASTReader::Success) { + diagnoseBuildFailure(); + return false; + } + + return true; + } + // FIXME: have LockFileManager return an error_code so that we can // avoid the mkdir when the directory already exists. StringRef Dir = llvm::sys::path::parent_path(ModuleFileName); Index: clang/lib/Basic/InMemoryOutputFileSystem.cpp =================================================================== --- /dev/null +++ clang/lib/Basic/InMemoryOutputFileSystem.cpp @@ -0,0 +1,56 @@ +//=== InMemoryOutputFileSystem.cpp - Collects outputs in memory -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/InMemoryOutputFileSystem.h" + +namespace clang { + +std::unique_ptr<llvm::raw_pwrite_stream> +InMemoryOutputFileSystem::CreateTemporaryBuffer(llvm::StringRef OutputPath, + std::string *TemporaryPath) { + assert(TemporaryPath); + std::lock_guard<std::mutex> locked(Mu); + llvm::StringMap<llvm::SmallVector<char, 0>>::iterator it; + bool inserted = false; + unsigned suffix = 0; + while (!inserted) { + *TemporaryPath = ""; + llvm::raw_string_ostream TemporaryPathOS(*TemporaryPath); + TemporaryPathOS << OutputPath << "-" << suffix; + TemporaryPathOS.flush(); + auto result = TemporaryBuffers.try_emplace(*TemporaryPath); + it = result.first; + inserted = result.second; + suffix += 1; + } + return std::make_unique<llvm::raw_svector_ostream>(it->getValue()); +} + +void InMemoryOutputFileSystem::DeleteTemporaryBuffer( + llvm::StringRef TemporaryPath) { + std::lock_guard<std::mutex> locked(Mu); + auto it = TemporaryBuffers.find(TemporaryPath); + assert(it != TemporaryBuffers.end()); + TemporaryBuffers.erase(it); +} + +void InMemoryOutputFileSystem::FinalizeTemporaryBuffer( + llvm::StringRef OutputPath, llvm::StringRef TemporaryPath) { + std::lock_guard<std::mutex> locked(Mu); + auto it = TemporaryBuffers.find(TemporaryPath); + assert(it != TemporaryBuffers.end()); + auto memoryBuffer = llvm::MemoryBuffer::getMemBufferCopy( + llvm::StringRef{it->getValue().data(), it->getValue().size()}, + OutputPath); + OutputFiles->addFile(OutputPath, /*ModificationTime=*/0, + std::move(memoryBuffer)); + TemporaryBuffers.erase(it); +} + +} // namespace clang Index: clang/lib/Basic/CMakeLists.txt =================================================================== --- clang/lib/Basic/CMakeLists.txt +++ clang/lib/Basic/CMakeLists.txt @@ -49,6 +49,7 @@ FileSystemStatCache.cpp FixedPoint.cpp IdentifierTable.cpp + InMemoryOutputFileSystem.cpp LangOptions.cpp LangStandards.cpp Module.cpp Index: clang/include/clang/Frontend/CompilerInstance.h =================================================================== --- clang/include/clang/Frontend/CompilerInstance.h +++ clang/include/clang/Frontend/CompilerInstance.h @@ -11,6 +11,7 @@ #include "clang/AST/ASTConsumer.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/InMemoryOutputFileSystem.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/PCHContainerOperations.h" @@ -182,6 +183,10 @@ /// Force an output buffer. std::unique_ptr<llvm::raw_pwrite_stream> OutputStream; + /// If defined, outputs will be written here instead of to the real + /// filesystem. + IntrusiveRefCntPtr<InMemoryOutputFileSystem> InMemoryOutputFileSystem; + CompilerInstance(const CompilerInstance &) = delete; void operator=(const CompilerInstance &) = delete; public: @@ -392,6 +397,20 @@ llvm::vfs::FileSystem &getVirtualFileSystem() const; + /// } + /// @name In-Memory Output File System + /// { + + IntrusiveRefCntPtr<clang::InMemoryOutputFileSystem> + getInMemoryOutputFileSystem() const { + return InMemoryOutputFileSystem; + } + + void setInMemoryOutputFileSystem( + IntrusiveRefCntPtr<clang::InMemoryOutputFileSystem> FS) { + InMemoryOutputFileSystem = std::move(FS); + } + /// } /// @name File Manager /// { Index: clang/include/clang/Basic/InMemoryOutputFileSystem.h =================================================================== --- /dev/null +++ clang/include/clang/Basic/InMemoryOutputFileSystem.h @@ -0,0 +1,104 @@ +//===-- InMemoryOutputFileSystem.h - Collects outputs in memory -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_INMEMORYOUTPUTFILESYSTEM_H_ +#define LLVM_CLANG_BASIC_INMEMORYOUTPUTFILESYSTEM_H_ + +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> + +namespace clang { + +/// Collects output files in memory, and provides a `llvm::vfs::FileSystem` +/// interface for accessing those files. +/// +/// This class is threadsafe. Unsynchronized calls from multiple threads will +/// not corrupt the internal state, and operations occur atomically and +/// sequentially consistently from the point of view of all threads. +class InMemoryOutputFileSystem : public llvm::vfs::FileSystem { +public: + InMemoryOutputFileSystem() + : OutputFiles(new llvm::vfs::InMemoryFileSystem()) {} + + /// Creates a temporary buffer that collects data for a file that may + /// eventually appear on the `llvm::vfs::FileSystem` interface. + /// `InMemoryOutputFileSystem` owns the buffer, which will not be released + /// until `DeleteTemporaryFile` or `FinalizeTemporaryFile` is called. + /// \param OutputPath the path of the file that may eventually be created. + /// \param TemporaryPath must be non-null. Pointee will be set to a unique + // string identifying this particular temporary buffer. + // \returns A stream that can be used to write to the buffer. + std::unique_ptr<llvm::raw_pwrite_stream> + CreateTemporaryBuffer(llvm::StringRef OutputPath, std::string *TemporaryPath); + + /// Releases the buffer underlying the temporary file. + /// \param TemporaryPath the unique string from `CreateTemporaryFile`. + void DeleteTemporaryBuffer(llvm::StringRef TemporaryPath); + + /// Makes the contents of the specified temporary buffer visible on the + /// `llvm::vfs::FileSystem` interface, and releases the temporary buffer. If + /// the file already exists on the `llvm::vfs::FileSystem` interface, then + /// the new contents is silently ignored. + /// \param OutputPath the path of the file to create. + /// \param TemporaryPath the unique string from `CreateTemporaryFile`. + void FinalizeTemporaryBuffer(llvm::StringRef OutputPath, + llvm::StringRef TemporaryPath); + + // MARK: - `llvm::vfs::FileSystem` overrides + + llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine &relpath) override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->status(relpath); + } + + llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> + openFileForRead(const llvm::Twine &relpath) override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->openFileForRead(relpath); + } + + llvm::vfs::directory_iterator dir_begin(const llvm::Twine &reldir, + std::error_code &err) override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->dir_begin(reldir, err); + } + + std::error_code setCurrentWorkingDirectory(const llvm::Twine &path) override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->setCurrentWorkingDirectory(path); + } + + llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->getCurrentWorkingDirectory(); + } + + std::error_code + getRealPath(const llvm::Twine &path, + llvm::SmallVectorImpl<char> &output) const override { + std::lock_guard<std::mutex> locked(Mu); + return OutputFiles->getRealPath(path, output); + } + +private: + mutable std::mutex Mu; + llvm::StringMap<llvm::SmallVector<char, 0>> TemporaryBuffers; + llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> OutputFiles; +}; + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_INMEMORYOUTPUTFILESYSTEM_H_
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits