sammccall created this revision. sammccall added reviewers: bkramer, thakis. Herald added subscribers: cfe-commits, kadircet, ioeric, ilya-biryukov.
r347205 fixed a bug in FileManager: first calling getFile(shouldOpen=false) and then getFile(shouldOpen=true) results in the file not being open. Unfortunately, some code was (inadvertently?) relying on this bug: when building with a PCH, the file entries are obtained first by passing shouldOpen=false, and then later shouldOpen=true, without any intention of reading them. After r347205, they do get unneccesarily opened. Aside from extra operations, this means they need to be closed. Normally files are closed when their contents are read. As these files are never read, they stay open until clang exits. On platforms with a low open-files limit (e.g. Mac), this can lead to spurious file-not-found errors when building large projects with PCH enabled, e.g. https://bugs.chromium.org/p/chromium/issues/detail?id=924225 Fixing the callsites to pass shouldOpen=false when the file won't be read is not quite trivial (that info isn't available at the direct callsite), and passing shouldOpen=false is a performance regression (it results in open+fstat pairs being replaced by stat+open). So an ideal fix is going to be a little risky and we need some fix soon (especially for the llvm 8 branch). The problem addressed by r347205 is rare and has only been observed in clangd. It was present in llvm-7, so we can live with it for now. Repository: rC Clang https://reviews.llvm.org/D57165 Files: include/clang/Basic/FileManager.h lib/Basic/FileManager.cpp test/PCH/leakfiles unittests/Basic/FileManagerTest.cpp
Index: unittests/Basic/FileManagerTest.cpp =================================================================== --- unittests/Basic/FileManagerTest.cpp +++ unittests/Basic/FileManagerTest.cpp @@ -221,33 +221,6 @@ EXPECT_EQ(nullptr, file); } -// When calling getFile(OpenFile=false); getFile(OpenFile=true) the file is -// opened for the second call. -TEST_F(FileManagerTest, getFileDefersOpen) { - llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS( - new llvm::vfs::InMemoryFileSystem()); - FS->addFile("/tmp/test", 0, llvm::MemoryBuffer::getMemBufferCopy("test")); - FS->addFile("/tmp/testv", 0, llvm::MemoryBuffer::getMemBufferCopy("testv")); - FileManager manager(options, FS); - - const FileEntry *file = manager.getFile("/tmp/test", /*OpenFile=*/false); - ASSERT_TRUE(file != nullptr); - ASSERT_TRUE(file->isValid()); - // "real path name" reveals whether the file was actually opened. - EXPECT_FALSE(file->isOpenForTests()); - - file = manager.getFile("/tmp/test", /*OpenFile=*/true); - ASSERT_TRUE(file != nullptr); - ASSERT_TRUE(file->isValid()); - EXPECT_TRUE(file->isOpenForTests()); - - // However we should never try to open a file previously opened as virtual. - ASSERT_TRUE(manager.getVirtualFile("/tmp/testv", 5, 0)); - ASSERT_TRUE(manager.getFile("/tmp/testv", /*OpenFile=*/false)); - file = manager.getFile("/tmp/testv", /*OpenFile=*/true); - EXPECT_FALSE(file->isOpenForTests()); -} - // The following tests apply to Unix-like system only. #ifndef _WIN32 Index: test/PCH/leakfiles =================================================================== --- /dev/null +++ test/PCH/leakfiles @@ -0,0 +1,29 @@ +// Test that compiling using a PCH doesn't leak file descriptors. +// https://bugs.chromium.org/p/chromium/issues/detail?id=924225 + +// This test requires bash loops and ulimit. +// REQUIRES: shell +// UNSUPPORTED: win32 +// +// Set up source files. lib/lib.h includes lots of lib*.h files in that dir. +// client.c includes lib/lib.h, and also the individual files directly. +// +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: cd %t +// RUN: mkdir lib +// RUN: for i in {1..300}; do touch lib/lib$i.h; done +// RUN: for i in {1..300}; do echo "#include \"lib$i.h\"" >> lib/lib.h; done +// RUN: echo "#include \"lib/lib.h\"" > client.c +// RUN: for i in {1..300}; do echo "#include \"lib/lib$i.h\"" >> client.c; done +// +// We want to verify that we don't hold all the files open at the same time. +// This is important e.g. on mac, which has a low default FD limit. +// RUN: ulimit -n 100 +// +// Test without PCH. +// RUN: %clang_cc1 -fsyntax-only -Ilib/ client.c +// +// Test with PCH. +// RUN: %clang_cc1 -emit-pch -o pch -Ilib/ client.c +// RUN: %clang_cc1 -include-pch pch -Ilib/ client.c -fsyntax-only Index: lib/Basic/FileManager.cpp =================================================================== --- lib/Basic/FileManager.cpp +++ lib/Basic/FileManager.cpp @@ -188,21 +188,15 @@ *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; // See if there is already an entry in the map. - if (NamedFileEnt.second) { - if (NamedFileEnt.second == NON_EXISTENT_FILE) - return nullptr; - // Entry exists: return it *unless* it wasn't opened and open is requested. - if (!(NamedFileEnt.second->DeferredOpen && openFile)) - return NamedFileEnt.second; - // We previously stat()ed the file, but didn't open it: do that below. - // FIXME: the below does other redundant work too (stats the dir and file). - } else { - // By default, initialize it to invalid. - NamedFileEnt.second = NON_EXISTENT_FILE; - } + if (NamedFileEnt.second) + return NamedFileEnt.second == NON_EXISTENT_FILE ? nullptr + : NamedFileEnt.second; ++NumFileCacheMisses; + // By default, initialize it to invalid. + NamedFileEnt.second = NON_EXISTENT_FILE; + // Get the null-terminated file name as stored as the key of the // SeenFileEntries map. StringRef InterndFileName = NamedFileEnt.first(); @@ -240,7 +234,6 @@ // It exists. See if we have already opened a file with the same inode. // This occurs when one dir is symlinked to another, for example. FileEntry &UFE = UniqueRealFiles[Data.UniqueID]; - UFE.DeferredOpen = !openFile; NamedFileEnt.second = &UFE; @@ -257,15 +250,6 @@ InterndFileName = NamedFileEnt.first().data(); } - // If we opened the file for the first time, record the resulting info. - // Do this even if the cache entry was valid, maybe we didn't previously open. - if (F && !UFE.File) { - if (auto PathName = F->getName()) - fillRealPathName(&UFE, *PathName); - UFE.File = std::move(F); - assert(!UFE.DeferredOpen && "we just opened it!"); - } - if (UFE.isValid()) { // Already have an entry with this inode, return it. // FIXME: this hack ensures that if we look up a file by a virtual path in @@ -296,9 +280,21 @@ UFE.UniqueID = Data.UniqueID; UFE.IsNamedPipe = Data.IsNamedPipe; UFE.InPCH = Data.InPCH; + UFE.File = std::move(F); UFE.IsValid = true; - // Note File and DeferredOpen were initialized above. + if (UFE.File) { + if (auto PathName = UFE.File->getName()) { + llvm::SmallString<128> AbsPath(*PathName); + // This is not the same as `VFS::getRealPath()`, which resolves symlinks + // but can be very expensive on real file systems. + // FIXME: the semantic of RealPathName is unclear, and the name might be + // misleading. We need to clean up the interface here. + makeAbsolutePath(AbsPath); + llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true); + UFE.RealPathName = AbsPath.str(); + } + } return &UFE; } @@ -370,7 +366,6 @@ UFE->UID = NextFileUID++; UFE->IsValid = true; UFE->File.reset(); - UFE->DeferredOpen = false; return UFE; } Index: include/clang/Basic/FileManager.h =================================================================== --- include/clang/Basic/FileManager.h +++ include/clang/Basic/FileManager.h @@ -69,15 +69,14 @@ bool IsNamedPipe; bool InPCH; bool IsValid; // Is this \c FileEntry initialized and valid? - bool DeferredOpen; // Created by getFile(OpenFile=0); may open later. /// The open file, if it is owned by the \p FileEntry. mutable std::unique_ptr<llvm::vfs::File> File; public: FileEntry() - : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false), - DeferredOpen(false) {} + : UniqueID(0, 0), IsNamedPipe(false), InPCH(false), IsValid(false) + {} FileEntry(const FileEntry &) = delete; FileEntry &operator=(const FileEntry &) = delete;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits