On Wed, Mar 16, 2016 at 7:20 PM, Bruno Cardoso Lopes via cfe-commits < cfe-commits@lists.llvm.org> wrote:
> Author: bruno > Date: Wed Mar 16 21:20:43 2016 > New Revision: 263686 > > URL: http://llvm.org/viewvc/llvm-project?rev=263686&view=rev > Log: > Reapply [2]: [VFS] Add support for handling path traversals > > This was applied twice r261551 and 263617 and later reverted because: > > (1) Windows bot failing on unittests. Change the current behavior to do > not handle path traversals on windows. > > (2) Windows bot failed to include llvm/Config/config.h in order to use > HAVE_REALPATH. Use LLVM_ON_UNIX instead, as done in > lib/Basic/FileManager.cpp. > > Handle ".", ".." and "./" with trailing slashes while collecting files > to be dumped into the vfs overlay directory. > > Include the support for symlinks into components. Given the path: > > /install-dir/bin/../lib/clang/3.8.0/include/altivec.h, if "bin" > component is a symlink, it's not safe to use `path::remove_dots` here, > and `realpath` is used to get the right answer. Since `realpath` > is expensive, we only do it at collecting time (which only happens > during the crash reproducer) and cache the base directory for fast lookups. > > Overall, this makes the input to the VFS YAML file to be canonicalized > to never contain traversal components. > > Differential Revision: http://reviews.llvm.org/D17104 > > rdar://problem/24499339 > > Added: > cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m > cfe/trunk/test/Modules/crash-vfs-path-traversal.m > Modified: > cfe/trunk/lib/Basic/VirtualFileSystem.cpp > cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp > > Modified: cfe/trunk/lib/Basic/VirtualFileSystem.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/VirtualFileSystem.cpp?rev=263686&r1=263685&r2=263686&view=diff > > ============================================================================== > --- cfe/trunk/lib/Basic/VirtualFileSystem.cpp (original) > +++ cfe/trunk/lib/Basic/VirtualFileSystem.cpp Wed Mar 16 21:20:43 2016 > @@ -112,6 +112,20 @@ bool FileSystem::exists(const Twine &Pat > return Status && Status->exists(); > } > > +#ifndef NDEBUG > +static bool isTraversalComponent(StringRef Component) { > + return Component.equals("..") || Component.equals("."); > +} > + > +static bool pathHasTraversal(StringRef Path) { > + using namespace llvm::sys; > + for (StringRef Comp : llvm::make_range(path::begin(Path), > path::end(Path))) > + if (isTraversalComponent(Comp)) > + return true; > + return false; > +} > +#endif > + > > > //===-----------------------------------------------------------------------===/ > // RealFileSystem implementation > > > //===-----------------------------------------------------------------------===/ > @@ -819,6 +833,16 @@ class RedirectingFileSystem : public vfs > bool UseExternalNames; > /// @} > > + /// Virtual file paths and external files could be canonicalized > without "..", > + /// "." and "./" in their paths. FIXME: some unittests currently fail on > + /// win32 when using remove_dots and remove_leading_dotslash on paths. > + bool UseCanonicalizedPaths = > +#ifdef LLVM_ON_WIN32 > + false; > +#else > + true; > +#endif > + > friend class RedirectingFileSystemParser; > > private: > @@ -954,7 +978,7 @@ class RedirectingFileSystemParser { > return true; > } > > - std::unique_ptr<Entry> parseEntry(yaml::Node *N) { > + std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem > *FS) { > yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N); > if (!M) { > error(N, "expected mapping node for file or directory entry"); > @@ -994,7 +1018,17 @@ class RedirectingFileSystemParser { > if (Key == "name") { > if (!parseScalarString(I->getValue(), Value, Buffer)) > return nullptr; > - Name = Value; > + > + if (FS->UseCanonicalizedPaths) { > + SmallString<256> Path(Value); > + // Guarantee that old YAML files containing paths with ".." and > "." > + // are properly canonicalized before read into the VFS. > + Path = sys::path::remove_leading_dotslash(Path); > + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); > + Name = Path.str(); > + } else { > + Name = Value; > + } > } else if (Key == "type") { > if (!parseScalarString(I->getValue(), Value, Buffer)) > return nullptr; > @@ -1024,7 +1058,7 @@ class RedirectingFileSystemParser { > for (yaml::SequenceNode::iterator I = Contents->begin(), > E = Contents->end(); > I != E; ++I) { > - if (std::unique_ptr<Entry> E = parseEntry(&*I)) > + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS)) > EntryArrayContents.push_back(std::move(E)); > else > return nullptr; > @@ -1038,7 +1072,16 @@ class RedirectingFileSystemParser { > HasContents = true; > if (!parseScalarString(I->getValue(), Value, Buffer)) > return nullptr; > - ExternalContentsPath = Value; > + if (FS->UseCanonicalizedPaths) { > + SmallString<256> Path(Value); > + // Guarantee that old YAML files containing paths with ".." and > "." > + // are properly canonicalized before read into the VFS. > + Path = sys::path::remove_leading_dotslash(Path); > + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); > + ExternalContentsPath = Path.str(); > + } else { > + ExternalContentsPath = Value; > + } > } else if (Key == "use-external-name") { > bool Val; > if (!parseScalarBool(I->getValue(), Val)) > @@ -1149,7 +1192,7 @@ public: > > for (yaml::SequenceNode::iterator I = Roots->begin(), E = > Roots->end(); > I != E; ++I) { > - if (std::unique_ptr<Entry> E = parseEntry(&*I)) > + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS)) > FS->Roots.push_back(std::move(E)); > else > return false; > @@ -1228,6 +1271,14 @@ ErrorOr<Entry *> RedirectingFileSystem:: > if (std::error_code EC = makeAbsolute(Path)) > return EC; > > + // Canonicalize path by removing ".", "..", "./", etc components. This > is > + // a VFS request, do bot bother about symlinks in the path components > + // but canonicalize in order to perform the correct entry search. > + if (UseCanonicalizedPaths) { > + Path = sys::path::remove_leading_dotslash(Path); > + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); > + } > + > if (Path.empty()) > return make_error_code(llvm::errc::invalid_argument); > > @@ -1244,10 +1295,17 @@ ErrorOr<Entry *> RedirectingFileSystem:: > ErrorOr<Entry *> > RedirectingFileSystem::lookupPath(sys::path::const_iterator Start, > sys::path::const_iterator End, Entry > *From) { > +#ifndef LLVM_ON_WIN32 > + assert(!isTraversalComponent(*Start) && > + !isTraversalComponent(From->getName()) && > + "Paths should not contain traversal components"); > +#else > + // FIXME: this is here to support windows, remove it once canonicalized > + // paths become globally default. > if (Start->equals(".")) > ++Start; > +#endif > > - // FIXME: handle .. > if (CaseSensitive ? !Start->equals(From->getName()) > : !Start->equals_lower(From->getName())) > // failure to match > @@ -1366,16 +1424,6 @@ UniqueID vfs::getNextVirtualUniqueID() { > return UniqueID(std::numeric_limits<uint64_t>::max(), ID); > } > > -#ifndef NDEBUG > -static bool pathHasTraversal(StringRef Path) { > - using namespace llvm::sys; > - for (StringRef Comp : llvm::make_range(path::begin(Path), > path::end(Path))) > - if (Comp == "." || Comp == "..") > - return true; > - return false; > -} > -#endif > - > void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef > RealPath) { > assert(sys::path::is_absolute(VirtualPath) && "virtual path not > absolute"); > assert(sys::path::is_absolute(RealPath) && "real path not absolute"); > > Modified: cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp?rev=263686&r1=263685&r2=263686&view=diff > > ============================================================================== > --- cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp (original) > +++ cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp Wed Mar 16 > 21:20:43 2016 > @@ -13,8 +13,9 @@ > > #include "clang/Frontend/Utils.h" > #include "clang/Serialization/ASTReader.h" > -#include "llvm/ADT/StringSet.h" > +#include "llvm/ADT/StringMap.h" > #include "llvm/ADT/iterator_range.h" > +#include "llvm/Config/config.h" > #include "llvm/Support/FileSystem.h" > #include "llvm/Support/Path.h" > #include "llvm/Support/raw_ostream.h" > @@ -25,7 +26,9 @@ namespace { > /// Private implementation for ModuleDependencyCollector > class ModuleDependencyListener : public ASTReaderListener { > ModuleDependencyCollector &Collector; > + llvm::StringMap<std::string> SymLinkMap; > > + bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result); > std::error_code copyToRoot(StringRef Src); > public: > ModuleDependencyListener(ModuleDependencyCollector &Collector) > @@ -57,6 +60,48 @@ void ModuleDependencyCollector::writeFil > VFSWriter.write(OS); > } > > +// TODO: move this to Support/Path.h and check for HAVE_REALPATH? > +static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath) > { > +#ifdef LLVM_ON_UNIX > + char CanonicalPath[PATH_MAX]; > + > + // TODO: emit a warning in case this fails...? > + if (!realpath(SrcPath.str().c_str(), CanonicalPath)) > + return false; > + > + SmallString<256> RPath(CanonicalPath); > + RealPath.swap(RPath); > + return true; > +#else > + // FIXME: Add support for systems without realpath. > + return false; > +#endif > +} > We already have a similar call to realpath in lib/Basic/FileManager.cpp (ugly #ifdef and all) Could you avoid duplicating it? -- Sean Silva > + > +bool ModuleDependencyListener::getRealPath(StringRef SrcPath, > + SmallVectorImpl<char> &Result) > { > + using namespace llvm::sys; > + SmallString<256> RealPath; > + StringRef FileName = path::filename(SrcPath); > + std::string Dir = path::parent_path(SrcPath).str(); > + auto DirWithSymLink = SymLinkMap.find(Dir); > + > + // Use real_path to fix any symbolic link component present in a path. > + // Computing the real path is expensive, cache the search through the > + // parent path directory. > + if (DirWithSymLink == SymLinkMap.end()) { > + if (!real_path(Dir, RealPath)) > + return false; > + SymLinkMap[Dir] = RealPath.str(); > + } else { > + RealPath = DirWithSymLink->second; > + } > + > + path::append(RealPath, FileName); > + Result.swap(RealPath); > + return true; > +} > + > std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) { > using namespace llvm::sys; > > @@ -65,22 +110,42 @@ std::error_code ModuleDependencyListener > fs::make_absolute(AbsoluteSrc); > // Canonicalize to a native path to avoid mixed separator styles. > path::native(AbsoluteSrc); > - // TODO: We probably need to handle .. as well as . in order to have > valid > - // input to the YAMLVFSWriter. > - path::remove_dots(AbsoluteSrc); > + // Remove redundant leading "./" pieces and consecutive separators. > + AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc); > + > + // Canonicalize path by removing "..", "." components. > + SmallString<256> CanonicalPath = AbsoluteSrc; > + path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true); > + > + // If a ".." component is present after a symlink component, > remove_dots may > + // lead to the wrong real destination path. Let the source be > canonicalized > + // like that but make sure the destination uses the real path. > + bool HasDotDotInPath = > + std::count(path::begin(AbsoluteSrc), path::end(AbsoluteSrc), "..") > > 0; > + SmallString<256> RealPath; > + bool HasRemovedSymlinkComponent = HasDotDotInPath && > + getRealPath(AbsoluteSrc, RealPath) && > + !StringRef(CanonicalPath).equals(RealPath); > > // Build the destination path. > SmallString<256> Dest = Collector.getDest(); > - path::append(Dest, path::relative_path(AbsoluteSrc)); > + path::append(Dest, path::relative_path(HasRemovedSymlinkComponent ? > RealPath > + : > CanonicalPath)); > > // Copy the file into place. > if (std::error_code EC = fs::create_directories(path::parent_path(Dest), > > /*IgnoreExisting=*/true)) > return EC; > - if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest)) > + if (std::error_code EC = fs::copy_file( > + HasRemovedSymlinkComponent ? RealPath : CanonicalPath, Dest)) > return EC; > - // Use the absolute path under the root for the file mapping. > - Collector.addFileMapping(AbsoluteSrc, Dest); > + > + // Use the canonical path under the root for the file mapping. Also > create > + // an additional entry for the real path. > + Collector.addFileMapping(CanonicalPath, Dest); > + if (HasRemovedSymlinkComponent) > + Collector.addFileMapping(RealPath, Dest); > + > return std::error_code(); > } > > > Added: cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m?rev=263686&view=auto > > ============================================================================== > --- cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m (added) > +++ cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m Wed Mar 16 > 21:20:43 2016 > @@ -0,0 +1,72 @@ > +// REQUIRES: crash-recovery, shell > + > +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? > +// XFAIL: mingw32 > + > +// Test that clang is capable of collecting the right header files in the > +// crash reproducer if there's a symbolic link component in the path. > + > +// RUN: rm -rf %t > +// RUN: mkdir -p %t/i %t/m %t %t/sysroot > +// RUN: cp -a %S/Inputs/System/usr %t/i/ > +// RUN: ln -s include/tcl-private %t/i/usr/x > + > +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ > +// RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \ > +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s > + > +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m > +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh > +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ > +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > +// RUN: find %t/crash-vfs-*.cache/vfs | \ > +// RUN: grep "usr/include/stdio.h" | count 1 > + > +#include "usr/x/../stdio.h" > + > +// CHECK: Preprocessed source(s) and associated run script(s) are located > at: > +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m > +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache > + > +// CHECKSRC: @import cstd.stdio; > + > +// CHECKSH: # Crash reproducer > +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" > +// CHECKSH-NEXT: # Original command: {{.*$}} > +// CHECKSH-NEXT: "-cc1" > +// CHECKSH: "-isysroot" "{{[^"]*}}/sysroot/" > +// CHECKSH-NOT: "-fmodules-cache-path=" > +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" > +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" > + > +// CHECKYAML: 'type': 'directory' > +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr/include", > +// CHECKYAML-NEXT: 'contents': [ > +// CHECKYAML-NEXT: { > +// CHECKYAML-NEXT: 'type': 'file', > +// CHECKYAML-NEXT: 'name': "module.map", > +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ > ]*}}/i/usr/include/module.map" > +// CHECKYAML-NEXT: }, > + > +// CHECKYAML: 'type': 'directory' > +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr", > +// CHECKYAML-NEXT: 'contents': [ > +// CHECKYAML-NEXT: { > +// CHECKYAML-NEXT: 'type': 'file', > +// CHECKYAML-NEXT: 'name': "module.map", > +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ > ]*}}/i/usr/include/module.map" > +// CHECKYAML-NEXT: }, > + > +// Test that by using the previous generated YAML file clang is able to > find the > +// right files inside the overlay and map the virtual request for a path > that > +// previously contained a symlink to work. To make sure of this, wipe out > the > +// %/t/i directory containing the symlink component. > + > +// RUN: rm -rf %/t/i > +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH > +// RUN: %clang -E %s -I %/t/i -isysroot %/t/sysroot/ \ > +// RUN: -ivfsoverlay %t/crash-vfs-*.cache/vfs/vfs.yaml -fmodules \ > +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ > +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY > + > +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for > "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/stdio.h" > > Added: cfe/trunk/test/Modules/crash-vfs-path-traversal.m > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-traversal.m?rev=263686&view=auto > > ============================================================================== > --- cfe/trunk/test/Modules/crash-vfs-path-traversal.m (added) > +++ cfe/trunk/test/Modules/crash-vfs-path-traversal.m Wed Mar 16 21:20:43 > 2016 > @@ -0,0 +1,60 @@ > +// REQUIRES: crash-recovery, shell, non-ms-sdk, non-ps4-sdk > + > +// FIXME: Canonicalizing paths to remove relative traversal components > +// currenty fails a unittest on windows and is disable by default. > +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? > +// XFAIL: mingw32 > + > +// RUN: rm -rf %t > +// RUN: mkdir -p %t/i %t/m %t > + > +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ > +// RUN: %clang -fsyntax-only %s -I %S/Inputs/System -isysroot %/t/i/ \ > +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s > + > +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m > +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh > +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ > +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > +// RUN: find %t/crash-vfs-*.cache/vfs | \ > +// RUN: grep "Inputs/System/usr/include/stdio.h" | count 1 > + > +#include "usr/././//////include/../include/./././../include/stdio.h" > + > +// CHECK: Preprocessed source(s) and associated run script(s) are located > at: > +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m > +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache > + > +// CHECKSRC: @import cstd.stdio; > + > +// CHECKSH: # Crash reproducer > +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" > +// CHECKSH-NEXT: # Original command: {{.*$}} > +// CHECKSH-NEXT: "-cc1" > +// CHECKSH: "-isysroot" "{{[^"]*}}/i/" > +// CHECKSH-NOT: "-fmodules-cache-path=" > +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" > +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" > + > +// CHECKYAML: 'type': 'directory' > +// CHECKYAML: 'name': "{{[^ ]*}}/Inputs/System/usr/include", > +// CHECKYAML-NEXT: 'contents': [ > +// CHECKYAML-NEXT: { > +// CHECKYAML-NEXT: 'type': 'file', > +// CHECKYAML-NEXT: 'name': "module.map", > +// CHECKYAML-NEXT: 'external-contents': "{{[^ > ]*}}/Inputs/System/usr/include/module.map" > +// CHECKYAML-NEXT: }, > + > +// Replace the paths in the YAML files with relative ".." traversals > +// and fed into clang to test whether we're correctly representing them > +// in the VFS overlay. > + > +// RUN: sed -e "s@usr/include@usr/include/../include@g" \ > +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > %t/vfs.yaml > +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH > +// RUN: %clang -E %s -I %S/Inputs/System -isysroot %/t/i/ \ > +// RUN: -ivfsoverlay %t/vfs.yaml -fmodules \ > +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ > +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY > + > +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for > "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/usr/include/stdio.h" > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits