Author: sammccall Date: Mon Jul 8 04:33:17 2019 New Revision: 365311 URL: http://llvm.org/viewvc/llvm-project?rev=365311&view=rev Log: [clangd] Use xxhash instead of SHA1 for background index file digests.
Summary: Currently SHA1 is about 10% of our CPU, this patch reduces it to ~1%. xxhash is a well-defined (stable) non-cryptographic hash optimized for fast checksums (like crc32). Collisions shouldn't be a problem, despite the reduced length: - for actual file content (used to invalidate bg index shards), there are only two versions that can collide (new shard and old shard). - for file paths in bg index shard filenames, we would need 2^32 files with the same filename to expect a collision. Imperfect hashing may reduce this a bit but it's well beyond what's plausible. This will invalidate shards on disk (as usual; I bumped the version), but this time the filenames are changing so the old files will stick around :-( So this is more expensive than the usual bump, but would be good to land before the v9 branch when everyone will start using bg index. Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64306 Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp clang-tools-extra/trunk/clangd/SourceCode.h clang-tools-extra/trunk/clangd/index/Background.cpp clang-tools-extra/trunk/clangd/index/Background.h clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp clang-tools-extra/trunk/clangd/index/Serialization.cpp clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.cpp?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.cpp (original) +++ clang-tools-extra/trunk/clangd/SourceCode.cpp Mon Jul 8 04:33:17 2019 @@ -25,6 +25,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" +#include "llvm/Support/xxhash.h" #include <algorithm> namespace clang { @@ -376,7 +377,13 @@ bool isRangeConsecutive(const Range &Lef } FileDigest digest(llvm::StringRef Content) { - return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); + uint64_t Hash{llvm::xxHash64(Content)}; + FileDigest Result; + for (unsigned I = 0; I < Result.size(); ++I) { + Result[I] = uint8_t(Hash); + Hash >>= 8; + } + return Result; } llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { Modified: clang-tools-extra/trunk/clangd/SourceCode.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.h?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.h (original) +++ clang-tools-extra/trunk/clangd/SourceCode.h Mon Jul 8 04:33:17 2019 @@ -22,7 +22,6 @@ #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/SHA1.h" namespace clang { class SourceManager; @@ -32,7 +31,7 @@ namespace clangd { // We tend to generate digests for source codes in a lot of different places. // This represents the type for those digests to prevent us hard coding details // of hashing function at every place that needs to store this information. -using FileDigest = decltype(llvm::SHA1::hash({})); +using FileDigest = std::array<uint8_t, 8>; FileDigest digest(StringRef Content); Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID); Modified: clang-tools-extra/trunk/clangd/index/Background.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Background.cpp Mon Jul 8 04:33:17 2019 @@ -32,7 +32,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" -#include "llvm/Support/SHA1.h" #include "llvm/Support/Threading.h" #include <atomic> Modified: clang-tools-extra/trunk/clangd/index/Background.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.h (original) +++ clang-tools-extra/trunk/clangd/index/Background.h Mon Jul 8 04:33:17 2019 @@ -19,7 +19,6 @@ #include "index/Serialization.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/SHA1.h" #include "llvm/Support/Threading.h" #include <atomic> #include <condition_variable> Modified: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp (original) +++ clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Mon Jul 8 04:33:17 2019 @@ -13,18 +13,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SHA1.h" namespace clang { namespace clangd { namespace { -using FileDigest = decltype(llvm::SHA1::hash({})); - -static FileDigest digest(StringRef Content) { - return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); -} - std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, llvm::StringRef FilePath) { llvm::SmallString<128> ShardRootSS(ShardRoot); Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Mon Jul 8 04:33:17 2019 @@ -444,7 +444,7 @@ readCompileCommand(Reader CmdReader, llv // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 11; +constexpr static uint32_t Version = 12; llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) { auto RIFF = riff::readFile(Data); Modified: clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp?rev=365311&r1=365310&r2=365311&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp (original) +++ clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp Mon Jul 8 04:33:17 2019 @@ -10,7 +10,6 @@ #include "index/Index.h" #include "index/Serialization.h" #include "clang/Tooling/CompilationDatabase.h" -#include "llvm/Support/SHA1.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -208,9 +207,7 @@ TEST(SerializationTest, SrcsTest) { std::string TestContent("TestContent"); IncludeGraphNode IGN; - IGN.Digest = - llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()), - TestContent.size()}); + IGN.Digest = digest(TestContent); IGN.DirectIncludes = {"inc1", "inc2"}; IGN.URI = "URI"; IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits