Author: Fangrui Song Date: 2022-11-23T11:27:49-08:00 New Revision: fa7bc386ec7565ccfdfa75b272079ee03604e3ba
URL: https://github.com/llvm/llvm-project/commit/fa7bc386ec7565ccfdfa75b272079ee03604e3ba DIFF: https://github.com/llvm/llvm-project/commit/fa7bc386ec7565ccfdfa75b272079ee03604e3ba.diff LOG: [modules] Support zstd in .pcm file Extend SM_SLOC_BUFFER_BLOB_COMPRESSED to allow zstd, which is much faster (compression/decompression) than zlib with a similar compression ratio. An alternative is to add a value beside SM_SLOC_BUFFER_BLOB_COMPRESSED, but reusing SM_SLOC_BUFFER_BLOB_COMPRESSED slightly simplifies the implementation and leads to better diagnostics when a slightly older Clang consumes zstd compressed blob. Compressing AST takes a small portion of WriteAST, so we can pick a higher compression level. Compiling a relatively large .pcm (absl endian) with -fmodules-embed-all-files, zstd level 9 has comparable performance with zlib-chromium level 6 (default), but provides smaller output (5809156 => 5796016). Higher zstd levels will make "Compress AST" notably slower and do not provide significant more size saving. ``` 2.219345 Total ExecuteCompiler 0.746799 Total Frontend 0.736862 Total Source 0.339434 Total ReadAST 0.165452 Total WriteAST 0.043045 Total Compress AST 0.008236 Total ParseClass 0.00633 Total InstantiateClass 0.001887 Total isPotentialConstantExpr 0.001808 Total InstantiateFunction 0.001535 Total EvaluateForOverflow 0.000986 Total EvaluateAsRValue 0.000536 Total EvaluateAsBooleanCondition 0.000308 Total EvaluateAsConstantExpr 0.000156 Total EvaluateAsInt 3.4e-05 Total EvaluateKnownConstInt 8e-06 Total EvaluateAsInitializer 0 Total PerformPendingInstantiations ``` Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D137885 Added: Modified: clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/test/Modules/embed-files-compressed.cpp Removed: ################################################################################ diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index cb418372ed589..367fbc06a8fe2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1452,19 +1452,24 @@ bool ASTReader::ReadSLocEntry(int ID) { unsigned RecCode = MaybeRecCode.get(); if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) { - if (!llvm::compression::zlib::isAvailable()) { - Error("zlib is not available"); + // Inspect the first two bytes to diff erentiate zlib (\x1f\x8b) and zstd. + const llvm::compression::Format F = + Blob.size() >= 2 && memcmp(Blob.data(), "\x1f\x8b", 2) == 0 + ? llvm::compression::Format::Zlib + : llvm::compression::Format::Zstd; + if (const char *Reason = llvm::compression::getReasonIfUnsupported(F)) { + Error(Reason); return nullptr; } - SmallVector<uint8_t, 0> Uncompressed; - if (llvm::Error E = llvm::compression::zlib::decompress( - llvm::arrayRefFromStringRef(Blob), Uncompressed, Record[0])) { + SmallVector<uint8_t, 0> Decompressed; + if (llvm::Error E = llvm::compression::decompress( + F, llvm::arrayRefFromStringRef(Blob), Decompressed, Record[0])) { Error("could not decompress embedded file contents: " + llvm::toString(std::move(E))); return nullptr; } return llvm::MemoryBuffer::getMemBufferCopy( - llvm::toStringRef(Uncompressed), Name); + llvm::toStringRef(Decompressed), Name); } else if (RecCode == SM_SLOC_BUFFER_BLOB) { return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true); } else { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index dd464f5507854..220b2b9aca3fb 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1979,6 +1979,14 @@ static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob, // Compress the buffer if possible. We expect that almost all PCM // consumers will not want its contents. SmallVector<uint8_t, 0> CompressedBuffer; + if (llvm::compression::zstd::isAvailable()) { + llvm::compression::zstd::compress( + llvm::arrayRefFromStringRef(Blob.drop_back(1)), CompressedBuffer, 9); + RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, Blob.size() - 1}; + Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, + llvm::toStringRef(CompressedBuffer)); + return; + } if (llvm::compression::zlib::isAvailable()) { llvm::compression::zlib::compress( llvm::arrayRefFromStringRef(Blob.drop_back(1)), CompressedBuffer); diff --git a/clang/test/Modules/embed-files-compressed.cpp b/clang/test/Modules/embed-files-compressed.cpp index cf33a662f91f7..ae016bc1f9630 100644 --- a/clang/test/Modules/embed-files-compressed.cpp +++ b/clang/test/Modules/embed-files-compressed.cpp @@ -1,4 +1,4 @@ -// REQUIRES: zlib +// REQUIRES: zlib || zstd // REQUIRES: shell // // RUN: rm -rf %t _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits