Hi. I've written a patch draft that replaces zlib with the zstd compression algorithm ([1]) in LTO. I'm also sending statistics that are collected for couple of quite big C++ source files. Observation I did:
- LTO stream compression takes 3-4% of LGEN compile time - zstd in default compression level (3) generated slighly smaller LTO elf files - zstd compression is 4-8x faster - decompression is quite negligible, but for a bigger project (godot) I can reduction from 1.37 to 0.53 seconds - ZSTD API is much simpler to use Suggestion based on the observation: - I would suggest to make zstd optional (--enable-zstd) and one would use #include <zstd> + -lzstd - I like the default level as we want to mainly speed up LTO compilation - we can provide an option to control algorithm (-flto-compression-algorithm), similarly to -flto-compression-level - we can discuss possible compression of LTO bytecode that is distributed between WPA stage and individual LTRANS phases. Thoughts? Thanks, Martin [1] https://github.com/facebook/zstd
>From 4939e90b2a8051128b7b2b0214a5fad5183f3bca Mon Sep 17 00:00:00 2001 From: Martin Liska <mli...@suse.cz> Date: Wed, 19 Jun 2019 09:40:35 +0200 Subject: [PATCH] Replace zlib with zstd. --- gcc/Makefile.in | 2 +- gcc/common.opt | 2 +- gcc/lto-compress.c | 161 ++++++++------------------------------------- gcc/timevar.def | 4 +- 4 files changed, 33 insertions(+), 136 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d9e0885b96b..8aedcccb717 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -373,7 +373,7 @@ OUTPUT_OPTION = -o $@ # This is where we get zlib from. zlibdir is -L../zlib and zlibinc is # -I../zlib, unless we were configured with --with-system-zlib, in which # case both are empty. -ZLIB = @zlibdir@ -lz +ZLIB = @zlibdir@ -lzstd -lz ZLIBINC = @zlibinc@ # How to find GMP diff --git a/gcc/common.opt b/gcc/common.opt index a1544d06824..f15e21914f3 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1888,7 +1888,7 @@ Specify the algorithm to partition symbols and vars at linktime. ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. flto-compression-level= -Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 9) +Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 1111) -flto-compression-level=<number> Use zlib compression level <number> for IL. flto-odr-type-merging diff --git a/gcc/lto-compress.c b/gcc/lto-compress.c index 3287178f257..b24f30f956e 100644 --- a/gcc/lto-compress.c +++ b/gcc/lto-compress.c @@ -27,13 +27,9 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "cgraph.h" #include "lto-streamer.h" -/* zlib.h includes other system headers. Those headers may test feature - test macros. config.h may define feature test macros. For this reason, - zlib.h needs to be included after, rather than before, config.h and - system.h. */ -#include <zlib.h> #include "lto-compress.h" #include "timevar.h" +#include <zstd.h> /* Compression stream structure, holds the flush callback and opaque token, the buffered data, and a note of whether compressing or uncompressing. */ @@ -48,45 +44,23 @@ struct lto_compression_stream bool is_compression; }; -/* Overall compression constants for zlib. */ - -static const size_t Z_BUFFER_LENGTH = 4096; static const size_t MIN_STREAM_ALLOCATION = 1024; -/* For zlib, allocate SIZE count of ITEMS and return the address, OPAQUE - is unused. */ - -static void * -lto_zalloc (void *opaque, unsigned items, unsigned size) -{ - gcc_assert (opaque == Z_NULL); - return xmalloc (items * size); -} - -/* For zlib, free memory at ADDRESS, OPAQUE is unused. */ - -static void -lto_zfree (void *opaque, void *address) -{ - gcc_assert (opaque == Z_NULL); - free (address); -} - -/* Return a zlib compression level that zlib will not reject. Normalizes +/* Return a zstd compression level that zstd will not reject. Normalizes the compression level from the command line flag, clamping non-default values to the appropriate end of their valid range. */ static int -lto_normalized_zlib_level (void) +lto_normalized_zstd_level (void) { int level = flag_lto_compression_level; - if (level != Z_DEFAULT_COMPRESSION) + if (level != ZSTD_CLEVEL_DEFAULT) { - if (level < Z_NO_COMPRESSION) - level = Z_NO_COMPRESSION; - else if (level > Z_BEST_COMPRESSION) - level = Z_BEST_COMPRESSION; + if (level < 1) + level = 1; + else if (level > ZSTD_maxCLevel ()) + level = ZSTD_maxCLevel (); } return level; @@ -169,57 +143,19 @@ void lto_end_compression (struct lto_compression_stream *stream) { unsigned char *cursor = (unsigned char *) stream->buffer; - size_t remaining = stream->bytes; - const size_t outbuf_length = Z_BUFFER_LENGTH; - unsigned char *outbuf = (unsigned char *) xmalloc (outbuf_length); - z_stream out_stream; - size_t compressed_bytes = 0; - int status; - - gcc_assert (stream->is_compression); + size_t size = stream->bytes; timevar_push (TV_IPA_LTO_COMPRESS); + size_t const outbuf_length = ZSTD_compressBound (size); + char *outbuf = (char *) xmalloc (outbuf_length); - out_stream.next_out = outbuf; - out_stream.avail_out = outbuf_length; - out_stream.next_in = cursor; - out_stream.avail_in = remaining; - out_stream.zalloc = lto_zalloc; - out_stream.zfree = lto_zfree; - out_stream.opaque = Z_NULL; + size_t const csize = ZSTD_compress (outbuf, outbuf_length, cursor, size, + lto_normalized_zstd_level ()); - status = deflateInit (&out_stream, lto_normalized_zlib_level ()); - if (status != Z_OK) - internal_error ("compressed stream: %s", zError (status)); + if (ZSTD_isError (csize)) + internal_error ("compressed stream: %s", ZSTD_getErrorName (csize)); - do - { - size_t in_bytes, out_bytes; - - status = deflate (&out_stream, Z_FINISH); - if (status != Z_OK && status != Z_STREAM_END) - internal_error ("compressed stream: %s", zError (status)); - - in_bytes = remaining - out_stream.avail_in; - out_bytes = outbuf_length - out_stream.avail_out; - - stream->callback ((const char *) outbuf, out_bytes, stream->opaque); - lto_stats.num_compressed_il_bytes += out_bytes; - compressed_bytes += out_bytes; - - cursor += in_bytes; - remaining -= in_bytes; - - out_stream.next_out = outbuf; - out_stream.avail_out = outbuf_length; - out_stream.next_in = cursor; - out_stream.avail_in = remaining; - } - while (status != Z_STREAM_END); - - status = deflateEnd (&out_stream); - if (status != Z_OK) - internal_error ("compressed stream: %s", zError (status)); + stream->callback (outbuf, csize, NULL); lto_destroy_compression_stream (stream); free (outbuf); @@ -258,61 +194,22 @@ void lto_end_uncompression (struct lto_compression_stream *stream) { unsigned char *cursor = (unsigned char *) stream->buffer; - size_t remaining = stream->bytes; - const size_t outbuf_length = Z_BUFFER_LENGTH; - unsigned char *outbuf = (unsigned char *) xmalloc (outbuf_length); - size_t uncompressed_bytes = 0; + size_t size = stream->bytes; - gcc_assert (!stream->is_compression); timevar_push (TV_IPA_LTO_DECOMPRESS); + unsigned long long const rsize = ZSTD_getFrameContentSize (cursor, size); + if (rsize == ZSTD_CONTENTSIZE_ERROR) + internal_error ("not compressed by zstd"); + else if (rsize == ZSTD_CONTENTSIZE_UNKNOWN) + internal_error ("original size unknown"); - while (remaining > 0) - { - z_stream in_stream; - size_t out_bytes; - int status; - - in_stream.next_out = outbuf; - in_stream.avail_out = outbuf_length; - in_stream.next_in = cursor; - in_stream.avail_in = remaining; - in_stream.zalloc = lto_zalloc; - in_stream.zfree = lto_zfree; - in_stream.opaque = Z_NULL; - - status = inflateInit (&in_stream); - if (status != Z_OK) - internal_error ("compressed stream: %s", zError (status)); - - do - { - size_t in_bytes; - - status = inflate (&in_stream, Z_SYNC_FLUSH); - if (status != Z_OK && status != Z_STREAM_END) - internal_error ("compressed stream: %s", zError (status)); - - in_bytes = remaining - in_stream.avail_in; - out_bytes = outbuf_length - in_stream.avail_out; - - stream->callback ((const char *) outbuf, out_bytes, stream->opaque); - lto_stats.num_uncompressed_il_bytes += out_bytes; - uncompressed_bytes += out_bytes; - - cursor += in_bytes; - remaining -= in_bytes; - - in_stream.next_out = outbuf; - in_stream.avail_out = outbuf_length; - in_stream.next_in = cursor; - in_stream.avail_in = remaining; - } - while (!(status == Z_STREAM_END && out_bytes == 0)); - - status = inflateEnd (&in_stream); - if (status != Z_OK) - internal_error ("compressed stream: %s", zError (status)); - } + char *outbuf = (char *) xmalloc (rsize); + size_t const dsize = ZSTD_decompress (outbuf, rsize, cursor, size); + + if (ZSTD_isError (dsize)) + internal_error ("decompressed stream: %s", ZSTD_getErrorName (dsize)); + + stream->callback (outbuf, dsize, stream->opaque); lto_destroy_compression_stream (stream); free (outbuf); diff --git a/gcc/timevar.def b/gcc/timevar.def index 13cb470b688..626ce493b76 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -78,8 +78,8 @@ DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") -DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream inflate") -DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream deflate") +DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") +DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression") DEFTIMEVAR (TV_IPA_LTO_OUTPUT , "lto stream output") DEFTIMEVAR (TV_IPA_LTO_GIMPLE_IN , "ipa lto gimple in") DEFTIMEVAR (TV_IPA_LTO_GIMPLE_OUT , "ipa lto gimple out") -- 2.21.0
zstd-vs-zlib.ods
Description: application/vnd.oasis.opendocument.spreadsheet