This reduces the cache size for Deus Ex from ~160M to ~30M for radeonsi. I'm also seeing the following improvements in minimum fps in the Shadow of Mordor benchmark:
no-cache: ~10fps with-cache-no-compression: ~15fps with-cache-and-compression: ~20fps Note the with cache results are from the second run after closing and opening the game to avoid the in-memory cache. Since we only really care about decompression I went with Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson who has benchmarked decompression speeds. --- configure.ac | 4 ++ src/util/Makefile.am | 2 + src/util/disk_cache.c | 173 +++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 156 insertions(+), 23 deletions(-) diff --git a/configure.ac b/configure.ac index 890a379..9fde95f 100644 --- a/configure.ac +++ b/configure.ac @@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0 VDPAU_REQUIRED=1.1 WAYLAND_REQUIRED=1.11 XCB_REQUIRED=1.9.3 XCBDRI2_REQUIRED=1.8 XCBGLX_REQUIRED=1.8.1 XDAMAGE_REQUIRED=1.1 XSHMFENCE_REQUIRED=1.1 XVMC_REQUIRED=1.0.6 PYTHON_MAKO_REQUIRED=0.8.0 LIBSENSORS_REQUIRED=4.0.0 +ZLIB_REQUIRED=1.2.8 dnl LLVM versions LLVM_REQUIRED_GALLIUM=3.3.0 LLVM_REQUIRED_OPENCL=3.6.0 LLVM_REQUIRED_R600=3.6.0 LLVM_REQUIRED_RADEONSI=3.6.0 LLVM_REQUIRED_RADV=3.9.0 LLVM_REQUIRED_SWR=3.6.0 dnl Check for progs @@ -777,20 +778,23 @@ darwin*) AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=], [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt], [AC_MSG_ERROR([Could not find clock_gettime])])]) AC_SUBST([CLOCK_LIB]) ;; esac dnl See if posix_memalign is available AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"]) +dnl Check for zlib +PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED]) + dnl Check for pthreads AX_PTHREAD if test "x$ax_pthread_ok" = xno; then AC_MSG_ERROR([Building mesa on this platform requires pthreads]) fi dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS dnl to -pthread, which causes problems if we need -lpthread to appear in dnl pkgconfig files. Since Android doesn't have a pthread lib, this check dnl is not valid for that platform. if test "x$android" = xno; then diff --git a/src/util/Makefile.am b/src/util/Makefile.am index ae50a3b..e46d893 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \ -I$(top_srcdir)/src/mesa \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ $(VISIBILITY_CFLAGS) \ $(MSVC2013_COMPAT_CFLAGS) libmesautil_la_SOURCES = \ $(MESA_UTIL_FILES) \ $(MESA_UTIL_GENERATED_FILES) +libmesautil_la_LIBADD = -lz + roundeven_test_LDADD = -lm check_PROGRAMS = u_atomic_test roundeven_test TESTS = $(check_PROGRAMS) BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES) CLEANFILES = $(BUILT_SOURCES) EXTRA_DIST = \ format_srgb.py \ SConscript \ diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 2a0edca..03aae02 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -30,20 +30,21 @@ #include <stdio.h> #include <sys/file.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> #include <fcntl.h> #include <pwd.h> #include <errno.h> #include <dirent.h> +#include "zlib.h" #include "util/crc32.h" #include "util/u_atomic.h" #include "util/mesa-sha1.h" #include "util/ralloc.h" #include "main/errors.h" #include "disk_cache.h" /* Number of bits to mask off from a cache key to get an index. */ @@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key key) return; } unlink(filename); free(filename); if (sb.st_size) p_atomic_add(cache->size, - sb.st_size); } +/* From the zlib docs: + * "If the memory is available, buffers sizes on the order of 128K or 256K + * bytes should be used." + */ +#define BUFSIZE 256 * 1024 + +/** + * Compresses cache entry in memeory and writes it to disk. Returns the size + * of the data written to disk. + */ +static size_t +deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest, + char *filename) +{ + unsigned char out[BUFSIZE]; + + /* allocate deflate state */ + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = (uint8_t *) in_data; + strm.avail_in = in_data_size; + + int ret = deflateInit(&strm, Z_BEST_COMPRESSION); + if (ret != Z_OK) + return 0; + + /* compress until end of in_data */ + size_t compressed_size = 0; + int flush; + do { + int remaining = in_data_size - BUFSIZE; + flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH; + in_data_size -= BUFSIZE; + + /* Run deflate() on input until the output buffer is not full (which + * means there is no more data to deflate). + */ + do { + strm.avail_out = BUFSIZE; + strm.next_out = out; + + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + + size_t have = BUFSIZE - strm.avail_out; + compressed_size += compressed_size + have; + + size_t written = 0; + for (size_t len = 0; len < have; len += written) { + written = write(dest, out + len, have - len); + if (written == -1) { + (void)deflateEnd(&strm); + return 0; + } + } + } while (strm.avail_out == 0); + + /* all input should be used */ + assert(strm.avail_in == 0); + + } while (flush != Z_FINISH); + + /* stream should be complete */ + assert(ret == Z_STREAM_END); + + /* clean up and return */ + (void)deflateEnd(&strm); + return compressed_size; +} + +struct cache_entry_file_data { + uint32_t crc32; + uint32_t uncompressed_size; +}; + void disk_cache_put(struct disk_cache *cache, cache_key key, const void *data, size_t size) { int fd = -1, fd_final = -1, err, ret; size_t len; char *filename = NULL, *filename_tmp = NULL; - const char *p = data; filename = get_cache_file(cache, key); if (filename == NULL) goto done; /* Write to a temporary file to allow for an atomic rename to the * final destination filename, (to prevent any readers from seeing * a partially written file). */ if (asprintf(&filename_tmp, "%s.tmp", filename) == -1) @@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache, * * Before we do that, if the cache is too large, evict something * else first. */ if (*cache->size + size > cache->max_size) evict_random_item(cache); /* Create CRC of the data and store at the start of the file. We will * read this when restoring the cache and use it to check for corruption. */ - uint32_t crc32 = util_hash_crc32(data, size); - size_t crc_size = sizeof(crc32); - for (len = 0; len < crc_size; len += ret) { - ret = write(fd, &crc32, crc_size - len); + struct cache_entry_file_data cf_data; + cf_data.crc32 = util_hash_crc32(data, size); + cf_data.uncompressed_size = size; + + size_t cf_data_size = sizeof(cf_data); + for (len = 0; len < cf_data_size; len += ret) { + ret = write(fd, &cf_data, cf_data_size - len); if (ret == -1) { unlink(filename_tmp); goto done; } } /* Now, finally, write out the contents to the temporary file, then * rename them atomically to the destination filename, and also * perform an atomic increment of the total cache size. */ - for (len = 0; len < size; len += ret) { - ret = write(fd, p + len, size - len); - if (ret == -1) { - unlink(filename_tmp); - goto done; - } + size_t file_size = deflate_and_write_to_disk(data, size, fd, filename_tmp); + if (file_size == 0) { + unlink(filename_tmp); + goto done; } - rename(filename_tmp, filename); - size += crc_size; - p_atomic_add(cache->size, size); + file_size += cf_data_size; + p_atomic_add(cache->size, file_size); done: if (fd_final != -1) close(fd_final); /* This close finally releases the flock, (now that the final dile * has been renamed into place and the size has been added). */ if (fd != -1) close(fd); if (filename_tmp) free(filename_tmp); if (filename) free(filename); } +/** + * Decompresses cache entry, returns true if successful. + */ +static bool +inflate_cache_data(uint8_t *in_data, size_t in_data_size, + uint8_t *out_data, size_t out_data_size) +{ + z_stream strm; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = in_data; + strm.avail_in = in_data_size; + strm.next_out = out_data; + strm.avail_out = out_data_size; + + int ret = inflateInit(&strm); + if (ret != Z_OK) + return false; + + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + + /* Unless there was an error we should have decompressed everything in one + * go as we know the uncompressed file size. + */ + if (ret != Z_STREAM_END) { + (void)inflateEnd(&strm); + return false; + } + assert(strm.avail_out == 0); + + /* clean up and return */ + (void)inflateEnd(&strm); + return true; +} + void * disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size) { int fd = -1, ret, len; struct stat sb; char *filename = NULL; uint8_t *data = NULL; + uint8_t *uncompressed_data = NULL; if (size) *size = 0; filename = get_cache_file(cache, key); if (filename == NULL) goto fail; fd = open(filename, O_RDONLY | O_CLOEXEC); if (fd == -1) goto fail; if (fstat(fd, &sb) == -1) goto fail; data = malloc(sb.st_size); if (data == NULL) goto fail; /* Load the CRC that was created when the file was written. */ - uint32_t crc32; - size_t crc_size = sizeof(crc32); - assert(sb.st_size > crc_size); - for (len = 0; len < crc_size; len += ret) { - ret = read(fd, &crc32 + len, crc_size - len); + struct cache_entry_file_data cf_data; + size_t cf_data_size = sizeof(cf_data); + assert(sb.st_size > cf_data_size); + for (len = 0; len < cf_data_size; len += ret) { + ret = read(fd, &cf_data + len, cf_data_size - len); if (ret == -1) goto fail; } /* Load the actual cache data. */ - size_t cache_data_size = sb.st_size - crc_size; + size_t cache_data_size = sb.st_size - cf_data_size; for (len = 0; len < cache_data_size; len += ret) { ret = read(fd, data + len, cache_data_size - len); if (ret == -1) goto fail; } + /* Uncompress the cache data */ + uncompressed_data = malloc(cf_data.uncompressed_size); + if (!inflate_cache_data(data, cache_data_size, uncompressed_data, + cf_data.uncompressed_size)) + goto fail; + /* Check the data for corruption */ - if (crc32 != util_hash_crc32(data, cache_data_size)) + if (cf_data.crc32 != util_hash_crc32(uncompressed_data, + cf_data.uncompressed_size)) goto fail; + free(data); free(filename); close(fd); if (size) - *size = cache_data_size; + *size = cf_data.uncompressed_size; - return data; + return uncompressed_data; fail: if (data) free(data); + if (uncompressed_data) + free(uncompressed_data); if (filename) free(filename); if (fd != -1) close(fd); return NULL; } void disk_cache_put_key(struct disk_cache *cache, cache_key key) -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev