With Emil's suggestions applied: Acked-by: Marek Olšák <marek.ol...@amd.com>
Whoever wants a different compression algorithm can send a patch. Marek On Thu, Mar 2, 2017 at 2:36 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote: > This reduces the cache size for Deus Ex from ~160M to ~30M for > radeonsi. > > I'm also seeing the following improvements in minimum fps in the > Shadow of Mordor benchmark: > > no-cache: ~10fps > with-cache-no-compression: ~15fps > with-cache-and-compression: ~20fps > > Note the with cache results are from the second run after closing > and opening the game to avoid the in-memory cache. > > Since we only really care about decompression I went with > Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson > who has benchmarked decompression speeds. > > V2: fix pointer increments for reading/writing cache entry > file data. > --- > configure.ac | 4 ++ > src/util/Makefile.am | 2 + > src/util/disk_cache.c | 173 > +++++++++++++++++++++++++++++++++++++++++++------- > 3 files changed, 156 insertions(+), 23 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 890a379..9fde95f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0 > VDPAU_REQUIRED=1.1 > WAYLAND_REQUIRED=1.11 > XCB_REQUIRED=1.9.3 > XCBDRI2_REQUIRED=1.8 > XCBGLX_REQUIRED=1.8.1 > XDAMAGE_REQUIRED=1.1 > XSHMFENCE_REQUIRED=1.1 > XVMC_REQUIRED=1.0.6 > PYTHON_MAKO_REQUIRED=0.8.0 > LIBSENSORS_REQUIRED=4.0.0 > +ZLIB_REQUIRED=1.2.8 > > dnl LLVM versions > LLVM_REQUIRED_GALLIUM=3.3.0 > LLVM_REQUIRED_OPENCL=3.6.0 > LLVM_REQUIRED_R600=3.6.0 > LLVM_REQUIRED_RADEONSI=3.6.0 > LLVM_REQUIRED_RADV=3.9.0 > LLVM_REQUIRED_SWR=3.6.0 > > dnl Check for progs > @@ -777,20 +778,23 @@ darwin*) > AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=], > [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt], > [AC_MSG_ERROR([Could not find > clock_gettime])])]) > AC_SUBST([CLOCK_LIB]) > ;; > esac > > dnl See if posix_memalign is available > AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"]) > > +dnl Check for zlib > +PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED]) > + > dnl Check for pthreads > AX_PTHREAD > if test "x$ax_pthread_ok" = xno; then > AC_MSG_ERROR([Building mesa on this platform requires pthreads]) > fi > dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS > dnl to -pthread, which causes problems if we need -lpthread to appear in > dnl pkgconfig files. Since Android doesn't have a pthread lib, this check > dnl is not valid for that platform. > if test "x$android" = xno; then > diff --git a/src/util/Makefile.am b/src/util/Makefile.am > index ae50a3b..e46d893 100644 > --- a/src/util/Makefile.am > +++ b/src/util/Makefile.am > @@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \ > -I$(top_srcdir)/src/mesa \ > -I$(top_srcdir)/src/gallium/include \ > -I$(top_srcdir)/src/gallium/auxiliary \ > $(VISIBILITY_CFLAGS) \ > $(MSVC2013_COMPAT_CFLAGS) > > libmesautil_la_SOURCES = \ > $(MESA_UTIL_FILES) \ > $(MESA_UTIL_GENERATED_FILES) > > +libmesautil_la_LIBADD = -lz > + > roundeven_test_LDADD = -lm > > check_PROGRAMS = u_atomic_test roundeven_test > TESTS = $(check_PROGRAMS) > > BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES) > CLEANFILES = $(BUILT_SOURCES) > EXTRA_DIST = \ > format_srgb.py \ > SConscript \ > diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c > index f8e9948..fafd329 100644 > --- a/src/util/disk_cache.c > +++ b/src/util/disk_cache.c > @@ -30,20 +30,21 @@ > #include <stdio.h> > #include <sys/file.h> > #include <sys/types.h> > #include <sys/stat.h> > #include <sys/mman.h> > #include <unistd.h> > #include <fcntl.h> > #include <pwd.h> > #include <errno.h> > #include <dirent.h> > +#include "zlib.h" > > #include "util/crc32.h" > #include "util/u_atomic.h" > #include "util/mesa-sha1.h" > #include "util/ralloc.h" > #include "main/errors.h" > > #include "disk_cache.h" > > /* Number of bits to mask off from a cache key to get an index. */ > @@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key > key) > return; > } > > unlink(filename); > free(filename); > > if (sb.st_size) > p_atomic_add(cache->size, - sb.st_size); > } > > +/* From the zlib docs: > + * "If the memory is available, buffers sizes on the order of 128K or 256K > + * bytes should be used." > + */ > +#define BUFSIZE 256 * 1024 > + > +/** > + * Compresses cache entry in memory and writes it to disk. Returns the size > + * of the data written to disk. > + */ > +static size_t > +deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest, > + char *filename) > +{ > + unsigned char out[BUFSIZE]; > + > + /* allocate deflate state */ > + z_stream strm; > + strm.zalloc = Z_NULL; > + strm.zfree = Z_NULL; > + strm.opaque = Z_NULL; > + strm.next_in = (uint8_t *) in_data; > + strm.avail_in = in_data_size; > + > + int ret = deflateInit(&strm, Z_BEST_COMPRESSION); > + if (ret != Z_OK) > + return 0; > + > + /* compress until end of in_data */ > + size_t compressed_size = 0; > + int flush; > + do { > + int remaining = in_data_size - BUFSIZE; > + flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH; > + in_data_size -= BUFSIZE; > + > + /* Run deflate() on input until the output buffer is not full (which > + * means there is no more data to deflate). > + */ > + do { > + strm.avail_out = BUFSIZE; > + strm.next_out = out; > + > + ret = deflate(&strm, flush); /* no bad return value */ > + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ > + > + size_t have = BUFSIZE - strm.avail_out; > + compressed_size += compressed_size + have; > + > + size_t written = 0; > + for (size_t len = 0; len < have; len += written) { > + written = write(dest, out + len, have - len); > + if (written == -1) { > + (void)deflateEnd(&strm); > + return 0; > + } > + } > + } while (strm.avail_out == 0); > + > + /* all input should be used */ > + assert(strm.avail_in == 0); > + > + } while (flush != Z_FINISH); > + > + /* stream should be complete */ > + assert(ret == Z_STREAM_END); > + > + /* clean up and return */ > + (void)deflateEnd(&strm); > + return compressed_size; > +} > + > +struct cache_entry_file_data { > + uint32_t crc32; > + uint32_t uncompressed_size; > +}; > + > void > disk_cache_put(struct disk_cache *cache, > cache_key key, > const void *data, > size_t size) > { > int fd = -1, fd_final = -1, err, ret; > size_t len; > char *filename = NULL, *filename_tmp = NULL; > - const char *p = data; > > filename = get_cache_file(cache, key); > if (filename == NULL) > goto done; > > /* Write to a temporary file to allow for an atomic rename to the > * final destination filename, (to prevent any readers from seeing > * a partially written file). > */ > if (asprintf(&filename_tmp, "%s.tmp", filename) == -1) > @@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache, > * > * Before we do that, if the cache is too large, evict something > * else first. > */ > if (*cache->size + size > cache->max_size) > evict_random_item(cache); > > /* Create CRC of the data and store at the start of the file. We will > * read this when restoring the cache and use it to check for corruption. > */ > - uint32_t crc32 = util_hash_crc32(data, size); > - size_t crc_size = sizeof(crc32); > - for (len = 0; len < crc_size; len += ret) { > - ret = write(fd, ((uint8_t *) &crc32) + len, crc_size - len); > + struct cache_entry_file_data cf_data; > + cf_data.crc32 = util_hash_crc32(data, size); > + cf_data.uncompressed_size = size; > + > + size_t cf_data_size = sizeof(cf_data); > + for (len = 0; len < cf_data_size; len += ret) { > + ret = write(fd, ((uint8_t *) &cf_data) + len, cf_data_size - len); > if (ret == -1) { > unlink(filename_tmp); > goto done; > } > } > > /* Now, finally, write out the contents to the temporary file, then > * rename them atomically to the destination filename, and also > * perform an atomic increment of the total cache size. > */ > - for (len = 0; len < size; len += ret) { > - ret = write(fd, p + len, size - len); > - if (ret == -1) { > - unlink(filename_tmp); > - goto done; > - } > + size_t file_size = deflate_and_write_to_disk(data, size, fd, > filename_tmp); > + if (file_size == 0) { > + unlink(filename_tmp); > + goto done; > } > - > rename(filename_tmp, filename); > > - size += crc_size; > - p_atomic_add(cache->size, size); > + file_size += cf_data_size; > + p_atomic_add(cache->size, file_size); > > done: > if (fd_final != -1) > close(fd_final); > /* This close finally releases the flock, (now that the final dile > * has been renamed into place and the size has been added). > */ > if (fd != -1) > close(fd); > if (filename_tmp) > free(filename_tmp); > if (filename) > free(filename); > } > > +/** > + * Decompresses cache entry, returns true if successful. > + */ > +static bool > +inflate_cache_data(uint8_t *in_data, size_t in_data_size, > + uint8_t *out_data, size_t out_data_size) > +{ > + z_stream strm; > + > + /* allocate inflate state */ > + strm.zalloc = Z_NULL; > + strm.zfree = Z_NULL; > + strm.opaque = Z_NULL; > + strm.next_in = in_data; > + strm.avail_in = in_data_size; > + strm.next_out = out_data; > + strm.avail_out = out_data_size; > + > + int ret = inflateInit(&strm); > + if (ret != Z_OK) > + return false; > + > + ret = inflate(&strm, Z_NO_FLUSH); > + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ > + > + /* Unless there was an error we should have decompressed everything in one > + * go as we know the uncompressed file size. > + */ > + if (ret != Z_STREAM_END) { > + (void)inflateEnd(&strm); > + return false; > + } > + assert(strm.avail_out == 0); > + > + /* clean up and return */ > + (void)inflateEnd(&strm); > + return true; > +} > + > void * > disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size) > { > int fd = -1, ret, len; > struct stat sb; > char *filename = NULL; > uint8_t *data = NULL; > + uint8_t *uncompressed_data = NULL; > > if (size) > *size = 0; > > filename = get_cache_file(cache, key); > if (filename == NULL) > goto fail; > > fd = open(filename, O_RDONLY | O_CLOEXEC); > if (fd == -1) > goto fail; > > if (fstat(fd, &sb) == -1) > goto fail; > > data = malloc(sb.st_size); > if (data == NULL) > goto fail; > > /* Load the CRC that was created when the file was written. */ > - uint32_t crc32; > - size_t crc_size = sizeof(crc32); > - assert(sb.st_size > crc_size); > - for (len = 0; len < crc_size; len += ret) { > - ret = read(fd, ((uint8_t *) &crc32) + len, crc_size - len); > + struct cache_entry_file_data cf_data; > + size_t cf_data_size = sizeof(cf_data); > + assert(sb.st_size > cf_data_size); > + for (len = 0; len < cf_data_size; len += ret) { > + ret = read(fd, ((uint8_t *) &cf_data) + len, cf_data_size - len); > if (ret == -1) > goto fail; > } > > /* Load the actual cache data. */ > - size_t cache_data_size = sb.st_size - crc_size; > + size_t cache_data_size = sb.st_size - cf_data_size; > for (len = 0; len < cache_data_size; len += ret) { > ret = read(fd, data + len, cache_data_size - len); > if (ret == -1) > goto fail; > } > > + /* Uncompress the cache data */ > + uncompressed_data = malloc(cf_data.uncompressed_size); > + if (!inflate_cache_data(data, cache_data_size, uncompressed_data, > + cf_data.uncompressed_size)) > + goto fail; > + > /* Check the data for corruption */ > - if (crc32 != util_hash_crc32(data, cache_data_size)) > + if (cf_data.crc32 != util_hash_crc32(uncompressed_data, > + cf_data.uncompressed_size)) > goto fail; > > + free(data); > free(filename); > close(fd); > > if (size) > - *size = cache_data_size; > + *size = cf_data.uncompressed_size; > > - return data; > + return uncompressed_data; > > fail: > if (data) > free(data); > + if (uncompressed_data) > + free(uncompressed_data); > if (filename) > free(filename); > if (fd != -1) > close(fd); > > return NULL; > } > > void > disk_cache_put_key(struct disk_cache *cache, cache_key key) > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev