In gzip I want to try HASH_BITS greater than 15, but deflate.c interferes: #if HASH_BITS > BITS-1 error: cannot overlay head with tab_prefix1
Investigation shows that the data space for LZW compress acts as the master, with zip deflate being a slave whose arrays never can be larger. Attached are two patches which make the two sides more equal. The first patch prepares the way by renaming BITS to LZW_BITS and creating new header file deflate.h similar to lzw.h. The second patch uses MAX() when overlaying arrays, which allows either to be larger. (Beware 'sizeof' any previous master.) Now I can vary HASH_BITS and LZW_BITS independently. --
>From 4a3d2c59a280f11202f6b59465fd76119bb3eb75 Mon Sep 17 00:00:00 2001 From: John Reiser <jrei...@bitwagon.com> Date: Mon, 17 Jun 2013 14:24:29 -0700 Subject: [PATCH 1/2] Prepare to overlay arrays in either order (zip deflate vs LZW compress) by renaming constant BITS ==> LZW_BITS and creating file deflate.h. --- deflate.c | 24 +++--------------------- deflate.h | 41 +++++++++++++++++++++++++++++++++++++++++ gzip.c | 14 +++++++------- lzw.h | 6 +++--- unlzh.c | 2 +- unlzw.c | 14 +++++++------- 6 files changed, 62 insertions(+), 39 deletions(-) create mode 100644 deflate.h diff --git a/deflate.c b/deflate.c index f0f2394..31c81db 100644 --- a/deflate.c +++ b/deflate.c @@ -85,32 +85,14 @@ * Configuration parameters */ -/* Compile with MEDIUM_MEM to reduce the memory requirements or - * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the - * entire input file can be held in memory (not possible on 16 bit systems). - * Warning: defining these symbols affects HASH_BITS (see below) and thus - * affects the compression ratio. The compressed output - * is still correct, and might even be smaller in some cases. - */ - -#ifdef SMALL_MEM -# define HASH_BITS 13 /* Number of bits used to hash strings */ -#endif -#ifdef MEDIUM_MEM -# define HASH_BITS 14 -#endif -#ifndef HASH_BITS -# define HASH_BITS 15 - /* For portability to 16 bit machines, do not use values above 15. */ -#endif - +#include "deflate.h" /* To save space (see unlzw.c), we overlay prev+head with tab_prefix and * window with tab_suffix. Check that we can do this: */ -#if (WSIZE<<1) > (1<<BITS) +#if (WSIZE<<1) > (1<<LZW_BITS) error: cannot overlay window with tab_suffix and prev with tab_prefix0 #endif -#if HASH_BITS > BITS-1 +#if HASH_BITS > LZW_BITS-1 error: cannot overlay head with tab_prefix1 #endif diff --git a/deflate.h b/deflate.h new file mode 100644 index 0000000..43b02ea --- /dev/null +++ b/deflate.h @@ -0,0 +1,41 @@ +/* deflate.h -- factored from deflate.c; somewhat parallel to lzw.h + + Copyright (C) 1999, 2006, 2009-2013 Free Software Foundation, Inc. + Copyright (C) 1992-1993 Jean-loup Gailly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Compile with MEDIUM_MEM to reduce the memory requirements or + * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the + * entire input file can be held in memory (not possible on 16 bit systems). + * Warning: defining these symbols affects HASH_BITS (see below) and thus + * affects the compression ratio. The compressed output + * is still correct, and might even be smaller in some cases. + */ + +#ifndef HASH_BITS + +#ifdef SMALL_MEM +# define HASH_BITS 13 /* Number of bits used to hash strings */ +#endif +#ifdef MEDIUM_MEM +# define HASH_BITS 14 +#endif +#ifndef HASH_BITS +# define HASH_BITS 15 + /* For portability to 16 bit machines, do not use values above 15. */ +#endif + +#endif diff --git a/gzip.c b/gzip.c index 93cc738..7da26ec 100644 --- a/gzip.c +++ b/gzip.c @@ -151,10 +151,10 @@ DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); DECLARE(ush, d_buf, DIST_BUFSIZE); DECLARE(uch, window, 2L*WSIZE); #ifndef MAXSEG_64K - DECLARE(ush, tab_prefix, 1L<<BITS); + DECLARE(ush, tab_prefix, 1L<<LZW_BITS); #else - DECLARE(ush, tab_prefix0, 1L<<(BITS-1)); - DECLARE(ush, tab_prefix1, 1L<<(BITS-1)); + DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1)); + DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1)); #endif /* local variables */ @@ -178,7 +178,7 @@ static int do_lzw = 0; /* generate output compatible with old compress (-Z int test = 0; /* test .gz file integrity */ static int foreground = 0; /* set if program run in foreground */ char *program_name; /* program name */ - int maxbits = BITS; /* max bits per code for LZW */ + int maxbits = LZW_BITS; /* max bits per code for LZW */ int method = DEFLATED;/* compression method */ int level = 6; /* compression level */ int exit_code = OK; /* program exit code */ @@ -551,10 +551,10 @@ int main (int argc, char **argv) ALLOC(ush, d_buf, DIST_BUFSIZE); ALLOC(uch, window, 2L*WSIZE); #ifndef MAXSEG_64K - ALLOC(ush, tab_prefix, 1L<<BITS); + ALLOC(ush, tab_prefix, 1L<<LZW_BITS); #else - ALLOC(ush, tab_prefix0, 1L<<(BITS-1)); - ALLOC(ush, tab_prefix1, 1L<<(BITS-1)); + ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1)); + ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1)); #endif exiting_signal = quiet ? SIGPIPE : 0; diff --git a/lzw.h b/lzw.h index 7f78134..59c1a40 100644 --- a/lzw.h +++ b/lzw.h @@ -16,10 +16,10 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#ifndef BITS -# define BITS 16 +#ifndef LZW_BITS +# define LZW_BITS 16 #endif -#define INIT_BITS 9 /* Initial number of bits per code */ +#define INIT_LZW_BITS 9 /* Initial number of bits per code */ #define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */ diff --git a/unlzh.c b/unlzh.c index 37084fe..bb85e9b 100644 --- a/unlzh.c +++ b/unlzh.c @@ -71,7 +71,7 @@ local void make_table (int nchar, uch bitlen[], /* local ush right[2 * NC - 1]; */ #define left prev #define right head -#if NC > (1<<(BITS-2)) +#if NC > (1<<(LZW_BITS-2)) error cannot overlay left+right and prev #endif diff --git a/unlzw.c b/unlzw.c index 676d58c..3390440 100644 --- a/unlzw.c +++ b/unlzw.c @@ -70,12 +70,12 @@ union bytes { #endif #ifndef MAXSEG_64K - /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */ + /* DECLARE(ush, tab_prefix, (1<<LZW_BITS)); -- prefix code */ # define tab_prefixof(i) tab_prefix[i] # define clear_tab_prefixof() memzero(tab_prefix, 256); #else - /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */ - /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */ + /* DECLARE(ush, tab_prefix0, (1<<(LZW_BITS-1)); -- prefix for even codes */ + /* DECLARE(ush, tab_prefix1, (1<<(LZW_BITS-1)); -- prefix for odd codes */ ush *tab_prefix[2]; # define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1] # define clear_tab_prefixof() \ @@ -128,15 +128,15 @@ int unlzw(in, out) maxbits &= BIT_MASK; maxmaxcode = MAXCODE(maxbits); - if (maxbits > BITS) { + if (maxbits > LZW_BITS) { fprintf(stderr, "\n%s: %s: compressed with %d bits, can only handle %d bits\n", - program_name, ifname, maxbits, BITS); + program_name, ifname, maxbits, LZW_BITS); exit_code = ERROR; return ERROR; } rsize = insize; - maxcode = MAXCODE(n_bits = INIT_BITS)-1; + maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1; bitmask = (1<<n_bits)-1; oldcode = -1; finchar = 0; @@ -203,7 +203,7 @@ int unlzw(in, out) free_ent = FIRST - 1; posbits = ((posbits-1) + ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3))); - maxcode = MAXCODE(n_bits = INIT_BITS)-1; + maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1; bitmask = (1<<n_bits)-1; goto resetbuf; } -- 1.7.11.7
>From 5ed21000b29bda8ee6adbfdfea008ca9fba6cba8 Mon Sep 17 00:00:00 2001 From: John Reiser <jrei...@bitwagon.com> Date: Tue, 18 Jun 2013 07:59:48 -0700 Subject: [PATCH 2/2] Use MAX() for array overlays. Prefer {prev,head,window} as base names. --- deflate.c | 10 ---------- gzip.c | 29 ++++++++++++++++++----------- gzip.h | 34 ++++++++++++++++++++++------------ trees.c | 3 --- unlzw.c | 1 + 5 files changed, 41 insertions(+), 36 deletions(-) diff --git a/deflate.c b/deflate.c index 31c81db..7fb6ed2 100644 --- a/deflate.c +++ b/deflate.c @@ -86,16 +86,6 @@ */ #include "deflate.h" -/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and - * window with tab_suffix. Check that we can do this: - */ -#if (WSIZE<<1) > (1<<LZW_BITS) - error: cannot overlay window with tab_suffix and prev with tab_prefix0 -#endif -#if HASH_BITS > LZW_BITS-1 - error: cannot overlay head with tab_prefix1 -#endif - #define HASH_SIZE (unsigned)(1<<HASH_BITS) #define HASH_MASK (HASH_SIZE-1) #define WMASK (WSIZE-1) diff --git a/gzip.c b/gzip.c index 7da26ec..7309e84 100644 --- a/gzip.c +++ b/gzip.c @@ -149,12 +149,19 @@ static char const *const license_msg[] = { DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA); DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); DECLARE(ush, d_buf, DIST_BUFSIZE); -DECLARE(uch, window, 2L*WSIZE); +#define LEN_window (2L*WSIZE) +#define LEN_tab_suffix (1L<<LZW_BITS) +DECLARE(uch, window, MAX(LEN_window, LEN_tab_suffix)); +#define LEN_prev WSIZE +#define LEN_head (1L<<HASH_BITS) #ifndef MAXSEG_64K - DECLARE(ush, tab_prefix, 1L<<LZW_BITS); +# define LEN_tab_prefix (1L<< LZW_BITS ) + DECLARE(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix)); #else - DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1)); - DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1)); +# define LEN_tab_prefix_0 (1L<<(LZW_BITS-1)) +# define LEN_tab_prefix_1 (1L<<(LZW_BITS-1)) + DECLARE(ush, prev, MAX(LEN_prev, LEN_tab_prefix0)); + DECLARE(ush, head, MAX(LEN_head, LEN_tab_prefix1)); #endif /* local variables */ @@ -549,12 +556,12 @@ int main (int argc, char **argv) ALLOC(uch, inbuf, INBUFSIZ +INBUF_EXTRA); ALLOC(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA); ALLOC(ush, d_buf, DIST_BUFSIZE); - ALLOC(uch, window, 2L*WSIZE); + ALLOC(uch, window, MAX(LEN_window, LEN_tab_suffix)); #ifndef MAXSEG_64K - ALLOC(ush, tab_prefix, 1L<<LZW_BITS); + ALLOC(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix)); #else - ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1)); - ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1)); + ALLOC(ush, prev, MAX(LEN_prev, LEN_tab_prefix0)); + ALLOC(ush, head, MAX(LEN_head, LEN_tab_prefix1)); #endif exiting_signal = quiet ? SIGPIPE : 0; @@ -1880,10 +1887,10 @@ local void do_exit(exitcode) FREE(d_buf); FREE(window); #ifndef MAXSEG_64K - FREE(tab_prefix); + FREE(prev); #else - FREE(tab_prefix0); - FREE(tab_prefix1); + FREE(prev); + FREE(head); #endif exit(exitcode); } diff --git a/gzip.h b/gzip.h index 648073e..acbaf55 100644 --- a/gzip.h +++ b/gzip.h @@ -124,17 +124,28 @@ extern int method; /* compression method */ EXTERN(uch, inbuf); /* input buffer */ EXTERN(uch, outbuf); /* output buffer */ EXTERN(ush, d_buf); /* buffer for distances, see trees.c */ -EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */ -#define tab_suffix window + +#ifndef WSIZE +# define WSIZE 0x8000 /* window size--must be at least 32K for zip's deflate method */ +#endif /* Code is slightly faster if WSIZE is a power of 2. */ + +#include "deflate.h" +#ifndef LZW_BITS +# define LZW_BITS 16 +#endif + +EXTERN(uch, window); /* Sliding window */ +#define tab_suffix window /* Suffix table (unlzw) */ + #ifndef MAXSEG_64K -# define tab_prefix prev /* hash link (see deflate.c) */ + EXTERN(ush, prev); /* hash link (see deflate.c) */ +# define tab_prefix prev /* prefix code (see unlzw.c) */ # define head (prev+WSIZE) /* hash head (see deflate.c) */ - EXTERN(ush, tab_prefix); /* prefix code (see unlzw.c) */ #else -# define tab_prefix0 prev -# define head tab_prefix1 - EXTERN(ush, tab_prefix0); /* prefix for even codes */ - EXTERN(ush, tab_prefix1); /* prefix for odd codes */ + EXTERN(ush, prev); /* hash link (see deflate.c) */ +# define tab_prefix0 prev /* prefix for even codes */ + EXTERN(ush, head); +# define tab_prefix1 head /* prefix for odd codes */ #endif extern unsigned insize; /* valid bytes in inbuf */ @@ -178,10 +189,6 @@ typedef int file_t; /* Do not use stdio */ #define BINARY 0 #define ASCII 1 -#ifndef WSIZE -# define WSIZE 0x8000 /* window size--must be a power of two, and */ -#endif /* at least 32K for zip's deflate method */ - #define MIN_MATCH 3 #define MAX_MATCH 258 /* The minimum and maximum match lengths */ @@ -207,6 +214,9 @@ extern int save_orig_name; /* set if original name must be saved */ #define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(0)) #define try_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(1)) +#define MIN(a,b) ((a) <= (b) ? (a) : (b)) /* XXX bug if arg has side effect */ +#define MAX(a,b) ((a) <= (b) ? (b) : (a)) /* XXX bug if arg has side effect */ + /* put_byte is used for the compressed output, put_ubyte for the * uncompressed output. However unlzw() uses window for its * suffix table instead of its output buffer, so it does not use put_ubyte diff --git a/trees.c b/trees.c index f2ad360..d14348a 100644 --- a/trees.c +++ b/trees.c @@ -330,9 +330,6 @@ local void set_file_type (void); * used. */ -#define MAX(a,b) (a >= b ? a : b) -/* the arguments must not have side effects */ - /* =========================================================================== * Allocate the match buffer, initialize the various tables and save the * location of the internal file attribute (ascii/binary) and method diff --git a/unlzw.c b/unlzw.c index 3390440..7bdf417 100644 --- a/unlzw.c +++ b/unlzw.c @@ -83,6 +83,7 @@ union bytes { memzero(tab_prefix1, 128); #endif #define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1])) + /* DECLARE(uch, tab_suffix, (1<<LZW_BITS)); -- suffix code */ #define tab_suffixof(i) tab_suffix[i] int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */ -- 1.7.11.7