In gzip I want to try HASH_BITS greater than 15, but deflate.c interferes:
   #if HASH_BITS > BITS-1
      error: cannot overlay head with tab_prefix1

Investigation shows that the data space for LZW compress acts as the master,
with zip deflate being a slave whose arrays never can be larger.

Attached are two patches which make the two sides more equal.
The first patch prepares the way by renaming BITS to LZW_BITS
and creating new header file deflate.h similar to lzw.h.
The second patch uses MAX() when overlaying arrays, which allows
either to be larger.  (Beware 'sizeof' any previous master.)

Now I can vary HASH_BITS and LZW_BITS independently.

-- 

>From 4a3d2c59a280f11202f6b59465fd76119bb3eb75 Mon Sep 17 00:00:00 2001
From: John Reiser <jrei...@bitwagon.com>
Date: Mon, 17 Jun 2013 14:24:29 -0700
Subject: [PATCH 1/2] Prepare to overlay arrays in either order (zip deflate
 vs LZW compress) by renaming constant BITS ==> LZW_BITS
 and creating file deflate.h.

---
 deflate.c | 24 +++---------------------
 deflate.h | 41 +++++++++++++++++++++++++++++++++++++++++
 gzip.c    | 14 +++++++-------
 lzw.h     |  6 +++---
 unlzh.c   |  2 +-
 unlzw.c   | 14 +++++++-------
 6 files changed, 62 insertions(+), 39 deletions(-)
 create mode 100644 deflate.h

diff --git a/deflate.c b/deflate.c
index f0f2394..31c81db 100644
--- a/deflate.c
+++ b/deflate.c
@@ -85,32 +85,14 @@
  * Configuration parameters
  */
 
-/* Compile with MEDIUM_MEM to reduce the memory requirements or
- * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
- * entire input file can be held in memory (not possible on 16 bit systems).
- * Warning: defining these symbols affects HASH_BITS (see below) and thus
- * affects the compression ratio. The compressed output
- * is still correct, and might even be smaller in some cases.
- */
-
-#ifdef SMALL_MEM
-#   define HASH_BITS  13  /* Number of bits used to hash strings */
-#endif
-#ifdef MEDIUM_MEM
-#   define HASH_BITS  14
-#endif
-#ifndef HASH_BITS
-#   define HASH_BITS  15
-   /* For portability to 16 bit machines, do not use values above 15. */
-#endif
-
+#include "deflate.h"
 /* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
  * window with tab_suffix. Check that we can do this:
  */
-#if (WSIZE<<1) > (1<<BITS)
+#if (WSIZE<<1) > (1<<LZW_BITS)
    error: cannot overlay window with tab_suffix and prev with tab_prefix0
 #endif
-#if HASH_BITS > BITS-1
+#if HASH_BITS > LZW_BITS-1
    error: cannot overlay head with tab_prefix1
 #endif
 
diff --git a/deflate.h b/deflate.h
new file mode 100644
index 0000000..43b02ea
--- /dev/null
+++ b/deflate.h
@@ -0,0 +1,41 @@
+/* deflate.h -- factored from deflate.c; somewhat parallel to lzw.h
+
+   Copyright (C) 1999, 2006, 2009-2013 Free Software Foundation, Inc.
+   Copyright (C) 1992-1993 Jean-loup Gailly
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* Compile with MEDIUM_MEM to reduce the memory requirements or
+ * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
+ * entire input file can be held in memory (not possible on 16 bit systems).
+ * Warning: defining these symbols affects HASH_BITS (see below) and thus
+ * affects the compression ratio. The compressed output
+ * is still correct, and might even be smaller in some cases.
+ */
+
+#ifndef HASH_BITS
+
+#ifdef SMALL_MEM
+#   define HASH_BITS  13  /* Number of bits used to hash strings */
+#endif
+#ifdef MEDIUM_MEM
+#   define HASH_BITS  14
+#endif
+#ifndef HASH_BITS
+#   define HASH_BITS  15
+   /* For portability to 16 bit machines, do not use values above 15. */
+#endif
+
+#endif
diff --git a/gzip.c b/gzip.c
index 93cc738..7da26ec 100644
--- a/gzip.c
+++ b/gzip.c
@@ -151,10 +151,10 @@ DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
 DECLARE(ush, d_buf,  DIST_BUFSIZE);
 DECLARE(uch, window, 2L*WSIZE);
 #ifndef MAXSEG_64K
-    DECLARE(ush, tab_prefix, 1L<<BITS);
+    DECLARE(ush, tab_prefix, 1L<<LZW_BITS);
 #else
-    DECLARE(ush, tab_prefix0, 1L<<(BITS-1));
-    DECLARE(ush, tab_prefix1, 1L<<(BITS-1));
+    DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1));
+    DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1));
 #endif
 
                 /* local variables */
@@ -178,7 +178,7 @@ static int do_lzw = 0;       /* generate output compatible with old compress (-Z
        int test = 0;         /* test .gz file integrity */
 static int foreground = 0;   /* set if program run in foreground */
        char *program_name;   /* program name */
-       int maxbits = BITS;   /* max bits per code for LZW */
+       int maxbits = LZW_BITS;   /* max bits per code for LZW */
        int method = DEFLATED;/* compression method */
        int level = 6;        /* compression level */
        int exit_code = OK;   /* program exit code */
@@ -551,10 +551,10 @@ int main (int argc, char **argv)
     ALLOC(ush, d_buf,  DIST_BUFSIZE);
     ALLOC(uch, window, 2L*WSIZE);
 #ifndef MAXSEG_64K
-    ALLOC(ush, tab_prefix, 1L<<BITS);
+    ALLOC(ush, tab_prefix, 1L<<LZW_BITS);
 #else
-    ALLOC(ush, tab_prefix0, 1L<<(BITS-1));
-    ALLOC(ush, tab_prefix1, 1L<<(BITS-1));
+    ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1));
+    ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1));
 #endif
 
     exiting_signal = quiet ? SIGPIPE : 0;
diff --git a/lzw.h b/lzw.h
index 7f78134..59c1a40 100644
--- a/lzw.h
+++ b/lzw.h
@@ -16,10 +16,10 @@
    along with this program; if not, write to the Free Software Foundation,
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 
-#ifndef BITS
-#  define BITS 16
+#ifndef LZW_BITS
+#  define LZW_BITS 16
 #endif
-#define INIT_BITS 9              /* Initial number of bits per code */
+#define INIT_LZW_BITS 9         /* Initial number of bits per code */
 
 #define	LZW_MAGIC  "\037\235"   /* Magic header for lzw files, 1F 9D */
 
diff --git a/unlzh.c b/unlzh.c
index 37084fe..bb85e9b 100644
--- a/unlzh.c
+++ b/unlzh.c
@@ -71,7 +71,7 @@ local void make_table (int nchar, uch bitlen[],
 /* local ush right[2 * NC - 1]; */
 #define left  prev
 #define right head
-#if NC > (1<<(BITS-2))
+#if NC > (1<<(LZW_BITS-2))
     error cannot overlay left+right and prev
 #endif
 
diff --git a/unlzw.c b/unlzw.c
index 676d58c..3390440 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -70,12 +70,12 @@ union	bytes {
 #endif
 
 #ifndef MAXSEG_64K
-   /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
+   /* DECLARE(ush, tab_prefix, (1<<LZW_BITS)); -- prefix code */
 #  define tab_prefixof(i) tab_prefix[i]
 #  define clear_tab_prefixof()	memzero(tab_prefix, 256);
 #else
-   /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
-   /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd  codes */
+   /* DECLARE(ush, tab_prefix0, (1<<(LZW_BITS-1)); -- prefix for even codes */
+   /* DECLARE(ush, tab_prefix1, (1<<(LZW_BITS-1)); -- prefix for odd  codes */
    ush *tab_prefix[2];
 #  define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
 #  define clear_tab_prefixof()	\
@@ -128,15 +128,15 @@ int unlzw(in, out)
     maxbits &= BIT_MASK;
     maxmaxcode = MAXCODE(maxbits);
 
-    if (maxbits > BITS) {
+    if (maxbits > LZW_BITS) {
         fprintf(stderr,
                 "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
-                program_name, ifname, maxbits, BITS);
+                program_name, ifname, maxbits, LZW_BITS);
         exit_code = ERROR;
         return ERROR;
     }
     rsize = insize;
-    maxcode = MAXCODE(n_bits = INIT_BITS)-1;
+    maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1;
     bitmask = (1<<n_bits)-1;
     oldcode = -1;
     finchar = 0;
@@ -203,7 +203,7 @@ int unlzw(in, out)
                 free_ent = FIRST - 1;
                 posbits = ((posbits-1) +
                            ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
-                maxcode = MAXCODE(n_bits = INIT_BITS)-1;
+                maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1;
                 bitmask = (1<<n_bits)-1;
                 goto resetbuf;
             }
-- 
1.7.11.7

>From 5ed21000b29bda8ee6adbfdfea008ca9fba6cba8 Mon Sep 17 00:00:00 2001
From: John Reiser <jrei...@bitwagon.com>
Date: Tue, 18 Jun 2013 07:59:48 -0700
Subject: [PATCH 2/2] Use MAX() for array overlays.  Prefer {prev,head,window}
 as base names.

---
 deflate.c | 10 ----------
 gzip.c    | 29 ++++++++++++++++++-----------
 gzip.h    | 34 ++++++++++++++++++++++------------
 trees.c   |  3 ---
 unlzw.c   |  1 +
 5 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/deflate.c b/deflate.c
index 31c81db..7fb6ed2 100644
--- a/deflate.c
+++ b/deflate.c
@@ -86,16 +86,6 @@
  */
 
 #include "deflate.h"
-/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
- * window with tab_suffix. Check that we can do this:
- */
-#if (WSIZE<<1) > (1<<LZW_BITS)
-   error: cannot overlay window with tab_suffix and prev with tab_prefix0
-#endif
-#if HASH_BITS > LZW_BITS-1
-   error: cannot overlay head with tab_prefix1
-#endif
-
 #define HASH_SIZE (unsigned)(1<<HASH_BITS)
 #define HASH_MASK (HASH_SIZE-1)
 #define WMASK     (WSIZE-1)
diff --git a/gzip.c b/gzip.c
index 7da26ec..7309e84 100644
--- a/gzip.c
+++ b/gzip.c
@@ -149,12 +149,19 @@ static char const *const license_msg[] = {
 DECLARE(uch, inbuf,  INBUFSIZ +INBUF_EXTRA);
 DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
 DECLARE(ush, d_buf,  DIST_BUFSIZE);
-DECLARE(uch, window, 2L*WSIZE);
+#define LEN_window (2L*WSIZE)
+#define LEN_tab_suffix (1L<<LZW_BITS)
+DECLARE(uch, window, MAX(LEN_window, LEN_tab_suffix));
+#define LEN_prev        WSIZE
+#define LEN_head       (1L<<HASH_BITS)
 #ifndef MAXSEG_64K
-    DECLARE(ush, tab_prefix, 1L<<LZW_BITS);
+#   define LEN_tab_prefix   (1L<< LZW_BITS   )
+    DECLARE(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix));
 #else
-    DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1));
-    DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1));
+#   define LEN_tab_prefix_0 (1L<<(LZW_BITS-1))
+#   define LEN_tab_prefix_1 (1L<<(LZW_BITS-1))
+    DECLARE(ush, prev, MAX(LEN_prev, LEN_tab_prefix0));
+    DECLARE(ush, head, MAX(LEN_head, LEN_tab_prefix1));
 #endif
 
                 /* local variables */
@@ -549,12 +556,12 @@ int main (int argc, char **argv)
     ALLOC(uch, inbuf,  INBUFSIZ +INBUF_EXTRA);
     ALLOC(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
     ALLOC(ush, d_buf,  DIST_BUFSIZE);
-    ALLOC(uch, window, 2L*WSIZE);
+    ALLOC(uch, window, MAX(LEN_window, LEN_tab_suffix));
 #ifndef MAXSEG_64K
-    ALLOC(ush, tab_prefix, 1L<<LZW_BITS);
+    ALLOC(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix));
 #else
-    ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1));
-    ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1));
+    ALLOC(ush, prev, MAX(LEN_prev, LEN_tab_prefix0));
+    ALLOC(ush, head, MAX(LEN_head, LEN_tab_prefix1));
 #endif
 
     exiting_signal = quiet ? SIGPIPE : 0;
@@ -1880,10 +1887,10 @@ local void do_exit(exitcode)
     FREE(d_buf);
     FREE(window);
 #ifndef MAXSEG_64K
-    FREE(tab_prefix);
+    FREE(prev);
 #else
-    FREE(tab_prefix0);
-    FREE(tab_prefix1);
+    FREE(prev);
+    FREE(head);
 #endif
     exit(exitcode);
 }
diff --git a/gzip.h b/gzip.h
index 648073e..acbaf55 100644
--- a/gzip.h
+++ b/gzip.h
@@ -124,17 +124,28 @@ extern int method;         /* compression method */
 EXTERN(uch, inbuf);          /* input buffer */
 EXTERN(uch, outbuf);         /* output buffer */
 EXTERN(ush, d_buf);          /* buffer for distances, see trees.c */
-EXTERN(uch, window);         /* Sliding window and suffix table (unlzw) */
-#define tab_suffix window
+
+#ifndef WSIZE
+#  define WSIZE 0x8000     /* window size--must be at least 32K for zip's deflate method */
+#endif                     /* Code is slightly faster if WSIZE is a power of 2. */
+
+#include "deflate.h"
+#ifndef LZW_BITS
+#  define LZW_BITS 16
+#endif
+
+EXTERN(uch, window);         /* Sliding window */
+#define tab_suffix window    /* Suffix table (unlzw) */
+
 #ifndef MAXSEG_64K
-#  define tab_prefix prev    /* hash link (see deflate.c) */
+   EXTERN(ush, prev);        /* hash link (see deflate.c) */
+#  define tab_prefix prev    /* prefix code (see unlzw.c) */
 #  define head (prev+WSIZE)  /* hash head (see deflate.c) */
-   EXTERN(ush, tab_prefix);  /* prefix code (see unlzw.c) */
 #else
-#  define tab_prefix0 prev
-#  define head tab_prefix1
-   EXTERN(ush, tab_prefix0); /* prefix for even codes */
-   EXTERN(ush, tab_prefix1); /* prefix for odd  codes */
+   EXTERN(ush, prev);        /* hash link (see deflate.c) */
+#  define tab_prefix0 prev   /* prefix for even codes */
+   EXTERN(ush, head);
+#  define tab_prefix1 head   /* prefix for odd codes */
 #endif
 
 extern unsigned insize; /* valid bytes in inbuf */
@@ -178,10 +189,6 @@ typedef int file_t;     /* Do not use stdio */
 #define BINARY  0
 #define ASCII   1
 
-#ifndef WSIZE
-#  define WSIZE 0x8000     /* window size--must be a power of two, and */
-#endif                     /*  at least 32K for zip's deflate method */
-
 #define MIN_MATCH  3
 #define MAX_MATCH  258
 /* The minimum and maximum match lengths */
@@ -207,6 +214,9 @@ extern int save_orig_name; /* set if original name must be saved */
 #define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf(0))
 #define try_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf(1))
 
+#define MIN(a,b) ((a) <= (b) ? (a) : (b)) /* XXX bug if arg has side effect */
+#define MAX(a,b) ((a) <= (b) ? (b) : (a)) /* XXX bug if arg has side effect */
+
 /* put_byte is used for the compressed output, put_ubyte for the
  * uncompressed output. However unlzw() uses window for its
  * suffix table instead of its output buffer, so it does not use put_ubyte
diff --git a/trees.c b/trees.c
index f2ad360..d14348a 100644
--- a/trees.c
+++ b/trees.c
@@ -330,9 +330,6 @@ local void set_file_type  (void);
  * used.
  */
 
-#define MAX(a,b) (a >= b ? a : b)
-/* the arguments must not have side effects */
-
 /* ===========================================================================
  * Allocate the match buffer, initialize the various tables and save the
  * location of the internal file attribute (ascii/binary) and method
diff --git a/unlzw.c b/unlzw.c
index 3390440..7bdf417 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -83,6 +83,7 @@ union	bytes {
       memzero(tab_prefix1, 128);
 #endif
 #define de_stack        ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
+   /* DECLARE(uch, tab_suffix, (1<<LZW_BITS)); -- suffix code */
 #define tab_suffixof(i) tab_suffix[i]
 
 int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
-- 
1.7.11.7

Reply via email to