thanks, moved the xors to the front of the lines

On Sun, Oct 27, 2024, 13:41 Bruno Haible <br...@clisp.org> wrote:
> Sam Russell wrote:
> > > Also, in GNU coding style, when line breaking is needed within
> > expressions,
> > > we do the line break before the operator, not after the operator. [1]
> > > This affects lib/crc.c lines 73..80, 102..103.
> >
> > Done.
>
> I meant this expression:
>
>   crc = crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF] ^
>         crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF] ^
>         crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF] ^
>         crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF] ^
>         crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF] ^
>         crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF] ^
>         crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF] ^
>         crc32_sliceby8_table[7][local_buf & 0xFF];
>
> which looks more GNU-style like this:
>
>   crc =   crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF]
>         ^ crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF]
>         ^ crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF]
>         ^ crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF]
>         ^ crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF]
>         ^ crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF]
>         ^ crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF]
>         ^ crc32_sliceby8_table[7][local_buf & 0xFF];
>
> Other than that, it looks fine.
>
> Bruno
>
>
>
>
From af2cfb7111140268cc74597272bf22b2cc258061 Mon Sep 17 00:00:00 2001
From: Sam Russell <sam.h.russell@gmail.com>
Date: Sun, 27 Oct 2024 13:19:17 +0100
Subject: [PATCH] crc: New optimised slice-by-8 implementation

* lib/crc.c: Implementation of slice-by-8 algorithm
* lib/crc-generate-table.c: Generation code for CRC32 lookup tables
* modules/crc (Depends-on): Add endian.
(Makefile.am): Build slice-by-8 tables from crc-generate-table.c.
---
 ChangeLog                |   8 +++
 lib/crc-generate-table.c | 150 +++++++++++++++++++++++++++++++++++++++
 lib/crc.c                | 104 +++++++++++++++++++++++++++
 modules/crc              |  16 +++++
 4 files changed, 278 insertions(+)
 create mode 100644 lib/crc-generate-table.c

diff --git a/ChangeLog b/ChangeLog
index c70df89b2f..90a0256baf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-10-27  Sam Russell  <sam.h.russell@gmail.com>
+
+	crc: New optimised slice-by-8 implementation
+	* lib/crc.c: Implementation of slice-by-8 algorithm
+	* lib/crc-generate-table.c: Generation code for CRC32 lookup tables
+	* modules/crc (Depends-on): Add endian.
+	(Makefile.am): Build slice-by-8 tables from crc-generate-table.c.
+
 2024-10-17  Sam Russell  <sam.h.russell@gmail.com>
 
 	crc: New tests for non-byte-aligned data.
diff --git a/lib/crc-generate-table.c b/lib/crc-generate-table.c
new file mode 100644
index 0000000000..01f9ff77ed
--- /dev/null
+++ b/lib/crc-generate-table.c
@@ -0,0 +1,150 @@
+/* crc-generate-table.c -- cyclic redundancy check table generator
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation, either version 3 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Sam Russell. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * The following function was extracted from RFC 1952 by Sam
+ * Russell.  It was modified to remove the reference to the
+ * crc_table_computed flag, to extend it to accept a variable number
+ * of bits (described below), and reformatted to follow GNU
+ * formatting guidelines.
+ *
+ * make_crc_table() takes a number of bits and generates a lookup
+ * table for the CRC32 algorithm. Usage is as follows:
+ *
+ * make_crc_table(8) : generate for CRC32(0x00) to CRC32(0xFF)
+ * make_crc_table(16) : generate for CRC32(0x0000) to CRC32 (0xFF00)
+ * in increments of 0x100
+ *
+ * This is used for the Sarwate algorithm specified in RFC 1952
+ * which uses a single lookup table of make_crc_table(8), and for
+ * the slice-by-8 algorithm which uses 8 tables from in 8-bit
+ * increments from make_crc_table(8) to make_crc_table(64)
+ */
+unsigned long crc_table[256];
+
+void
+make_crc_table (int bits)
+{
+  unsigned long c;
+
+  int n, k;
+  for (n = 0; n < 256; n++)
+    {
+      c = (unsigned long) n;
+      for (k = 0; k < bits; k++)
+        {
+          if (c & 1)
+            c = 0xedb88320L ^ (c >> 1);
+          else
+            c = c >> 1;
+        }
+      crc_table[n] = c;
+    }
+}
+
+void
+print_crc_table (FILE * stream, int bits)
+{
+  make_crc_table (bits);
+
+  fprintf (stream, "  {");
+
+  for (int i = 0; i < 256; i++)
+    {
+      if ((i % 6) == 0)
+        fprintf (stream, "\n   ");
+      if (i != 255)
+        fprintf (stream, " 0x%08lX,", crc_table[i]);
+      else
+        fprintf (stream, " 0x%08lX\n", crc_table[i]);
+    }
+
+  fprintf (stream, "  }");
+}
+
+void
+print_header (FILE * stream)
+{
+  fprintf (stream, "/* Slice-by-8 lookup tables */\n");
+  fprintf (stream, "static const uint32_t crc32_sliceby8_table[][256] = {\n");
+  for (int i = 8; i <= 64; i += 8)
+    {
+      print_crc_table (stream, i);
+      if (i < 64)
+        fprintf (stream, ",");
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "};\n\n");
+}
+
+void
+print_copyright_notice (FILE * stream)
+{
+  fprintf (stream, "/* crc.c -- cyclic redundancy checks\n");
+  fprintf (stream,
+           "Copyright (C) 2005-2006, 2009-2024 Free Software Foundation, Inc.\n");
+  fprintf (stream, "\n");
+  fprintf (stream,
+           "This file is free software: you can redistribute it and/or modify\n");
+  fprintf (stream,
+           "it under the terms of the GNU Lesser General Public License as\n");
+  fprintf (stream,
+           "published by the Free Software Foundation, either version 3 of the\n");
+  fprintf (stream, "License, or (at your option) any later version.\n");
+  fprintf (stream, "\n");
+  fprintf (stream,
+           "This file is distributed in the hope that it will be useful,\n");
+  fprintf (stream,
+           "but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  fprintf (stream,
+           "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  fprintf (stream, "GNU Lesser General Public License for more details.\n");
+  fprintf (stream, "\n");
+  fprintf (stream,
+           "You should have received a copy of the GNU Lesser General Public License\n");
+  fprintf (stream,
+           "along with this program.  If not, see <https://www.gnu.org/licenses/>.  */\n\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (argc != 2)
+    {
+      fprintf (stderr, " Usage: %s crc-sliceby8.h\n", argv[0]);
+      exit (1);
+    }
+  FILE *stream;
+
+  stream = fopen (argv[1], "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", argv[1]);
+      exit (1);
+    }
+
+  print_copyright_notice (stream);
+  print_header (stream);
+
+  return 0;
+}
diff --git a/lib/crc.c b/lib/crc.c
index 0216932f28..10260e85ed 100644
--- a/lib/crc.c
+++ b/lib/crc.c
@@ -19,6 +19,108 @@
 #include <config.h>
 
 #include "crc.h"
+#include "endian.h"
+
+#include <string.h>
+
+#ifdef GL_CRC_SLICE_BY_8
+#include "crc-sliceby8.h"
+
+/*
+ * When we calculate the CRC32 byte-by-byte we take the checksum
+ * up to the previous byte, XOR it with our new byte, run CRC32
+ * and then XOR against the remaining bytes from the prior checksum:
+ *
+ * c0 = prior checksum
+ * d0,d1 = bytes 0 and 1 of our data
+ * c1 = (c0 >> 8)
+ * c2 = CRC32(c0 ^ d0) ^ c1
+ * c3 = (c2 >> 8)
+ * c4 = CRC32(c2 ^ d1) ^ c3
+ *
+ * The CRC32 function is "affine" which means that
+ * CRC32(a ^ b) = CRC32(a) ^ CRC32(b)
+ *
+ * and therefore we can move some of the terms around, e.g.
+ *
+ * c4 = CRC32(CRC32(c0 ^ d0) ^ c1 ^ d1) ^ c3
+ * c4 = CRC32(CRC32(c0 ^ d0)) ^ CRC32(c1 ^ d1) ^ c3
+ *
+ * Another way to look at this is
+ *
+ * CRC32(0x1200) ^ CRC32(0x34) = CRC32(0x1234)
+ *
+ * With the slice-by-8 method, we extend the table-lookup technique
+ * from a single table of 0x00 to 0xFF, but also every 0x100 from
+ * 0x0000 to 0xFF00, and for 0x000000 to 0xFF0000 etc.
+ *
+ * We can then calculate by taking 8 bytes of plaintext and then
+ * looking up each one individually and XORing them together
+ * at the end.
+ * 
+ * This approach has been mentioned by multiple sources but one of
+ * the common references is 10.1109/TC.2008.85 by Kounavis
+ * and Berry
+ */
+
+static uint32_t
+crc32_update_no_xor_slice_by_8 (uint32_t crc, const char *buf)
+{
+  uint64_t local_buf;
+  memcpy (&local_buf, buf, 8);
+  local_buf = le64toh (local_buf) ^ crc;
+  crc = crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF]
+        ^ crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF]
+        ^ crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF]
+        ^ crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF]
+        ^ crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF]
+        ^ crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF]
+        ^ crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF]
+        ^ crc32_sliceby8_table[7][local_buf & 0xFF];
+
+  return crc;
+}
+
+uint32_t
+crc32_update_no_xor_slice_by_n (uint32_t crc, const char *buf,
+                                size_t num_bytes)
+{
+  uint64_t local_buf;
+  size_t i, shift_amount;
+
+  memcpy (&local_buf, buf, num_bytes);
+  local_buf = le64toh (local_buf) ^ crc;
+
+  if (num_bytes >= 4)
+    crc = 0;
+  else
+    crc = crc >> (num_bytes * 8);
+
+  for (i = 0; i < num_bytes; i++)
+    {
+      shift_amount = ((num_bytes - i - 1) * 8);
+      crc = crc ^ crc32_sliceby8_table[i][(local_buf >> shift_amount) & 0xFF];
+    }
+
+  return crc;
+}
+
+uint32_t
+crc32_update_no_xor (uint32_t crc, const char *buf, size_t len)
+{
+  size_t n, slice_alignment;
+
+  slice_alignment = (len & (-8));
+
+  for (n = 0; n < slice_alignment; n += 8)
+    crc = crc32_update_no_xor_slice_by_8 (crc, buf + n);
+
+  if (len > n)
+    crc = crc32_update_no_xor_slice_by_n (crc, buf + n, len - n);
+
+  return crc;
+}
+#else
 
 /* Table of CRCs of all 8-bit messages.  Generated by running code
    from RFC 1952 modified to print out the table. */
@@ -84,6 +186,8 @@ crc32_update_no_xor (uint32_t crc, const char *buf, size_t len)
   return crc;
 }
 
+#endif
+
 uint32_t
 crc32_no_xor (const char *buf, size_t len)
 {
diff --git a/modules/crc b/modules/crc
index bd7fe29dba..3c08e20b4f 100644
--- a/modules/crc
+++ b/modules/crc
@@ -4,15 +4,31 @@ Compute cyclic redundancy codes.
 Files:
 lib/crc.h
 lib/crc.c
+lib/crc-generate-table.c
 
 Depends-on:
 stdint
+endian
 
 configure.ac:
+	if test $gl_crc_slice_by_8 != no; then
+	  AC_DEFINE([GL_CRC_SLICE_BY_8], [1],
+	  [Define to get faster but larger CRC32 operation.])
+	fi
 
 Makefile.am:
 lib_SOURCES += crc.c
 
+$(srcdir)/crc-sliceby8.h: $(srcdir)/crc-generate-table.c
+	$(COMPILE) $(AM_LDFLAGS) $(LDFLAGS) -o crc-generate-table $(srcdir)/crc-generate-table.c \
+	&& ./crc-generate-table $(srcdir)/crc-sliceby8.h-t \
+	&& rm -f crc-generate-table \
+	&& mv $(srcdir)/crc-sliceby8.h-t $(srcdir)/crc-sliceby8.h
+BUILT_SOURCES        += crc-sliceby8.h
+MOSTLYCLEANFILES     += crc-sliceby8.h-t crc-generate-table
+MAINTAINERCLEANFILES += crc-sliceby8.h
+EXTRA_DIST           += crc-sliceby8.h
+
 Include:
 "crc.h"
 
-- 
2.34.1

Reply via email to