thanks, moved the xors to the front of the lines On Sun, Oct 27, 2024, 13:41 Bruno Haible <br...@clisp.org> wrote:
> Sam Russell wrote: > > > Also, in GNU coding style, when line breaking is needed within > > expressions, > > > we do the line break before the operator, not after the operator. [1] > > > This affects lib/crc.c lines 73..80, 102..103. > > > > Done. > > I meant this expression: > > crc = crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF] ^ > crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF] ^ > crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF] ^ > crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF] ^ > crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF] ^ > crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF] ^ > crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF] ^ > crc32_sliceby8_table[7][local_buf & 0xFF]; > > which looks more GNU-style like this: > > crc = crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF] > ^ crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF] > ^ crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF] > ^ crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF] > ^ crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF] > ^ crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF] > ^ crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF] > ^ crc32_sliceby8_table[7][local_buf & 0xFF]; > > Other than that, it looks fine. > > Bruno > > > >
From af2cfb7111140268cc74597272bf22b2cc258061 Mon Sep 17 00:00:00 2001 From: Sam Russell <sam.h.russell@gmail.com> Date: Sun, 27 Oct 2024 13:19:17 +0100 Subject: [PATCH] crc: New optimised slice-by-8 implementation * lib/crc.c: Implementation of slice-by-8 algorithm * lib/crc-generate-table.c: Generation code for CRC32 lookup tables * modules/crc (Depends-on): Add endian. (Makefile.am): Build slice-by-8 tables from crc-generate-table.c. --- ChangeLog | 8 +++ lib/crc-generate-table.c | 150 +++++++++++++++++++++++++++++++++++++++ lib/crc.c | 104 +++++++++++++++++++++++++++ modules/crc | 16 +++++ 4 files changed, 278 insertions(+) create mode 100644 lib/crc-generate-table.c diff --git a/ChangeLog b/ChangeLog index c70df89b2f..90a0256baf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2024-10-27 Sam Russell <sam.h.russell@gmail.com> + + crc: New optimised slice-by-8 implementation + * lib/crc.c: Implementation of slice-by-8 algorithm + * lib/crc-generate-table.c: Generation code for CRC32 lookup tables + * modules/crc (Depends-on): Add endian. + (Makefile.am): Build slice-by-8 tables from crc-generate-table.c. + 2024-10-17 Sam Russell <sam.h.russell@gmail.com> crc: New tests for non-byte-aligned data. diff --git a/lib/crc-generate-table.c b/lib/crc-generate-table.c new file mode 100644 index 0000000000..01f9ff77ed --- /dev/null +++ b/lib/crc-generate-table.c @@ -0,0 +1,150 @@ +/* crc-generate-table.c -- cyclic redundancy check table generator + Copyright (C) 2024 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Sam Russell. */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> + +/* + * The following function was extracted from RFC 1952 by Sam + * Russell. It was modified to remove the reference to the + * crc_table_computed flag, to extend it to accept a variable number + * of bits (described below), and reformatted to follow GNU + * formatting guidelines. + * + * make_crc_table() takes a number of bits and generates a lookup + * table for the CRC32 algorithm. Usage is as follows: + * + * make_crc_table(8) : generate for CRC32(0x00) to CRC32(0xFF) + * make_crc_table(16) : generate for CRC32(0x0000) to CRC32 (0xFF00) + * in increments of 0x100 + * + * This is used for the Sarwate algorithm specified in RFC 1952 + * which uses a single lookup table of make_crc_table(8), and for + * the slice-by-8 algorithm which uses 8 tables from in 8-bit + * increments from make_crc_table(8) to make_crc_table(64) + */ +unsigned long crc_table[256]; + +void +make_crc_table (int bits) +{ + unsigned long c; + + int n, k; + for (n = 0; n < 256; n++) + { + c = (unsigned long) n; + for (k = 0; k < bits; k++) + { + if (c & 1) + c = 0xedb88320L ^ (c >> 1); + else + c = c >> 1; + } + crc_table[n] = c; + } +} + +void +print_crc_table (FILE * stream, int bits) +{ + make_crc_table (bits); + + fprintf (stream, " {"); + + for (int i = 0; i < 256; i++) + { + if ((i % 6) == 0) + fprintf (stream, "\n "); + if (i != 255) + fprintf (stream, " 0x%08lX,", crc_table[i]); + else + fprintf (stream, " 0x%08lX\n", crc_table[i]); + } + + fprintf (stream, " }"); +} + +void +print_header (FILE * stream) +{ + fprintf (stream, "/* Slice-by-8 lookup tables */\n"); + fprintf (stream, "static const uint32_t crc32_sliceby8_table[][256] = {\n"); + for (int i = 8; i <= 64; i += 8) + { + print_crc_table (stream, i); + if (i < 64) + fprintf (stream, ","); + fprintf (stream, "\n"); + } + fprintf (stream, "};\n\n"); +} + +void +print_copyright_notice (FILE * stream) +{ + fprintf (stream, "/* crc.c -- cyclic redundancy checks\n"); + fprintf (stream, + "Copyright (C) 2005-2006, 2009-2024 Free Software Foundation, Inc.\n"); + fprintf (stream, "\n"); + fprintf (stream, + "This file is free software: you can redistribute it and/or modify\n"); + fprintf (stream, + "it under the terms of the GNU Lesser General Public License as\n"); + fprintf (stream, + "published by the Free Software Foundation, either version 3 of the\n"); + fprintf (stream, "License, or (at your option) any later version.\n"); + fprintf (stream, "\n"); + fprintf (stream, + "This file is distributed in the hope that it will be useful,\n"); + fprintf (stream, + "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); + fprintf (stream, + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"); + fprintf (stream, "GNU Lesser General Public License for more details.\n"); + fprintf (stream, "\n"); + fprintf (stream, + "You should have received a copy of the GNU Lesser General Public License\n"); + fprintf (stream, + "along with this program. If not, see <https://www.gnu.org/licenses/>. */\n\n"); +} + +int +main (int argc, char *argv[]) +{ + if (argc != 2) + { + fprintf (stderr, " Usage: %s crc-sliceby8.h\n", argv[0]); + exit (1); + } + FILE *stream; + + stream = fopen (argv[1], "w"); + if (stream == NULL) + { + fprintf (stderr, "cannot open '%s' for writing\n", argv[1]); + exit (1); + } + + print_copyright_notice (stream); + print_header (stream); + + return 0; +} diff --git a/lib/crc.c b/lib/crc.c index 0216932f28..10260e85ed 100644 --- a/lib/crc.c +++ b/lib/crc.c @@ -19,6 +19,108 @@ #include <config.h> #include "crc.h" +#include "endian.h" + +#include <string.h> + +#ifdef GL_CRC_SLICE_BY_8 +#include "crc-sliceby8.h" + +/* + * When we calculate the CRC32 byte-by-byte we take the checksum + * up to the previous byte, XOR it with our new byte, run CRC32 + * and then XOR against the remaining bytes from the prior checksum: + * + * c0 = prior checksum + * d0,d1 = bytes 0 and 1 of our data + * c1 = (c0 >> 8) + * c2 = CRC32(c0 ^ d0) ^ c1 + * c3 = (c2 >> 8) + * c4 = CRC32(c2 ^ d1) ^ c3 + * + * The CRC32 function is "affine" which means that + * CRC32(a ^ b) = CRC32(a) ^ CRC32(b) + * + * and therefore we can move some of the terms around, e.g. + * + * c4 = CRC32(CRC32(c0 ^ d0) ^ c1 ^ d1) ^ c3 + * c4 = CRC32(CRC32(c0 ^ d0)) ^ CRC32(c1 ^ d1) ^ c3 + * + * Another way to look at this is + * + * CRC32(0x1200) ^ CRC32(0x34) = CRC32(0x1234) + * + * With the slice-by-8 method, we extend the table-lookup technique + * from a single table of 0x00 to 0xFF, but also every 0x100 from + * 0x0000 to 0xFF00, and for 0x000000 to 0xFF0000 etc. + * + * We can then calculate by taking 8 bytes of plaintext and then + * looking up each one individually and XORing them together + * at the end. + * + * This approach has been mentioned by multiple sources but one of + * the common references is 10.1109/TC.2008.85 by Kounavis + * and Berry + */ + +static uint32_t +crc32_update_no_xor_slice_by_8 (uint32_t crc, const char *buf) +{ + uint64_t local_buf; + memcpy (&local_buf, buf, 8); + local_buf = le64toh (local_buf) ^ crc; + crc = crc32_sliceby8_table[0][(local_buf >> 56) & 0xFF] + ^ crc32_sliceby8_table[1][(local_buf >> 48) & 0xFF] + ^ crc32_sliceby8_table[2][(local_buf >> 40) & 0xFF] + ^ crc32_sliceby8_table[3][(local_buf >> 32) & 0xFF] + ^ crc32_sliceby8_table[4][(local_buf >> 24) & 0xFF] + ^ crc32_sliceby8_table[5][(local_buf >> 16) & 0xFF] + ^ crc32_sliceby8_table[6][(local_buf >> 8) & 0xFF] + ^ crc32_sliceby8_table[7][local_buf & 0xFF]; + + return crc; +} + +uint32_t +crc32_update_no_xor_slice_by_n (uint32_t crc, const char *buf, + size_t num_bytes) +{ + uint64_t local_buf; + size_t i, shift_amount; + + memcpy (&local_buf, buf, num_bytes); + local_buf = le64toh (local_buf) ^ crc; + + if (num_bytes >= 4) + crc = 0; + else + crc = crc >> (num_bytes * 8); + + for (i = 0; i < num_bytes; i++) + { + shift_amount = ((num_bytes - i - 1) * 8); + crc = crc ^ crc32_sliceby8_table[i][(local_buf >> shift_amount) & 0xFF]; + } + + return crc; +} + +uint32_t +crc32_update_no_xor (uint32_t crc, const char *buf, size_t len) +{ + size_t n, slice_alignment; + + slice_alignment = (len & (-8)); + + for (n = 0; n < slice_alignment; n += 8) + crc = crc32_update_no_xor_slice_by_8 (crc, buf + n); + + if (len > n) + crc = crc32_update_no_xor_slice_by_n (crc, buf + n, len - n); + + return crc; +} +#else /* Table of CRCs of all 8-bit messages. Generated by running code from RFC 1952 modified to print out the table. */ @@ -84,6 +186,8 @@ crc32_update_no_xor (uint32_t crc, const char *buf, size_t len) return crc; } +#endif + uint32_t crc32_no_xor (const char *buf, size_t len) { diff --git a/modules/crc b/modules/crc index bd7fe29dba..3c08e20b4f 100644 --- a/modules/crc +++ b/modules/crc @@ -4,15 +4,31 @@ Compute cyclic redundancy codes. Files: lib/crc.h lib/crc.c +lib/crc-generate-table.c Depends-on: stdint +endian configure.ac: + if test $gl_crc_slice_by_8 != no; then + AC_DEFINE([GL_CRC_SLICE_BY_8], [1], + [Define to get faster but larger CRC32 operation.]) + fi Makefile.am: lib_SOURCES += crc.c +$(srcdir)/crc-sliceby8.h: $(srcdir)/crc-generate-table.c + $(COMPILE) $(AM_LDFLAGS) $(LDFLAGS) -o crc-generate-table $(srcdir)/crc-generate-table.c \ + && ./crc-generate-table $(srcdir)/crc-sliceby8.h-t \ + && rm -f crc-generate-table \ + && mv $(srcdir)/crc-sliceby8.h-t $(srcdir)/crc-sliceby8.h +BUILT_SOURCES += crc-sliceby8.h +MOSTLYCLEANFILES += crc-sliceby8.h-t crc-generate-table +MAINTAINERCLEANFILES += crc-sliceby8.h +EXTRA_DIST += crc-sliceby8.h + Include: "crc.h" -- 2.34.1