On 08/10/2023 14:36, Pádraig Brady wrote:
On 07/10/2023 22:29, Paul Eggert wrote:
On 2023-10-07 04:42, Pádraig Brady wrote:
The auto linking is globally controlled with the --with-openssl
cofigure option, but you could build sort (and md5sum)
without that dependency with:
./configure ac_cv_lib_crypto_MD5=no
Thanks, I was thinking more along the lines that Bruno suggested, which
to continue to link to libcrypto, but do it with dlopen/dlsym in 'sort'
only when need_random is true.
It's not clear to me offhand whether this should be done entirely in
Coreutils, or whether we should add some Gnulib support to make it
easier to do this sort of lazier linking.
I was wondering if this was worth worrying about at all,
but it is a significant overhead that's worth improving.
To quantify the overhead I compared optimized builds,
with and without the above configure option, giving:
$ time seq 10000 | xargs -I'{}' src/sort /dev/null -k'{}'
real 0m7.009s
user 0m3.462s
sys 0m3.578s
$ time seq 10000 | xargs -I'{}' src/sort-lc /dev/null -k'{}'
real 0m12.950s
user 0m3.754s
sys 0m9.200s
So we should do something. Now dlopening libcrypto on demand
would work, but there may be better solutions.
sort doesn't have to use md5. It could use blake2 routines
already in coreutils to avoid the issue (and get some speed ups).
Alternatively it might use some other hash function.
For example see the other 128 bit functions compared at:
https://github.com/Cyan4973/xxHash
BTW there was mention of static linking as an option in this thread.
That's is an option to provide better speed an isolation for binaries,
however it's best left to the system builders to use this for their builds.
There can be security implications for prompt library updating,
and libcrypto is particularly sensitive in this regard.
Adding coreutils list...
So above we've demonstrated that sort dynamically loading libcrypto
does nearly double the startup time for the process.
Attached is a patch to use the coreutils reference blake2b hash instead
of the optimized libcrypto md5 routines.
$ seq 1000000 > 1.txt
$ time src/sort-md5-lc -R < 1.txt > /dev/null
real 0m6.734s
user 0m23.258s
sys 0m0.047s
$ time src/sort-blake2 -R < 1.txt > /dev/null
real 0m7.215s
user 0m25.683s
sys 0m0.043s
$ grep 'model name' /proc/cpuinfo | head -n1
model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz
$ rpm -q openssl-libs
openssl-libs-3.0.9-2.fc38.x86_64
So while this avoids the startup overhead,
the reference blake2 routines are a little less efficient
than the optimized md5 libcrypto routines.
cheers,
Pádraig
From 7b6765771a235849f424cb6a884cb8237de8380d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Sun, 8 Oct 2023 21:17:09 +0100
Subject: [PATCH] sort: avoid linking with libcrypto
Use blake2b (128 bit) instead of md5
---
src/blake2/blake2.h | 2 +-
src/local.mk | 10 ++++++----
src/sort.c | 35 +++++++++++++++++++----------------
3 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/src/blake2/blake2.h b/src/blake2/blake2.h
index be8b176d7..73bd171b7 100644
--- a/src/blake2/blake2.h
+++ b/src/blake2/blake2.h
@@ -23,7 +23,7 @@
#ifdef _MSC_VER
# define BLAKE2_PACKED(x) __pragma (pack (push, 1)) x __pragma (pack (pop))
#else
-# define BLAKE2_PACKED(x) x _GL_ATTRIBUTE_PACKED
+# define BLAKE2_PACKED(x) x /* _GL_ATTRIBUTE_PACKED */
#endif
#if defined(__cplusplus)
diff --git a/src/local.mk b/src/local.mk
index ed5d46ddb..35f4d0a35 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -302,7 +302,6 @@ src_printf_LDADD += $(LIBICONV)
# for libcrypto hash routines
src_md5sum_LDADD += $(LIB_CRYPTO)
-src_sort_LDADD += $(LIB_CRYPTO)
src_sha1sum_LDADD += $(LIB_CRYPTO)
src_sha224sum_LDADD += $(LIB_CRYPTO)
src_sha256sum_LDADD += $(LIB_CRYPTO)
@@ -409,6 +408,9 @@ src_tac_SOURCES = src/tac.c src/temp-stream.c
src_tail_SOURCES = src/tail.c src/iopoll.c
src_tee_SOURCES = src/tee.c src/iopoll.c
+src_sort_CPPFLAGS = -DHASH_ALGO_BLAKE2=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS)
+src_sort_SOURCES = src/sort.c $(src_blake2_SOURCES)
+
src_sum_SOURCES = src/sum.c src/sum.h src/digest.c
src_sum_CPPFLAGS = -DHASH_ALGO_SUM=1 $(AM_CPPFLAGS)
@@ -425,9 +427,9 @@ src_sha384sum_CPPFLAGS = -DHASH_ALGO_SHA384=1 $(AM_CPPFLAGS)
src_sha512sum_SOURCES = src/digest.c
src_sha512sum_CPPFLAGS = -DHASH_ALGO_SHA512=1 $(AM_CPPFLAGS)
src_b2sum_CPPFLAGS = -DHASH_ALGO_BLAKE2=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS)
-src_b2sum_SOURCES = src/digest.c \
- src/blake2/blake2.h src/blake2/blake2-impl.h \
- src/blake2/blake2b-ref.c \
+src_blake2_SOURCES = src/blake2/blake2.h src/blake2/blake2-impl.h \
+ src/blake2/blake2b-ref.c
+src_b2sum_SOURCES = src/digest.c $(src_blake2_SOURCES) \
src/blake2/b2sum.c src/blake2/b2sum.h
src_cksum_SOURCES = $(src_b2sum_SOURCES) src/sum.c src/sum.h \
diff --git a/src/sort.c b/src/sort.c
index 5c86b8332..14c3951ce 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -31,6 +31,7 @@
#include "system.h"
#include "argmatch.h"
#include "assure.h"
+#include "blake2/blake2.h"
#include "fadvise.h"
#include "filevercmp.h"
#include "flexmember.h"
@@ -38,7 +39,6 @@
#include "hash.h"
#include "heap.h"
#include "ignore-value.h"
-#include "md5.h"
#include "mbswidth.h"
#include "nproc.h"
#include "physmem.h"
@@ -2084,23 +2084,24 @@ getmonth (char const *month, char **ea)
return 0;
}
-/* A randomly chosen MD5 state, used for random comparison. */
-static struct md5_ctx random_md5_state;
+/* A randomly chosen state, used for random comparison. */
+static blake2b_state random_hash_state;
+#define RANDOM_HASH_BYTES 16
-/* Initialize the randomly chosen MD5 state. */
+/* Initialize the randomly chosen hash state. */
static void
-random_md5_state_init (char const *random_source)
+random_hash_state_init (char const *random_source)
{
- unsigned char buf[MD5_DIGEST_SIZE];
+ unsigned char buf[RANDOM_HASH_BYTES];
struct randread_source *r = randread_new (random_source, sizeof buf);
if (! r)
sort_die (_("open failed"), random_source ? random_source : "getrandom");
randread (r, buf, sizeof buf);
if (randread_free (r) != 0)
sort_die (_("close failed"), random_source);
- md5_init_ctx (&random_md5_state);
- md5_process_bytes (buf, sizeof buf, &random_md5_state);
+ blake2b_init (&random_hash_state, RANDOM_HASH_BYTES);
+ blake2b_update (&random_hash_state, buf, sizeof buf);
}
/* This is like strxfrm, except it reports any error and exits. */
@@ -2141,9 +2142,9 @@ compare_random (char *restrict texta, size_t lena,
char *buf = stackbuf;
size_t bufsize = sizeof stackbuf;
void *allocated = nullptr;
- uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
- struct md5_ctx s[2];
- s[0] = s[1] = random_md5_state;
+ uint32_t dig[2][RANDOM_HASH_BYTES / sizeof (uint32_t)];
+ blake2b_state s[2];
+ s[0] = s[1] = random_hash_state;
if (hard_LC_COLLATE)
{
@@ -2220,8 +2221,8 @@ compare_random (char *restrict texta, size_t lena,
/* Accumulate the transformed data in the corresponding
checksums. */
- md5_process_bytes (buf, sizea, &s[0]);
- md5_process_bytes (buf + sizea, sizeb, &s[1]);
+ blake2b_update (&s[0], buf, sizea);
+ blake2b_update (&s[1], buf + sizea, sizeb);
/* Update the tiebreaker comparison of the transformed data. */
if (! xfrm_diff)
@@ -2234,8 +2235,10 @@ compare_random (char *restrict texta, size_t lena,
}
/* Compute and compare the checksums. */
- md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
- md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+ blake2b_update (&s[0], texta, lena);
+ blake2b_final (&s[0], dig[0], RANDOM_HASH_BYTES);
+ blake2b_update (&s[1], textb, lenb);
+ blake2b_final (&s[1], dig[1], RANDOM_HASH_BYTES);
int diff = memcmp (dig[0], dig[1], sizeof dig[0]);
/* Fall back on the tiebreaker if the checksums collide. */
@@ -4771,7 +4774,7 @@ main (int argc, char **argv)
reverse = gkey.reverse;
if (need_random)
- random_md5_state_init (random_source);
+ random_hash_state_init (random_source);
if (temp_dir_count == 0)
{
--
2.41.0