Now, here's a draft patch for adding support for AF_ALG also for the sha1_buffer etc. functions.
But I have a problem here: On 4 different systems, I don't get a speedup from this patch. To benchmark it, I use this set of commands: $ ./gnulib-tool --create-testdir --dir=testdir --single-configure --symlink crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 $ cd testdir $ mkdir without; (cd without; ../configure CPPFLAGS=-Wall CFLAGS=-O2 --without-linux-crypto; make && make check) $ mkdir with; (cd with; ../configure CPPFLAGS=-Wall CFLAGS=-O2 --with-linux-crypto; make && make check) $ without/gltests/bench-md5 100 1000000 real 0.391257 user 0.388 sys 0.004 $ with/gltests/bench-md5 100 1000000 real 9.800789 user 1.088 sys 8.648 $ without/gltests/bench-md5 1000 100000 real 0.289286 user 0.288 sys 0.000 $ with/gltests/bench-md5 1000 100000 real 1.220016 user 0.104 sys 1.116 $ without/gltests/bench-md5 10000 10000 real 0.270131 user 0.268 sys 0.000 $ with/gltests/bench-md5 10000 10000 real 0.375399 user 0.020 sys 0.352 $ without/gltests/bench-md5 100000 1000 real 0.280091 user 0.276 sys 0.000 $ with/gltests/bench-md5 100000 1000 real 0.295650 user 0.000 sys 0.292 $ without/gltests/bench-md5 100000 1000 real 0.276514 user 0.276 sys 0.000 $ with/gltests/bench-md5 100000 1000 real 0.292350 user 0.000 sys 0.292 $ without/gltests/bench-md5 1000000 100 real 0.261845 user 0.260 sys 0.004 $ with/gltests/bench-md5 1000000 100 real 0.265650 user 0.000 sys 0.260 [and similarly for sha1 etc.] Tested this on - Intel Xeon X5450 - Intel Xeon E5-2603 v3 - Intel Core i7-2600 - Intel Core m3-6Y30 On all four, no speedup is visible. On machines without crypto instructions or crypto devices, I would expect that - sha1_stream gets slightly faster with than without linux-crypto (because the copy of data from the file to user-space is optimized away). - sha1_buffer is slightly slower with than without linux-crypto (because of the overhead of copying the data from user to kernel space). Whereas on machines with crypto instructions or crypto devices, I would expect a significant benefit for both functions. You showed us significant benefits for sha1_stream, whereas I see no benefit for sha1_buffer. How is this possible? In <https://en.wikipedia.org/wiki/AES_instruction_set> I read that there are specialized instructions for AES. Does it mean that there are NO specialized instructions for MD5, SHA-1, SHA-224 ... SHA-512? In this case, all the work we have done is futile for Intel CPUs and only beneficial for embedded CPUs?? Can you try this comparison on the Intel Xeon you have access to, please? Bruno
diff --git a/lib/md5.c b/lib/md5.c index 13f6275..194e980 100644 --- a/lib/md5.c +++ b/lib/md5.c @@ -221,11 +221,13 @@ process_partial_block: void * md5_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "md5", resblock, MD5_DIGEST_SIZE) == 0) return resblock; } +#endif struct md5_ctx ctx; diff --git a/lib/sha1.c b/lib/sha1.c index 735de64..1ba0afb 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -209,11 +209,13 @@ sha1_stream (FILE *stream, void *resblock) void * sha1_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "sha1", resblock, SHA1_DIGEST_SIZE) == 0) return resblock; } +#endif struct sha1_ctx ctx; diff --git a/lib/sha256.c b/lib/sha256.c index 78fc96e..0f3bfc5 100644 --- a/lib/sha256.c +++ b/lib/sha256.c @@ -273,12 +273,14 @@ sha224_stream (FILE *stream, void *resblock) void * sha256_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "sha256", resblock, SHA256_DIGEST_SIZE) == 0) return resblock; } +#endif struct sha256_ctx ctx; @@ -295,12 +297,14 @@ sha256_buffer (const char *buffer, size_t len, void *resblock) void * sha224_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "sha224", resblock, SHA224_DIGEST_SIZE) == 0) return resblock; } +#endif struct sha256_ctx ctx; diff --git a/lib/sha512.c b/lib/sha512.c index 24cdd59..f73cbd6 100644 --- a/lib/sha512.c +++ b/lib/sha512.c @@ -281,12 +281,14 @@ sha384_stream (FILE *stream, void *resblock) void * sha512_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "sha512", resblock, SHA512_DIGEST_SIZE) == 0) return resblock; } +#endif struct sha512_ctx ctx; @@ -303,12 +305,14 @@ sha512_buffer (const char *buffer, size_t len, void *resblock) void * sha384_buffer (const char *buffer, size_t len, void *resblock) { +#if 0 /* TODO: Determine list of platforms and break-even point. */ if (len >= 1) { if (afalg_buffer (buffer, len, "sha384", resblock, SHA384_DIGEST_SIZE) == 0) return resblock; } +#endif struct sha512_ctx ctx;