Now, here's a draft patch for adding support for AF_ALG also for the
sha1_buffer etc. functions.

But I have a problem here: On 4 different systems, I don't get a speedup
from this patch.

To benchmark it, I use this set of commands:

$ ./gnulib-tool --create-testdir --dir=testdir --single-configure --symlink 
crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512
$ cd testdir
$ mkdir without; (cd without; ../configure CPPFLAGS=-Wall CFLAGS=-O2 
--without-linux-crypto; make && make check)
$ mkdir with; (cd with; ../configure CPPFLAGS=-Wall CFLAGS=-O2 
--with-linux-crypto; make && make check)

$ without/gltests/bench-md5 100 1000000
real   0.391257
user   0.388
sys    0.004
$ with/gltests/bench-md5 100 1000000   
real   9.800789
user   1.088
sys    8.648
$ without/gltests/bench-md5 1000 100000
real   0.289286
user   0.288
sys    0.000
$ with/gltests/bench-md5 1000 100000   
real   1.220016
user   0.104
sys    1.116
$ without/gltests/bench-md5 10000 10000
real   0.270131
user   0.268
sys    0.000
$ with/gltests/bench-md5 10000 10000       
real   0.375399
user   0.020
sys    0.352
$ without/gltests/bench-md5 100000 1000
real   0.280091
user   0.276
sys    0.000
$ with/gltests/bench-md5 100000 1000
real   0.295650
user   0.000
sys    0.292
$ without/gltests/bench-md5 100000 1000
real   0.276514
user   0.276
sys    0.000
$ with/gltests/bench-md5 100000 1000
real   0.292350
user   0.000
sys    0.292
$ without/gltests/bench-md5 1000000 100
real   0.261845
user   0.260
sys    0.004
$ with/gltests/bench-md5 1000000 100
real   0.265650
user   0.000
sys    0.260
[and similarly for sha1 etc.]

Tested this on
  - Intel Xeon X5450
  - Intel Xeon E5-2603 v3
  - Intel Core i7-2600
  - Intel Core m3-6Y30
On all four, no speedup is visible.

On machines without crypto instructions or crypto devices, I would expect
that
  - sha1_stream gets slightly faster with than without linux-crypto
    (because the copy of data from the file to user-space is optimized away).
  - sha1_buffer is slightly slower with than without linux-crypto
    (because of the overhead of copying the data from user to kernel space).

Whereas on machines with crypto instructions or crypto devices, I would
expect a significant benefit for both functions.

You showed us significant benefits for sha1_stream, whereas I see no benefit
for sha1_buffer. How is this possible?

In <https://en.wikipedia.org/wiki/AES_instruction_set> I read that there are
specialized instructions for AES. Does it mean that there are NO specialized
instructions for MD5, SHA-1, SHA-224 ... SHA-512? In this case, all the work
we have done is futile for Intel CPUs and only beneficial for embedded CPUs??

Can you try this comparison on the Intel Xeon you have access to, please?

Bruno
diff --git a/lib/md5.c b/lib/md5.c
index 13f6275..194e980 100644
--- a/lib/md5.c
+++ b/lib/md5.c
@@ -221,11 +221,13 @@ process_partial_block:
 void *
 md5_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "md5", resblock, MD5_DIGEST_SIZE) == 0)
         return resblock;
     }
+#endif
 
   struct md5_ctx ctx;
 
diff --git a/lib/sha1.c b/lib/sha1.c
index 735de64..1ba0afb 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -209,11 +209,13 @@ sha1_stream (FILE *stream, void *resblock)
 void *
 sha1_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "sha1", resblock, SHA1_DIGEST_SIZE) == 0)
         return resblock;
     }
+#endif
 
   struct sha1_ctx ctx;
 
diff --git a/lib/sha256.c b/lib/sha256.c
index 78fc96e..0f3bfc5 100644
--- a/lib/sha256.c
+++ b/lib/sha256.c
@@ -273,12 +273,14 @@ sha224_stream (FILE *stream, void *resblock)
 void *
 sha256_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "sha256", resblock, SHA256_DIGEST_SIZE)
           == 0)
         return resblock;
     }
+#endif
 
   struct sha256_ctx ctx;
 
@@ -295,12 +297,14 @@ sha256_buffer (const char *buffer, size_t len, void *resblock)
 void *
 sha224_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "sha224", resblock, SHA224_DIGEST_SIZE)
           == 0)
         return resblock;
     }
+#endif
 
   struct sha256_ctx ctx;
 
diff --git a/lib/sha512.c b/lib/sha512.c
index 24cdd59..f73cbd6 100644
--- a/lib/sha512.c
+++ b/lib/sha512.c
@@ -281,12 +281,14 @@ sha384_stream (FILE *stream, void *resblock)
 void *
 sha512_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "sha512", resblock, SHA512_DIGEST_SIZE)
           == 0)
         return resblock;
     }
+#endif
 
   struct sha512_ctx ctx;
 
@@ -303,12 +305,14 @@ sha512_buffer (const char *buffer, size_t len, void *resblock)
 void *
 sha384_buffer (const char *buffer, size_t len, void *resblock)
 {
+#if 0 /* TODO: Determine list of platforms and break-even point.  */
   if (len >= 1)
     {
       if (afalg_buffer (buffer, len, "sha384", resblock, SHA384_DIGEST_SIZE)
           == 0)
         return resblock;
     }
+#endif
 
   struct sha512_ctx ctx;
 

Reply via email to