OpenSSL includes asm code for several platforms to speed up various
operations. Currently we don't build any of this - the attached patch
turns on asm code for Pentiums and above (it relies on an uncommitted
patch to sys.mk which defined MACHINE_CPU ?= i386). Set MACHINE_CPU to
"i586" or "i686" (both are actually identical at present) and rebuild.

Typical speed improvements are between 2x and 5x for BigNum, Blowfish,
CAST, DES, MD5, RC4, RC5, RIPEMD-160, and SHA-1 operations, as measured by
'openssl speed' on my Pentium Pro 233 (speeds are in K/sec for packets of 
the given size):

=== BLOWFISH ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
blowfish cbc      3325.35k     4417.62k     4553.81k     4572.46k     4595.71k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
blowfish cbc      7073.58k     8099.29k     8286.63k     8328.53k     8315.07k

=== CAST ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
cast cbc          3249.59k     4231.19k     4394.88k     4401.69k     4437.97k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
cast cbc          6956.17k     7999.28k     8208.47k     8199.05k     8192.00k

=== DES ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc           2291.38k     2404.86k     2426.97k     2436.64k     2443.95k
des ede3           492.61k      516.87k      519.83k      521.18k      520.20k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
des cbc           4830.90k     5301.97k     5412.18k     5419.02k     5355.72k
des ede3          1831.93k     1919.80k     1929.86k     1932.36k     1913.72k

=== MD5 ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5               1636.10k     7736.92k    13991.76k    17495.04k    18879.83k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
md5               2609.84k    13585.19k    26511.38k    34934.97k    38629.09k

=== RC4 ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rc4              12947.45k    16970.70k    18044.59k    18370.22k    18275.58k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rc4              18209.51k    25287.65k    26100.65k    26139.99k    26512.82k

=== RC5 ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rc5-32/12 cbc     5115.15k     8124.08k     8766.29k     8814.59k     8928.03k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rc5-32/12 cbc    12462.59k    15226.54k    15804.76k    16218.86k    16447.82k

=== RIPEMD-160 ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rmd160             849.94k     3680.04k     6326.22k     7626.07k     8123.73k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rmd160            1279.72k     5915.67k    10461.46k    12978.21k    13944.41k

=== SHA1 ===

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
sha1              1081.31k     2844.71k     5784.80k     7765.50k     8650.75k

type              8 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
sha1              1617.59k     7664.76k    13538.05k    17012.18k    18419.89k

Kris

----
In God we Trust -- all others must submit an X.509 certificate.
    -- Charles Forsythe <[EMAIL PROTECTED]>
Index: Makefile
===================================================================
RCS file: /home/ncvs/src/secure/lib/libcrypto/Makefile,v
retrieving revision 1.17
diff -u -r1.17 Makefile
--- Makefile    2000/04/13 07:36:09     1.17
+++ Makefile    2000/04/22 02:16:57
@@ -9,11 +9,11 @@
        ${LCRYPTO_SRC}/err ${LCRYPTO_SRC}/evp ${LCRYPTO_SRC}/hmac \
        ${LCRYPTO_SRC}/lhash ${LCRYPTO_SRC}/md2 ${LCRYPTO_SRC}/md5 \
        ${LCRYPTO_SRC}/mdc2 ${LCRYPTO_SRC}/objects ${LCRYPTO_SRC}/pem \
-       ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 ${LCRYPTO_SRC}/rand \
-       ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 ${LCRYPTO_SRC}/rc5 \
-       ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa ${LCRYPTO_SRC}/../rsaref \
-       ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack ${LCRYPTO_SRC}/txt_db \
-       ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3
+       ${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 \
+       ${LCRYPTO_SRC}/rand ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 \
+       ${LCRYPTO_SRC}/rc5 ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa \
+       ${LCRYPTO_SRC}/../rsaref ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack \
+       ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3
 
 LIB=           crypto
 SHLIB_MAJOR=   1
@@ -41,24 +41,69 @@
        x_x509a.c \
 
 # blowfish
-SRCS+= bf_cfb64.c bf_ecb.c bf_enc.c bf_ofb64.c bf_skey.c
+SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
 
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   bx86-elf.o
+SOBJS+=                bx86-elf.o
+.else
+SRCS+= bf_enc.c 
+.endif
+
+bx86-elf.o: bx86unix.cpp
+       cpp -DELF -x c bx86unix.cpp | as -o bx86-elf.o
+
+bx86unix.cpp: asm/bf-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bf/asm/bf-586.pl cpp > 
+bx86unix.cpp
+
 # bio
 SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c \
        bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c bss_conn.c \
        bss_fd.c bss_file.c bss_log.c bss_mem.c bss_null.c bss_sock.c
 
 # bn
-SRCS+= bn_add.c bn_asm.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \
+SRCS+= bn_add.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \
        bn_exp2.c bn_gcd.c bn_lib.c bn_mont.c bn_mpi.c bn_mul.c bn_prime.c \
        bn_print.c bn_rand.c bn_recp.c bn_shift.c bn_sqr.c bn_word.c
 
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   bn86-elf.o co86-elf.o
+SOBJS+=                bn86-elf.o co86-elf.o
+.else
+SRCS+= bn_asm.c 
+.endif
+
+bn86-elf.o: bn86unix.cpp
+       cpp -DELF -x c bn86unix.cpp | as -o bn86-elf.o
+
+bn86unix.cpp: asm/bn-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/bn-586.pl cpp > 
+bn86unix.cpp
+
+co86-elf.o: co86unix.cpp
+       cpp -DELF -x c co86unix.cpp | as -o co86-elf.o
+
+co86unix.cpp: asm/co-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/co-586.pl cpp > 
+co86unix.cpp
+
 # buffer
 SRCS+= buf_err.c buffer.c 
 
 # cast
-SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c
+SRCS+= c_cfb64.c c_ecb.c c_ofb64.c c_skey.c
 
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   cx86-elf.o
+SOBJS+=                cx86-elf.o
+.else
+SRCS+= c_enc.c 
+.endif
+
+cx86-elf.o: cx86unix.cpp
+       cpp -DELF -x c cx86unix.cpp | as -o cx86-elf.o
+
+cx86unix.cpp: asm/cast-586.pl x86asm.pl cbc.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/cast/asm/cast-586.pl cpp > 
+cx86unix.cpp
+
 # comp
 SRCS+= c_rle.c c_zlib.c comp_lib.c
 
@@ -66,12 +111,31 @@
 SRCS+= conf.c conf_err.c
 
 # des
-SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c des_enc.c \
-       ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c fcrypt.c \
-       fcrypt_b.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
+SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
+       ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c \
+       fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
        rand_key.c read2pwd.c read_pwd.c rnd_keys.c rpc_enc.c set_key.c \
        str2key.c supp.c xcbc_enc.c 
 
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   dx86-elf.o yx86-elf.o
+SOBJS+=                dx86-elf.o yx86-elf.o
+.else
+SRCS+= des_enc.c fcrypt_b.c
+.endif
+
+dx86-elf.o: dx86unix.cpp
+       cpp -DELF -x c dx86unix.cpp | as -o dx86-elf.o
+
+yx86-elf.o: yx86unix.cpp
+       cpp -DELF -x c yx86unix.cpp | as -o yx86-elf.o
+
+dx86unix.cpp: asm/des-586.pl x86asm.pl cbc.pl
+       perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm 
+${LCRYPTO_SRC}/des/asm/des-586.pl cpp > dx86unix.cpp
+
+yx86unix.cpp: asm/crypt586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm 
+${LCRYPTO_SRC}/des/asm/crypt586.pl cpp > yx86unix.cpp
+
 # dh
 SRCS+= dh_check.c dh_err.c dh_gen.c dh_key.c dh_lib.c
 
@@ -106,6 +170,17 @@
 
 # md5
 SRCS+= md5_dgst.c md5_one.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   mx86-elf.o
+SOBJS+=        mx86-elf.o
+CFLAGS+= -DMD5_ASM
+.endif
+
+mx86-elf.o: mx86unix.cpp
+       cpp -DELF -x c mx86unix.cpp | as -o mx86-elf.o
+
+mx86unix.cpp: asm/md5-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/md5/asm/md5-586.pl cpp > 
+mx86unix.cpp
 
 # mdc2
 SRCS+= mdc2dgst.c mdc2_one.c
@@ -131,14 +206,52 @@
 SRCS+= rc2_cbc.c rc2cfb64.c rc2_ecb.c rc2ofb64.c rc2_skey.c 
 
 # rc4
-SRCS+= rc4_enc.c rc4_skey.c
+SRCS+= rc4_skey.c
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   rx86-elf.o
+SOBJS+=                rx86-elf.o
+.else
+SRCS+= rc4_enc.c 
+.endif
 
+rx86-elf.o: rx86unix.cpp
+       cpp -DELF -x c rx86unix.cpp | as -o rx86-elf.o
+
+rx86unix.cpp: asm/rc4-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc4/asm/rc4-586.pl cpp > 
+rx86unix.cpp
+
 # rc5
-SRCS+= rc5cfb64.c rc5_ecb.c rc5_enc.c rc5ofb64.c rc5_skey.c 
+SRCS+= rc5cfb64.c rc5_ecb.c rc5ofb64.c rc5_skey.c 
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   r586-elf.o
+SOBJS+=                r586-elf.o
+.else
+SRCS+= rc5_enc.c 
+.endif
+
+r586-elf.o: r586unix.cpp
+       cpp -DELF -x c r586unix.cpp | as -o r586-elf.o
 
+r586unix.cpp: asm/rc5-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc5/asm/rc5-586.pl cpp > 
+r586unix.cpp
+
 # ripemd
 SRCS+= rmd_dgst.c rmd_one.c
 
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   rm86-elf.o
+SOBJS+=                rm86-elf.o
+CFLAGS+= -DRMD160_ASM
+.endif
+
+rm86-elf.o: rm86unix.cpp
+       cpp -DELF -x c rm86unix.cpp | as -o rm86-elf.o
+
+rm86unix.cpp: asm/rmd-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/ripemd/asm/rmd-586.pl cpp > 
+rm86unix.cpp
+
 # rsa
 .if defined(WITH_RSA) && ${WITH_RSA} == YES
 SRCS+= rsa_chk.c rsa_err.c rsa_gen.c rsa_lib.c rsa_none.c rsa_null.c \
@@ -147,6 +260,18 @@
 
 # sha
 SRCS+= sha_dgst.c sha_one.c sha1_one.c sha1dgst.c
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == 
+"i686"))
+STATICOBJS+=   sx86-elf.o
+SOBJS+=                sx86-elf.o
+CFLAGS+= -DSHA1_ASM
+.endif
+
+sx86-elf.o: sx86unix.cpp
+       cpp -DELF -x c sx86unix.cpp | as -o sx86-elf.o
+
+sx86unix.cpp: asm/sha1-586.pl x86asm.pl
+       perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/sha/asm/sha1-586.pl cpp > 
+sx86unix.cpp
 
 # stack
 SRCS+= stack.c

Reply via email to