OpenSSL includes asm code for several platforms to speed up various operations. Currently we don't build any of this - the attached patch turns on asm code for Pentiums and above (it relies on an uncommitted patch to sys.mk which defined MACHINE_CPU ?= i386). Set MACHINE_CPU to "i586" or "i686" (both are actually identical at present) and rebuild. Typical speed improvements are between 2x and 5x for BigNum, Blowfish, CAST, DES, MD5, RC4, RC5, RIPEMD-160, and SHA-1 operations, as measured by 'openssl speed' on my Pentium Pro 233 (speeds are in K/sec for packets of the given size): === BLOWFISH === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes blowfish cbc 3325.35k 4417.62k 4553.81k 4572.46k 4595.71k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes blowfish cbc 7073.58k 8099.29k 8286.63k 8328.53k 8315.07k === CAST === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes cast cbc 3249.59k 4231.19k 4394.88k 4401.69k 4437.97k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes cast cbc 6956.17k 7999.28k 8208.47k 8199.05k 8192.00k === DES === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 2291.38k 2404.86k 2426.97k 2436.64k 2443.95k des ede3 492.61k 516.87k 519.83k 521.18k 520.20k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes des cbc 4830.90k 5301.97k 5412.18k 5419.02k 5355.72k des ede3 1831.93k 1919.80k 1929.86k 1932.36k 1913.72k === MD5 === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes md5 1636.10k 7736.92k 13991.76k 17495.04k 18879.83k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes md5 2609.84k 13585.19k 26511.38k 34934.97k 38629.09k === RC4 === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rc4 12947.45k 16970.70k 18044.59k 18370.22k 18275.58k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rc4 18209.51k 25287.65k 26100.65k 26139.99k 26512.82k === RC5 === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rc5-32/12 cbc 5115.15k 8124.08k 8766.29k 8814.59k 8928.03k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rc5-32/12 cbc 12462.59k 15226.54k 15804.76k 16218.86k 16447.82k === RIPEMD-160 === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rmd160 849.94k 3680.04k 6326.22k 7626.07k 8123.73k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes rmd160 1279.72k 5915.67k 10461.46k 12978.21k 13944.41k === SHA1 === type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes sha1 1081.31k 2844.71k 5784.80k 7765.50k 8650.75k type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes sha1 1617.59k 7664.76k 13538.05k 17012.18k 18419.89k Kris ---- In God we Trust -- all others must submit an X.509 certificate. -- Charles Forsythe <[EMAIL PROTECTED]>
Index: Makefile =================================================================== RCS file: /home/ncvs/src/secure/lib/libcrypto/Makefile,v retrieving revision 1.17 diff -u -r1.17 Makefile --- Makefile 2000/04/13 07:36:09 1.17 +++ Makefile 2000/04/22 02:16:57 @@ -9,11 +9,11 @@ ${LCRYPTO_SRC}/err ${LCRYPTO_SRC}/evp ${LCRYPTO_SRC}/hmac \ ${LCRYPTO_SRC}/lhash ${LCRYPTO_SRC}/md2 ${LCRYPTO_SRC}/md5 \ ${LCRYPTO_SRC}/mdc2 ${LCRYPTO_SRC}/objects ${LCRYPTO_SRC}/pem \ - ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 ${LCRYPTO_SRC}/rand \ - ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 ${LCRYPTO_SRC}/rc5 \ - ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa ${LCRYPTO_SRC}/../rsaref \ - ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack ${LCRYPTO_SRC}/txt_db \ - ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3 + ${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 \ + ${LCRYPTO_SRC}/rand ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 \ + ${LCRYPTO_SRC}/rc5 ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa \ + ${LCRYPTO_SRC}/../rsaref ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack \ + ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3 LIB= crypto SHLIB_MAJOR= 1 @@ -41,24 +41,69 @@ x_x509a.c \ # blowfish -SRCS+= bf_cfb64.c bf_ecb.c bf_enc.c bf_ofb64.c bf_skey.c +SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= bx86-elf.o +SOBJS+= bx86-elf.o +.else +SRCS+= bf_enc.c +.endif + +bx86-elf.o: bx86unix.cpp + cpp -DELF -x c bx86unix.cpp | as -o bx86-elf.o + +bx86unix.cpp: asm/bf-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bf/asm/bf-586.pl cpp > +bx86unix.cpp + # bio SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c \ bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c bss_conn.c \ bss_fd.c bss_file.c bss_log.c bss_mem.c bss_null.c bss_sock.c # bn -SRCS+= bn_add.c bn_asm.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \ +SRCS+= bn_add.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \ bn_exp2.c bn_gcd.c bn_lib.c bn_mont.c bn_mpi.c bn_mul.c bn_prime.c \ bn_print.c bn_rand.c bn_recp.c bn_shift.c bn_sqr.c bn_word.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= bn86-elf.o co86-elf.o +SOBJS+= bn86-elf.o co86-elf.o +.else +SRCS+= bn_asm.c +.endif + +bn86-elf.o: bn86unix.cpp + cpp -DELF -x c bn86unix.cpp | as -o bn86-elf.o + +bn86unix.cpp: asm/bn-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/bn-586.pl cpp > +bn86unix.cpp + +co86-elf.o: co86unix.cpp + cpp -DELF -x c co86unix.cpp | as -o co86-elf.o + +co86unix.cpp: asm/co-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/co-586.pl cpp > +co86unix.cpp + # buffer SRCS+= buf_err.c buffer.c # cast -SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c +SRCS+= c_cfb64.c c_ecb.c c_ofb64.c c_skey.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= cx86-elf.o +SOBJS+= cx86-elf.o +.else +SRCS+= c_enc.c +.endif + +cx86-elf.o: cx86unix.cpp + cpp -DELF -x c cx86unix.cpp | as -o cx86-elf.o + +cx86unix.cpp: asm/cast-586.pl x86asm.pl cbc.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/cast/asm/cast-586.pl cpp > +cx86unix.cpp + # comp SRCS+= c_rle.c c_zlib.c comp_lib.c @@ -66,12 +111,31 @@ SRCS+= conf.c conf_err.c # des -SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c des_enc.c \ - ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c fcrypt.c \ - fcrypt_b.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \ +SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \ + ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c \ + fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \ rand_key.c read2pwd.c read_pwd.c rnd_keys.c rpc_enc.c set_key.c \ str2key.c supp.c xcbc_enc.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= dx86-elf.o yx86-elf.o +SOBJS+= dx86-elf.o yx86-elf.o +.else +SRCS+= des_enc.c fcrypt_b.c +.endif + +dx86-elf.o: dx86unix.cpp + cpp -DELF -x c dx86unix.cpp | as -o dx86-elf.o + +yx86-elf.o: yx86unix.cpp + cpp -DELF -x c yx86unix.cpp | as -o yx86-elf.o + +dx86unix.cpp: asm/des-586.pl x86asm.pl cbc.pl + perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm +${LCRYPTO_SRC}/des/asm/des-586.pl cpp > dx86unix.cpp + +yx86unix.cpp: asm/crypt586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm +${LCRYPTO_SRC}/des/asm/crypt586.pl cpp > yx86unix.cpp + # dh SRCS+= dh_check.c dh_err.c dh_gen.c dh_key.c dh_lib.c @@ -106,6 +170,17 @@ # md5 SRCS+= md5_dgst.c md5_one.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= mx86-elf.o +SOBJS+= mx86-elf.o +CFLAGS+= -DMD5_ASM +.endif + +mx86-elf.o: mx86unix.cpp + cpp -DELF -x c mx86unix.cpp | as -o mx86-elf.o + +mx86unix.cpp: asm/md5-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/md5/asm/md5-586.pl cpp > +mx86unix.cpp # mdc2 SRCS+= mdc2dgst.c mdc2_one.c @@ -131,14 +206,52 @@ SRCS+= rc2_cbc.c rc2cfb64.c rc2_ecb.c rc2ofb64.c rc2_skey.c # rc4 -SRCS+= rc4_enc.c rc4_skey.c +SRCS+= rc4_skey.c + +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= rx86-elf.o +SOBJS+= rx86-elf.o +.else +SRCS+= rc4_enc.c +.endif +rx86-elf.o: rx86unix.cpp + cpp -DELF -x c rx86unix.cpp | as -o rx86-elf.o + +rx86unix.cpp: asm/rc4-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc4/asm/rc4-586.pl cpp > +rx86unix.cpp + # rc5 -SRCS+= rc5cfb64.c rc5_ecb.c rc5_enc.c rc5ofb64.c rc5_skey.c +SRCS+= rc5cfb64.c rc5_ecb.c rc5ofb64.c rc5_skey.c + +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= r586-elf.o +SOBJS+= r586-elf.o +.else +SRCS+= rc5_enc.c +.endif + +r586-elf.o: r586unix.cpp + cpp -DELF -x c r586unix.cpp | as -o r586-elf.o +r586unix.cpp: asm/rc5-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc5/asm/rc5-586.pl cpp > +r586unix.cpp + # ripemd SRCS+= rmd_dgst.c rmd_one.c +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= rm86-elf.o +SOBJS+= rm86-elf.o +CFLAGS+= -DRMD160_ASM +.endif + +rm86-elf.o: rm86unix.cpp + cpp -DELF -x c rm86unix.cpp | as -o rm86-elf.o + +rm86unix.cpp: asm/rmd-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/ripemd/asm/rmd-586.pl cpp > +rm86unix.cpp + # rsa .if defined(WITH_RSA) && ${WITH_RSA} == YES SRCS+= rsa_chk.c rsa_err.c rsa_gen.c rsa_lib.c rsa_none.c rsa_null.c \ @@ -147,6 +260,18 @@ # sha SRCS+= sha_dgst.c sha_one.c sha1_one.c sha1dgst.c + +.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == +"i686")) +STATICOBJS+= sx86-elf.o +SOBJS+= sx86-elf.o +CFLAGS+= -DSHA1_ASM +.endif + +sx86-elf.o: sx86unix.cpp + cpp -DELF -x c sx86unix.cpp | as -o sx86-elf.o + +sx86unix.cpp: asm/sha1-586.pl x86asm.pl + perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/sha/asm/sha1-586.pl cpp > +sx86unix.cpp # stack SRCS+= stack.c