Andy Polyakov wrote:

Try attached patch.
...
ACE is so fast that overall performance does depend on the additional time we spend calling function and so on and the only question is what percentage the key reload overhead in comparison to the rest. 50 cycles can be noticeable on small blocks... Well, one will always be able to argue about this, but there was another reason why I've chosen to implement padlock_verify_context() that way. It was the way to learn something about hardware:-) A.

Also works fine. Attached the patch for openssl-0.9.7's padlock extension - yours seems to be an openssl-0.9.8 patch.
--- crypto/engine/hw_padlock.c.fks.fix	2005-04-07 09:10:33.000000000 +0200
+++ crypto/engine/hw_padlock.c	2005-04-08 22:09:33.755438172 +0200
@@ -394,10 +394,10 @@
 "	jnc	1f\n"
 "	cmp	%2,%1\n"
 "	je	1f\n"
-"	mov	%2,%0\n"
 "	popfl\n"
 "	sub	$4,%%esp\n"
-"1:	add	$4,%%esp"
+"1:	add	$4,%%esp\n"
+"	mov	%2,%0"
 	:"+m"(padlock_saved_context)
 	: "r"(padlock_saved_context), "r"(cdata) : "cc");
 }
@@ -520,10 +520,10 @@
 		jnc	skip
 		cmp	ecx,padlock_saved_context
 		je	skip
-		mov	padlock_saved_context,ecx
 		popfd
 		sub	esp,4
 	skip:	add	esp,4
+		mov	padlock_saved_context,ecx
 		}
 }
 

Reply via email to