padlock_verify_context() does pushfl but IIRC sometimes *skips* over
popfl to avoid unnecessary key reload. Apparently it is not 100% accurate.

Try attached patch.

Originally I did padlock_reload_key() on each padlock_aes_cipher() call
and I still believe it is not an unacceptable overhead ;-)

ACE is so fast that overall performance does depend on the additional time we spend calling function and so on and the only question is what percentage the key reload overhead in comparison to the rest. 50 cycles can be noticeable on small blocks... Well, one will always be able to argue about this, but there was another reason why I've chosen to implement padlock_verify_context() that way. It was the way to learn something about hardware:-) A.
--- ./eng_padlock.c.orig	Wed Jan 26 00:00:23 2005
+++ ./eng_padlock.c	Fri Apr  8 13:14:33 2005
@@ -394,10 +394,10 @@
 "	jnc	1f\n"
 "	cmp	%2,%1\n"
 "	je	1f\n"
-"	mov	%2,%0\n"
 "	popfl\n"
 "	sub	$4,%%esp\n"
-"1:	add	$4,%%esp"
+"1:	add	$4,%%esp\n"
+"	mov	%2,%0"
 	:"+m"(padlock_saved_context)
 	: "r"(padlock_saved_context), "r"(cdata) : "cc");
 }
@@ -520,10 +520,10 @@
 		jnc	skip
 		cmp	ecx,padlock_saved_context
 		je	skip
-		mov	padlock_saved_context,ecx
 		popfd
 		sub	esp,4
 	skip:	add	esp,4
+		mov	padlock_saved_context,ecx
 		}
 }
 

Reply via email to