The main reason I made this algo in ASM was for speed,
but after spending 3 days on it, it is only 2 times
slower than it's C++ counterpart. Strange huh. I don't
see why this should happen. 

Anyways, the code works fine and is below. If anyone
has a problem using it, let me know. The code is
tested with calls from VC++ .net 2003 in release mode.
If any of you know why the code gives me such poor
performance, let me know.

For those still interested in it, here is how to use
my code
1. Put this in your C++ code. After all your #includes

extern "C" bf_cbc_encrypt(BYTE *incoming_data,
unsigned int data_len, unsigned int *key_data1,
unsigned int *key_data2, unsigned char *chain);

Note: unsigned int *key_data1, unsigned int *key_data2
is the pre processed key data. This part has to be
done by c. I was going to implement this in ASM, but
after I saw the speed, I thought I will stick to C++.
unsigned int data_len is the must be a multiple of 8.
This mean that padding has to be done by you in
whatever way you want it to be.
BYTE *incoming_data - Hmmm... I wonder what this is
???
Ok, NOW A VERY IMPORTANT NOTE regarding
incoming_data:- Unlike some of the other functions you
may have used, mine encrypts the data in the same
buffer. This means that after you run the function,
incoming_data will be overwritten with the encrypted
data. No seperate buffer is needed for outgoing data.
chain- The 8 byte iv. This is not affected by the
function and so it cannot be used to encrypt 2 blocks
of data, although this can be modified easily.

2. The ASM code below, to be used with NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; bf_cbc.asm starts here
%macro ProcessRound 2

call s_boxer_f
xor eax, [edi+%1]
xor ecx, eax

call s_boxer_f2
xor eax, [edi+%2]
xor ebx, eax

%endmacro

segment .data


segment .bss

chain resd 2

s_boxer_arg resb 4


segment .text
global _bf_cbc_encrypt


_bf_cbc_encrypt:
enter 0,0
pusha

; Initialize pointers to key1 and key2 data
mov edi, [ebp+16]
mov edx, [ebp+20]

; Copy data from the chain (IV)
mov ebx, [ebp+24]
mov ecx, [ebx]
mov [chain], ecx
mov ecx, [ebx+4]
mov [chain+4], ecx


; Copy the length of the data to the counter (ecx)
mov ecx, [ebp+12]
mov esi, [ebp+8]


main_loop_start:

push ecx

mov ebx, [esi]
mov ecx, [esi+4]

xor ebx, [chain]

xor ecx, [chain+4]

xor ebx, [edi]

;;;;;;;;;;;;;;;;;;;;;;
; Encryptor
;;;;;;;;;;;;;;;;;;;;;;

ProcessRound 4, 8

ProcessRound 12, 16

ProcessRound 20, 24

ProcessRound 28, 32

ProcessRound 36, 40

ProcessRound 44, 48

ProcessRound 52, 56

ProcessRound 60, 64

xor ecx, [edi+68]

mov [chain], ecx
mov [chain+4], ebx

; exchange the bytes to proper endian order for pc

bswap ecx
bswap ebx
mov [esi], ecx
mov [esi+4], ebx

; Process next loop

pop ecx

add esi, 8
sub ecx, 8

jnz main_loop_start
js main_loop_start

popa
mov eax, 22 ; return back to C
leave
ret




; Affects the ebx and eax registers.
; Answer sored in eax
; Takes input from s_boxer_arga, s_boxer_argb,
s_boxer_argc, s_boxer_argd

s_boxer_f:
push ebx
bswap ebx
mov [s_boxer_arg], ebx

mov ebx, 0
mov bl, [s_boxer_arg]

; y = s[0][a]
mov eax, [(ebx*4)+edx]

; y += s[1][b]
mov bl, [s_boxer_arg+1]
add eax, [(ebx*4)+edx+1024]

; y ^= s[2][c]
mov bl, [s_boxer_arg+2]
xor eax, [(ebx*4)+edx+2048]

; y += s[3][d]
mov bl, [s_boxer_arg+3]
add eax, [(ebx*4)+edx+3072]
pop ebx

ret


s_boxer_f2:

push ecx
bswap ecx
mov [s_boxer_arg], ecx

mov ecx, 0
mov cl, [s_boxer_arg]

; y = s[0][a]
mov eax, [(ecx*4)+edx]

; y += s[1][b]
mov cl, [s_boxer_arg+1]
add eax, [(ecx*4)+edx+1024]

; y ^= s[2][c]
mov cl, [s_boxer_arg+2]
xor eax, [(ecx*4)+edx+2048]

; y += s[3][d]
mov cl, [s_boxer_arg+3]
add eax, [(ecx*4)+edx+3072]
pop ecx

ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; bf_cbc.asm ends here

I hope this helps someone.

- Sid


                
_______________________________
Do you Yahoo!?
Win 1 of 4,000 free domain names from Yahoo! Enter now.
http://promotions.yahoo.com/goldrush
______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
User Support Mailing List                    [EMAIL PROTECTED]
Automated List Manager                           [EMAIL PROTECTED]

Reply via email to