Hello, > + AV_WB64(y,KE); > > + > KE=SP[0][y[0]]^SP[1][y[1]]^SP[2][y[2]]^SP[3][y[3]]^SP[4][y[4]]^SP[5][y[5]]^SP[6][y[6]]^SP[7][y[7]]; > > + return KE; > > have you tried it without the y[] array ? > storing and loading from memory may (or may not) be slower
I have tried without y[] array. And there seems to be some improvement in the time. lavu CAMELLIA size: 1048576 runs: 1024 time: 21.549 +- 0.176 I have attached a patch with the respective changes. Would you want me to convert the F() into a macro ? Thanks, Supraja
From 95f089edee3cda76f7b5efe82869a42e841b7a3a Mon Sep 17 00:00:00 2001 From: Supraja Meedinti <supraja0...@gmail.com> Date: Tue, 10 Feb 2015 21:24:07 +0530 Subject: [PATCH] libavutil: camellia: remove unwanted memory loads Signed-off-by: Supraja Meedinti <supraja0...@gmail.com> --- libavutil/camellia.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavutil/camellia.c b/libavutil/camellia.c index 11e57a1..be2485a 100644 --- a/libavutil/camellia.c +++ b/libavutil/camellia.c @@ -142,11 +142,9 @@ static void LR128(uint64_t d[2], const uint64_t K[2], int x) static uint64_t F(uint64_t F_IN, uint64_t KE) { - uint8_t y[8]; KE ^= F_IN; - AV_WB64(y,KE); - KE=SP[0][y[0]]^SP[1][y[1]]^SP[2][y[2]]^SP[3][y[3]]^SP[4][y[4]]^SP[5][y[5]]^SP[6][y[6]]^SP[7][y[7]]; - return KE; + F_IN=SP[0][KE >> 56]^SP[1][(KE >> 48) & MASK8]^SP[2][(KE >> 40) & MASK8]^SP[3][(KE >> 32) & MASK8]^SP[4][(KE >> 24) & MASK8]^SP[5][(KE >> 16) & MASK8]^SP[6][(KE >> 8) & MASK8]^SP[7][KE & MASK8]; + return F_IN; } static uint64_t FL(uint64_t FL_IN, uint64_t KE) -- 1.8.3.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel