Well, my matlab code wasn't very good, I did a fully vectorized version,
and it's about the same speed than Julia.
I'm playing now with a problem that is easier to vectorize and there the
matlab version is 300 times faster than Julia, which is a bit crazy.
Maybe I messed something up again, that's really a big difference. I've
check that the output is identical, so at least I'm computing the same
thing.
Julia:
-
1. A = rand(50,40,30);
2. B = rand(50,50);
3. C = rand(40,40);
4. D = rand(30,30);
5. E = rand(50,40,30);
6. alpha = zeros(size(A))
7.
8. function testSum!(A::Array{Float64,3},B::Array{Float64,2},C::Array{
Float64,2},
9. D::Array{Float64,2},E::Array{Float64,3},alpha::
Array{Float64,3})
10.
11. tmp_1 = zero(Float64)
12. tmp_2 = zero(Float64)
13. tmp_3 = zero(Float64)
14.
15. @inbounds begin
16. for x_3p = 1:30
17. for x_2p = 1:40
18. for x_1p = 1:50
19. tmp_3 = zero(tmp_3)
20. for x_3 = 1:30
21. tmp_2 = zero(tmp_2)
22. for x_2 = 1:40
23. tmp_1 = zero(tmp_1)
24. @simd for x_1 = 1:50
25. tmp_1 += A[x_1,x_2,x_3] * B[x_1,x_1p]
26. end
27. tmp_2 += tmp_1 * C[x_2,x_2p]
28. end
29. tmp_3 += tmp_2 * D[x_3,x_3p]
30. end
31. alpha[x_1p,x_2p,x_3p] = E[x_1p,x_2p,x_3p] * tmp_3
32. end
33. end
34. end
35. end
36. end
Matlab:
function alpha = test2(A,B,C,D,E)
alpha = multiprod(multiprod(multiprod(B', A,[1 2],[1 2]),C,[1 2],[1
2]),D,[2 3],[1 2]).*E;
end