This replicates my "problem", f2 is much faster than f1, but like I said
isn't that because the loops are optimized away during the compilation?
The weird thing I just noticed is that that it still depend on the size of
A.
function f1(A::Array{Float64,2})
tmp = 0.0
for i=1:size(A,1)
for j=1:size(A,2)
tmp += A[i,j]
end
end
out = tmp
end
function f2(A::Array{Float64,2})
tmp = 0.0
for i=1:size(A,1)
for j=1:size(A,2)
tmp += A[i,j]
end
end
out = 1.0
end
A = randn(10^4,10^4);
f1(A);
f2(A);
@time f1(A)
@time f2(A)