Coming back to my original problem, I did a simplified version of it,

which is about 10x slower than a vectorized matlab version. Have I missed 
anything here ?


A = ones(50,40,40);

B = ones(50,100)/2;

C = ones(40,100)/3;

D = ones(40,100)/4;

E = ones(100,100)/5;

idx = int([100:-1:1]);

 

function 
testSum(A::Array{Float64,3},B::Array{Float64,2},C::Array{Float64,2},

                 
D::Array{Float64,2},E::Array{Float64,2},idx::Array{Int64,1})

 

    alpha = zeros(100)

    tmp_1 = zero(Float64)

    tmp_2 = zero(Float64)

    tmp_3 = zero(Float64)

        

    @inbounds for t = 1:100

        for thp = 1:50 

            

            tmp_3 = zero(tmp_3)             

            for x_3 = 1:40 

 

                tmp_2 = zero(tmp_2) 

                for x_2 = 1:40 

 

                    tmp_1 = zero(tmp_1) 

                    @simd for x_1 = 1:50 

                         tmp_1 += A[x_1,x_2,x_3] * B[x_1,t]                 
                                       

                    end

                    tmp_2 += tmp_1 * C[x_2,t]

                end

                tmp_3 += tmp_2 * D[x_3,t]

            end

            

            alpha[t] =  E[t,idx[t]] * tmp_3

        end

    end

    

    return alpha

    

end



Reply via email to