Hi all, 

Look at the following code:

using Benchmarks

const Nf = 1_000_000

immutable PolygonalFace{T, N}
    points::NTuple{N, UInt32}
    area::T
end

@inline area(p::PolygonalFace) = p.area

function make_abstract_faces(Nf)
    faces = PolygonalFace{Float64}[]
    for i = 1:Nf
        N = rand(3:5) # triangular, quadrilateral or pentagonal faces
        f = PolygonalFace{Float64, N}(tuple(rand(1:1000, N)...), rand())
        push!(faces, f)
    end
    faces
end

function make_concrete_faces(Nf, N)
    faces = PolygonalFace{Float64, N}[]
    for i = 1:Nf
        f = PolygonalFace{Float64, N}(tuple(rand(1:1000, N)...), rand())
        push!(faces, f)
    end
    faces
end

# homogeneous elements in both parameters
function sum_areas{T, N}(faces::Vector{PolygonalFace{T, N}})
    tot = zero(T)
    for face in faces
        tot += area(face)
    end
    tot
end

# homogeneous elements in the first parameter only
function sum_areas{T}(faces::Vector{PolygonalFace{T}})
    tot = zero(T)
    for face in faces
        # use trick discussed 
in https://groups.google.com/forum/#!topic/julia-users/OBs0fmNmjCU
        if     isa(face, PolygonalFace{T, 3})
            tot += area(face::PolygonalFace{T, 3})
        elseif isa(face, PolygonalFace{T, 4})
            tot += area(face::PolygonalFace{T, 4})
        elseif isa(face, PolygonalFace{T, 5})
            tot += area(face::PolygonalFace{T, 5})
        end
    end
    tot
end

# create random faces
const ab_faces = make_abstract_faces(Nf)
const co_faces = make_concrete_faces(Nf, 3) # triangles

# benchmark
println(@benchmark sum_areas(ab_faces))
println(@benchmark sum_areas(co_faces))

@time sum_areas(ab_faces)
@time sum_areas(ab_faces)
@time sum_areas(ab_faces)
@time sum_areas(co_faces)
@time sum_areas(co_faces)
@time sum_areas(co_faces)

which results in 

================ Benchmark Results ========================
     Time per evaluation: 10.91 ms [10.44 ms, 11.38 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 0.00 bytes
   Number of allocations: 0 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 1.29 s

================ Benchmark Results ========================
     Time per evaluation: 1.09 ms [1.03 ms, 1.14 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 0.00 bytes
   Number of allocations: 0 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 0.26 s

  0.012995 seconds (61 allocations: 3.672 KB)
  0.014940 seconds (5 allocations: 176 bytes)
  0.012015 seconds (5 allocations: 176 bytes)
  0.002574 seconds (5 allocations: 176 bytes)
  0.001170 seconds (5 allocations: 176 bytes)
  0.001200 seconds (5 allocations: 176 bytes)

i.e, when the elements of the input vector `x` are homogeneous in the first 
parameter only, the code is about 10 times slower. Are those if branches 
and type asserts so expensive?

Thanks,

Davide 





Reply via email to