On Sunday, 7 March 2021 at 22:54:32 UTC, tsbockman wrote:
...
    result = diffSq[0];
    static foreach(i; 0 .. 3)
        result += diffSq[i];
...

Oops, that's supposed to say `i; 1 .. 3`. Fixed:

import std.meta : Repeat;
void euclideanDistanceFixedSizeArray(V)(ref Repeat!(3, const(V)) a, ref Repeat!(3, const(V)) b, ref V result)
    if(is(V : __vector(float[length]), size_t length))
{
    Repeat!(3, V) diffSq = a;
    static foreach(i; 0 .. 3) {
        diffSq[i] -= b[i];
        diffSq[i] *= diffSq[i];
    }

    result = diffSq[0];
    static foreach(i; 1 .. 3)
        result += diffSq[i];

    version(LDC) { version(X86_64) {
        enum isSupportedPlatform = true;
        import ldc.llvmasm : __asm;
        result = __asm!V(`vsqrtps $1, $0`, `=x, x`, result);
    } }
    static assert(isSupportedPlatform);
}

Fixed asm:

pure nothrow @nogc void app.euclideanDistanceFixedSizeArray!(__vector(float[16])).euclideanDistanceFixedSizeArray(ref const(__vector(float[16])), ref const(__vector(float[16])), ref const(__vector(float[16])), ref const(__vector(float[16])), ref const(__vector(float[16])), ref const(__vector(float[16])), ref __vector(float[16])):
        mov     rax, qword ptr [rsp + 8]
        vmovaps zmm0, zmmword ptr [rax]
        vmovaps zmm1, zmmword ptr [r9]
        vmovaps zmm2, zmmword ptr [r8]
        vsubps  zmm0, zmm0, zmmword ptr [rcx]
        vsubps  zmm1, zmm1, zmmword ptr [rdx]
        vmulps  zmm1, zmm1, zmm1
        vsubps  zmm2, zmm2, zmmword ptr [rsi]
        vfmadd231ps     zmm1, zmm0, zmm0
        vfmadd231ps     zmm1, zmm2, zmm2
        vmovaps zmmword ptr [rdi], zmm1
        vsqrtps zmm0, zmm1
        vmovaps zmmword ptr [rdi], zmm0
        vzeroupper
        ret

(I really wish I could just edit my posts here...)

Reply via email to