Scorn:

> double min(double a, double b, double c)
> {
>     return a < b && a < c ? a : b < c ? b : c;
> }

Don't write code like that, add some parenthesys like this:

return (a < b && a < c) ? a : (b < c ? b : c);

because the compiler is able to sort out those operator precedences, but the 
programmer that comes after you and reads that code will have problems.
A compiler compiles that code with 3 FP tests, while I think two suffice, so 
there are better ways to write that.


> This is (and a little bit more) is running in a tight loop which runs
> about 10000000 times.
> With these "optimizations" i get a speed increase about 20% percent. 

---------------------

I have created a module named "mo" and a main module named "temp":

module mo;
int foo(int x) {
    return x * x;
}

double min3(double a, double b, double c) {
    return (a <= b) ? (a <= c ? a : c) : (b <= c ? b : c);
}

---------------------

module temp; // main module
version (Tango) {
    import tango.stdc.stdio: printf;
    import tango.stdc.stdlib: atoi, atof;
} else {
    import std.c.stdio: printf;
    import std.c.stdlib: atoi, atof;
}
import mo: foo, min3;

void main() {
    int x = atoi("12");
    printf("%d\n", foo(x));

    double x1 = atof("10");
    double x2 = atof("20");
    double x3 = atof("30");
    printf("%f\n", min3(x1, x2, x3));
}

---------------------

>From my tests it seems LDC isn't able to inline those functions, while DMD is 
>able to inline them :-)

ldc -O5 -release -output-s -inline temp.d mo.d

08049600 <_Dmain>:
 8049600:       83 ec 34                sub    $0x34,%esp
 8049603:       c7 04 24 e8 8c 05 08    movl   $0x8058ce8,(%esp)
 804960a:       e8 99 fd ff ff          call   80493a8 <a...@plt>
 804960f:       e8 9c 00 00 00          call   80496b0 <_D2mo3fooFiZi>
 8049614:       89 44 24 04             mov    %eax,0x4(%esp)
 8049618:       c7 04 24 eb 8c 05 08    movl   $0x8058ceb,(%esp)
 804961f:       e8 64 fd ff ff          call   8049388 <pri...@plt>
 8049624:       c7 04 24 ef 8c 05 08    movl   $0x8058cef,(%esp)
 804962b:       e8 98 fd ff ff          call   80493c8 <a...@plt>
 8049630:       db 7c 24 28             fstpt  0x28(%esp)
 8049634:       c7 04 24 f2 8c 05 08    movl   $0x8058cf2,(%esp)
 804963b:       e8 88 fd ff ff          call   80493c8 <a...@plt>
 8049640:       db 7c 24 1c             fstpt  0x1c(%esp)
 8049644:       c7 04 24 f5 8c 05 08    movl   $0x8058cf5,(%esp)
 804964b:       e8 78 fd ff ff          call   80493c8 <a...@plt>
 8049650:       db 6c 24 28             fldt   0x28(%esp)
 8049654:       dd 5c 24 10             fstpl  0x10(%esp)
 8049658:       db 6c 24 1c             fldt   0x1c(%esp)
 804965c:       dd 5c 24 08             fstpl  0x8(%esp)
 8049660:       dd 1c 24                fstpl  (%esp)
 8049663:       e8 58 00 00 00          call   80496c0 <_D2mo4min3FdddZd>
 8049668:       83 ec 18                sub    $0x18,%esp
 804966b:       dd 5c 24 04             fstpl  0x4(%esp)
 804966f:       c7 04 24 f8 8c 05 08    movl   $0x8058cf8,(%esp)
 8049676:       e8 0d fd ff ff          call   8049388 <pri...@plt>
 804967b:       31 c0                   xor    %eax,%eax
 804967d:       83 c4 34                add    $0x34,%esp
 8049680:       c2 08 00                ret    $0x8
 8049683:       8d b6 00 00 00 00       lea    0x0(%esi),%esi
 8049689:       8d bc 27 00 00 00 00    lea    0x0(%edi,%eiz,1),%edi

-----------------

dmd -O -release -inline temp.d mo.d

__Dmain comdat
L0:     sub ESP,038h
        mov EAX,offset FLAT:_DATA
        push    EBX
        push    ESI
        push    EDI
        push    EAX
        call    near ptr _atoi
        add ESP,4
        mov EBX,EAX
        mov ECX,EAX
        imul    ECX,ECX
        mov EDX,offset FLAT:_DATA[4]
        push    ECX
        push    EDX
        call    near ptr _printf
        mov ESI,offset FLAT:_DATA[8]
        push    ESI
        call    near ptr _atof
        mov EDI,offset FLAT:_DATA[0Ch]
        fstp    qword ptr 018h[ESP]
        push    EDI
        call    near ptr _atof
        mov EAX,offset FLAT:_DATA[010h]
        fstp    qword ptr 024h[ESP]
        push    EAX
        call    near ptr _atof
        add ESP,4
        fld qword ptr 01Ch[ESP]
        fxch    ST1
        fstp    qword ptr 02Ch[ESP]
        fcomp   qword ptr 024h[ESP]
        fstsw   AX
        sahf
        ja  L83
        jp  L83
        fld qword ptr 01Ch[ESP]
        fcomp   qword ptr 02Ch[ESP]
        fstsw   AX
        sahf
        ja  L7D
        jp  L7D
        fld qword ptr 01Ch[ESP]
        jmp short   L9C
L7D:        fld qword ptr 02Ch[ESP]
        jmp short   L9C
L83:        fld qword ptr 024h[ESP]
        fcomp   qword ptr 02Ch[ESP]
        fstsw   AX
        sahf
        ja  L98
        jp  L98
        fld qword ptr 024h[ESP]
        jmp short   L9C
L98:        fld qword ptr 02Ch[ESP]
L9C:        sub ESP,8
        mov ECX,offset FLAT:_DATA[014h]
        fstp    qword ptr [ESP]
        push    ECX
        call    near ptr _printf
        add ESP,01Ch
        xor EAX,EAX
        pop EDI
        pop ESI
        pop EBX
        add ESP,038h
        ret

-----------------

Using Link-Time optimization LDC is able to inline those functions.
So here it seems LDC is worse :-(

Bye,
bearophile

Reply via email to