Hi,

I'm developing a Free C++ template library (1) in which it is very important 
that certain loops get unrolled, but at the same time I can't unroll them by 
hand, because they depend on template parameters.

My problem is that G++ 4.1.1 (Gentoo) doesn't unroll these loops.

I have written a standalone simple program showing this problem; I attach it 
(toto.cpp) and I also paste it below. This program does a loop if UNROLL is 
not defined, and does the same thing but with the loop unrolled by hand if 
UNROLL is defined. So one would expect that with g++ -O3, the speed would be 
the same in both cases. Alas, it's not:

g++ -DUNROLL -O3 toto.cpp -o toto   ---> toto runs in 0.3 seconds
g++ -O3 toto.cpp -o toto            ---> toto runs in 1.9 seconds

So what can I do? Is that a bug in g++? If yes, any hope to see it fixed soon?

Cheers,
Benoit

(1) : Eigen, see http://eigen.tuxfamily.org

----------------------------
file: toto.cpp
----------------------------

#include<iostream>

class Matrix
{
public:
    double data[9];
    double & operator()( int i, int j )
    {
        return data[i + 3 * j];
    }
    void loadScaling( double factor );
};

void Matrix::loadScaling( double factor)
{
#ifdef UNROLL
    (*this)( 0, 0 ) = factor;
    (*this)( 1, 0 ) = 0;
    (*this)( 2, 0 ) = 0;
    (*this)( 0, 1 ) = 0;
    (*this)( 1, 1 ) = factor;
    (*this)( 2, 1 ) = 0;
    (*this)( 0, 2 ) = 0;
    (*this)( 1, 2 ) = 0;
    (*this)( 2, 2 ) = factor;
#else
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 3; j++ )
            (*this)(i, j) = (i == j) * factor;
#endif
}

int main( int argc, char *argv[] )
{
    Matrix m;
    for( int i = 0; i < 100000000; i++ )
        m.loadScaling( i );
    std::cout << "m(0,0) = " << m(0,0) << std::endl;
}
#include<iostream>

class Matrix
{
public:
    double data[9];
    double & operator()( int i, int j )
    {
        return data[i + 3 * j];
    }
    void loadScaling( double factor );
};

void Matrix::loadScaling( double factor)
{
#ifdef UNROLL
    (*this)( 0, 0 ) = factor;
    (*this)( 1, 0 ) = 0;
    (*this)( 2, 0 ) = 0;
    (*this)( 0, 1 ) = 0;
    (*this)( 1, 1 ) = factor;
    (*this)( 2, 1 ) = 0;
    (*this)( 0, 2 ) = 0;
    (*this)( 1, 2 ) = 0;
    (*this)( 2, 2 ) = factor;
#else
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 3; j++ )
            (*this)(i, j) = (i == j) * factor;
#endif
}

int main( int argc, char *argv[] )
{
    Matrix m;
    for( int i = 0; i < 100000000; i++ )
        m.loadScaling( i );
    std::cout << "m(0,0) = " << m(0,0) << std::endl;
}

Attachment: pgpWZeXGqxnTe.pgp
Description: PGP signature

Reply via email to