The following is the code of vectorized sin computation which uses
a lookup table.

#define NUM_SAMPLES_IN_CIRCLE2 150*1024
extern const int sign_;
extern const int notsign_;
extern __vector float ONE_DIV_PI2_;
extern __vector float numSamples;
extern __vector float sign;
extern __vector float notsign;
extern __vector float vecZeros;
extern float lookup_samples2[NUM_SAMPLES_IN_CIRCLE2];

void vec_fastsin(
        __vector float x,
        __vector float& result
)
{
        __vector long int indexes;
        __vector long int temp;
        __vector float mask;

        mask = vec_and(x,sign);
        x = vec_and(x,notsign);
        x = vec_madd(x, ONE_DIV_PI2_, vecZeros);
        x = vec_sub(x, vec_ctf(vec_cts(x,0),0));
        x = vec_madd(x, numSamples, vecZeros);

        indexes = vec_cts(x,0);

        int *p=((int *) &indexes);
        ((float *)&result)[0] = lookup_samples2[p[0]];
        ((float *)&result)[1] = lookup_samples2[p[1]];
        ((float *)&result)[2] = lookup_samples2[p[2]];
        ((float *)&result)[3] = lookup_samples2[p[3]];

        result=vec_xor(result, mask);
}


I'm using gcc version 2.96 for PowerPC which is included in Tornado 2.2.1
suite:
it works fine when compiling with debug information but using -O2 option it
doesn't work.
The problem is that compiler performs an incorrect optimization moving the
last vec_xor
before the assignments to result. Doing that it doesn't work only for
negative numbers.
It follows the disassembly of the compiled code:

test1.o:     file format elf32-powerpc

Disassembly of section .text:

00000000 <vec_fastsin__FEfREf>:
   0:   54 2b 07 38     rlwinm  r11,r1,0,28,28
   4:   21 6b ff e0     subfic  r11,r11,-32
   8:   7c 21 59 6e     stwux   r1,r1,r11
   c:   3d 60 00 00     lis     r11,0
  10:   3d 40 00 00     lis     r10,0
  14:   7c 20 18 ce     lvx     v1,r0,r3
  18:   39 6b 00 00     addi    r11,r11,0
  1c:   3d 20 00 00     lis     r9,0
  20:   7d a0 58 ce     lvx     v13,r0,r11
  24:   39 4a 00 00     addi    r10,r10,0
  28:   39 29 00 00     addi    r9,r9,0
  2c:   7d 60 48 ce     lvx     v11,r0,r9
  30:   3d 60 00 00     lis     r11,0
  34:   39 01 00 10     addi    r8,r1,16
  38:   7d 80 50 ce     lvx     v12,r0,r10
  3c:   3d 20 00 00     lis     r9,0
  40:   39 6b 00 00     addi    r11,r11,0
  44:   39 29 00 00     addi    r9,r9,0
  48:   7d 40 58 ce     lvx     v10,r0,r11
  4c:   3d 40 00 00     lis     r10,0
  50:   11 a2 6c 04     vand    v13,v2,v13
  54:   7c 00 48 ce     lvx     v0,r0,r9
  58:   39 4a 00 00     addi    r10,r10,0
  5c:   11 ad 5b 2e     vmaddfp v13,v13,v12,v11
  60:   10 42 04 04     vand    v2,v2,v0
  64:   10 21 14 c4     vxor    v1,v1,v2          <----------VXOR is here!!
  68:   7c 20 19 ce     stvx    v1,r0,r3
  6c:   10 00 6b ca     vctsxs  v0,v13,0
  70:   10 00 03 4a     vcfsx   v0,v0,0
  74:   11 ad 00 4a     vsubfp  v13,v13,v0
  78:   11 ad 5a ae     vmaddfp v13,v13,v10,v11
  7c:   11 a0 6b ca     vctsxs  v13,v13,0
  80:   7d a0 41 ce     stvx    v13,r0,r8
  84:   80 01 00 10     lwz     r0,16(r1)
  88:   81 28 00 04     lwz     r9,4(r8)
  8c:   54 00 10 3a     rlwinm  r0,r0,2,0,29
  90:   81 68 00 08     lwz     r11,8(r8)
  94:   7c 0a 04 2e     lfsx    f0,r10,r0
  98:   55 29 10 3a     rlwinm  r9,r9,2,0,29
  9c:   d0 03 00 00     stfs    f0,0(r3)
  a0:   55 6b 10 3a     rlwinm  r11,r11,2,0,29
  a4:   7c 0a 4c 2e     lfsx    f0,r10,r9
  a8:   d0 03 00 04     stfs    f0,4(r3)
  ac:   80 08 00 0c     lwz     r0,12(r8)
  b0:   7c 0a 5c 2e     lfsx    f0,r10,r11
  b4:   d0 03 00 08     stfs    f0,8(r3)
  b8:   54 00 10 3a     rlwinm  r0,r0,2,0,29
  bc:   7c 0a 04 2e     lfsx    f0,r10,r0
  c0:   d0 03 00 0c     stfs    f0,12(r3)
  c4:   81 61 00 00     lwz     r11,0(r1)
  c8:   7d 61 5b 78     mr      r1,r11
  cc:   4e 80 00 20     blr



as you can note vec_xor is BEFORE the store of result. Do you think this is 
a compiler error?


-- 
View this message in context: 
http://www.nabble.com/GCC-strange-behavior-tp25157468p25157468.html
Sent from the gcc - bugs mailing list archive at Nabble.com.

Reply via email to