------- Additional Comments From canqun at nudt dot edu dot cn 2005-03-30 14:42 ------- (In reply to comment #7) > Waiting for a test case...
Part 1 ! Test sase for address giv (general induction variable) optimization. subroutine dot_product (sum, a, b, n) real*8 a(n), b(n), sum sum = 0 do i = 1, n sum = sum + a (i) * b(i) end do end Part 2 // IA-64 Assembly code generated by GCC without address giv splitting mov ar.lc = r14 .L4: // Loop is unrolled, but the address givs are not splitted. // Register r17, r19 is used to calculate all the addresses // of the array elements. .mmb ldfd f7 = [r17] ldfd f6 = [r19] nop 0 .mmi add r17 = r15, r33 add r19 = r15, r34 shladd r15 = r18, 3, r0 ;; .mmf nop 0 nop 0 fma.d f8 = f7, f6, f9 .mmi ldfd f7 = [r17] ldfd f6 = [r19] add r17 = r15, r33 .mfi nop 0 shladd r15 = r16, 3, r0 ;; .mmf nop 0 nop 0 fma.d f8 = f7, f6, f8 .mmi ldfd f7 = [r17] ldfd f6 = [r19] add r17 = r15, r33 .mmb nop 0 add r19 = r15, r34 nop 0 ;; .mmf nop 0 nop 0 fma.d f8 = f7, f6, f8 .mmb ldfd f7 = [r17] ldfd f6 = [r19] nop 0 ;; .mmf nop 0 nop 0 fma.d f8 = f7, f6, f8 ;; .mfb nop 0 mov f9 = f8 br.cloop.sptk.few .L4 ... .endp dot_product__# .ident "GCC: (GNU) 4.1.0 20050302 (experimental)" Part 3 // IA-64 assembly code generated by GCC with address giv splitting mov ar.lc = r16 .L28: [.L3:] [.L2:] ... // The loop is unrolled, and the address givs are splitted. // Register r14, r8, r3, r35, r33, r31,r28, r29 is used to // caculate the address of each array element respectively. .mmi ldfd f38 = [r14] ldfd f39 = [r8] add r31 = r34, r23 .mmi ldfd f35 = [r3] ldfd f37 = [r35] add r33 = r34, r24 ;; .mmb ldfd f33 = [r33] ldfd f34 = [r31] nop 0 .mmi add r28 = r30, r23 add r29 = r30, r24 adds r22 = 4, r22 ;; .mmf ldfd f32 = [r29] ldfd f15 = [r28] fma.d f36 = f38, f39, f14 ;; .mmf nop 0 nop 0 fma.d f13 = f35, f37, f36 ;; .mmf nop 0 nop 0 fma.d f12 = f33, f34, f13 ;; .mfb nop 0 fma.d f14 = f32, f15, f12 br.cloop.sptk.few .L28 ;; .L9: .mfb stfd [r32] = f14 nop 0 nop 0 ... .endp dot_product__# .ident "GCC: (GNU) 3.5-tree-ssa 20031221 (CCRG)" -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=20376