http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46928
--- Comment #4 from sebpop at gmail dot com <sebpop at gmail dot com> 2010-12-13 19:05:58 UTC --- The code that is produced looks like this just after loop distribution, i.e., we generate memset zero only by distributing the innermost loop: mad_synth_mute (struct mad_synth * synth) { long unsigned int D.2739; long unsigned int D.2740; long unsigned int D.2741; long unsigned int D.2742; long unsigned int D.2743; struct mad_synth * D.2744; long unsigned int D.2730; long unsigned int D.2731; long unsigned int D.2732; long unsigned int D.2733; <unnamed-signed:64> D.2734; <unnamed-signed:64> D.2735; <unnamed-signed:64> D.2736; long unsigned int D.2737; struct mad_synth * D.2738; long unsigned int D.2721; long unsigned int D.2722; long unsigned int D.2723; long unsigned int D.2724; <unnamed-signed:64> D.2725; <unnamed-signed:64> D.2726; <unnamed-signed:64> D.2727; long unsigned int D.2728; struct mad_synth * D.2729; long unsigned int D.2712; long unsigned int D.2713; long unsigned int D.2714; long unsigned int D.2715; <unnamed-signed:64> D.2716; <unnamed-signed:64> D.2717; <unnamed-signed:64> D.2718; long unsigned int D.2719; struct mad_synth * D.2720; unsigned int pretmp.2; unsigned int v; unsigned int s; unsigned int ch; <bb 2>: goto <bb 10>; <bb 5>: Invalid sum of incoming frequencies 139, should be 1111 s_9 = s_29 + 1; if (s_9 != 16) goto <bb 6>; else goto <bb 8>; <bb 6>: <bb 7>: Invalid sum of outgoing probabilities 12.5% # s_29 = PHI <0(10), s_9(6)> D.2712_25 = (long unsigned int) s_29; D.2713_22 = (long unsigned int) ch_28; D.2714_1 = D.2713_22 * 64; D.2715_2 = D.2712_25 + D.2714_1; D.2716_3 = (<unnamed-signed:64>) D.2715_2; D.2717_26 = D.2716_3 + 48; D.2718_27 = D.2717_26 * 32; D.2719_24 = (long unsigned int) D.2718_27; D.2720_23 = synth_7(D) + D.2719_24; __builtin_memset (D.2720_23, 0, 32); D.2721_13 = (long unsigned int) s_29; D.2722_12 = (long unsigned int) ch_28; D.2723_11 = D.2722_12 * 64; D.2724_6 = D.2721_13 + D.2723_11; D.2725_20 = (<unnamed-signed:64>) D.2724_6; D.2726_32 = D.2725_20 + 32; D.2727_33 = D.2726_32 * 32; D.2728_34 = (long unsigned int) D.2727_33; D.2729_35 = synth_7(D) + D.2728_34; __builtin_memset (D.2729_35, 0, 32); D.2730_36 = (long unsigned int) s_29; D.2731_37 = (long unsigned int) ch_28; D.2732_38 = D.2731_37 * 64; D.2733_39 = D.2730_36 + D.2732_38; D.2734_40 = (<unnamed-signed:64>) D.2733_39; D.2735_41 = D.2734_40 + 16; D.2736_42 = D.2735_41 * 32; D.2737_43 = (long unsigned int) D.2736_42; D.2738_44 = synth_7(D) + D.2737_43; __builtin_memset (D.2738_44, 0, 32); D.2739_45 = (long unsigned int) ch_28; D.2740_46 = D.2739_45 * 64; D.2741_47 = (long unsigned int) s_29; D.2742_48 = D.2740_46 + D.2741_47; D.2743_49 = D.2742_48 * 32; D.2744_50 = synth_7(D) + D.2743_49; __builtin_memset (D.2744_50, 0, 32); goto <bb 5>; <bb 8>: ch_10 = ch_28 + 1; if (ch_10 != 2) goto <bb 9>; else goto <bb 11>; <bb 9>: <bb 10>: # ch_28 = PHI <0(2), ch_10(9)> goto <bb 7>; <bb 11>: return; } and the assembler: mad_synth_mute: .LFB0: .cfi_startproc movq %rdi, %r9 xorl %r8d, %r8d .L2: leaq 16(%r8), %rsi movq %r9, %rdx movq %r8, %rax .p2align 4,,10 .p2align 3 .L3: leaq 48(%rax), %rcx salq $5, %rcx addq %rdi, %rcx movq $0, (%rcx) movq $0, 8(%rcx) movq $0, 16(%rcx) movq $0, 24(%rcx) leaq 32(%rax), %rcx salq $5, %rcx addq %rdi, %rcx movq $0, (%rcx) movq $0, 8(%rcx) movq $0, 16(%rcx) movq $0, 24(%rcx) leaq 16(%rax), %rcx addq $1, %rax salq $5, %rcx addq %rdi, %rcx movq $0, (%rcx) movq $0, 8(%rcx) movq $0, 16(%rcx) movq $0, 24(%rcx) movq $0, (%rdx) movq $0, 8(%rdx) movq $0, 16(%rdx) movq $0, 24(%rdx) addq $32, %rdx cmpq %rsi, %rax jne .L3 addq $64, %r8 addq $2048, %r9 cmpq $128, %r8 jne .L2 rep ret