https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69760
Bug ID: 69760 Summary: Wrong 64-bit memory address caused by an unneeded overflowing 32-bit integer multiplication on x86_64 under -O2 and -O3 code optimization Product: gcc Version: 5.3.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: dimitri_laikov at mail dot ru Target Milestone: --- This simplest code "test_func.c": $ cat test_func.c void test_func (double *a, int L, int m, int n, int N) { int i, k; for (i = 0; i < N; i++) { k = i - m; if ((k >= 0) && (k < n)) { a[L*k] = 0.0; } } return; } when compiled on x86_64 by GCC versions from at least 4.8.1 up to 5.3.0 is working right with -O0 or -O1 optimizations, but fails with -O2 or -O3, as can be seen by calling it from this main function "test_main.c": $ cat test_main.c #include <stdlib.h> void test_func (double *a, int L, int m, int n, int N); int main () { double *a; int L, m, n, N; L = 10000000; n = 4; N = 100*n; a = (double *) malloc (sizeof (double)*L*n); for (m = 0; m < N; m += n) { test_func (a, L, m, n, N); } free (a); return 0; } $ for n in 0 1 2 3 ; do gcc-5.3.0 -S -O$n test_func.c -o test_func.O$n.s ; wc test_func.O$n.s ; gcc -O0 test_func.O$n.s test_main.c -o test.x ; ./test.x ; done 51 119 881 test_func.O0.s 35 79 551 test_func.O1.s 45 94 696 test_func.O2.s Segmentation fault 45 94 696 test_func.O3.s Segmentation fault An even more _amazing_ thing is seen when printf is added: $ cat test_func_print.c #include <stdio.h> void test_func (double *a, int L, int m, int n, int N) { int i, k; for (i = 0; i < N; i++) { k = i - m; if ((k >= 0) && (k < n)) { printf ("k=%d a=%p a+L*k=%p\n", k, a, a + L*k); a[L*k] = 0.0; } } return; } $ gcc-5.3.0 -O2 test_func_print.c test_main.c -o test.x ; ./test.x k=0 a=0x7f66ddc42010 a+L*k=0x7f66ddc42010 ... k=0 a=0x7f66ddc42010 a+L*k=0x7f6eddc42010 Segmentation fault the corrupted pointer being 0x800000000 higher than it should! (Should anyone say that this innocent C code is wrong?) It can be understood by looking into the assembly file (with comments added by us): $ cat test_func.O2.S .file "test_func.c" .section .text.unlikely,"ax",@progbits .LCOLDB1: .text .LHOTB1: .p2align 4,,15 .globl test_func .type test_func, @function test_func: .LFB0: .cfi_startproc testl %r8d, %r8d jle .L1 # if (N <= 0) return movl %edx, %eax # %eax = m movslq %esi, %r9 # %r9 = (long) L subl %edx, %r8d # %r8d = N - m negl %eax # %eax = k = -m salq $3, %r9 # %r9 = 8*((long) L) imull %eax, %esi # %esi = L*(-m) movslq %esi, %rsi # %rsi = (long) (L*(-m)) leaq (%rdi,%rsi,8), %rsi # %rsi = ak = a + L*(-m) .p2align 4,,10 .p2align 3 .L4: testl %eax, %eax js .L3 cmpl %eax, %ecx jle .L3 movq $0, (%rsi) # if ((k >= 0) && (k < n)) *ak = 0.0 .L3: addl $1, %eax # %eax = k++ addq %r9, %rsi # %rsi = ak += L cmpl %r8d, %eax jne .L4 # if (k != N - m) goto L4 .L1: rep ret .cfi_endproc .LFE0: .size test_func, .-test_func .section .text.unlikely .LCOLDE1: .text .LHOTE1: .ident "GCC: (GNU) 5.3.0" .section .note.GNU-stack,"",@progbits The problem is that the optimizer rewrites the code as if it were: void test_func (double *a, int L, int m, int n, int N) { double *ak; int k; if (N <= 0) { return; } ak = a + L*(-m); for (k = -m; k < N - m; k++) { if ((k >= 0) && (k < n)) { *ak = 0.0; } ak += L; } return; } and because "L*(-m)" is computed in 32-bit precision, in which it does not fit, the pointer "double *ak" fails to reference "a[0]" after being m times incremented by L. The problem would be solved by teaching the compiler to use more bits when precomputing pointers for the optimized loops, in this example: ... ak = a + ((long) L)*(-m) ... or something like this test_func: .LFB0: .cfi_startproc testl %r8d, %r8d jle .L1 # if (N <= 0) return movslq %edx, %rax #|%rax = (long) m movslq %esi, %r9 # %r9 = (long) L subl %edx, %r8d # %r8d = N - m negq %rax #|%rax = k = -m salq $3, %r9 # %r9 = 8*((long) L) movslq %esi, %rsi #|%rsi = (long) L imulq %rax, %rsi # %rsi = ((long) L)*(-m) # movslq %esi, %rsi #- leaq (%rdi,%rsi,8), %rsi # %rsi = ak = a + ((long) L)*(-m) .p2align 4,,10 .p2align 3 .L4: testl %eax, %eax js .L3 cmpl %eax, %ecx jle .L3 movq $0, (%rsi) # if ((k >= 0) && (k < n)) *ak = 0.0 .L3: addl $1, %eax # %eax = k++ addq %r9, %rsi # %rsi = ak += L cmpl %r8d, %eax jne .L4 # if (k != N - m) goto L4 .L1: rep ret Many thanks for your kind attention.