Issue 131671
Summary VLA accessed below stack pointer
Labels new issue
Assignees
Reporter mark4o
    The source file below is miscompiled when compiled with "clang-20 -g -O2 bug.c" using Clang 20.1.0 from Debian Experimental x86_64.  Also reproduced with Clang 19.1.7 on Debian and macOS, and earlier versions back to Clang 9.  It is compiled correctly with -O0 or with GCC.

```
#define _FORTIFY_SOURCE 2
#include <assert.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>


static inline int silk_CLZ32(int in32)
{
    return in32 ? __builtin_clz(in32) : 32;
}

static inline int silk_DIV32_Q16(const int a32, const int b32)
{
    int a_headrm, b_headrm, lshift;
    int b32_inv, a32_nrm, b32_nrm, result;

    assert(b32 != 0);

    a_headrm = silk_CLZ32(a32 > 0 ? a32 : -a32) - 1;
    a32_nrm = (int)((unsigned int)a32 << a_headrm);
    b_headrm = silk_CLZ32(b32 > 0 ? b32 : -b32) - 1;
 b32_nrm = (int)((unsigned int)b32 << b_headrm);
    b32_inv = (int)((0x7FFFFFFF >> 2) / (b32_nrm >> 16));
    result = (int)((a32_nrm * (long long)((short)b32_inv)) >> 16);
    a32_nrm = (int)((unsigned int)a32_nrm - (unsigned int)((int)((unsigned int)((int)(((long long)b32_nrm * result) >> 32)) << 3)));
    result = (int)(result + ((a32_nrm * (long long)((short)b32_inv)) >> 16));

    lshift = 29 + a_headrm - b_headrm - 16;
    if (lshift < 0) {
        return result;
    } else if (lshift < 32) {
        return result >> lshift;
    } else {
        return 0;
 }
}

typedef struct {
    int prev_gain_Q16;
    int sLPC_Q14_buf[16];
 int nb_subfr;
    int subfr_length;
    int ltp_mem_length;
    int LPC_order;
} silk_decoder_state;

typedef struct {
    int Gains_Q16[4];
    short PredCoef_Q12[2][16];
} silk_decoder_control;

void silk_decode_core(silk_decoder_state *psDec, silk_decoder_control *psDecCtrl)
{
    int i, k;
    short *A_Q12, A_Q12_tmp[16];
    int gain_adj_Q16;
    int sLTP_Q15[psDec->ltp_mem_length + 16];
    int sLPC_Q14[psDec->subfr_length + 16];

    memcpy(sLPC_Q14, psDec->sLPC_Q14_buf, 16 * sizeof(int));

    for (k = 0; k < psDec->nb_subfr; k++) {
        A_Q12 = psDecCtrl->PredCoef_Q12[k >> 1];
 memcpy(A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof(short));

 sLTP_Q15[k] = k == 0 ? 1 : sLTP_Q15[k-1] + k;

        if (psDecCtrl->Gains_Q16[k] != psDec->prev_gain_Q16) {
            gain_adj_Q16 = silk_DIV32_Q16(psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[k]);

 for (i = 0; i < 16; i++) {
                sLPC_Q14[i] = (int)(((long long)gain_adj_Q16 * sLPC_Q14[i]) >> 16);
            }
        }

 for (i = 0; i < psDec->subfr_length; i++) {
            sLPC_Q14[16 + i] = (int)((unsigned int)sLPC_Q14[16 + i - 1] + 1);
        }

 memcpy(sLPC_Q14, &sLPC_Q14[psDec->subfr_length], 16 * sizeof(int));
    }
 /* llvm sets the stack pointer above the sLPC_Q14 VLA and therefore the VLA might be clobbered by the signal handler stack frame before it is read below */
    memcpy(psDec->sLPC_Q14_buf, sLPC_Q14, 16 * sizeof(int));
}

void sigalrm_handler(int signum)
{
    volatile int dummy[100];
    int i;
 for (i = 0; i < 100; i++) dummy[i] = 0xdeadbeef;
}

int
main(void)
{
 struct sigaction sa;
    struct itimerval itv;
    int i, j;

 sa.sa_handler = sigalrm_handler;
    sigemptyset(&sa.sa_mask);
 sa.sa_flags = 0;
    assert(sigaction(SIGALRM, &sa, NULL) == 0);

 itv.it_interval.tv_sec = 0;
    itv.it_interval.tv_usec = 1;
 itv.it_value.tv_sec = 0;
    itv.it_value.tv_usec = 1;
 assert(setitimer(ITIMER_REAL, &itv, NULL) == 0);

    for (i = 0; i < 100000; i++) {
        silk_decoder_state ds = { 65536, { 0 }, 2, 40, 20, 10 };
        silk_decoder_control dc = { { 32768, 32768, 32768, 32768 }, { 0 } };
        for (j = 0; j < 1000; j++) {
            silk_decode_core(&ds, &dc);
        }
        assert(ds.sLPC_Q14_buf[0] == 1796421289);
    }
 return 0;
}
```

A VLA is declared on line 60:
`    int sLPC_Q14[psDec->subfr_length + 16];`

At the end of the function, some of the contents of the VLA are copied to another location:
` memcpy(psDec->sLPC_Q14_buf, sLPC_Q14, 16 * sizeof(int));`

(Because `_FORTIFY_SOURCE=2`, `memcpy()` is actually an inline function that calls `__builtin___memcpy_chk()`.  The destination has a fixed size so the size can be checked at compile time.)

At the time of the `memcpy()`, the VLA is still in scope, however just before the data is copied, the compiler restores the stack pointer that it saved prior to allocating the VLA, which makes the VLA invalid.  It then copies the VLA content even though it is now below the stack pointer.  This is not valid, as the data below the stack pointer may be clobbered at any time by a signal handler.

At the beginning of the function, before allocating any VLAs, rsp is saved:
`        movq    %rsp, -72(%rbp)                 # 8-byte Spill`

At the end of the function, rsp is restored and then content is copied from the VLA (that is no longer on the stack) with an inline memcpy:
```
        .loc    0 0 9 is_stmt 0 # bug.c:0:9
        movq    -72(%rbp), %rsp                 # 8-byte Reload
        movq    -48(%rbp), %rax                 # 8-byte Reload
 addq    $4, %rax
.Ltmp81:
        #DEBUG_VALUE: memcpy:__dest <- $rax
 #DEBUG_VALUE: memcpy:__src <- $r14
        #DEBUG_VALUE: memcpy:__len <- 64
        .loc    2 29 10 is_stmt 1               # /usr/include/x86_64-linux-gnu/bits/string_fortified.h:29:10
        movups (%r14), %xmm0
        movups  16(%r14), %xmm1
        movups  32(%r14), %xmm2
        movups  48(%r14), %xmm3
        movups  %xmm3, 48(%rax)
 movups  %xmm2, 32(%rax)
        movups  %xmm1, 16(%rax)
        movups %xmm0, (%rax)
.Ltmp82:
        .loc    0 86 1                          # bug.c:86:1
        leaq    -40(%rbp), %rsp
        .loc    0 86 1 epilogue_begin is_stmt 0 # bug.c:86:1
        popq    %rbx
        popq %r12
        popq    %r13
        popq    %r14
.Ltmp83:
        popq %r15
        popq    %rbp
        .cfi_def_cfa %rsp, 8
 retq
```

In the test program, a signal handler is called periodically on a timer and the miscompiled function is called repeatedly.  Because the stack pointer is above the VLA, the VLA might be clobbered by the signal handler stack frame before it is read at the end of the function, causing incorrect data to be copied.  This causes a failed assertion and abort in the test program.  The test program repeats until it aborts due to detecting incorrect data or it reaches a maximum number of iterations.  It is expected that the test program will completely successfully without aborting.

```
$ clang-20 -g -O2 bug.c
$ ./a.out
a.out: bug.c:119: int main(void): Assertion `ds.sLPC_Q14_buf[0] == 1796421289' failed.
Aborted
$ clang-20 -g -O0 bug.c
$ ./a.out
$ gcc-14 -g -O2 bug.c
$ ./a.out
$
```

Valgrind reports the following:
```
$ clang-20 -g -O2 bug.c
$ valgrind ./a.out
==65085== Memcheck, a memory error detector
==65085== Copyright (C) 2002-2024, and GNU GPL'd, by Julian Seward et al.
==65085== Using Valgrind-3.24.0 and LibVEX; rerun with -h for copyright info
==65085== Command: ./a.out
==65085==
==65085== Invalid read of size 16
==65085== at 0x10953F: memcpy (string_fortified.h:29)
==65085==    by 0x10953F: silk_decode_core (???:85)
==65085==    by 0x10970A: main (bug.c:117)
==65085==  Address 0x1ffefffa80 is on thread 1's stack
==65085==  368 bytes below stack pointer
==65085==
==65085== Invalid read of size 16
==65085==    at 0x109543: memcpy (string_fortified.h:29)
==65085==    by 0x109543: silk_decode_core (???:85)
==65085==    by 0x10970A: main (bug.c:117)
==65085==  Address 0x1ffefffa90 is on thread 1's stack
==65085==  352 bytes below stack pointer
==65085==
==65085== Invalid read of size 16
==65085==    at 0x109548: memcpy (string_fortified.h:29)
==65085==    by 0x109548: silk_decode_core (???:85)
==65085==    by 0x10970A: main (bug.c:117)
==65085==  Address 0x1ffefffaa0 is on thread 1's stack
==65085==  336 bytes below stack pointer
==65085==
==65085== Invalid read of size 16
==65085==    at 0x10954D: memcpy (string_fortified.h:29)
==65085==    by 0x10954D: silk_decode_core (???:85)
==65085==    by 0x10970A: main (bug.c:117)
==65085==  Address 0x1ffefffab0 is on thread 1's stack
==65085==  320 bytes below stack pointer
==65085==
...
```
It is expected that the program will run with no valgrind errors.  Valgrind does not report any errors using -O0 or GCC. Also, compiling with -fsanitize=undefined, no errors are reported by either valgrind or ubsan.  The program does not abort under valgrind because of the way that valgrind processes asynchronous signals.

The issue was discovered using libopus.  The test program is a portion of the miscompiled function from libopus, cut down to a standalone test case that reproduces the issue without other dependencies.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to