=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/70...@github.com>
================ @@ -0,0 +1,149 @@ +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \ +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s + +int array[10]; + +void arrayUnderflow(void) { + array[-3] = 5; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset -12}} +} + +int scanf(const char *restrict fmt, ...); + +void taintedIndex(void) { + int index; + scanf("%d", &index); + // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to the 2nd argument}} + array[index] = 5; + // expected-warning@-1 {{Potential out of bound access to 'array' with tainted offset}} + // expected-note@-2 {{Access of 'array' with a tainted offset that may be too large}} +} + +void arrayOverflow(void) { + array[12] = 5; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 12, while it holds only 10 'int' elements}} +} + +int scalar; +int scalarOverflow(void) { + return (&scalar)[1]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'scalar'}} + // expected-note@-2 {{Access of 'scalar' at index 1, while it holds only a single 'int' element}} +} + +int oneElementArray[1]; +int oneElementArrayOverflow(void) { + return oneElementArray[1]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'oneElementArray'}} + // expected-note@-2 {{Access of 'oneElementArray' at index 1, while it holds only a single 'int' element}} +} + +short convertedArray(void) { + return ((short*)array)[47]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 47, while it holds only 20 'short' elements}} +} + +struct vec { + int len; + double elems[64]; +} v; + +double arrayInStruct(void) { + return v.elems[64]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'v.elems'}} + // expected-note@-2 {{Access of 'v.elems' at index 64, while it holds only 64 'double' elements}} +} + +double arrayInStructPtr(struct vec *pv) { + return pv->elems[64]; + // expected-warning@-1 {{Out of bound access to memory after the end of the field 'elems'}} + // expected-note@-2 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}} +} + +struct two_bytes { + char lo, hi; +}; + +struct two_bytes convertedArray2(void) { + // We report this with byte offsets because the offset is not divisible by the element size. + struct two_bytes a = {0, 0}; + char *p = (char*)&a; + return *((struct two_bytes*)(p + 7)); + // expected-warning@-1 {{Out of bound access to memory after the end of 'a'}} + // expected-note@-2 {{Access of 'a' at byte offset 7, while it holds only 2 bytes}} +} + +int intFromString(void) { + // We report this with byte offsets because the extent is not divisible by the element size. + return ((const int*)"this is a string of 33 characters")[20]; + // expected-warning@-1 {{Out of bound access to memory after the end of the string literal}} + // expected-note@-2 {{Access of the string literal at byte offset 80, while it holds only 34 bytes}} +} + +int intFromStringDivisible(void) { + // However, this is reported with indices/elements, because the extent happens to be a multiple of 4. + return ((const int*)"abc")[20]; + // expected-warning@-1 {{Out of bound access to memory after the end of the string literal}} + // expected-note@-2 {{Access of the string literal at index 20, while it holds only a single 'int' element}} +} ---------------- DonatNagyE wrote: The note mentions `int` because it's using the array subscript operator on an `int *` pointer (to get an `int` value). This string literal memory area consists of the 4 bytes `abc\0`, which is equivalent to 'a single' 4-byte integer. I can add a special case to avoid messages like this e.g. by always using byte offsets for reporting issues that are coming from string literals. https://github.com/llvm/llvm-project/pull/70056 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits