This is the first example semantic patch and shows how to capture and fix a common problem.
If you use an palloc() to allocate memory for an object (or an array of objects) and by mistake type something like: StringInfoData *info = palloc(sizeof(StringInfoData*)); You will not allocate enough memory for storing the object. This semantic patch catches any cases where you are either allocating an array of objects or a single object that do not have corret types in this sense, more precisely, it captures assignments to a variable of type T* where palloc() uses sizeof(T) either alone or with a single expression (assuming this is an array count). The semantic patch is overzealous in the sense that allocation to a "const char **" expects a "sizeof(const char *)" and it cannot deal with typedefs that introduce aliases (these two can be seen in the patch). Although the sizes of these are the same, and Coccinelle do not have a good system for comparing types, it might be better to just follow the convention of always using the type "T*" for any "palloc(sizeof(T))" since it makes automated checking easier and is a small inconvenience; especially considering that coccicheck can easily fix this for you. It also simplifies other automated checking to follow this convention. We don't really have any real bugs as a result from this, but we have one case where an allocation of "sizeof(LLVMBasicBlockRef*)" is allocated to an "LLVMBasicBlockRef*", which strictly speaking is not correct (it should be "sizeof(LLVMBasicBlockRef)"). However, since they are both pointers, there is no risk of incorrect allocation size. -- Best wishes, Mats Kindahl, Timescale
From eedf99b78a0e63b4de6db14617091f18adbd0d83 Mon Sep 17 00:00:00 2001 From: Mats Kindahl <m...@kindahl.net> Date: Sun, 5 Jan 2025 19:26:47 +0100 Subject: Add semantic patch for sizeof() using palloc() If palloc() is used to allocate elements of type T it should be assigned to a variable of type T* or risk indexes out of bounds. This semantic patch checks that allocations to variables of type T* are using sizeof(T) when allocating memory using palloc(). --- cocci/palloc_sizeof.cocci | 40 ++++++++++++++++++++++++++++ contrib/btree_gist/btree_utils_var.c | 2 +- src/backend/jit/llvm/llvmjit_expr.c | 5 ++-- src/backend/utils/adt/tsquery.c | 2 +- 4 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 cocci/palloc_sizeof.cocci diff --git a/cocci/palloc_sizeof.cocci b/cocci/palloc_sizeof.cocci new file mode 100644 index 00000000000..7e79139fed4 --- /dev/null +++ b/cocci/palloc_sizeof.cocci @@ -0,0 +1,40 @@ +virtual report +virtual context +virtual patch + +@r1 depends on report || context@ +type T1 != {void}; +idexpression T1 *I; +type T2 != T1; +position p; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +* I = func@p(sizeof(T2)) +| +* I = func@p(E * sizeof(T2)) +) + +@script:python depends on report@ +T1 << r1.T1; +T2 << r1.T2; +I << r1.I; +p << r1.p; +@@ +coccilib.report.print_report(p[0], f"'{I}' has type '{T1}*' but 'sizeof({T2})' is used to allocate memory") + +@depends on patch@ +type T1 != {void}; +idexpression T1 *I; +type T2 != T1; +expression E; +identifier func = {palloc, palloc0}; +@@ +( +- I = func(sizeof(T2)) ++ I = func(sizeof(T1)) +| +- I = func(E * sizeof(T2)) ++ I = func(E * sizeof(T1)) +) diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c index d9df2356cd1..36937795e90 100644 --- a/contrib/btree_gist/btree_utils_var.c +++ b/contrib/btree_gist/btree_utils_var.c @@ -475,7 +475,7 @@ gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v, v->spl_nleft = 0; v->spl_nright = 0; - sv = palloc(sizeof(bytea *) * (maxoff + 1)); + sv = palloc((maxoff + 1) * sizeof(GBT_VARKEY *)); /* Sort entries */ diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index c533f552540..c582fd89c67 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -618,8 +618,7 @@ llvm_compile_expr(ExprState *state) LLVMBuildStore(b, l_sbool_const(1), v_resnullp); /* create blocks for checking args, one for each */ - b_checkargnulls = - palloc(sizeof(LLVMBasicBlockRef *) * op->d.func.nargs); + b_checkargnulls = palloc(op->d.func.nargs * sizeof(LLVMBasicBlockRef)); for (int argno = 0; argno < op->d.func.nargs; argno++) b_checkargnulls[argno] = l_bb_before_v(b_nonull, "b.%d.isnull.%d", opno, @@ -2409,7 +2408,7 @@ llvm_compile_expr(ExprState *state) v_nullsp = l_ptr_const(nulls, l_ptr(TypeStorageBool)); /* create blocks for checking args */ - b_checknulls = palloc(sizeof(LLVMBasicBlockRef *) * nargs); + b_checknulls = palloc(nargs * sizeof(LLVMBasicBlockRef)); for (int argno = 0; argno < nargs; argno++) { b_checknulls[argno] = diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 0366c2a2acd..aa7a296149c 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -1241,7 +1241,7 @@ tsqueryrecv(PG_FUNCTION_ARGS) elog(ERROR, "invalid size of tsquery"); /* Allocate space to temporarily hold operand strings */ - operands = palloc(size * sizeof(char *)); + operands = palloc(size * sizeof(const char *)); /* Allocate space for all the QueryItems. */ len = HDRSIZETQ + sizeof(QueryItem) * size; -- 2.43.0