This is the first example semantic patch and shows how to capture and fix a
common problem.

If you use an palloc() to allocate memory for an object (or an array of
objects) and by mistake type something like:

    StringInfoData *info = palloc(sizeof(StringInfoData*));

You will not allocate enough memory for storing the object. This semantic
patch catches any cases where you are either allocating an array of objects
or a single object that do not have corret types in this sense, more
precisely, it captures assignments to a variable of type T* where palloc()
uses sizeof(T) either alone or with a single expression (assuming this is
an array count).

The semantic patch is overzealous in the sense that allocation to a "const
char **" expects a "sizeof(const char *)" and it cannot deal with typedefs
that introduce aliases (these two can be seen in the patch). Although the
sizes of these are the same, and Coccinelle do not have a good system for
comparing types, it might be better to just follow the convention of always
using the type "T*" for any "palloc(sizeof(T))" since it makes automated
checking easier and is a small inconvenience; especially considering that
coccicheck can easily fix this for you. It also simplifies other automated
checking to follow this convention.

We don't really have any real bugs as a result from this, but we have one
case where an allocation of "sizeof(LLVMBasicBlockRef*)" is allocated to an
"LLVMBasicBlockRef*", which strictly speaking is not correct (it should be
"sizeof(LLVMBasicBlockRef)"). However, since they are both pointers, there
is no risk of incorrect allocation size.
-- 
Best wishes,
Mats Kindahl, Timescale
From eedf99b78a0e63b4de6db14617091f18adbd0d83 Mon Sep 17 00:00:00 2001
From: Mats Kindahl <m...@kindahl.net>
Date: Sun, 5 Jan 2025 19:26:47 +0100
Subject: Add semantic patch for sizeof() using palloc()

If palloc() is used to allocate elements of type T it should be assigned to a
variable of type T* or risk indexes out of bounds. This semantic patch checks
that allocations to variables of type T* are using sizeof(T) when allocating
memory using palloc().
---
 cocci/palloc_sizeof.cocci            | 40 ++++++++++++++++++++++++++++
 contrib/btree_gist/btree_utils_var.c |  2 +-
 src/backend/jit/llvm/llvmjit_expr.c  |  5 ++--
 src/backend/utils/adt/tsquery.c      |  2 +-
 4 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 cocci/palloc_sizeof.cocci

diff --git a/cocci/palloc_sizeof.cocci b/cocci/palloc_sizeof.cocci
new file mode 100644
index 00000000000..7e79139fed4
--- /dev/null
+++ b/cocci/palloc_sizeof.cocci
@@ -0,0 +1,40 @@
+virtual report
+virtual context
+virtual patch
+
+@r1 depends on report || context@
+type T1 != {void};
+idexpression T1 *I;
+type T2 != T1;
+position p;
+expression E;
+identifier func = {palloc, palloc0};
+@@
+(
+* I = func@p(sizeof(T2))
+|
+* I = func@p(E * sizeof(T2))
+)
+
+@script:python depends on report@
+T1 << r1.T1;
+T2 << r1.T2;
+I << r1.I;
+p << r1.p;
+@@
+coccilib.report.print_report(p[0], f"'{I}' has type '{T1}*' but 'sizeof({T2})' is used to allocate memory")
+
+@depends on patch@
+type T1 != {void};
+idexpression T1 *I;
+type T2 != T1;
+expression E;
+identifier func = {palloc, palloc0};
+@@
+(
+- I = func(sizeof(T2))
++ I = func(sizeof(T1))
+|
+- I = func(E * sizeof(T2))
++ I = func(E * sizeof(T1))
+)
diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c
index d9df2356cd1..36937795e90 100644
--- a/contrib/btree_gist/btree_utils_var.c
+++ b/contrib/btree_gist/btree_utils_var.c
@@ -475,7 +475,7 @@ gbt_var_picksplit(const GistEntryVector *entryvec, GIST_SPLITVEC *v,
 	v->spl_nleft = 0;
 	v->spl_nright = 0;
 
-	sv = palloc(sizeof(bytea *) * (maxoff + 1));
+	sv = palloc((maxoff + 1) * sizeof(GBT_VARKEY *));
 
 	/* Sort entries */
 
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index c533f552540..c582fd89c67 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -618,8 +618,7 @@ llvm_compile_expr(ExprState *state)
 						LLVMBuildStore(b, l_sbool_const(1), v_resnullp);
 
 						/* create blocks for checking args, one for each */
-						b_checkargnulls =
-							palloc(sizeof(LLVMBasicBlockRef *) * op->d.func.nargs);
+						b_checkargnulls = palloc(op->d.func.nargs * sizeof(LLVMBasicBlockRef));
 						for (int argno = 0; argno < op->d.func.nargs; argno++)
 							b_checkargnulls[argno] =
 								l_bb_before_v(b_nonull, "b.%d.isnull.%d", opno,
@@ -2409,7 +2408,7 @@ llvm_compile_expr(ExprState *state)
 					v_nullsp = l_ptr_const(nulls, l_ptr(TypeStorageBool));
 
 					/* create blocks for checking args */
-					b_checknulls = palloc(sizeof(LLVMBasicBlockRef *) * nargs);
+					b_checknulls = palloc(nargs * sizeof(LLVMBasicBlockRef));
 					for (int argno = 0; argno < nargs; argno++)
 					{
 						b_checknulls[argno] =
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 0366c2a2acd..aa7a296149c 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -1241,7 +1241,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 		elog(ERROR, "invalid size of tsquery");
 
 	/* Allocate space to temporarily hold operand strings */
-	operands = palloc(size * sizeof(char *));
+	operands = palloc(size * sizeof(const char *));
 
 	/* Allocate space for all the QueryItems. */
 	len = HDRSIZETQ + sizeof(QueryItem) * size;
-- 
2.43.0

Reply via email to