Re: Add new for_each macros for iterating over a List that do not require ListCell pointer

Nathan Bossart Wed, 03 Jan 2024 11:55:48 -0800

I spent some time preparing this for commit, which only amounted to some
light edits.  I am posting a new version of the patch in order to get one
more round of cfbot coverage and to make sure there is no remaining
feedback.


-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

>From f91cac27a322ee3f60c93354f03da2ccb3ef11e2 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Wed, 3 Jan 2024 12:44:45 -0600
Subject: [PATCH v8 1/1] Add macros for looping through a List without a
 ListCell.

Many foreach loops only use the ListCell pointer to retrieve the
content of the cell, like so:

    ListCell   *lc;

    foreach(lc, mylist)
    {
        int         myint = lfirst_int(lc);

        ... et cetera ...
    }

This ListCell pointer serves little purpose, except perhaps to
check whether we actually completed the loop, in which case the
pointer will be set to NULL.  However, most existing foreach loops
don't do that.

This commit adds a few convenience macros that automatically
declare the loop variable and retrieve the current cell's contents.
This allows us to rewrite the previous loop like this:

    foreach_int(myint, mylist)
    {
        ... et cetera ...
    }

This commit also adjusts a few existing loops in order to add
coverage for the new/adjusted macros.  There is presently no plan
to bulk update all foreach loops in the code base, as that could
introduce significant back-patching pain.  Instead, these macros
are primarily intended for use in new code.

Author: Jelte Fennema-Nio
Reviewed-by: TODO
Discussion: https://postgr.es/m/CAGECzQSwXKnxGwW1_Q5JE%2B8Ja20kyAbhBHO04vVrQsLcDciwXA%40mail.gmail.com
---
 src/backend/executor/execExpr.c             |  9 +--
 src/backend/replication/logical/relation.c  |  4 +-
 src/backend/replication/logical/tablesync.c |  6 +-
 src/backend/replication/pgoutput/pgoutput.c |  7 +--
 src/include/nodes/pg_list.h                 | 68 ++++++++++++++++++---
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 2c62b0c9c8..e06886a733 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -216,7 +216,6 @@ ExecInitQual(List *qual, PlanState *parent)
 	ExprState  *state;
 	ExprEvalStep scratch = {0};
 	List	   *adjust_jumps = NIL;
-	ListCell   *lc;
 
 	/* short-circuit (here and in ExecQual) for empty restriction list */
 	if (qual == NIL)
@@ -250,10 +249,8 @@ ExecInitQual(List *qual, PlanState *parent)
 	scratch.resvalue = &state->resvalue;
 	scratch.resnull = &state->resnull;
 
-	foreach(lc, qual)
+	foreach_ptr(Expr, node, qual)
 	{
-		Expr	   *node = (Expr *) lfirst(lc);
-
 		/* first evaluate expression */
 		ExecInitExprRec(node, state, &state->resvalue, &state->resnull);
 
@@ -265,9 +262,9 @@ ExecInitQual(List *qual, PlanState *parent)
 	}
 
 	/* adjust jump targets */
-	foreach(lc, adjust_jumps)
+	foreach_int(jump, adjust_jumps)
 	{
-		ExprEvalStep *as = &state->steps[lfirst_int(lc)];
+		ExprEvalStep *as = &state->steps[jump];
 
 		Assert(as->opcode == EEOP_QUAL);
 		Assert(as->d.qualexpr.jumpdone == -1);
diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
index a581ac9a4d..41834a2554 100644
--- a/src/backend/replication/logical/relation.c
+++ b/src/backend/replication/logical/relation.c
@@ -746,11 +746,9 @@ static Oid
 FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
 {
 	List	   *idxlist = RelationGetIndexList(localrel);
-	ListCell   *lc;
 
-	foreach(lc, idxlist)
+	foreach_oid(idxoid, idxlist)
 	{
-		Oid			idxoid = lfirst_oid(lc);
 		bool		isUsableIdx;
 		Relation	idxRel;
 		IndexInfo  *idxInfo;
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 4d056c16c8..fdd674adeb 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -1036,11 +1036,11 @@ fetch_remote_table_info(char *nspname, char *relname,
 
 		/* Build the pubname list. */
 		initStringInfo(&pub_names);
-		foreach(lc, MySubscription->publications)
+		foreach_node(String, pubstr, MySubscription->publications)
 		{
-			char	   *pubname = strVal(lfirst(lc));
+			char	   *pubname = strVal(pubstr);
 
-			if (foreach_current_index(lc) > 0)
+			if (foreach_current_index(pubstr) > 0)
 				appendStringInfoString(&pub_names, ", ");
 
 			appendStringInfoString(&pub_names, quote_literal_cstr(pubname));
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 25a95076cf..37450b55f9 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -2234,7 +2234,6 @@ cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
 {
 	HASH_SEQ_STATUS hash_seq;
 	RelationSyncEntry *entry;
-	ListCell   *lc;
 
 	Assert(RelationSyncCache != NULL);
 
@@ -2247,15 +2246,15 @@ cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
 		 * corresponding schema and we don't need to send it unless there is
 		 * any invalidation for that relation.
 		 */
-		foreach(lc, entry->streamed_txns)
+		foreach_xid(streamed_txn, entry->streamed_txns)
 		{
-			if (xid == lfirst_xid(lc))
+			if (xid == streamed_txn)
 			{
 				if (is_commit)
 					entry->schema_sent = true;
 
 				entry->streamed_txns =
-					foreach_delete_current(entry->streamed_txns, lc);
+					foreach_delete_current(entry->streamed_txns, streamed_txn);
 				break;
 			}
 		}
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 529a382d28..5ecc693ac0 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -383,24 +383,25 @@ lnext(const List *l, const ListCell *c)
  *	  delete the current list element from the List associated with a
  *	  surrounding foreach() loop, returning the new List pointer.
  *
- * This is equivalent to list_delete_cell(), but it also adjusts the foreach
- * loop's state so that no list elements will be missed.  Do not delete
- * elements from an active foreach loop's list in any other way!
+ * This is similar to list_delete_cell(), but it also works when using
+ * foreach_xyz macros that don't require passing in a "ListCell *".
+ * Furthermore, it adjusts the foreach loop's state so that no list elements
+ * will be missed.  Do not delete elements from an active foreach loop's list
+ * in any other way!
  */
-#define foreach_delete_current(lst, cell)	\
-	(cell##__state.i--, \
-	 (List *) (cell##__state.l = list_delete_cell(lst, cell)))
+#define foreach_delete_current(lst, var_or_cell)	\
+	((List *) (var_or_cell##__state.l = list_delete_nth_cell(lst, var_or_cell##__state.i--)))
 
 /*
  * foreach_current_index -
  *	  get the zero-based list index of a surrounding foreach() loop's
- *	  current element; pass the name of the "ListCell *" iterator variable.
+ *	  current element; pass the name of the iterator variable.
  *
  * Beware of using this after foreach_delete_current(); the value will be
  * out of sync for the rest of the current loop iteration.  Anyway, since
  * you just deleted the current element, the value is pretty meaningless.
  */
-#define foreach_current_index(cell)  (cell##__state.i)
+#define foreach_current_index(var_or_cell)  (var_or_cell##__state.i)
 
 /*
  * for_each_from -
@@ -452,6 +453,57 @@ for_each_cell_setup(const List *lst, const ListCell *initcell)
 	return r;
 }
 
+/*
+ * Convenience macros that loop through a list without needing a separate
+ * "ListCell *" variable.  Instead, the macros declare a locally-scoped loop
+ * variable with the provided name.
+ *
+ * Since the loop variable is scoped to the loop, it's not possible to detect
+ * an early break by checking its value after the loop completes, as is common
+ * practice.  If you need to do this, you can either use foreach() instead or
+ * manually track early breaks with a separate variable declared outside of the
+ * loop.
+ *
+ * Note that the caveats described in the comment above the foreach() macro
+ * also apply to these convenience macros.
+ */
+#define foreach_ptr(type, var, lst) foreach_internal(type, *, var, lst, lfirst)
+#define foreach_int(var, lst)	foreach_internal(int, , var, lst, lfirst_int)
+#define foreach_oid(var, lst)	foreach_internal(Oid, , var, lst, lfirst_oid)
+#define foreach_xid(var, lst)	foreach_internal(TransactionId, , var, lst, lfirst_xid)
+
+/*
+ * The internal implementation of the above macros.  Do not use directly.
+ *
+ * This macro actually generates two loops in order to declare two variables of
+ * different types.  The outer loop only does a single iteration, so we expect
+ * optimizing compilers will unroll it, thereby optimizing it away.
+ */
+#define foreach_internal(type, pointer, var, lst, func) \
+	for (type pointer var = 0, pointer var##__outerloop = (type pointer) 1; \
+		 var##__outerloop; \
+		 var##__outerloop = 0) \
+		for (ForEachState var##__state = {(lst), 0}; \
+			 (var##__state.l != NIL && \
+			  var##__state.i < var##__state.l->length && \
+			 (var = func(&var##__state.l->elements[var##__state.i]), true)); \
+			 var##__state.i++)
+
+/*
+ * foreach_node -
+ *	  The same as foreach_ptr, but asserts that the element is of the specified
+ *	  node type.
+ */
+#define foreach_node(type, var, lst) \
+	for (type * var = 0, *var##__outerloop = (type *) 1; \
+		 var##__outerloop; \
+		 var##__outerloop = 0) \
+		for (ForEachState var##__state = {(lst), 0}; \
+			 (var##__state.l != NIL && \
+			  var##__state.i < var##__state.l->length && \
+			 (var = lfirst_node(type, &var##__state.l->elements[var##__state.i]), true));\
+			 var##__state.i++)
+
 /*
  * forboth -
  *	  a convenience macro for advancing through two linked lists
-- 
2.25.1

Re: Add new for_each macros for iterating over a List that do not require ListCell pointer

Reply via email to