Hello. This is my first patch to the project.
 
The patch adds support for system columns in JOIN USING clause.  The problem
can be demonstrated with this code:
 
```sql
CREATE TABLE t  (id int);
CREATE TABLE tt (id int);
 
-- Works:
SELECT * FROM t JOIN tt ON t.xmin = tt.xmin;
-- Doesn't work:
SELECT * FROM t JOIN tt USING (xmin);
```
 
Solution:
1. Use the scanNSItemForColumn() function instead of buildVarFromNSColumn() for
constructing Var objects, as it correctly handles system columns.
2. Remove extra calls to markVarForSelectPriv(), since this function is already
invoked in scanNSItemForColumn().
3. For system columns, collect their negative attribute numbers along with
user-defined column indices into l_colnos and r_colnos.
4. Create a buildVarFromSystemAttribute() function for rebuilding Var objects
with system attributes, analogous to buildVarFromNSColumn(), since
scanNSItemForColumn() is complex and has side effects.
5. Implement a fillNSColumnParametersFromVar() function for building NS columns
with system attributes.
6. Add SystemAttributeTotalNumber() function to heap.c to ensure memory for
res_nscolumns is allocated with system columns in mind.
 
Link to PR on GitHub: https://github.com/hilltracer/postgres/pull/3
-- 
Best regards,  
Denis Garsh,  
d.ga...@arenadata.io
 
From 295f9ec59b22cc3707fb8f168e0e3750e51724a0 Mon Sep 17 00:00:00 2001
From: Denis Garsh <d.ga...@arenadata.io>
Date: Wed, 7 Aug 2024 15:57:32 +0300
Subject: [PATCH] Add system column support to the USING clause

Problem: The parser can't handle USING joins with system columns, such as xmin.

Solution:
1. Use the scanNSItemForColumn() function instead of buildVarFromNSColumn() for
constructing Var objects, as it correctly handles system columns.
2. Remove extra calls to markVarForSelectPriv(), since this function is already
invoked in scanNSItemForColumn().
3. For system columns, collect their negative attribute numbers along with
user-defined column indices into l_colnos and r_colnos.
4. Create a buildVarFromSystemAttribute() function for rebuilding Var objects
with system attributes, analogous to buildVarFromNSColumn(), since
scanNSItemForColumn() is complex and has side effects.
5. Implement a fillNSColumnParametersFromVar() function for building NS columns
with system attributes.
6. Add SystemAttributeTotalNumber() function to heap.c to ensure memory for
res_nscolumns is allocated with system columns in mind.
---
 src/backend/catalog/heap.c         |   8 ++
 src/backend/parser/parse_clause.c  | 178 ++++++++++++++++++++++++++---
 src/include/catalog/heap.h         |   2 +
 src/test/regress/expected/join.out | 106 +++++++++++++++++
 src/test/regress/sql/join.sql      |  54 +++++++++
 5 files changed, 330 insertions(+), 18 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 01b43cc6a8..b44d0d0005 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -264,6 +264,14 @@ SystemAttributeByName(const char *attname)
 	return NULL;
 }
 
+/*
+ * Returns the total number of system attributes.
+ */
+size_t SystemAttributeTotalNumber(void)
+{
+	return lengthof(SysAtt);
+}
+
 
 /* ----------------------------------------------------------------
  *				XXX END OF UGLY HARD CODED BADNESS XXX
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8118036495..8e99d78272 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -24,6 +24,7 @@
 #include "catalog/pg_amproc.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_type.h"
+#include "catalog/heap.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -76,6 +77,10 @@ static Node *transformFromClauseItem(ParseState *pstate, Node *n,
 									 List **namespace);
 static Var *buildVarFromNSColumn(ParseState *pstate,
 								 ParseNamespaceColumn *nscol);
+static Var *buildVarFromSystemAttribute(ParseState *pstate,
+										int rtindex, int attnum);
+static void fillNSColumnParametersFromVar(ParseNamespaceColumn *rescolumn,
+										  const Var *colvar);
 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 								Var *l_colvar, Var *r_colvar);
 static void markRelsAsNulledBy(ParseState *pstate, Node *n, int jindex);
@@ -273,7 +278,16 @@ extractRemainingColumns(ParseState *pstate,
 	prevcols = NULL;
 	foreach(lc, *src_colnos)
 	{
-		prevcols = bms_add_member(prevcols, lfirst_int(lc));
+		int col_index = lfirst_int(lc);
+		if (col_index < 0)
+		{
+			/*
+			 * Attribute number of system column. Don't collect the number
+			 * because the name of this column cannot be among src_colnames.
+			 */
+			continue;
+		}
+		prevcols = bms_add_member(prevcols, col_index);
 	}
 
 	attnum = 0;
@@ -327,10 +341,6 @@ transformJoinUsingClause(ParseState *pstate,
 		Var		   *rvar = (Var *) lfirst(rvars);
 		A_Expr	   *e;
 
-		/* Require read access to the join variables */
-		markVarForSelectPriv(pstate, lvar);
-		markVarForSelectPriv(pstate, rvar);
-
 		/* Now create the lvar = rvar join condition */
 		e = makeSimpleA_Expr(AEXPR_OP, "=",
 							 (Node *) copyObject(lvar), (Node *) copyObject(rvar),
@@ -1291,7 +1301,9 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 
 		/* this may be larger than needed, but it's not worth being exact */
 		res_nscolumns = (ParseNamespaceColumn *)
-			palloc0((list_length(l_colnames) + list_length(r_colnames)) *
+			palloc0((list_length(l_colnames) +
+					 list_length(r_colnames) +
+					 SystemAttributeTotalNumber()) *
 					sizeof(ParseNamespaceColumn));
 		res_colindex = 0;
 
@@ -1332,6 +1344,14 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 										u_colname)));
 				}
 
+				/*
+				 * It is important to understand that among the l_colnames or
+				 * r_colnames there may be a system column name. A system
+				 * column name may appear if it was added in the previous step.
+				 * This is because a query can contain multiple USING joins,
+				 * where each join uses the result of the previous one.
+				 */
+
 				/* Find it in left input */
 				ndx = 0;
 				foreach(col, l_colnames)
@@ -1349,12 +1369,25 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 					}
 					ndx++;
 				}
-				if (l_index < 0)
+
+				l_colvar = (Var *) scanNSItemForColumn(pstate,
+													   l_nsitem,
+													   0, u_colname, -1);
+				if (l_colvar == NULL)
 					ereport(ERROR,
 							(errcode(ERRCODE_UNDEFINED_COLUMN),
 							 errmsg("column \"%s\" specified in USING clause does not exist in left table",
 									u_colname)));
-				l_colnos = lappend_int(l_colnos, l_index + 1);
+				if (l_index >= 0)
+				{
+					/* User-defined column */
+					l_colnos = lappend_int(l_colnos, l_index + 1);
+				}
+				else
+				{
+					/* System column */
+					l_colnos = lappend_int(l_colnos, l_colvar->varattno);
+				}
 
 				/* Find it in right input */
 				ndx = 0;
@@ -1373,17 +1406,30 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 					}
 					ndx++;
 				}
-				if (r_index < 0)
+
+				r_colvar = (Var *) scanNSItemForColumn(pstate,
+													   r_nsitem,
+													   0, u_colname, -1);
+
+				if (r_colvar == NULL)
 					ereport(ERROR,
 							(errcode(ERRCODE_UNDEFINED_COLUMN),
 							 errmsg("column \"%s\" specified in USING clause does not exist in right table",
 									u_colname)));
-				r_colnos = lappend_int(r_colnos, r_index + 1);
+
+				if (r_index >= 0)
+				{
+					/* User-defined column */
+					r_colnos = lappend_int(r_colnos, r_index + 1);
+				}
+				else
+				{
+					/* System column */
+					r_colnos = lappend_int(r_colnos, r_colvar->varattno);
+				}
 
 				/* Build Vars to use in the generated JOIN ON clause */
-				l_colvar = buildVarFromNSColumn(pstate, l_nscolumns + l_index);
 				l_usingvars = lappend(l_usingvars, l_colvar);
-				r_colvar = buildVarFromNSColumn(pstate, r_nscolumns + r_index);
 				r_usingvars = lappend(r_usingvars, r_colvar);
 
 				/*
@@ -1461,8 +1507,8 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 			/* Scan the colnos lists to recover info from the previous loop */
 			forboth(lc1, l_colnos, lc2, r_colnos)
 			{
-				int			l_index = lfirst_int(lc1) - 1;
-				int			r_index = lfirst_int(lc2) - 1;
+				int			l_index = lfirst_int(lc1);
+				int			r_index = lfirst_int(lc2);
 				Var		   *l_colvar,
 						   *r_colvar;
 				Node	   *u_colvar;
@@ -1472,8 +1518,33 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 				 * Note we re-build these Vars: they might have different
 				 * varnullingrels than the ones made in the previous loop.
 				 */
-				l_colvar = buildVarFromNSColumn(pstate, l_nscolumns + l_index);
-				r_colvar = buildVarFromNSColumn(pstate, r_nscolumns + r_index);
+				if (l_index > 0)
+				{
+					/* User-defined column */
+					l_colvar = buildVarFromNSColumn(pstate,
+													l_nscolumns + l_index - 1);
+				}
+				else
+				{
+					/* System column, so l_index is attnum. */
+					l_colvar = buildVarFromSystemAttribute(pstate,
+														   l_nsitem->p_rtindex,
+														   l_index);
+				}
+
+				if (r_index > 0)
+				{
+					/* User-defined column */
+					r_colvar = buildVarFromNSColumn(pstate,
+													r_nscolumns + r_index - 1);
+				}
+				else
+				{
+					/* System column, so r_index is attnum. */
+					r_colvar = buildVarFromSystemAttribute(pstate,
+														   r_nsitem->p_rtindex,
+														   r_index);
+				}
 
 				/* Construct the join alias Var for this column */
 				u_colvar = buildMergedJoinVar(pstate,
@@ -1488,12 +1559,32 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 				if (u_colvar == (Node *) l_colvar)
 				{
 					/* Merged column is equivalent to left input */
-					*res_nscolumn = l_nscolumns[l_index];
+					if (l_index > 0)
+					{
+						/* User-defined column, so just take it. */
+						*res_nscolumn = l_nscolumns[l_index - 1];
+					}
+					else
+					{
+						/* System column, so construct new column. */
+						fillNSColumnParametersFromVar(res_nscolumn,
+													  l_colvar);
+					}
 				}
 				else if (u_colvar == (Node *) r_colvar)
 				{
 					/* Merged column is equivalent to right input */
-					*res_nscolumn = r_nscolumns[r_index];
+					if (r_index > 0)
+					{
+						/* User-defined column, so just take it. */
+						*res_nscolumn = r_nscolumns[r_index - 1];
+					}
+					else
+					{
+						/* System column, so construct new column. */
+						fillNSColumnParametersFromVar(res_nscolumn,
+													  r_colvar);
+					}
 				}
 				else
 				{
@@ -1658,6 +1749,57 @@ buildVarFromNSColumn(ParseState *pstate, ParseNamespaceColumn *nscol)
 	return var;
 }
 
+/*
+ * buildVarFromSystemAttribute -
+ *	  build a Var node using system attribute, e.g. "xmin".
+ *
+ * rtindex is the relation's index in the rangetable.
+ * attnum is the number of system attribute.
+ * Returns NULL if there is no such system attribute.
+ */
+static Var *
+buildVarFromSystemAttribute(ParseState *pstate, int rtindex, int attnum)
+{
+	Var		   *var;
+	const FormData_pg_attribute *sysatt;
+
+	sysatt = SystemAttributeDefinition(attnum);
+
+	if (sysatt == NULL)
+		return NULL;
+
+	var = makeVar(rtindex,
+				  attnum,
+				  sysatt->atttypid,
+				  sysatt->atttypmod,
+				  sysatt->attcollation,
+				  0);
+
+	/* update varnullingrels */
+	markNullableIfNeeded(pstate, var);
+	return var;
+}
+
+/*
+ * fillNSColumnParametersFromVar -
+ *	  fill ParseNamespaceColumn data using Var data.
+ *
+ * rescolumn is pointer to the structure to be filled.
+ * colvar is the Var where to get data from.
+ */
+static void
+fillNSColumnParametersFromVar(ParseNamespaceColumn *rescolumn,
+							  const Var *colvar)
+{
+	rescolumn->p_varno = colvar->varno;
+	rescolumn->p_varattno = colvar->varattno;
+	rescolumn->p_vartype = colvar->vartype;
+	rescolumn->p_vartypmod = colvar->vartypmod;
+	rescolumn->p_varcollid = colvar->varcollid;
+	rescolumn->p_varnosyn = colvar->varnosyn;
+	rescolumn->p_varattnosyn = colvar->varattnosyn;
+}
+
 /*
  * buildMergedJoinVar -
  *	  generate a suitable replacement expression for a merged join column
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index c512824cd1..960db152b5 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -136,6 +136,8 @@ extern const FormData_pg_attribute *SystemAttributeDefinition(AttrNumber attno);
 
 extern const FormData_pg_attribute *SystemAttributeByName(const char *attname);
 
+extern size_t SystemAttributeTotalNumber(void);
+
 extern void CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 									 int flags);
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 31fb7d142e..1ea9a1ab71 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -8017,3 +8017,109 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+-- Test USING with system columns
+-- Generate 2 tables with 3 rows each.
+-- xmin (transaction number) is an integer value represented as T1, T2, etc.
+-- The xmin values of the first and second rows are equal to each other,
+-- respectively. The values T3 and T4 of the third rows are not equal.
+-- J1: id1 | xmin      J2: id2 | xmin
+--     ----+-------       -----+-------
+--       1 | T1            101 | T1
+--       2 | T2            102 | T2
+--       3 | T3            103 | T4
+CREATE TABLE j1 (id1 INT);
+CREATE TABLE j2 (id2 INT);
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (1);
+  INSERT INTO j2 (id2) VALUES (101);
+COMMIT;
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (2);
+  INSERT INTO j2 (id2) VALUES (102);
+COMMIT;
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (3);
+COMMIT;
+BEGIN;
+  INSERT INTO j2 (id2) VALUES (103);
+COMMIT;
+-- The xmin values are not output, as they will differ with each run.
+-- So, only the column headers and the values of id1 and id2 are displayed.
+SELECT * FROM j1 JOIN j2 USING (xmin) LIMIT 0;
+ xmin | id1 | id2 
+------+-----+-----
+(0 rows)
+
+SELECT j1.id1, j2.id2 FROM j1 JOIN j2 USING (xmin);
+ id1 | id2 
+-----+-----
+   1 | 101
+   2 | 102
+(2 rows)
+
+SELECT * FROM j1 LEFT JOIN j2 USING (xmin) LIMIT 0;
+ xmin | id1 | id2 
+------+-----+-----
+(0 rows)
+
+SELECT j1.id1, j2.id2 FROM j1 LEFT JOIN j2 USING (xmin);
+ id1 | id2 
+-----+-----
+   1 | 101
+   2 | 102
+   3 |    
+(3 rows)
+
+SELECT * FROM j1 RIGHT JOIN j2 USING (xmin) LIMIT 0;
+ xmin | id1 | id2 
+------+-----+-----
+(0 rows)
+
+SELECT j1.id1, j2.id2 FROM j1 RIGHT JOIN j2 USING (xmin);
+ id1 | id2 
+-----+-----
+   1 | 101
+   2 | 102
+     | 103
+(3 rows)
+
+SELECT * FROM j1 FULL JOIN j2 USING (xmin) LIMIT 0;
+ xmin | id1 | id2 
+------+-----+-----
+(0 rows)
+
+SELECT j1.id1, j2.id2 FROM j1 FULL JOIN j2 USING (xmin);
+ id1 | id2 
+-----+-----
+   1 | 101
+   2 | 102
+   3 |    
+     | 103
+(4 rows)
+
+-- Test if USING can add all system columns at once
+SELECT *
+  FROM j1 JOIN j2 USING (tableoid, xmin, cmin, xmax, cmax, ctid);
+ tableoid | xmin | cmin | xmax | cmax | ctid | id1 | id2 
+----------+------+------+------+------+------+-----+-----
+(0 rows)
+
+-- Test USING exceptions
+SELECT * FROM j1 JOIN j2 USING (id2);
+ERROR:  column "id2" specified in USING clause does not exist in left table
+SELECT * FROM j1 JOIN j2 USING (id1);
+ERROR:  column "id1" specified in USING clause does not exist in right table
+WITH j1_dubbed AS (SELECT id1, id1 FROM j1)
+  SELECT * FROM j1_dubbed JOIN j1 USING (id1);
+ERROR:  common column name "id1" appears more than once in left table
+WITH j1_dubbed AS (SELECT id1, id1 FROM j1)
+  SELECT * FROM j1 JOIN j1_dubbed USING (id1);
+ERROR:  common column name "id1" appears more than once in right table
+-- Test multiple USING joins, each based on the prior result.
+CREATE TABLE j3 (id3 INT);
+SELECT * FROM j1 JOIN j2 USING (xmin) JOIN j3 USING (xmin) LIMIT 0;
+ xmin | id1 | id2 | id3 
+------+-----+-----+-----
+(0 rows)
+
+DROP TABLE j1, J2, J3;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index d81ff63be5..f49c730e56 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2952,3 +2952,57 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+
+-- Test USING with system columns
+
+-- Generate 2 tables with 3 rows each.
+-- xmin (transaction number) is an integer value represented as T1, T2, etc.
+-- The xmin values of the first and second rows are equal to each other,
+-- respectively. The values T3 and T4 of the third rows are not equal.
+-- J1: id1 | xmin      J2: id2 | xmin
+--     ----+-------       -----+-------
+--       1 | T1            101 | T1
+--       2 | T2            102 | T2
+--       3 | T3            103 | T4
+CREATE TABLE j1 (id1 INT);
+CREATE TABLE j2 (id2 INT);
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (1);
+  INSERT INTO j2 (id2) VALUES (101);
+COMMIT;
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (2);
+  INSERT INTO j2 (id2) VALUES (102);
+COMMIT;
+BEGIN;
+  INSERT INTO j1 (id1) VALUES (3);
+COMMIT;
+BEGIN;
+  INSERT INTO j2 (id2) VALUES (103);
+COMMIT;
+
+-- The xmin values are not output, as they will differ with each run.
+-- So, only the column headers and the values of id1 and id2 are displayed.
+SELECT * FROM j1 JOIN j2 USING (xmin) LIMIT 0;
+SELECT j1.id1, j2.id2 FROM j1 JOIN j2 USING (xmin);
+SELECT * FROM j1 LEFT JOIN j2 USING (xmin) LIMIT 0;
+SELECT j1.id1, j2.id2 FROM j1 LEFT JOIN j2 USING (xmin);
+SELECT * FROM j1 RIGHT JOIN j2 USING (xmin) LIMIT 0;
+SELECT j1.id1, j2.id2 FROM j1 RIGHT JOIN j2 USING (xmin);
+SELECT * FROM j1 FULL JOIN j2 USING (xmin) LIMIT 0;
+SELECT j1.id1, j2.id2 FROM j1 FULL JOIN j2 USING (xmin);
+-- Test if USING can add all system columns at once
+SELECT *
+  FROM j1 JOIN j2 USING (tableoid, xmin, cmin, xmax, cmax, ctid);
+-- Test USING exceptions
+SELECT * FROM j1 JOIN j2 USING (id2);
+SELECT * FROM j1 JOIN j2 USING (id1);
+WITH j1_dubbed AS (SELECT id1, id1 FROM j1)
+  SELECT * FROM j1_dubbed JOIN j1 USING (id1);
+WITH j1_dubbed AS (SELECT id1, id1 FROM j1)
+  SELECT * FROM j1 JOIN j1_dubbed USING (id1);
+-- Test multiple USING joins, each based on the prior result.
+CREATE TABLE j3 (id3 INT);
+SELECT * FROM j1 JOIN j2 USING (xmin) JOIN j3 USING (xmin) LIMIT 0;
+
+DROP TABLE j1, J2, J3;
-- 
2.34.1

Reply via email to