Allow subfield references without parentheses

Peter Eisentraut Thu, 12 Dec 2024 04:24:36 -0800

This patch allows subfield references in column references withoutparentheses, subject to certain condition. This implements (hopes to,anyway) the rules from the SQL standard (since SQL99).

This has been requested a number of times over the years. [0] is arecent discussion that has mentioned it.

Specifically, identifier chains of three or more items now have anadditional possible interpretation.


Before:

A.B.C:    schema A, table B, column or function C
A.B.C.D:  database A, schema B, table C, column or function D

Now additionally:

A.B.C:    correlation A, column B, field C; like (A.B).C
A.B.C.D:  correlation A, column B, field C, field D; like (A.B).C.D

Also, identifier chains longer than four items now have an analogousinterpretation. They had no possible interpretation before.

(Note that single identifiers and two-part identifiers are not affectedat all.)


The "correlation A" above must be an explicit alias, not just a table name.

If both possible interpretations apply, then an error is raised. (Aworkaround is to change the alias used in the query.) Such errorsshould be very rare in practice.

In [0] there was some light discussion about other possible behaviors incase of conflicts. In any case, with this patch it's possible toexperiment with different possible behaviors, by just replacing theconditional that errors by another action. I also studied ruleutils.c abit to see if there are any tweaks needed to support this. So far itseems okay. I'm sure we can come up with some pathological cases, butso far I haven't done anything about it.

I left a couple of TODO notes in the patch such as where documentationshould be updated, and I didn't do anything about SQL and PL/pgSQLparameters so far. Also, I tried to weave the additional code intotransformColumnRef() in a way that doesn't move much existing codearound, but eventually this should probably be reorganized a bit toreduce duplication.

Another thing to think about would be the exact phrasing of any errormessages. Right now, transformColumnRef() assumes that a givenidentifier chain can only have one possible interpretation and if itdoesn't find the thing the error says "didn't find the thing". But nowif there are multiple possible interpretations, it should probably saysomething more like "didn't find this and also not that" or "didn't findanything that matches that" or some other variant. I mean, what it doesnow isn't bad, but given the amount of attention we have put into thefine-tuning of these specific errors in the past, some additionalchanges might be desired.

[0]:https://www.postgresql.org/message-id/flat/CAFiTN-uiwaogH-dbz-ARpUUQM%2BRQKdU2qmPh1WzM6gEyS8PVRA%40mail.gmail.com

From 9fea0127a51d1504658b7bab4d9ac505d6135ce3 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 12 Dec 2024 08:21:51 +0100
Subject: [PATCH v0] Allow subfield references without parentheses

This allows subfield references in column references without
parentheses, subject to certain condition.  This implements the rules
from the SQL standard (since SQL99).

Specifically, identifier chains of three or more items now have an
additional possible interpretation.

Before:

A.B.C:    schema A, table B, column or function C
A.B.C.D:  database A, schema B, table C, column or function D

Now additionally:

A.B.C:    correlation A, column B, field C; like (A.B).C
A.B.C.D:  correlation A, column B, field C, field D; like (A.B).C.D

Also, identifier chains longer than four items now have an analogous
interpretation.  They had no possible interpretation before.

The "correlation A" above must be an explicit alias, not just a table
name.

If both possible interpretations apply, then an error is raised.  (A
workaround is to change the alias used in the query.)  Such errors
should be very rare in practice.
---
 doc/src/sgml/syntax.sgml               |   2 +
 src/backend/executor/functions.c       |   3 +
 src/backend/parser/parse_expr.c        | 216 +++++++++++++++++++++----
 src/test/regress/expected/rowtypes.out |  19 +++
 src/test/regress/sql/rowtypes.sql      |   6 +
 5 files changed, 218 insertions(+), 28 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index 916189a7d68..37225c84758 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1441,6 +1441,8 @@ <title>Field Selection</title>
     <primary>field selection</primary>
    </indexterm>
 
+   <!-- TODO -->
+
    <para>
     If an expression yields a value of a composite type (row type), then a
     specific field of the row can be extracted by writing
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 3b2f78b2197..1e17951f45f 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -302,6 +302,9 @@ sql_fn_post_column_ref(ParseState *pstate, ColumnRef *cref, 
Node *var)
         *                      (the first possibility takes precedence)
         * A.B.C        A = function name, B = record-typed parameter name,
         *                      C = field name
+        * A.B.C.D...
+        *                      A = function name, B = record-typed parameter 
name,
+        *                      C, D, etc. = field names TODO
         * A.*          Whole-row reference to composite parameter A.
         * A.B.*        Same, with A = function name, B = parameter name
         *
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index c2806297aa4..872b7d6be98 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -499,6 +499,12 @@ transformIndirection(ParseState *pstate, A_Indirection 
*ind)
        return result;
 }
 
+static bool
+type_is_subfieldable(Oid typeid)
+{
+       return ISCOMPLEX(typeid) || typeid == RECORDOID;
+}
+
 /*
  * Transform a ColumnRef.
  *
@@ -507,18 +513,19 @@ transformIndirection(ParseState *pstate, A_Indirection 
*ind)
 static Node *
 transformColumnRef(ParseState *pstate, ColumnRef *cref)
 {
-       Node       *node = NULL;
+       Node       *node = NULL, *node2 = NULL;
        char       *nspname = NULL;
        char       *relname = NULL;
        char       *colname = NULL;
-       ParseNamespaceItem *nsitem;
-       int                     levels_up;
+       ParseNamespaceItem *nsitem, *nsitem2;
+       int                     levels_up, levels_up2;
+       ColumnRef  *indcref = NULL;
+       List       *indirection = NULL;
        enum
        {
                CRERR_NO_COLUMN,
                CRERR_NO_RTE,
-               CRERR_WRONG_DB,
-               CRERR_TOO_MANY
+               CRERR_AMBIGUOUS,
        }                       crerr = CRERR_NO_COLUMN;
        const char *err;
 
@@ -617,8 +624,12 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
         *                      if no luck, try to resolve as unqualified table 
name (A.*).
         * A.B          A is an unqualified table name; B is either a
         *                      column or function name (trying column name 
first).
-        * A.B.C        schema A, table B, col or func name C.
-        * A.B.C.D      catalog A, schema B, table C, col or func D.
+        * A.B.C        schema A, table B, col or func name C; or
+        *          correlation A, column B, field C.
+        * A.B.C.D      catalog A, schema B, table C, col or func D; or
+        *          correlation A, column B, fields C, D.
+        * A.B.C.D.E...
+        *          correlation A, column B, fields C, D, E, etc.
         * A.*          A is an unqualified table name; means whole-row value.
         * A.B.*        whole-row value of table B in schema A.
         * A.B.C.*      whole-row value of table C in schema B in catalog A.
@@ -724,7 +735,45 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                                nsitem = refnameNamespaceItem(pstate, nspname, 
relname,
                                                                                
          cref->location,
                                                                                
          &levels_up);
-                               if (nsitem == NULL)
+
+                               /*
+                                * Also look it up as a subfieldable column 
reference, but
+                                * then it can't end with a star
+                                */
+                               if (!IsA(field3, A_Star))
+                                       nsitem2 = refnameNamespaceItem(pstate, 
NULL, strVal(field1),
+                                                                               
                   cref->location,
+                                                                               
                   &levels_up2);
+                               else
+                                       nsitem2 = NULL;
+
+                               /* must be an explicit alias */
+                               if (nsitem2 && nsitem2->p_rte->alias == NULL)
+                                       nsitem2 = NULL;
+
+                               if (nsitem2)
+                                       node2 = scanNSItemForColumn(pstate, 
nsitem2, levels_up2, strVal(field2), cref->location);
+
+                               /*
+                                * If we found a potential subfield reference, 
check that the
+                                * type is subfieldable, else forget it.
+                                */
+                               if (node2)
+                               {
+                                       if (type_is_subfieldable(castNode(Var, 
node2)->vartype))
+                                       {
+                                               indcref = copyObject(cref);
+                                               indcref->fields = 
list_truncate(indcref->fields, 2);
+                                               indirection = 
list_copy_tail(cref->fields, 2);
+                                       }
+                                       else
+                                       {
+                                               nsitem2 = NULL;
+                                               node2 = NULL;
+                                       }
+                               }
+
+                               if (nsitem == NULL && nsitem2 == NULL)
                                {
                                        crerr = CRERR_NO_RTE;
                                        break;
@@ -740,9 +789,12 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 
                                colname = strVal(field3);
 
+                               if (nsitem)
+                               {
                                /* Try to identify as a column of the nsitem */
                                node = scanNSItemForColumn(pstate, nsitem, 
levels_up, colname,
                                                                                
   cref->location);
+
                                if (node == NULL)
                                {
                                        /* Try it as a function call on the 
whole row */
@@ -756,6 +808,13 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                                                                                
         false,
                                                                                
         cref->location);
                                }
+                               }
+
+                               if (node != NULL && node2 != NULL)
+                               {
+                                       crerr = CRERR_AMBIGUOUS;
+                                       break;
+                               }
                                break;
                        }
                case 4:
@@ -770,20 +829,52 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                                nspname = strVal(field2);
                                relname = strVal(field3);
 
+                               /* Locate the referenced nsitem (only eligible 
if database name matches) */
+                               if (strcmp(catname, 
get_database_name(MyDatabaseId)) == 0)
+                                       nsitem = refnameNamespaceItem(pstate, 
nspname, relname,
+                                                                               
                  cref->location,
+                                                                               
                  &levels_up);
+                               else
+                                       nsitem = NULL;
+
+                               /*
+                                * Also look it up as a subfieldable column 
reference, but
+                                * then it can't end with a star
+                                */
+                               if (!IsA(field4, A_Star))
+                                       nsitem2 = refnameNamespaceItem(pstate, 
NULL, strVal(field1),
+                                                                               
                   cref->location,
+                                                                               
                   &levels_up2);
+                               else
+                                       nsitem2 = NULL;
+
+                               /* must be an explicit alias */
+                               if (nsitem2 && nsitem2->p_rte->alias == NULL)
+                                       nsitem2 = NULL;
+
+                               if (nsitem2)
+                                       node2 = scanNSItemForColumn(pstate, 
nsitem2, levels_up2, strVal(field2), cref->location);
+
                                /*
-                                * We check the catalog name and then ignore it.
+                                * If we found a potential subfield reference, 
check that the
+                                * type is subfieldable, else forget it.
                                 */
-                               if (strcmp(catname, 
get_database_name(MyDatabaseId)) != 0)
+                               if (node2)
                                {
-                                       crerr = CRERR_WRONG_DB;
-                                       break;
+                                       if (type_is_subfieldable(castNode(Var, 
node2)->vartype))
+                                       {
+                                               indcref = copyObject(cref);
+                                               indcref->fields = 
list_truncate(indcref->fields, 2);
+                                               indirection = 
list_copy_tail(cref->fields, 2);
+                                       }
+                                       else
+                                       {
+                                               nsitem2 = NULL;
+                                               node2 = NULL;
+                                       }
                                }
 
-                               /* Locate the referenced nsitem */
-                               nsitem = refnameNamespaceItem(pstate, nspname, 
relname,
-                                                                               
          cref->location,
-                                                                               
          &levels_up);
-                               if (nsitem == NULL)
+                               if (nsitem == NULL && nsitem2 == NULL)
                                {
                                        crerr = CRERR_NO_RTE;
                                        break;
@@ -799,6 +890,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 
                                colname = strVal(field4);
 
+                               if (nsitem)
+                               {
                                /* Try to identify as a column of the nsitem */
                                node = scanNSItemForColumn(pstate, nsitem, 
levels_up, colname,
                                                                                
   cref->location);
@@ -815,11 +908,85 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                                                                                
         false,
                                                                                
         cref->location);
                                }
+                               }
+
+                               if (node != NULL && node2 != NULL)
+                               {
+                                       crerr = CRERR_AMBIGUOUS;
+                                       break;
+                               }
                                break;
                        }
                default:
-                       crerr = CRERR_TOO_MANY; /* too many dotted names */
-                       break;
+                       {
+                               Node       *field1 = (Node *) 
linitial(cref->fields);
+                               Node       *field2 = (Node *) 
lsecond(cref->fields);
+                               Node       *fieldl = (Node *) 
llast(cref->fields);
+
+                               /* XXX for error reporting below */
+                               relname = strVal(field1);
+
+                               /*
+                                * Look it up as a subfieldable column 
reference, but then it
+                                * can't end with a star
+                                */
+                               if (!IsA(fieldl, A_Star))
+                                       nsitem2 = refnameNamespaceItem(pstate, 
NULL, strVal(field1),
+                                                                               
                   cref->location,
+                                                                               
                   &levels_up2);
+                               else
+                                       nsitem2 = NULL;
+
+                               /* must be an explicit alias */
+                               if (nsitem2 && nsitem2->p_rte->alias == NULL)
+                                       nsitem2 = NULL;
+
+                               if (nsitem2)
+                                       node2 = scanNSItemForColumn(pstate, 
nsitem2, levels_up2, strVal(field2), cref->location);
+
+                               /*
+                                * If we found a potential subfield reference, 
check that the
+                                * type is subfieldable, else forget it.
+                                */
+                               if (node2)
+                               {
+                                       if (type_is_subfieldable(castNode(Var, 
node2)->vartype))
+                                       {
+                                               indcref = copyObject(cref);
+                                               indcref->fields = 
list_truncate(indcref->fields, 2);
+                                               indirection = 
list_copy_tail(cref->fields, 2);
+                                       }
+                                       else
+                                       {
+                                               nsitem2 = NULL;
+                                               node2 = NULL;
+                                       }
+                               }
+
+                               if (nsitem2 == NULL)
+                               {
+                                       crerr = CRERR_NO_RTE;
+                                       break;
+                               }
+                               break;
+                       }
+       }
+
+       /*
+        * If we decided it's a subfield reference, convert it to an indirection
+        * (as if you had written "(A.B).C.D" instead of "A.B.C.D").  (Note that
+        * the subfield references detected above always come from an identifier
+        * chain of length >= 3, but the indirections we are building here have 
a
+        * column reference of length 2, and so there won't be any endless
+        * recursion.)
+        */
+       if (node2)
+       {
+               A_Indirection *a = makeNode(A_Indirection);
+
+               a->arg = (Node *) indcref;
+               a->indirection = indirection;
+               node = transformIndirection(pstate, a);
        }
 
        /*
@@ -860,17 +1027,10 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                                errorMissingRTE(pstate, makeRangeVar(nspname, 
relname,
                                                                                
                         cref->location));
                                break;
-                       case CRERR_WRONG_DB:
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                                errmsg("cross-database 
references are not implemented: %s",
-                                                               
NameListToString(cref->fields)),
-                                                parser_errposition(pstate, 
cref->location)));
-                               break;
-                       case CRERR_TOO_MANY:
+                       case CRERR_AMBIGUOUS:
                                ereport(ERROR,
                                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("improper qualified 
name (too many dotted names): %s",
+                                                errmsg("ambiguous identifier 
chain: %s",
                                                                
NameListToString(cref->fields)),
                                                 parser_errposition(pstate, 
cref->location)));
                                break;
diff --git a/src/test/regress/expected/rowtypes.out 
b/src/test/regress/expected/rowtypes.out
index 9168979a620..885338ac0d1 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -100,6 +100,7 @@ SELECT * FROM pg_input_error_info('(1,1e400)', 'complex');
  "1e400" is out of range for type double precision |        |      | 22003
 (1 row)
 
+-- test identifier chain syntax for column and field references
 create temp table quadtable(f1 int, q quad);
 insert into quadtable values (1, ((3.3,4.4),(5.5,6.6)));
 insert into quadtable values (2, ((null,4.4),(5.5,6.6)));
@@ -121,6 +122,24 @@ select f1, (q).c1, (qq.q).c1.i from quadtable qq;
   2 | (,4.4)    | 4.4
 (2 rows)
 
+select f1, qq.q.c1 from quadtable qq;
+ f1 |    c1     
+----+-----------
+  1 | (3.3,4.4)
+  2 | (,4.4)
+(2 rows)
+
+select f1, qq.q.c1.i from quadtable qq;
+ f1 |  i  
+----+-----
+  1 | 4.4
+  2 | 4.4
+(2 rows)
+
+select f1, quadtable.q.c1.i from quadtable;  -- fails, works only with 
explicit alias
+ERROR:  missing FROM-clause entry for table "c1"
+LINE 1: select f1, quadtable.q.c1.i from quadtable;
+                   ^
 create temp table people (fn fullname, bd date);
 insert into people values ('(Joe,Blow)', '1984-01-10');
 select * from people;
diff --git a/src/test/regress/sql/rowtypes.sql 
b/src/test/regress/sql/rowtypes.sql
index 174b062144a..145c8a640fb 100644
--- a/src/test/regress/sql/rowtypes.sql
+++ b/src/test/regress/sql/rowtypes.sql
@@ -38,6 +38,7 @@
 SELECT * FROM pg_input_error_info('(1,zed)', 'complex');
 SELECT * FROM pg_input_error_info('(1,1e400)', 'complex');
 
+-- test identifier chain syntax for column and field references
 create temp table quadtable(f1 int, q quad);
 
 insert into quadtable values (1, ((3.3,4.4),(5.5,6.6)));
@@ -49,6 +50,11 @@
 
 select f1, (q).c1, (qq.q).c1.i from quadtable qq;
 
+select f1, qq.q.c1 from quadtable qq;
+select f1, qq.q.c1.i from quadtable qq;
+
+select f1, quadtable.q.c1.i from quadtable;  -- fails, works only with 
explicit alias
+
 create temp table people (fn fullname, bd date);
 
 insert into people values ('(Joe,Blow)', '1984-01-10');

base-commit: bd10ec529796a13670645e6acd640c6f290df020
-- 
2.47.1

Allow subfield references without parentheses

Reply via email to