On 2/10/16 12:26 PM, Jim Nasby wrote:
I editorialized the docs and some of the comments. In particular, I documented 
behavior of not truncating, and recommended casting to name[] if user cares 
about that. Added a unit test to verify that works. BTW, I saw mention in the 
thread about not truncated spaces, but the function*does*  truncate them, 
unless they're inside quotes, where they're legitimate.

New patch for that.
--
Jim Nasby, Data Architect, Blue Treble Consulting, Austin TX
Experts in Analytics, Data Architecture and PostgreSQL
Data in Trouble? Get it in Treble! http://BlueTreble.com
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 139aa2b..b4a2898 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1778,6 +1778,27 @@
       <row>
        <entry>
         <indexterm>
+         <primary>parse_ident</primary>
+        </indexterm>
+        <literal><function>parse_ident(<parameter>str</parameter> 
<type>text</type>,
+           [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT 
true ] )</function></literal>
+       </entry>
+       <entry><type>text[]</type></entry>
+       <entry>Split <parameter>qualified identifier</parameter> into array 
<parameter>parts</parameter>.
+       When <parameter>strictmode</parameter> is false, extra characters after 
the identifier are ignored.
+       This is useful for parsing identifiers for objects like functions and 
arrays that may have trailing
+       characters. By default, extra characters after the last identifier are 
considered an error.
+       second parameter is false, then chars after last identifier are 
ignored. Note that this function
+       does not truncate quoted identifiers. If you care about that you should 
cast the result of this
+          function to name[].
+       </entry>
+       <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+       <entry><literal>"SomeSchema,sometable"</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <indexterm>
          <primary>pg_client_encoding</primary>
         </indexterm>
         <literal><function>pg_client_encoding()</function></literal>
diff --git a/src/backend/catalog/system_views.sql 
b/src/backend/catalog/system_views.sql
index 923fe58..61d5b80 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -965,3 +965,10 @@ RETURNS jsonb
 LANGUAGE INTERNAL
 STRICT IMMUTABLE
 AS 'jsonb_set';
+
+CREATE OR REPLACE FUNCTION
+  parse_ident(str text, strictmode boolean DEFAULT true)
+RETURNS text[]
+LANGUAGE INTERNAL
+STRICT IMMUTABLE
+AS 'parse_ident';
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 2b4ab20..7aa5b76 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -130,6 +130,15 @@ scanstr(const char *s)
 char *
 downcase_truncate_identifier(const char *ident, int len, bool warn)
 {
+       return downcase_identifier(ident, len, warn, true);
+}
+
+/*
+ * a workhorse for downcase_truncate_identifier
+ */
+char *
+downcase_identifier(const char *ident, int len, bool warn, bool truncate)
+{
        char       *result;
        int                     i;
        bool            enc_is_single_byte;
@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, 
bool warn)
        }
        result[i] = '\0';
 
-       if (i >= NAMEDATALEN)
+       if (i >= NAMEDATALEN && truncate)
                truncate_identifier(result, i, warn);
 
        return result;
 }
 
+
 /*
  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
  *
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 6a306f3..3072c32 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -21,12 +21,15 @@
 #include <unistd.h>
 
 #include "access/sysattr.h"
+#include "access/htup_details.h"
 #include "catalog/catalog.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
 #include "commands/dbcommands.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "parser/scansup.h"
 #include "parser/keywords.h"
 #include "postmaster/syslogger.h"
 #include "rewrite/rewriteHandler.h"
@@ -38,6 +41,7 @@
 #include "utils/ruleutils.h"
 #include "tcop/tcopprot.h"
 #include "utils/acl.h"
+#include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/timestamp.h"
 
@@ -598,3 +602,173 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
 
        PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
 }
+
+
+/*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+       if (c == '_')
+               return true;
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return true;
+
+       if (c >= 0200 && c <= 0377)
+               return true;
+
+       return false;
+}
+
+static bool
+is_ident_cont(unsigned char c)
+{
+       if (c >= '0' && c <= '9')
+               return true;
+
+       return is_ident_start(c);
+}
+
+/*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+       text            *qualname;
+       char            *qualname_str;
+       bool            strict_mode;
+       ArrayBuildState *astate = NULL;
+       char    *nextp;
+       bool            after_dot = false;
+
+       qualname = PG_GETARG_TEXT_PP(0);
+       qualname_str = text_to_cstring(qualname);
+       strict_mode = PG_GETARG_BOOL(1);
+
+       nextp = qualname_str;
+
+       /* skip leading whitespace */
+       while (isspace((unsigned char) *nextp))
+               nextp++;
+
+       for (;;)
+       {
+               char            *curname;
+               char            *endp;
+               bool            missing_ident;
+
+               missing_ident = true;
+
+               if (*nextp == '\"')
+               {
+                       curname = nextp + 1;
+                       for (;;)
+                       {
+                               endp = strchr(nextp + 1, '\"');
+                               if (endp == NULL)
+                                       ereport(ERROR,
+                                               
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("unclosed double 
quotes"),
+                                                errdetail("string \"%s\" is 
not valid identifier",
+                                                           
text_to_cstring(qualname))));
+                               if (endp[1] != '\"')
+                                       break;
+                               memmove(endp, endp + 1, strlen(endp));
+                               nextp = endp;
+                       }
+                       nextp = endp + 1;
+                       *endp = '\0';
+
+                       if (endp - curname == 0)
+                               ereport(ERROR,
+                                       
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("identifier should not be 
empty: \"%s\"",
+                                                                   
text_to_cstring(qualname))));
+
+                       astate = accumArrayResult(astate,
+                               CStringGetTextDatum(curname), false,
+                                                   TEXTOID, 
CurrentMemoryContext);
+                       missing_ident = false;
+               }
+               else
+               {
+                       if (is_ident_start((unsigned char) *nextp))
+                       {
+                               char *downname;
+                               int     len;
+                               text    *part;
+
+                               curname = nextp++;
+                               while (is_ident_cont((unsigned char) *nextp))
+                                       nextp++;
+
+                               len = nextp - curname;
+
+                               /*
+                                * We don't would to truncate identifier 
implicitli in this
+                                * moment, so we use downcase_identifier instead
+                                * dowcase_truncate_identifier. The user can do 
truncating
+                                * later if it is necessary for his purpouse.
+                                */
+                               downname = downcase_identifier(curname, len, 
false, false);
+                               part = cstring_to_text_with_len(downname, len);
+                               astate = accumArrayResult(astate,
+                                       PointerGetDatum(part), false,
+                                                           TEXTOID, 
CurrentMemoryContext);
+                               missing_ident = false;
+                       }
+               }
+
+               if (missing_ident)
+               {
+                       /* choose text of adequate error message */
+                       if (*nextp == '.')
+                               ereport(ERROR,
+                                       
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing identifier before 
\".\" symbol: \"%s\"",
+                                                                   
text_to_cstring(qualname))));
+                       else if (after_dot)
+                               ereport(ERROR,
+                                       
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing identifier after \".\" 
symbol: \"%s\"",
+                                                                   
text_to_cstring(qualname))));
+                       else
+                               ereport(ERROR,
+                                       
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing identifier: \"%s\"",
+                                                                   
text_to_cstring(qualname))));
+               }
+
+               while (isspace((unsigned char) *nextp))
+                       nextp++;
+
+               if (*nextp == '.')
+               {
+                       after_dot = true;
+                       nextp++;
+                       while (isspace((unsigned char) *nextp))
+                               nextp++;
+                       continue;
+               }
+               else if (*nextp == '\0')
+               {
+                       break;
+               }
+               else
+               {
+                       if (strict_mode)
+                               ereport(ERROR,
+                                       
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("identifier contains disallowed 
characters: \"%s\"",
+                                                                   
text_to_cstring(qualname))));
+                       break;
+               }
+       }
+
+       PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 5ded13e..d7c09a7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3451,6 +3451,9 @@ DESCR("I/O");
 DATA(insert OID = 4086 (  to_regnamespace      PGNSP PGUID 12 1 0 0 0 f f f f 
t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ 
_null_ _null_ ));
 DESCR("convert namespace name to regnamespace");
 
+DATA(insert OID = 3318 (  parse_ident          PGNSP PGUID 12 1 0 0 0 f f f f 
t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strictmode}" _null_ _null_ 
parse_ident _null_ _null_ _null_ ));
+DESCR("parse qualified identifier to array of identifiers");
+
 DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f 
f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ 
fmgr_internal_validator _null_ _null_ _null_ ));
 DESCR("(internal)");
 DATA(insert OID = 2247 ( fmgr_c_validator      PGNSP PGUID 12 1 0 0 0 f f f f 
t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator 
_null_ _null_ _null_ ));
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 4f4164b..4f95c81 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
 extern char *downcase_truncate_identifier(const char *ident, int len,
                                                         bool warn);
 
+extern char *downcase_identifier(const char *ident, int len,
+                                                        bool warn, bool 
truncate);
+
 extern void truncate_identifier(char *ident, int len, bool warn);
 
 extern bool scanner_isspace(char ch);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 5e8e832..b1b6ef6 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -504,6 +504,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
 extern Datum pg_collation_for(PG_FUNCTION_ARGS);
 extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
 extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+extern Datum parse_ident(PG_FUNCTION_ARGS);
 
 /* oid.c */
 extern Datum oidin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/name.out 
b/src/test/regress/expected/name.out
index b359d52..66569f4 100644
--- a/src/test/regress/expected/name.out
+++ b/src/test/regress/expected/name.out
@@ -124,3 +124,55 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ 
'.*asdf.*';
 (2 rows)
 
 DROP TABLE NAME_TBL;
+DO $$
+DECLARE r text[];
+BEGIN
+  r := parse_ident('Schemax.Tabley');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+  r := parse_ident('"SchemaX"."TableY"');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+NOTICE:  schemax.tabley
+NOTICE:  "SchemaX"."TableY"
+SELECT parse_ident('foo.boo');
+ parse_ident 
+-------------
+ {foo,boo}
+(1 row)
+
+SELECT parse_ident('foo.boo[]'); -- should fail
+ERROR:  identifier contains disallowed characters: "foo.boo[]"
+SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+ parse_ident 
+-------------
+ {foo,boo}
+(1 row)
+
+-- should fail
+SELECT parse_ident(' ');
+ERROR:  missing identifier: " "
+SELECT parse_ident(' .aaa');
+ERROR:  missing identifier before "." symbol: " .aaa"
+SELECT parse_ident(' aaa . ');
+ERROR:  missing identifier after "." symbol: " aaa . "
+SELECT parse_ident('aaa.a%b');
+ERROR:  identifier contains disallowed characters: "aaa.a%b"
+SELECT length(a[1]), length(a[2]) from 
parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')
 as a ;
+ length | length 
+--------+--------
+    414 |    289
+(1 row)
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || 
repeat('x',66) || '"');
+                                                parse_ident                    
                            
+-----------------------------------------------------------------------------------------------------------
+ {first,"  second  ","   third   ","  
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || 
repeat('x',66) || '"')::name[];
+                                             parse_ident                       
                       
+------------------------------------------------------------------------------------------------------
+ {first,"  second  ","   third   ","  
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql
index 1c7a671..2e70e4e 100644
--- a/src/test/regress/sql/name.sql
+++ b/src/test/regress/sql/name.sql
@@ -52,3 +52,28 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ 
'[0-9]';
 SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
 
 DROP TABLE NAME_TBL;
+
+DO $$
+DECLARE r text[];
+BEGIN
+  r := parse_ident('Schemax.Tabley');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+  r := parse_ident('"SchemaX"."TableY"');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+
+SELECT parse_ident('foo.boo');
+SELECT parse_ident('foo.boo[]'); -- should fail
+SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+
+-- should fail
+SELECT parse_ident(' ');
+SELECT parse_ident(' .aaa');
+SELECT parse_ident(' aaa . ');
+SELECT parse_ident('aaa.a%b');
+
+SELECT length(a[1]), length(a[2]) from 
parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')
 as a ;
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || 
repeat('x',66) || '"');
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || 
repeat('x',66) || '"')::name[];
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to