On Sat, Nov 24, 2018 at 5:09 AM Tomas Vondra <tomas.von...@2ndquadrant.com>
wrote:

>
> 1) While comparing this to the FILTER clause we already have for
> aggregates, I've noticed the aggregate version is
>
>     FILTER '(' WHERE a_expr ')'
>
> while here we have
>
>     FILTER '(' a_expr ')'
>
> For a while I was thinking that maybe we should use the same syntax
> here, but I don't think so. The WHERE bit seems rather unnecessary and
> we probably implemented it only because it's required by SQL standard,
> which does not apply to COPY. So I think this is fine.


ok

>
> 2) The various parser checks emit errors like this:
>
>     case EXPR_KIND_COPY_FILTER:
>         err = _("cannot use subquery in copy from FILTER condition");
>         break;
>
> I think the "copy from" should be capitalized, to make it clear that it
> refers to a COPY FROM command and not a copy of something.
>
>
> 3) I think there should be regression tests for these prohibited things,
> i.e. for a set-returning function, for a non-existent column, etc.
>
>
fixed

>
> 4) What might be somewhat confusing for users is that the filter uses a
> single snapshot to evaluate the conditions for all rows. That is, one
> might do this
>
>     create or replace function f() returns int as $$
>         select count(*)::int from t;
>     $$ language sql;
>
> and hope that
>
>     copy t from '/...' filter (f() <= 100);
>
> only ever imports the first 100 rows - but that's not true, of course,
> because f() uses the snapshot acquired at the very beginning. For
> example INSERT SELECT does behave differently:
>
>     test=# copy t from '/home/user/t.data' filter (f() < 100);
>     COPY 81
>     test=# insert into t select * from t where f() < 100;
>     INSERT 0 19
>
> Obviously, this is not an issue when the filter clause references only
> the input row (which I assume will be the primary use case).
>
> Not sure if this is expected / appropriate behavior, and if the patch
> needs to do something else here.
>
> Comparing with overhead of setting snapshot before evaluating every row
and considering this

kind of usage is not frequent it seems to me the behavior is acceptable

regards
Surafel
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 411941ed31..15b6ddebed 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -25,6 +25,7 @@ PostgreSQL documentation
 COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
     FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
     [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
+    [ FILTER ( <replaceable class="parameter">condition</replaceable> ) ]
 
 COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
     TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
@@ -353,6 +354,30 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>FILTER</literal></term>
+    <listitem>
+   <para>
+    The optional <literal>FILTER</literal> clause has the general form
+<synopsis>
+FILTER <replaceable class="parameter">condition</replaceable>
+</synopsis>
+    where <replaceable class="parameter">condition</replaceable> is
+    any expression that evaluates to a result of type
+    <type>boolean</type>.  Any row that does not satisfy this
+    condition will not be inserted to the table.  A row satisfies the
+    condition if it returns true when the actual row values are
+    substituted for any variable references.
+   </para>
+
+   <para>
+    Currently, <literal>FILTER</literal> expressions cannot contain
+    subqueries.
+   </para>
+
+    </listitem>
+   </varlistentry>
+
   </variablelist>
  </refsect1>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 4aa8890fe8..bb260c41fd 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -40,7 +40,11 @@
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/planner.h"
+#include "optimizer/prep.h"
 #include "nodes/makefuncs.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
 #include "port/pg_bswap.h"
 #include "rewrite/rewriteHandler.h"
@@ -150,6 +154,7 @@ typedef struct CopyStateData
 	bool		convert_selectively;	/* do selective binary conversion? */
 	List	   *convert_select; /* list of column names (can be NIL) */
 	bool	   *convert_select_flags;	/* per-column CSV/TEXT CS flags */
+	Node	   *filterClause;	/* FILTER condition (or NULL) */
 
 	/* these are just for error messages, see CopyFromErrorCallback */
 	const char *cur_relname;	/* table name for error messages */
@@ -180,6 +185,7 @@ typedef struct CopyStateData
 	ExprState **defexprs;		/* array of default att expressions */
 	bool		volatile_defexprs;	/* is any of defexprs volatile? */
 	List	   *range_table;
+	ExprState  *qualexpr;
 
 	TransitionCaptureState *transition_capture;
 
@@ -801,6 +807,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 	Relation	rel;
 	Oid			relid;
 	RawStmt    *query = NULL;
+	Node    *filterClause = NULL;
 
 	/*
 	 * Disallow COPY to/from file or program except to users with the
@@ -854,6 +861,25 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 											NULL, false, false);
 		rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
 
+		if (stmt->filterClause)
+		{
+			/* add rte to column namespace  */
+			addRTEtoQuery(pstate, rte, false, true, true);
+
+			/* Transform the raw expression tree */
+			filterClause = transformExpr(pstate, stmt->filterClause, EXPR_KIND_COPY_FILTER);
+
+			/*  Make sure it yields a boolean result. */
+			filterClause = coerce_to_boolean(pstate, filterClause, "FILTER");
+			/* we have to fix its collations too */
+			assign_expr_collations(pstate, filterClause);
+
+			filterClause = eval_const_expressions(NULL, filterClause);
+
+			filterClause = (Node *) canonicalize_qual((Expr *) filterClause, false);
+			filterClause = (Node *) make_ands_implicit((Expr *) filterClause);
+		}
+
 		tupDesc = RelationGetDescr(rel);
 		attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
 		foreach(cur, attnums)
@@ -1002,6 +1028,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 
 		cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
 							   NULL, stmt->attlist, stmt->options);
+		cstate->filterClause = filterClause;
 		*processed = CopyFrom(cstate);	/* copy from file to database */
 		EndCopyFrom(cstate);
 	}
@@ -2531,6 +2558,10 @@ CopyFrom(CopyState cstate)
 	if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		proute = ExecSetupPartitionTupleRouting(NULL, cstate->rel);
 
+	if (cstate->filterClause)
+		cstate->qualexpr = ExecInitQual(castNode(List, cstate->filterClause),
+												&mtstate->ps);
+
 	/*
 	 * It's more efficient to prepare a bunch of tuples for insertion, and
 	 * insert them in one heap_multi_insert() call, than call heap_insert()
@@ -2679,6 +2710,13 @@ CopyFrom(CopyState cstate)
 		slot = myslot;
 		ExecStoreHeapTuple(tuple, slot, false);
 
+		if (cstate->filterClause)
+		{
+			econtext->ecxt_scantuple = myslot;
+			if (!ExecQual(cstate->qualexpr, econtext))
+				continue;
+		}
+
 		/* Determine the partition to heap_insert the tuple into */
 		if (proute)
 		{
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index db49968409..31d753f139 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3309,6 +3309,7 @@ _copyCopyStmt(const CopyStmt *from)
 	COPY_SCALAR_FIELD(is_program);
 	COPY_STRING_FIELD(filename);
 	COPY_NODE_FIELD(options);
+	COPY_NODE_FIELD(filterClause);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 3a084b4d1f..f17d9b0006 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1221,6 +1221,7 @@ _equalCopyStmt(const CopyStmt *a, const CopyStmt *b)
 	COMPARE_SCALAR_FIELD(is_program);
 	COMPARE_STRING_FIELD(filename);
 	COMPARE_NODE_FIELD(options);
+	COMPARE_NODE_FIELD(filterClause);
 
 	return true;
 }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2c2208ffb7..78bdfdf722 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -509,6 +509,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	copy_generic_opt_elem
 %type <list>	copy_generic_opt_list copy_generic_opt_arg_list
 %type <list>	copy_options
+%type <node>	copy_filter_clause
 
 %type <typnam>	Typename SimpleTypename ConstTypename
 				GenericType Numeric opt_float
@@ -2962,7 +2963,8 @@ ClosePortalStmt:
  *****************************************************************************/
 
 CopyStmt:	COPY opt_binary qualified_name opt_column_list
-			copy_from opt_program copy_file_name copy_delimiter opt_with copy_options
+			copy_from opt_program copy_file_name copy_delimiter opt_with
+			copy_options copy_filter_clause
 				{
 					CopyStmt *n = makeNode(CopyStmt);
 					n->relation = $3;
@@ -2971,6 +2973,7 @@ CopyStmt:	COPY opt_binary qualified_name opt_column_list
 					n->is_from = $5;
 					n->is_program = $6;
 					n->filename = $7;
+					n->filterClause = $11;
 
 					if (n->is_program && n->filename == NULL)
 						ereport(ERROR,
@@ -2978,6 +2981,12 @@ CopyStmt:	COPY opt_binary qualified_name opt_column_list
 								 errmsg("STDIN/STDOUT not allowed with PROGRAM"),
 								 parser_errposition(@8)));
 
+					if (!n->is_from && n->filterClause != NULL)
+						ereport(ERROR,
+								(errcode(ERRCODE_SYNTAX_ERROR),
+								 errmsg("FILTER clause not allowed with COPY TO"),
+								 parser_errposition(@11)));
+
 					n->options = NIL;
 					/* Concatenate user-supplied flags */
 					if ($2)
@@ -3161,6 +3170,11 @@ copy_generic_opt_arg_list_item:
 			opt_boolean_or_string	{ $$ = (Node *) makeString($1); }
 		;
 
+copy_filter_clause:
+			FILTER '(' a_expr	')'						{ $$ = $3; }
+			| /*EMPTY*/								{ $$ = NULL; }
+		;
+
 
 /*****************************************************************************
  *
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 61727e1d71..e205adfdaa 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -523,6 +523,14 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
 
 			break;
 
+		case EXPR_KIND_COPY_FILTER:
+			if (isAgg)
+				err = _("aggregate functions are not allowed in COPY FROM FILTER conditions");
+			else
+				err = _("grouping operations are not allowed in COPY FROM FILTER conditions");
+
+			break;
+
 			/*
 			 * There is intentionally no default: case here, so that the
 			 * compiler will warn if we add a new ParseExprKind without
@@ -902,6 +910,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 		case EXPR_KIND_CALL_ARGUMENT:
 			err = _("window functions are not allowed in CALL arguments");
 			break;
+		case EXPR_KIND_COPY_FILTER:
+			err = _("window functions are not allowed in COPY FROM FILTER conditions");
+			break;
 
 			/*
 			 * There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 385e54a9b6..6e2f99730a 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1849,6 +1849,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		case EXPR_KIND_CALL_ARGUMENT:
 			err = _("cannot use subquery in CALL argument");
 			break;
+		case EXPR_KIND_COPY_FILTER:
+			err = _("cannot use subquery in COPY FROM FILTER condition");
+			break;
 
 			/*
 			 * There is intentionally no default: case here, so that the
@@ -3475,6 +3478,8 @@ ParseExprKindName(ParseExprKind exprKind)
 			return "PARTITION BY";
 		case EXPR_KIND_CALL_ARGUMENT:
 			return "CALL";
+		case EXPR_KIND_COPY_FILTER:
+			return "FILTER";
 
 			/*
 			 * There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 44257154b8..3d4784d8fa 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2370,6 +2370,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
 		case EXPR_KIND_CALL_ARGUMENT:
 			err = _("set-returning functions are not allowed in CALL arguments");
 			break;
+		case EXPR_KIND_COPY_FILTER:
+			err = _("set-returning functions are not allowed in COPY FROM FILTER conditions");
+			break;
 
 			/*
 			 * There is intentionally no default: case here, so that the
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index e5bdc1cec5..e2a6e20498 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1969,6 +1969,7 @@ typedef struct CopyStmt
 	bool		is_program;		/* is 'filename' a program to popen? */
 	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
 	List	   *options;		/* List of DefElem nodes */
+	Node	   *filterClause;		/* FILTER condition (or NULL) */
 } CopyStmt;
 
 /* ----------------------
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 0230543810..7761e8fc6c 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,7 +69,8 @@ typedef enum ParseExprKind
 	EXPR_KIND_TRIGGER_WHEN,		/* WHEN condition in CREATE TRIGGER */
 	EXPR_KIND_POLICY,			/* USING or WITH CHECK expr in policy */
 	EXPR_KIND_PARTITION_EXPRESSION, /* PARTITION BY expression */
-	EXPR_KIND_CALL_ARGUMENT		/* procedure argument in CALL */
+	EXPR_KIND_CALL_ARGUMENT,		/* procedure argument in CALL */
+	EXPR_KIND_COPY_FILTER		/* FILTER condition in COPY FROM */
 } ParseExprKind;
 
 
diff --git a/src/interfaces/ecpg/preproc/ecpg.addons b/src/interfaces/ecpg/preproc/ecpg.addons
index 0167ee4620..bd6736d46b 100644
--- a/src/interfaces/ecpg/preproc/ecpg.addons
+++ b/src/interfaces/ecpg/preproc/ecpg.addons
@@ -192,7 +192,7 @@ ECPG: where_or_current_clauseWHERECURRENT_POFcursor_name block
 		char *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
 		$$ = cat_str(2,mm_strdup("where current of"), cursor_marker);
 	}
-ECPG: CopyStmtCOPYopt_binaryqualified_nameopt_column_listcopy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_options addon
+ECPG: CopyStmtCOPYopt_binaryqualified_nameopt_column_listcopy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_optionscopy_filter_clause addon
 			if (strcmp($6, "from") == 0 &&
 			   (strcmp($7, "stdin") == 0 || strcmp($7, "stdout") == 0))
 				mmerror(PARSE_ERROR, ET_WARNING, "COPY FROM STDIN is not implemented");
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 19bb538411..ab37a4dc13 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -49,6 +49,28 @@ CONTEXT:  COPY x, line 1: "2002	232	40	50	60	70	80"
 COPY x (b, c, d, e) from stdin delimiter ',' null 'x';
 COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
 COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
+COPY x from stdin FILTER (a = 50004);
+COPY x from stdin FILTER (a > 60003);
+COPY x from stdin FILTER (f > 60003);
+ERROR:  column "f" does not exist
+LINE 1: COPY x from stdin FILTER (f > 60003);
+                                  ^
+COPY x from stdin FILTER (a = max(x.b));
+ERROR:  aggregate functions are not allowed in COPY FROM FILTER conditions
+LINE 1: COPY x from stdin FILTER (a = max(x.b));
+                                      ^
+COPY x from stdin FILTER (a IN (SELECT 1 FROM x));
+ERROR:  cannot use subquery in COPY FROM FILTER condition
+LINE 1: COPY x from stdin FILTER (a IN (SELECT 1 FROM x));
+                                    ^
+COPY x from stdin FILTER (a IN (generate_series(1,5)));
+ERROR:  set-returning functions are not allowed in COPY FROM FILTER conditions
+LINE 1: COPY x from stdin FILTER (a IN (generate_series(1,5)));
+                                        ^
+COPY x from stdin FILTER (a = row_number() over(b));
+ERROR:  window functions are not allowed in COPY FROM FILTER conditions
+LINE 1: COPY x from stdin FILTER (a = row_number() over(b));
+                                      ^
 -- check results of copy in
 SELECT * FROM x;
    a   | b  |     c      |   d    |          e           
@@ -73,12 +95,15 @@ SELECT * FROM x;
   4006 |  6 | BackslashN | \N     | before trigger fired
   4007 |  7 | XX         | XX     | before trigger fired
   4008 |  8 | Delimiter  | :      | before trigger fired
+ 50004 | 25 | 35         | 45     | before trigger fired
+ 60004 | 25 | 35         | 45     | before trigger fired
+ 60005 | 26 | 36         | 46     | before trigger fired
      1 |  1 | stuff      | test_1 | after trigger fired
      2 |  2 | stuff      | test_2 | after trigger fired
      3 |  3 | stuff      | test_3 | after trigger fired
      4 |  4 | stuff      | test_4 | after trigger fired
      5 |  5 | stuff      | test_5 | after trigger fired
-(25 rows)
+(28 rows)
 
 -- check copy out
 COPY x TO stdout;
@@ -102,6 +127,9 @@ COPY x TO stdout;
 4006	6	BackslashN	\\N	before trigger fired
 4007	7	XX	XX	before trigger fired
 4008	8	Delimiter	:	before trigger fired
+50004	25	35	45	before trigger fired
+60004	25	35	45	before trigger fired
+60005	26	36	46	before trigger fired
 1	1	stuff	test_1	after trigger fired
 2	2	stuff	test_2	after trigger fired
 3	3	stuff	test_3	after trigger fired
@@ -128,6 +156,9 @@ N	before trigger fired
 BackslashN	before trigger fired
 XX	before trigger fired
 Delimiter	before trigger fired
+35	before trigger fired
+35	before trigger fired
+36	before trigger fired
 stuff	after trigger fired
 stuff	after trigger fired
 stuff	after trigger fired
@@ -154,6 +185,9 @@ I'm null	before trigger fired
 6	before trigger fired
 7	before trigger fired
 8	before trigger fired
+25	before trigger fired
+25	before trigger fired
+26	before trigger fired
 1	after trigger fired
 2	after trigger fired
 3	after trigger fired
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index e36df8858e..1a0f0f97d0 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -95,6 +95,31 @@ COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
 4008:8:Delimiter:\::\:
 \.
 
+COPY x from stdin FILTER (a = 50004);
+50003	24	34	44	54
+50004	25	35	45	55
+50005	26	36	46	56
+\.
+
+COPY x from stdin FILTER (a > 60003);
+60001	22	32	42	52
+60002	23	33	43	53
+60003	24	34	44	54
+60004	25	35	45	55
+60005	26	36	46	56
+\.
+
+COPY x from stdin FILTER (f > 60003);
+
+COPY x from stdin FILTER (a = max(x.b));
+
+COPY x from stdin FILTER (a IN (SELECT 1 FROM x));
+
+COPY x from stdin FILTER (a IN (generate_series(1,5)));
+
+COPY x from stdin FILTER (a = row_number() over(b));
+
+
 -- check results of copy in
 SELECT * FROM x;
 

Reply via email to