On Sat, 2004-11-27 at 12:55 -0500, Tom Lane wrote:
> This seems like the most appropriate answer to me; I was thinking of
> doing that earlier when Fabien and I were fooling with plpgsql error
> reporting, but didn't get around to it.

Attached is a patch that implements a rough draft of this (it also
includes an improved version of the "unreachable code" patch that
includes your suggested fixes). Two questions about the patch:

(1) It doesn't report syntax errors in unreachable code. I suppose this
ought to be done, right?

(2) The syntax error message is wrong (we print a character offset and
query context that is relative to the CREATE FUNCTION statement, not the
individual SQL statement we're executing). I fooled around a bit with
defining a custom ereport() callback to print the right line number and
query context, but I couldn't get it right. Do you have any guidance on
the proper way to do this.

> As long as you only do this in check_function_bodies mode it seems
> safe enough.  One possible problem (if it's done towards the end of
> parsing as you've suggested for dead-code checking) is that a syntax
> error in a SQL statement might confuse the plpgsql parser into
> misparsing statement boundaries, which could lead to a plpgsql parse
> error much further down, such as a "missing END" at the end of the
> function.  The error would be more useful if reported immediately
> after the putative SQL statement is parsed.  Not sure if that's
> hard or not.  (The same remark applies to dead code checking, now
> that I think about it.)

In the case of dead code checking, I don't think it matters. Doing the
syntax check in gram.y might be a better idea, I'll take a look doing
that...

-Neil

# 
# patch "src/pl/plpgsql/src/pl_comp.c"
#  from [94dec38ffccf5fceed00c2b1007cf2b0c238af4b]
#    to [3223e07f0046277dbea155774132975d6e636fa7]
# 
# patch "src/test/regress/expected/plpgsql.out"
#  from [1b0591090144d66281066d54d81ef9a238809b0d]
#    to [320d79127d4f067eea8c09f0e17174721e96a9df]
# 
# patch "src/test/regress/sql/plpgsql.sql"
#  from [4ca1c085eda5ca4fa0397354d887a28d10236594]
#    to [261d4fa85aef9eb40c2922e433af025a5b19f38a]
# 
--- src/pl/plpgsql/src/pl_comp.c
+++ src/pl/plpgsql/src/pl_comp.c
@@ -52,6 +52,7 @@
 #include "nodes/makefuncs.h"
 #include "parser/gramparse.h"
 #include "parser/parse_type.h"
+#include "parser/parser.h"
 #include "tcop/tcopprot.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -130,6 +131,7 @@
 static void plpgsql_HashTableInsert(PLpgSQL_function *function,
 						PLpgSQL_func_hashkey *func_key);
 static void plpgsql_HashTableDelete(PLpgSQL_function *function);
+static void check_function(PLpgSQL_function *function);
 
 /*
  * This routine is a crock, and so is everyplace that calls it.  The problem
@@ -152,8 +154,10 @@
 /* ----------
  * plpgsql_compile		Make an execution tree for a PL/pgSQL function.
  *
- * If forValidator is true, we're only compiling for validation purposes,
- * and so some checks are skipped.
+ * If forValidator is true, we're only compiling for validation
+ * purposes; this means we skip some checks, as well as making some
+ * additional compile-time checks that we only want to do once (at
+ * function definition time), not very time the function is compiled.
  *
  * Note: it's important for this to fall through quickly if the function
  * has already been compiled.
@@ -293,7 +297,7 @@
 	 * Setup error traceback support for ereport()
 	 */
 	plerrcontext.callback = plpgsql_compile_error_callback;
-	plerrcontext.arg = forValidator ? proc_source : (char *) NULL;
+	plerrcontext.arg = forValidator ? proc_source : NULL;
 	plerrcontext.previous = error_context_stack;
 	error_context_stack = &plerrcontext;
 
@@ -595,17 +599,16 @@
 	plpgsql_add_initdatums(NULL);
 
 	/*
-	 * Now parse the functions text
+	 * Now parse the function's text
 	 */
 	parse_rc = plpgsql_yyparse();
 	if (parse_rc != 0)
 		elog(ERROR, "plpgsql parser returned %d", parse_rc);
 
 	plpgsql_scanner_finish();
-	pfree(proc_source);
 
 	/*
-	 * If that was successful, complete the functions info.
+	 * If that was successful, complete the function's info.
 	 */
 	function->fn_nargs = procStruct->pronargs;
 	for (i = 0; i < function->fn_nargs; i++)
@@ -616,12 +619,22 @@
 		function->datums[i] = plpgsql_Datums[i];
 	function->action = plpgsql_yylval.program;
 
+	/*
+	 * Perform whatever additional compile-time checks we can. Note
+	 * that we only do this when validating the function; this is so
+	 * that (a) we don't bother the user with warnings when the
+	 * function is invoked (b) we don't take the performance hit of
+	 * doing the analysis more than once per function definition.
+	 */
+	if (forValidator)
+		check_function(function);
+
 	/* Debug dump for completed functions */
 	if (plpgsql_DumpExecTree)
 		plpgsql_dumptree(function);
 
 	/*
-	 * add it to the hash table
+	 * Add it to the hash table
 	 */
 	plpgsql_HashTableInsert(function, hashkey);
 
@@ -631,6 +644,7 @@
 	error_context_stack = plerrcontext.previous;
 	plpgsql_error_funcname = NULL;
 	plpgsql_error_lineno = 0;
+	pfree(proc_source);
 
 	return function;
 }
@@ -664,8 +678,198 @@
 				   plpgsql_error_funcname, plpgsql_error_lineno);
 }
 
+/*
+ * The PL/PgSQL parser will often assume that an unrecognized keyword
+ * indicates the beginning of a SQL statement. That avoids the need to
+ * duplicate parts of the SQL grammar in the PL/PgSQL parser, but it
+ * also means we are very liberal in the input we accept. To try and
+ * catch some of the more obviously invalid input, we run strings that
+ * we've guessed to be SQL statements through the backend's parser.
+ *
+ * We only invoke the "raw" parser (not the analyzer); this doesn't do
+ * any database access, it just checks that the query is (mostly)
+ * syntactically correct.
+ *
+ * XXX: do we want to check that certain SQL statements cannot be
+ * used?
+ *
+ * XXX: fix line number in syntax error message
+ *
+ * XXX: fix the context that is printed on error
+ */
+static void
+check_sql_statement(char *stmt_string)
+{
+	MemoryContext tmpCxt;
+	MemoryContext oldCxt;
 
+	/* XXX: reset a passed-in temporary context, don't create/destroy */
+	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+								   "PL/PgSQL syntax check",
+								   ALLOCSET_DEFAULT_MINSIZE,
+								   ALLOCSET_DEFAULT_INITSIZE,
+								   ALLOCSET_DEFAULT_MAXSIZE);
+	oldCxt = MemoryContextSwitchTo(tmpCxt);
+
+	raw_parser(stmt_string);
+
+	MemoryContextSwitchTo(oldCxt);
+	MemoryContextDelete(tmpCxt);
+}
+
 /*
+ * Emit a warning that the statement 'unreach' is unreachable, due to
+ * the effect of the preceding statement 'cause'.
+ */
+static void
+report_unreachable_stmt(PLpgSQL_stmt *unreach, PLpgSQL_stmt *cause)
+{
+	/*
+	 * XXX: adjust the line number that is emitted along with the
+	 * warning message. This is a kludge.
+	 */
+	int old_lineno = plpgsql_error_lineno;
+	plpgsql_error_lineno = unreach->lineno;
+
+	/*
+	 * XXX: error code?
+	 */
+	ereport(WARNING,
+			(errmsg("%s is unreachable, due to %s near line %d",
+					plpgsql_stmt_typename(unreach),
+					plpgsql_stmt_typename(cause),
+					cause->lineno)));
+
+	plpgsql_error_lineno = old_lineno;
+}
+
+/*
+ * Given a list of PL/PgSQL statements, perform some compile-time
+ * checks on them.
+ *
+ * Note when checking for unreachable code, we return as soon as we
+ * have emitted "unreachable" warnings for a given sequence of
+ * statements. So that given:
+ *
+ *    EXIT; EXIT; EXIT
+ *
+ * we will see the first "EXIT", issue warnings for the second and
+ * third unreachable EXIT statements, and then return, so that we
+ * don't issue bogus "unreachable" warnings when we see the second
+ * EXIT.
+ *
+ * XXX: currently we walk the PL/PgSQL execution tree by hand. It
+ * would probably be worth refactoring this to use something akin to
+ * the tree walker infrastructure in the backend.
+ */
+static void
+check_stmts(PLpgSQL_stmts *stmts)
+{
+	int i;
+
+	for (i = 0; i < stmts->stmts_used; i++)
+	{
+		PLpgSQL_stmt *stmt = stmts->stmts[i];
+
+		switch (stmt->cmd_type)
+		{
+			case PLPGSQL_STMT_RETURN:
+				if (i + 1 < stmts->stmts_used)
+				{
+					report_unreachable_stmt(stmts->stmts[i+1], stmt);
+					return;
+				}
+			case PLPGSQL_STMT_EXIT:
+				{
+					/*
+					 * If the EXIT statement has a conditional, it is
+					 * not guaranteed to exit the loop, so don't issue
+					 * a warning.
+					 */
+					PLpgSQL_stmt_exit *exit_stmt = (PLpgSQL_stmt_exit *) stmt;
+					if (exit_stmt->cond == NULL && i + 1 < stmts->stmts_used)
+					{
+						report_unreachable_stmt(stmts->stmts[i+1], stmt);
+						return;
+					}
+				}
+				break;
+			case PLPGSQL_STMT_RAISE:
+				{
+					PLpgSQL_stmt_raise *raise_stmt = (PLpgSQL_stmt_raise *) stmt;
+					/*
+					 * Only RAISE EXCEPTION (converted to elog_level =
+					 * ERROR by the parser) will exit the current
+					 * block.
+					 */
+					if (raise_stmt->elog_level == ERROR && i + 1 < stmts->stmts_used)
+					{
+						report_unreachable_stmt(stmts->stmts[i+1], stmt);
+						return;
+					}
+				}
+				break;
+			case PLPGSQL_STMT_BLOCK:
+				{
+					PLpgSQL_stmt_block *block_stmt = (PLpgSQL_stmt_block *) stmt;
+					check_stmts(block_stmt->body);
+
+					if (block_stmt->exceptions)
+					{
+						PLpgSQL_exceptions *exceptions;
+						int j;
+
+						exceptions = block_stmt->exceptions;
+						for (j = 0; j < exceptions->exceptions_used; j++)
+							check_stmts(exceptions->exceptions[j]->action);
+					}
+				}
+				break;
+			case PLPGSQL_STMT_IF:
+				check_stmts(((PLpgSQL_stmt_if *) stmt)->true_body);
+				check_stmts(((PLpgSQL_stmt_if *) stmt)->false_body);
+				break;
+			case PLPGSQL_STMT_LOOP:
+				check_stmts(((PLpgSQL_stmt_loop *) stmt)->body);
+				break;
+			case PLPGSQL_STMT_WHILE:
+				check_stmts(((PLpgSQL_stmt_while *) stmt)->body);
+				break;
+			case PLPGSQL_STMT_FORI:
+				check_stmts(((PLpgSQL_stmt_fori *) stmt)->body);
+				break;
+			case PLPGSQL_STMT_FORS:
+				{
+					PLpgSQL_stmt_fors *fors_stmt = (PLpgSQL_stmt_fors *) stmt;
+					check_sql_statement(fors_stmt->query->query);
+					check_stmts(fors_stmt->body);
+				}
+				break;
+			case PLPGSQL_STMT_DYNFORS:
+				check_stmts(((PLpgSQL_stmt_dynfors *) stmt)->body);
+				break;
+			case PLPGSQL_STMT_EXECSQL:
+				check_sql_statement(((PLpgSQL_stmt_execsql *) stmt)->sqlstmt->query);
+				break;
+			default:
+				/* do nothing */
+				break;
+		}
+	}
+}
+
+/*
+ * Issue warnings about various ill-advised constructs in the function
+ * body. We don't do very many compile-time checks at the moment, but
+ * a few is better than none...
+ */
+static void
+check_function(PLpgSQL_function *function)
+{
+	check_stmts(function->action->body);
+}
+
+/*
  * Fetch the argument names, if any, from the proargnames field of the
  * pg_proc tuple.  Results are palloc'd.
  */
--- src/test/regress/expected/plpgsql.out
+++ src/test/regress/expected/plpgsql.out
@@ -2032,3 +2032,153 @@
 DETAIL:  Key (f1)=(2) is not present in table "master".
 drop function trap_foreign_key(int);
 drop function trap_foreign_key_2();
+--
+-- issue warnings about unreachable code
+--
+create function exit_warn() returns int as $$
+declare x int;
+begin
+    x := 5;
+    loop
+        x := x + 1;
+        exit;
+        x := x + 2;
+    end loop;
+    x := x + 3;
+    return x;
+end;$$ language 'plpgsql';
+WARNING:  assignment is unreachable, due to exit near line 6
+CONTEXT:  compile of PL/pgSQL function "exit_warn" near line 7
+create function exit_no_warn() returns int as $$
+declare x int;
+begin
+    x := 5;
+    loop
+        x := x + 5;
+        exit when x > 20;
+        x := x - 1;
+    end loop;
+    x := x + 3;
+    return x;
+end;$$ language 'plpgsql';
+create function return_warn() returns int as $$
+declare
+    a int;
+    b int;
+begin
+    a := 3;
+    b := 2;
+    if a > b then
+        return 5;
+        begin
+            return 10;
+        end;
+    end if;
+    return 15;
+end;$$ language 'plpgsql';
+WARNING:  block variables initialization is unreachable, due to return near line 8
+CONTEXT:  compile of PL/pgSQL function "return_warn" near line 9
+create function return_warn2() returns int as $$
+begin
+    if 5 > 5 then
+        return 5;
+        return 10;
+    end if;
+    return 10;
+    return 15;
+    return 20;
+end;$$ language 'plpgsql';
+WARNING:  return is unreachable, due to return near line 3
+CONTEXT:  compile of PL/pgSQL function "return_warn2" near line 4
+WARNING:  return is unreachable, due to return near line 6
+CONTEXT:  compile of PL/pgSQL function "return_warn2" near line 7
+create function raise_warn() returns int as $$
+declare x int;
+begin
+    x := 10;
+    begin
+        raise exception 'some random exception';
+        return x;
+    exception
+        when RAISE_EXCEPTION then NULL;
+    end;
+
+    begin
+        raise exception 'foo';
+    exception
+        when RAISE_EXCEPTION then
+            return x;
+            x := x + 1;
+    end;
+
+    begin
+        raise notice 'not an exception';
+        return x;
+    end;
+end;$$ language 'plpgsql';
+WARNING:  return is unreachable, due to raise near line 5
+CONTEXT:  compile of PL/pgSQL function "raise_warn" near line 6
+WARNING:  assignment is unreachable, due to return near line 15
+CONTEXT:  compile of PL/pgSQL function "raise_warn" near line 16
+-- note that we only want to emit warnings at CREATE FUNCTION time, not
+-- when the function is invoked.
+SELECT exit_warn();
+ exit_warn 
+-----------
+         9
+(1 row)
+
+SELECT return_warn();
+ return_warn 
+-------------
+           5
+(1 row)
+
+SELECT return_warn2();
+ return_warn2 
+--------------
+           10
+(1 row)
+
+SELECT raise_warn();
+ raise_warn 
+------------
+         10
+(1 row)
+
+--
+-- reject function definitions that contain malformed SQL queries at
+-- compile-time, where possible
+--
+create function bad_sql1() returns int as $$
+declare a int;
+begin
+    a := 5;
+    Johnny Yuma;
+    a := 10;
+    return a;
+end$$ language 'plpgsql';
+ERROR:  syntax error at or near "Johnny" at character 45
+LINE 1: create function bad_sql1() returns int as $$
+                                                    ^
+create function bad_sql2() returns int as $$
+declare r record;
+begin
+    for r in I fought the law, the law won LOOP
+        raise notice 'in loop';
+    end loop;
+    return 5;
+end;$$ language 'plpgsql';
+ERROR:  syntax error at or near "I" at character 46
+LINE 2: declare r record;
+        ^
+create function bad_sql3() returns int as $$
+declare x int;
+begin
+    return 10;
+    -- report syntax errors even in unreachable code
+    ekgekrgkjge djngrg;
+    return 15;
+end;$$ language 'plpgsql';
+WARNING:  SQL statement is unreachable, due to return near line 3
+CONTEXT:  compile of PL/pgSQL function "bad_sql3" near line 5
--- src/test/regress/sql/plpgsql.sql
+++ src/test/regress/sql/plpgsql.sql
@@ -1764,3 +1764,123 @@
 
 drop function trap_foreign_key(int);
 drop function trap_foreign_key_2();
+
+--
+-- issue warnings about unreachable code
+--
+create function exit_warn() returns int as $$
+declare x int;
+begin
+    x := 5;
+    loop
+        x := x + 1;
+        exit;
+        x := x + 2;
+    end loop;
+    x := x + 3;
+    return x;
+end;$$ language 'plpgsql';
+
+create function exit_no_warn() returns int as $$
+declare x int;
+begin
+    x := 5;
+    loop
+        x := x + 5;
+        exit when x > 20;
+        x := x - 1;
+    end loop;
+    x := x + 3;
+    return x;
+end;$$ language 'plpgsql';
+
+create function return_warn() returns int as $$
+declare
+    a int;
+    b int;
+begin
+    a := 3;
+    b := 2;
+    if a > b then
+        return 5;
+        begin
+            return 10;
+        end;
+    end if;
+    return 15;
+end;$$ language 'plpgsql';
+
+create function return_warn2() returns int as $$
+begin
+    if 5 > 5 then
+        return 5;
+        return 10;
+    end if;
+    return 10;
+    return 15;
+    return 20;
+end;$$ language 'plpgsql';
+
+create function raise_warn() returns int as $$
+declare x int;
+begin
+    x := 10;
+    begin
+        raise exception 'some random exception';
+        return x;
+    exception
+        when RAISE_EXCEPTION then NULL;
+    end;
+
+    begin
+        raise exception 'foo';
+    exception
+        when RAISE_EXCEPTION then
+            return x;
+            x := x + 1;
+    end;
+
+    begin
+        raise notice 'not an exception';
+        return x;
+    end;
+end;$$ language 'plpgsql';
+
+-- note that we only want to emit warnings at CREATE FUNCTION time, not
+-- when the function is invoked.
+SELECT exit_warn();
+SELECT return_warn();
+SELECT return_warn2();
+SELECT raise_warn();
+
+--
+-- reject function definitions that contain malformed SQL queries at
+-- compile-time, where possible
+--
+
+create function bad_sql1() returns int as $$
+declare a int;
+begin
+    a := 5;
+    Johnny Yuma;
+    a := 10;
+    return a;
+end$$ language 'plpgsql';
+
+create function bad_sql2() returns int as $$
+declare r record;
+begin
+    for r in I fought the law, the law won LOOP
+        raise notice 'in loop';
+    end loop;
+    return 5;
+end;$$ language 'plpgsql';
+
+create function bad_sql3() returns int as $$
+declare x int;
+begin
+    return 10;
+    -- report syntax errors even in unreachable code
+    ekgekrgkjge djngrg;
+    return 15;
+end;$$ language 'plpgsql';
---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faqs/FAQ.html

Reply via email to