On Sat, 2004-11-27 at 12:55 -0500, Tom Lane wrote: > This seems like the most appropriate answer to me; I was thinking of > doing that earlier when Fabien and I were fooling with plpgsql error > reporting, but didn't get around to it.
Attached is a patch that implements a rough draft of this (it also includes an improved version of the "unreachable code" patch that includes your suggested fixes). Two questions about the patch: (1) It doesn't report syntax errors in unreachable code. I suppose this ought to be done, right? (2) The syntax error message is wrong (we print a character offset and query context that is relative to the CREATE FUNCTION statement, not the individual SQL statement we're executing). I fooled around a bit with defining a custom ereport() callback to print the right line number and query context, but I couldn't get it right. Do you have any guidance on the proper way to do this. > As long as you only do this in check_function_bodies mode it seems > safe enough. One possible problem (if it's done towards the end of > parsing as you've suggested for dead-code checking) is that a syntax > error in a SQL statement might confuse the plpgsql parser into > misparsing statement boundaries, which could lead to a plpgsql parse > error much further down, such as a "missing END" at the end of the > function. The error would be more useful if reported immediately > after the putative SQL statement is parsed. Not sure if that's > hard or not. (The same remark applies to dead code checking, now > that I think about it.) In the case of dead code checking, I don't think it matters. Doing the syntax check in gram.y might be a better idea, I'll take a look doing that... -Neil
# # patch "src/pl/plpgsql/src/pl_comp.c" # from [94dec38ffccf5fceed00c2b1007cf2b0c238af4b] # to [3223e07f0046277dbea155774132975d6e636fa7] # # patch "src/test/regress/expected/plpgsql.out" # from [1b0591090144d66281066d54d81ef9a238809b0d] # to [320d79127d4f067eea8c09f0e17174721e96a9df] # # patch "src/test/regress/sql/plpgsql.sql" # from [4ca1c085eda5ca4fa0397354d887a28d10236594] # to [261d4fa85aef9eb40c2922e433af025a5b19f38a] # --- src/pl/plpgsql/src/pl_comp.c +++ src/pl/plpgsql/src/pl_comp.c @@ -52,6 +52,7 @@ #include "nodes/makefuncs.h" #include "parser/gramparse.h" #include "parser/parse_type.h" +#include "parser/parser.h" #include "tcop/tcopprot.h" #include "utils/array.h" #include "utils/builtins.h" @@ -130,6 +131,7 @@ static void plpgsql_HashTableInsert(PLpgSQL_function *function, PLpgSQL_func_hashkey *func_key); static void plpgsql_HashTableDelete(PLpgSQL_function *function); +static void check_function(PLpgSQL_function *function); /* * This routine is a crock, and so is everyplace that calls it. The problem @@ -152,8 +154,10 @@ /* ---------- * plpgsql_compile Make an execution tree for a PL/pgSQL function. * - * If forValidator is true, we're only compiling for validation purposes, - * and so some checks are skipped. + * If forValidator is true, we're only compiling for validation + * purposes; this means we skip some checks, as well as making some + * additional compile-time checks that we only want to do once (at + * function definition time), not very time the function is compiled. * * Note: it's important for this to fall through quickly if the function * has already been compiled. @@ -293,7 +297,7 @@ * Setup error traceback support for ereport() */ plerrcontext.callback = plpgsql_compile_error_callback; - plerrcontext.arg = forValidator ? proc_source : (char *) NULL; + plerrcontext.arg = forValidator ? proc_source : NULL; plerrcontext.previous = error_context_stack; error_context_stack = &plerrcontext; @@ -595,17 +599,16 @@ plpgsql_add_initdatums(NULL); /* - * Now parse the functions text + * Now parse the function's text */ parse_rc = plpgsql_yyparse(); if (parse_rc != 0) elog(ERROR, "plpgsql parser returned %d", parse_rc); plpgsql_scanner_finish(); - pfree(proc_source); /* - * If that was successful, complete the functions info. + * If that was successful, complete the function's info. */ function->fn_nargs = procStruct->pronargs; for (i = 0; i < function->fn_nargs; i++) @@ -616,12 +619,22 @@ function->datums[i] = plpgsql_Datums[i]; function->action = plpgsql_yylval.program; + /* + * Perform whatever additional compile-time checks we can. Note + * that we only do this when validating the function; this is so + * that (a) we don't bother the user with warnings when the + * function is invoked (b) we don't take the performance hit of + * doing the analysis more than once per function definition. + */ + if (forValidator) + check_function(function); + /* Debug dump for completed functions */ if (plpgsql_DumpExecTree) plpgsql_dumptree(function); /* - * add it to the hash table + * Add it to the hash table */ plpgsql_HashTableInsert(function, hashkey); @@ -631,6 +644,7 @@ error_context_stack = plerrcontext.previous; plpgsql_error_funcname = NULL; plpgsql_error_lineno = 0; + pfree(proc_source); return function; } @@ -664,8 +678,198 @@ plpgsql_error_funcname, plpgsql_error_lineno); } +/* + * The PL/PgSQL parser will often assume that an unrecognized keyword + * indicates the beginning of a SQL statement. That avoids the need to + * duplicate parts of the SQL grammar in the PL/PgSQL parser, but it + * also means we are very liberal in the input we accept. To try and + * catch some of the more obviously invalid input, we run strings that + * we've guessed to be SQL statements through the backend's parser. + * + * We only invoke the "raw" parser (not the analyzer); this doesn't do + * any database access, it just checks that the query is (mostly) + * syntactically correct. + * + * XXX: do we want to check that certain SQL statements cannot be + * used? + * + * XXX: fix line number in syntax error message + * + * XXX: fix the context that is printed on error + */ +static void +check_sql_statement(char *stmt_string) +{ + MemoryContext tmpCxt; + MemoryContext oldCxt; + /* XXX: reset a passed-in temporary context, don't create/destroy */ + tmpCxt = AllocSetContextCreate(CurrentMemoryContext, + "PL/PgSQL syntax check", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldCxt = MemoryContextSwitchTo(tmpCxt); + + raw_parser(stmt_string); + + MemoryContextSwitchTo(oldCxt); + MemoryContextDelete(tmpCxt); +} + /* + * Emit a warning that the statement 'unreach' is unreachable, due to + * the effect of the preceding statement 'cause'. + */ +static void +report_unreachable_stmt(PLpgSQL_stmt *unreach, PLpgSQL_stmt *cause) +{ + /* + * XXX: adjust the line number that is emitted along with the + * warning message. This is a kludge. + */ + int old_lineno = plpgsql_error_lineno; + plpgsql_error_lineno = unreach->lineno; + + /* + * XXX: error code? + */ + ereport(WARNING, + (errmsg("%s is unreachable, due to %s near line %d", + plpgsql_stmt_typename(unreach), + plpgsql_stmt_typename(cause), + cause->lineno))); + + plpgsql_error_lineno = old_lineno; +} + +/* + * Given a list of PL/PgSQL statements, perform some compile-time + * checks on them. + * + * Note when checking for unreachable code, we return as soon as we + * have emitted "unreachable" warnings for a given sequence of + * statements. So that given: + * + * EXIT; EXIT; EXIT + * + * we will see the first "EXIT", issue warnings for the second and + * third unreachable EXIT statements, and then return, so that we + * don't issue bogus "unreachable" warnings when we see the second + * EXIT. + * + * XXX: currently we walk the PL/PgSQL execution tree by hand. It + * would probably be worth refactoring this to use something akin to + * the tree walker infrastructure in the backend. + */ +static void +check_stmts(PLpgSQL_stmts *stmts) +{ + int i; + + for (i = 0; i < stmts->stmts_used; i++) + { + PLpgSQL_stmt *stmt = stmts->stmts[i]; + + switch (stmt->cmd_type) + { + case PLPGSQL_STMT_RETURN: + if (i + 1 < stmts->stmts_used) + { + report_unreachable_stmt(stmts->stmts[i+1], stmt); + return; + } + case PLPGSQL_STMT_EXIT: + { + /* + * If the EXIT statement has a conditional, it is + * not guaranteed to exit the loop, so don't issue + * a warning. + */ + PLpgSQL_stmt_exit *exit_stmt = (PLpgSQL_stmt_exit *) stmt; + if (exit_stmt->cond == NULL && i + 1 < stmts->stmts_used) + { + report_unreachable_stmt(stmts->stmts[i+1], stmt); + return; + } + } + break; + case PLPGSQL_STMT_RAISE: + { + PLpgSQL_stmt_raise *raise_stmt = (PLpgSQL_stmt_raise *) stmt; + /* + * Only RAISE EXCEPTION (converted to elog_level = + * ERROR by the parser) will exit the current + * block. + */ + if (raise_stmt->elog_level == ERROR && i + 1 < stmts->stmts_used) + { + report_unreachable_stmt(stmts->stmts[i+1], stmt); + return; + } + } + break; + case PLPGSQL_STMT_BLOCK: + { + PLpgSQL_stmt_block *block_stmt = (PLpgSQL_stmt_block *) stmt; + check_stmts(block_stmt->body); + + if (block_stmt->exceptions) + { + PLpgSQL_exceptions *exceptions; + int j; + + exceptions = block_stmt->exceptions; + for (j = 0; j < exceptions->exceptions_used; j++) + check_stmts(exceptions->exceptions[j]->action); + } + } + break; + case PLPGSQL_STMT_IF: + check_stmts(((PLpgSQL_stmt_if *) stmt)->true_body); + check_stmts(((PLpgSQL_stmt_if *) stmt)->false_body); + break; + case PLPGSQL_STMT_LOOP: + check_stmts(((PLpgSQL_stmt_loop *) stmt)->body); + break; + case PLPGSQL_STMT_WHILE: + check_stmts(((PLpgSQL_stmt_while *) stmt)->body); + break; + case PLPGSQL_STMT_FORI: + check_stmts(((PLpgSQL_stmt_fori *) stmt)->body); + break; + case PLPGSQL_STMT_FORS: + { + PLpgSQL_stmt_fors *fors_stmt = (PLpgSQL_stmt_fors *) stmt; + check_sql_statement(fors_stmt->query->query); + check_stmts(fors_stmt->body); + } + break; + case PLPGSQL_STMT_DYNFORS: + check_stmts(((PLpgSQL_stmt_dynfors *) stmt)->body); + break; + case PLPGSQL_STMT_EXECSQL: + check_sql_statement(((PLpgSQL_stmt_execsql *) stmt)->sqlstmt->query); + break; + default: + /* do nothing */ + break; + } + } +} + +/* + * Issue warnings about various ill-advised constructs in the function + * body. We don't do very many compile-time checks at the moment, but + * a few is better than none... + */ +static void +check_function(PLpgSQL_function *function) +{ + check_stmts(function->action->body); +} + +/* * Fetch the argument names, if any, from the proargnames field of the * pg_proc tuple. Results are palloc'd. */ --- src/test/regress/expected/plpgsql.out +++ src/test/regress/expected/plpgsql.out @@ -2032,3 +2032,153 @@ DETAIL: Key (f1)=(2) is not present in table "master". drop function trap_foreign_key(int); drop function trap_foreign_key_2(); +-- +-- issue warnings about unreachable code +-- +create function exit_warn() returns int as $$ +declare x int; +begin + x := 5; + loop + x := x + 1; + exit; + x := x + 2; + end loop; + x := x + 3; + return x; +end;$$ language 'plpgsql'; +WARNING: assignment is unreachable, due to exit near line 6 +CONTEXT: compile of PL/pgSQL function "exit_warn" near line 7 +create function exit_no_warn() returns int as $$ +declare x int; +begin + x := 5; + loop + x := x + 5; + exit when x > 20; + x := x - 1; + end loop; + x := x + 3; + return x; +end;$$ language 'plpgsql'; +create function return_warn() returns int as $$ +declare + a int; + b int; +begin + a := 3; + b := 2; + if a > b then + return 5; + begin + return 10; + end; + end if; + return 15; +end;$$ language 'plpgsql'; +WARNING: block variables initialization is unreachable, due to return near line 8 +CONTEXT: compile of PL/pgSQL function "return_warn" near line 9 +create function return_warn2() returns int as $$ +begin + if 5 > 5 then + return 5; + return 10; + end if; + return 10; + return 15; + return 20; +end;$$ language 'plpgsql'; +WARNING: return is unreachable, due to return near line 3 +CONTEXT: compile of PL/pgSQL function "return_warn2" near line 4 +WARNING: return is unreachable, due to return near line 6 +CONTEXT: compile of PL/pgSQL function "return_warn2" near line 7 +create function raise_warn() returns int as $$ +declare x int; +begin + x := 10; + begin + raise exception 'some random exception'; + return x; + exception + when RAISE_EXCEPTION then NULL; + end; + + begin + raise exception 'foo'; + exception + when RAISE_EXCEPTION then + return x; + x := x + 1; + end; + + begin + raise notice 'not an exception'; + return x; + end; +end;$$ language 'plpgsql'; +WARNING: return is unreachable, due to raise near line 5 +CONTEXT: compile of PL/pgSQL function "raise_warn" near line 6 +WARNING: assignment is unreachable, due to return near line 15 +CONTEXT: compile of PL/pgSQL function "raise_warn" near line 16 +-- note that we only want to emit warnings at CREATE FUNCTION time, not +-- when the function is invoked. +SELECT exit_warn(); + exit_warn +----------- + 9 +(1 row) + +SELECT return_warn(); + return_warn +------------- + 5 +(1 row) + +SELECT return_warn2(); + return_warn2 +-------------- + 10 +(1 row) + +SELECT raise_warn(); + raise_warn +------------ + 10 +(1 row) + +-- +-- reject function definitions that contain malformed SQL queries at +-- compile-time, where possible +-- +create function bad_sql1() returns int as $$ +declare a int; +begin + a := 5; + Johnny Yuma; + a := 10; + return a; +end$$ language 'plpgsql'; +ERROR: syntax error at or near "Johnny" at character 45 +LINE 1: create function bad_sql1() returns int as $$ + ^ +create function bad_sql2() returns int as $$ +declare r record; +begin + for r in I fought the law, the law won LOOP + raise notice 'in loop'; + end loop; + return 5; +end;$$ language 'plpgsql'; +ERROR: syntax error at or near "I" at character 46 +LINE 2: declare r record; + ^ +create function bad_sql3() returns int as $$ +declare x int; +begin + return 10; + -- report syntax errors even in unreachable code + ekgekrgkjge djngrg; + return 15; +end;$$ language 'plpgsql'; +WARNING: SQL statement is unreachable, due to return near line 3 +CONTEXT: compile of PL/pgSQL function "bad_sql3" near line 5 --- src/test/regress/sql/plpgsql.sql +++ src/test/regress/sql/plpgsql.sql @@ -1764,3 +1764,123 @@ drop function trap_foreign_key(int); drop function trap_foreign_key_2(); + +-- +-- issue warnings about unreachable code +-- +create function exit_warn() returns int as $$ +declare x int; +begin + x := 5; + loop + x := x + 1; + exit; + x := x + 2; + end loop; + x := x + 3; + return x; +end;$$ language 'plpgsql'; + +create function exit_no_warn() returns int as $$ +declare x int; +begin + x := 5; + loop + x := x + 5; + exit when x > 20; + x := x - 1; + end loop; + x := x + 3; + return x; +end;$$ language 'plpgsql'; + +create function return_warn() returns int as $$ +declare + a int; + b int; +begin + a := 3; + b := 2; + if a > b then + return 5; + begin + return 10; + end; + end if; + return 15; +end;$$ language 'plpgsql'; + +create function return_warn2() returns int as $$ +begin + if 5 > 5 then + return 5; + return 10; + end if; + return 10; + return 15; + return 20; +end;$$ language 'plpgsql'; + +create function raise_warn() returns int as $$ +declare x int; +begin + x := 10; + begin + raise exception 'some random exception'; + return x; + exception + when RAISE_EXCEPTION then NULL; + end; + + begin + raise exception 'foo'; + exception + when RAISE_EXCEPTION then + return x; + x := x + 1; + end; + + begin + raise notice 'not an exception'; + return x; + end; +end;$$ language 'plpgsql'; + +-- note that we only want to emit warnings at CREATE FUNCTION time, not +-- when the function is invoked. +SELECT exit_warn(); +SELECT return_warn(); +SELECT return_warn2(); +SELECT raise_warn(); + +-- +-- reject function definitions that contain malformed SQL queries at +-- compile-time, where possible +-- + +create function bad_sql1() returns int as $$ +declare a int; +begin + a := 5; + Johnny Yuma; + a := 10; + return a; +end$$ language 'plpgsql'; + +create function bad_sql2() returns int as $$ +declare r record; +begin + for r in I fought the law, the law won LOOP + raise notice 'in loop'; + end loop; + return 5; +end;$$ language 'plpgsql'; + +create function bad_sql3() returns int as $$ +declare x int; +begin + return 10; + -- report syntax errors even in unreachable code + ekgekrgkjge djngrg; + return 15; +end;$$ language 'plpgsql';
---------------------------(end of broadcast)--------------------------- TIP 5: Have you checked our extensive FAQ? http://www.postgresql.org/docs/faqs/FAQ.html