Re: [HACKERS] Doing better at HINTing an appropriate column within errorMissingColumn()

Peter Geoghegan Sat, 15 Nov 2014 16:36:51 -0800

On Wed, Nov 12, 2014 at 12:59 PM, Robert Haas <robertmh...@gmail.com> wrote:
> On that topic, I think there's unanimous consensus against the design
> where equally-distant matches are treated differently based on whether
> they are in the same RTE or different RTEs.  I think you need to
> change that if you want to get anywhere with this.  On a related note,
> the use of the additional parameter AttrNumber closest[2] to
> searchRangeTableForCol() and of the additional parameters AttrNumber
> *matchedatt and int *distance to scanRTEForColumn() is less than
> self-documenting.  I suggest creating a structure called something
> like FuzzyAttrMatchState and passing a pointer to it down to both
> functions.


Attached patch incorporates this feedback.

The only user-visible difference between this revision and the
previous revision is that it's quite possible for two suggestion to
originate from the same RTE (there is exactly one change in the
regression test's expected output as compared to the last revision for
this reason. The regression tests are otherwise unchanged). It's still
not possible to see more than 2 suggestions under any circumstances,
no matter where they might have originated from, which I think is
appropriate -- we continue to not present any HINT in the event of 3
or more equidistant matches.

I think that the restructuring required to pass around a state
variable has resulted in somewhat clearer code.

-- 
Peter Geoghegan

From 0aef5253f10ebb1ee5bbcc73782eff1352c7ab84 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <p...@heroku.com>
Date: Wed, 12 Nov 2014 15:31:37 -0800
Subject: [PATCH] Levenshtein distance column HINT

Add a new HINT -- a guess as to what column the user might have intended
to reference, to be shown in various contexts where an
ERRCODE_UNDEFINED_COLUMN error is raised.  The user will see this HINT
when he or she fat-fingers a column reference in an ad-hoc SQL query, or
incorrectly pluralizes or fails to pluralize a column reference, or
incorrectly omits or includes an underscore or other punctuation
character.

The HINT suggests a column in the range table with the lowest
Levenshtein distance, or the tied-for-best pair of matching columns in
the event of there being exactly two equally likely candidates (these
may come from multiple RTEs, or the same RTE).  Limiting to two the
number of cases where multiple equally likely suggestions are all
offered at once (i.e.  giving no hint when the number of equally likely
candidates exceeds two) is a measure against suggestions that are of low
quality in an absolute sense.

A further, final measure is taken against suggestions that are of low
absolute quality:  If the distance exceeds a normalized distance
threshold, no suggestion is given.
---
 src/backend/parser/parse_expr.c           |   9 +-
 src/backend/parser/parse_func.c           |   2 +-
 src/backend/parser/parse_relation.c       | 345 +++++++++++++++++++++++++++---
 src/backend/utils/adt/levenshtein.c       |   9 +
 src/include/parser/parse_relation.h       |  20 +-
 src/test/regress/expected/alter_table.out |   8 +
 src/test/regress/expected/join.out        |  39 ++++
 src/test/regress/expected/plpgsql.out     |   1 +
 src/test/regress/expected/rowtypes.out    |   1 +
 src/test/regress/expected/rules.out       |   1 +
 src/test/regress/expected/without_oid.out |   2 +
 src/test/regress/sql/join.sql             |  24 +++
 12 files changed, 421 insertions(+), 40 deletions(-)

diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 4a8aaf6..a77a3a0 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -621,7 +621,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field2);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -666,7 +667,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field3);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -724,7 +726,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field4);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 9ebd3fd..472e15e 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -1779,7 +1779,7 @@ ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg,
 									 ((Var *) first_arg)->varno,
 									 ((Var *) first_arg)->varlevelsup);
 		/* Return a Var if funcname matches a column, else NULL */
-		return scanRTEForColumn(pstate, rte, funcname, location);
+		return scanRTEForColumn(pstate, rte, funcname, location, NULL);
 	}
 
 	/*
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 478584d..40c69d7 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
 
 #include "access/htup_details.h"
 #include "access/sysattr.h"
@@ -520,6 +521,22 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
 }
 
 /*
+ * distanceName
+ *	  Return Levenshtein distance between an actual column name and possible
+ *	  partial match.
+ */
+static int
+distanceName(const char *actual, const char *match, int max)
+{
+	int len = strlen(actual),
+		match_len = strlen(match);
+
+	/* Charge half as much per deletion as per insertion or per substitution */
+	return varstr_levenshtein_less_equal(actual, len, match, match_len,
+										 2, 1, 2, max);
+}
+
+/*
  * scanRTEForColumn
  *	  Search the column names of a single RTE for the given name.
  *	  If found, return an appropriate Var node, else return NULL.
@@ -527,10 +544,22 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
  *
  * Side effect: if we find a match, mark the RTE as requiring read access
  * for the column.
+ *
+ * For those callers that will settle for a fuzzy match (for the purposes of
+ * building diagnostic messages), we match the column attribute whose name has
+ * the lowest Levenshtein distance from colname.  Such callers should not rely
+ * on the return value (even when there is an exact match), nor should they
+ * expect the usual side effect (unless there is an exact match).  This hardly
+ * matters in practice, since an error is imminent.
+ *
+ * If there are two or more attributes in the range table entry tied for
+ * closest, or if there are no matches, accurately report the shortest distance
+ * found overall while not setting a closest attribute.  Note that we never
+ * consider system column names when performing fuzzy matching.
  */
 Node *
 scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
-				 int location)
+				 int location, FuzzyAttrMatchState *rtestate)
 {
 	Node	   *result = NULL;
 	int			attnum = 0;
@@ -548,12 +577,16 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 	 * Should this somehow go wrong and we try to access a dropped column,
 	 * we'll still catch it by virtue of the checks in
 	 * get_rte_attribute_type(), which is called by make_var().  That routine
-	 * has to do a cache lookup anyway, so the check there is cheap.
+	 * has to do a cache lookup anyway, so the check there is cheap.  Callers
+	 * interested in finding match with shortest distance need to defend
+	 * against this directly, though.
 	 */
 	foreach(c, rte->eref->colnames)
 	{
+		const char *attcolname = strVal(lfirst(c));
+
 		attnum++;
-		if (strcmp(strVal(lfirst(c)), colname) == 0)
+		if (strcmp(attcolname, colname) == 0)
 		{
 			if (result)
 				ereport(ERROR,
@@ -566,6 +599,49 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 			markVarForSelectPriv(pstate, var, rte);
 			result = (Node *) var;
 		}
+
+		if (rtestate)
+		{
+			int				columndistance;
+
+			if (!result)
+				columndistance = distanceName(attcolname, colname,
+											  rtestate->distance);
+			else
+				columndistance = 0;
+
+			if (columndistance < rtestate->distance)
+			{
+				/* Store new lowest observed distance for RTE */
+				rtestate->distance = columndistance;
+				rtestate->first = attnum;
+				rtestate->second = InvalidAttrNumber;
+			}
+			else if (columndistance == rtestate->distance)
+			{
+				/*
+				 * This match distance may equal a prior match within this same
+				 * range table.  When that happens, the prior match may also be
+				 * given, but only if there is no more than two equally distant
+				 * matches from the RTE (in turn, our caller will only accept
+				 * two equally distant matches overall).
+				 */
+				Assert(AttributeNumberIsValid(rtestate->first));
+
+				if (AttributeNumberIsValid(rtestate->second))
+				{
+					/* Too many RTE-level matches */
+					rtestate->first = rtestate->second = InvalidAttrNumber;
+					/* Clearly, distance is too low a bar (for *any* RTE) */
+					rtestate->distance = columndistance - 1;
+				}
+				else
+				{
+					/* Record as provisional second match for RTE */
+					rtestate->second = attnum;
+				}
+			}
+		}
 	}
 
 	/*
@@ -642,7 +718,8 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 				continue;
 
 			/* use orig_pstate here to get the right sublevels_up */
-			newresult = scanRTEForColumn(orig_pstate, rte, colname, location);
+			newresult = scanRTEForColumn(orig_pstate, rte, colname, location,
+										 NULL);
 
 			if (newresult)
 			{
@@ -668,8 +745,15 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 
 /*
  * searchRangeTableForCol
- *	  See if any RangeTblEntry could possibly provide the given column name.
- *	  If so, return a pointer to the RangeTblEntry; else return NULL.
+ *	  See if any RangeTblEntry could possibly provide the given column name (or
+ *	  find the best match available).  Returns state with relevant details.
+ *
+ * Column name may be matched fuzzily;  we provide the closet column(s) if
+ * there was not an exact match.  Caller can depend on returned state to find
+ * right attribute.  If first attribute is InvalidAttrNumber, but corresponding
+ * RTE is set, that indicates an exact match (i.e. column name is present, but
+ * presumably not visible).  However, if the wrong alias was specified by user,
+ * the first match attribute *is* set.
  *
  * This is different from colNameToVar in that it considers every entry in
  * the ParseState's rangetable(s), not only those that are currently visible
@@ -678,26 +762,180 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
  * matches, but only one will be returned).  This must be used ONLY as a
  * heuristic in giving suitable error messages.  See errorMissingColumn.
  */
-static RangeTblEntry *
-searchRangeTableForCol(ParseState *pstate, char *colname, int location)
+static FuzzyAttrMatchState *
+searchRangeTableForCol(ParseState *pstate, const char *alias, char *colname,
+					   int location)
 {
 	ParseState *orig_pstate = pstate;
+	FuzzyAttrMatchState *state = palloc(sizeof(FuzzyAttrMatchState));
+	ListCell   *l;
+	int			i;
+
+	state->distance = INT_MAX;
+	state->rsecond = state->rfirst = NULL;
+	state->second = state->first = InvalidAttrNumber;
 
 	while (pstate != NULL)
 	{
-		ListCell   *l;
-
 		foreach(l, pstate->p_rtable)
 		{
-			RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+			RangeTblEntry  *rte = (RangeTblEntry *) lfirst(l);
+			FuzzyAttrMatchState	rtestate;
+			bool			wrongalias;
 
-			if (scanRTEForColumn(orig_pstate, rte, colname, location))
-				return rte;
+			/*
+			 * Typically, it is not useful to look for matches within join
+			 * RTEs;  they effectively duplicate other RTEs for our purposes,
+			 * and if a match is chosen from a join RTE, an unhelpful alias is
+			 * displayed in the final diagnostic message.
+			 */
+			if (rte->rtekind == RTE_JOIN)
+				continue;
+
+			/*
+			 * Get single best match (or pair of joint best matches, or no
+			 * match) from each RTE -- the best two columns ultimately
+			 * suggested may or may not both be from the same RTE.
+			 *
+			 * Initialize RTE's distance to INT_MAX (and not RT state's current
+			 * lowest distance) to ensure that per-RTE penalties do not distort
+			 * per-RT costing.
+			 */
+			rtestate.distance = INT_MAX;
+			rtestate.rsecond = rtestate.rfirst = NULL;
+			rtestate.second = rtestate.first = InvalidAttrNumber;
+			scanRTEForColumn(orig_pstate, rte, colname, location, &rtestate);
+
+			/* Avoid totally non-matching RTEs (e.g. no RTE attributes) */
+			if (!AttributeNumberIsValid(rtestate.first))
+				continue;
+
+			/* Was alias provided by user that does not match entry's alias? */
+			wrongalias = (alias && strcmp(alias, rte->eref->aliasname) != 0);
+
+			if (rtestate.distance == 0)
+			{
+				/*
+				 * Exact match (for "wrong alias" or "wrong level" cases).
+				 *
+				 * Only consider first element for RTE, because there can only
+				 * be one exact match -- it doesn't seem worth considering the
+				 * case where there are multiple exact matches, so we're done.
+				 */
+				state->rfirst = rte;
+				state->first = wrongalias? rtestate.first : InvalidAttrNumber;
+				state->rsecond = NULL;
+				state->second = InvalidAttrNumber;
+
+				return state;
+			}
+
+			/*
+			 * Charge extra (for inexact matches only) when an alias was
+			 * specified that differs from what might have been used to
+			 * correctly qualify this RTE's closest column
+			 */
+			if (wrongalias)
+				rtestate.distance += 3;
+
+			if (rtestate.distance < state->distance)
+			{
+				/*
+				 * New, uncontested best match RTE, with 1 or 2 best match
+				 * columns
+				 */
+				state->distance = rtestate.distance;
+
+				state->rfirst = rte;
+				state->first = rtestate.first;
+				state->rsecond =
+					AttributeNumberIsValid(rtestate.second)? rte: NULL;
+				state->second = rtestate.second;
+			}
+			else if (rtestate.distance == state->distance)
+			{
+				/*
+				 * Can't have 3 or more matches at same distance.
+				 *
+				 * It's useful to provide two matches for the common case where
+				 * two range tables have single equidistant candidates, as when
+				 * an unqualified (and therefore would-be ambiguous) column
+				 * name is specified which is also misspelled by the user --
+				 * there is probably a foreign key relationship between
+				 * tables/RTEs.  It's also possible to usefully give two column
+				 * suggestions originating from the same RTE, which may be
+				 * useful when an alias strongly suggests that RTE, while there
+				 * are 2 somewhat close matches.
+				 *
+				 * However, when there are more than 2 equally distant matches,
+				 * that's probably because the matches are not useful at all,
+				 * so don't suggest anything.
+				 */
+				if (AttributeNumberIsValid(state->second) ||
+					AttributeNumberIsValid(rtestate.second))
+				{
+					/* 3 or more equidistant matches -- RTE is uninteresting */
+					state->rsecond = state->rfirst = NULL;
+					state->second = state->first = InvalidAttrNumber;
+					/* Clearly this distance is too low a bar generally */
+					state->distance--;
+				}
+				else
+				{
+					/* Record as provisional second match for RT */
+					Assert(state->rfirst != NULL &&
+						   AttributeNumberIsValid(state->first));
+					Assert(state->rsecond == NULL &&
+						   !AttributeNumberIsValid(state->second) );
+					state->rsecond = rte;
+					state->second = rtestate.first;
+				}
+			}
 		}
 
 		pstate = pstate->parentParseState;
 	}
-	return NULL;
+
+	/*
+	 * Handle dropped columns, which can appear here as empty colnames per
+	 * remarks within scanRTEForColumn().  If either the first or second
+	 * suggested attributes are dropped, do not provide any suggestion.
+	 */
+	for (i = 0; i < 2; i++)
+	{
+		AttrNumber		closest;
+		RangeTblEntry  *rte;
+		char		   *closestcol;
+
+		rte = (i == 0 ? state->rfirst: state->rsecond);
+		closest = (i == 0 ? state->first: state->second);
+
+		if (!AttributeNumberIsValid(closest))
+			break;
+
+		closestcol = strVal(list_nth(rte->eref->colnames, closest - 1));
+
+		if (strcmp(closestcol, "") == 0)
+		{
+			state->rsecond = state->rfirst = NULL;
+			state->second = state->first = InvalidAttrNumber;
+			break;
+		}
+	}
+
+	/*
+	 * Distance must be less than a normalized threshold in order to avoid
+	 * completely ludicrous suggestions.  Note that a distance of 6 will be
+	 * seen when 6 deletions are required against actual attribute name, or 3
+	 * insertions/substitutions.
+	 */
+	if (state->distance > 6 && state->distance > strlen(colname) / 2)
+	{
+		state->rsecond = state->rfirst = NULL;
+		state->second = state->first = InvalidAttrNumber;
+	}
+
+	return state;
 }
 
 /*
@@ -2862,34 +3100,71 @@ void
 errorMissingColumn(ParseState *pstate,
 				   char *relname, char *colname, int location)
 {
-	RangeTblEntry *rte;
+	FuzzyAttrMatchState	   *state;
+	char				   *closestfirst = NULL;
 
 	/*
-	 * If relname was given, just play dumb and report it.  (In practice, a
-	 * bad qualification name should end up at errorMissingRTE, not here, so
-	 * no need to work hard on this case.)
+	 * Search the entire rtable looking for possible matches.  If we find one,
+	 * emit a hint about it.
+	 *
+	 * TODO: improve this code (and also errorMissingRTE) to mention using
+	 * LATERAL if appropriate.
 	 */
-	if (relname)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_COLUMN),
-				 errmsg("column %s.%s does not exist", relname, colname),
-				 parser_errposition(pstate, location)));
+	state = searchRangeTableForCol(pstate, relname, colname, location);
 
 	/*
-	 * Otherwise, search the entire rtable looking for possible matches.  If
-	 * we find one, emit a hint about it.
+	 * In practice a bad qualification name should end up at errorMissingRTE,
+	 * not here, so no need to work hard on this case.
 	 *
-	 * TODO: improve this code (and also errorMissingRTE) to mention using
-	 * LATERAL if appropriate.
+	 * Extract closest col string for best match, if any.
+	 *
+	 * Infer an exact match referenced despite not being visible from the fact
+	 * that an attribute number was not present in state passed back -- this is
+	 * what is reported when !closestfirst.  There might also be an exact match
+	 * that was qualified with an incorrect alias, in which case closestfirst
+	 * will be set (so hint is the same as generic fuzzy case).
 	 */
-	rte = searchRangeTableForCol(pstate, colname, location);
-
-	ereport(ERROR,
-			(errcode(ERRCODE_UNDEFINED_COLUMN),
-			 errmsg("column \"%s\" does not exist", colname),
-			 rte ? errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
-						   colname, rte->eref->aliasname) : 0,
-			 parser_errposition(pstate, location)));
+	if (state->rfirst && AttributeNumberIsValid(state->first))
+		closestfirst = strVal(list_nth(state->rfirst->eref->colnames,
+									   state->first - 1));
+
+	if (!state->rsecond)
+	{
+		/*
+		 * Handle case where there is zero or one column suggestions to hint,
+		 * including exact matches referenced but not visible.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 state->rfirst? closestfirst?
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\".",
+						 state->rfirst->eref->aliasname, closestfirst):
+				 errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
+						 colname, state->rfirst->eref->aliasname): 0,
+				 parser_errposition(pstate, location)));
+	}
+	else
+	{
+		/* Extract closest col string for second, joint-best match, if any */
+		char				   *closestsecond;
+
+		closestsecond = strVal(list_nth(state->rsecond->eref->colnames,
+										state->second - 1));
+
+		/* Handle case where there are two equally useful column hints */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\" or the column \"%s\".\"%s\".",
+						 state->rfirst->eref->aliasname, closestfirst,
+						 state->rsecond->eref->aliasname, closestsecond),
+				 parser_errposition(pstate, location)));
+	}
 }
 
 
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
index a8670e9..8d565c6 100644
--- a/src/backend/utils/adt/levenshtein.c
+++ b/src/backend/utils/adt/levenshtein.c
@@ -95,6 +95,15 @@ varstr_levenshtein(const char *source, int slen, const char *target, int tlen,
 #define STOP_COLUMN m
 #endif
 
+	/*
+	 * A common use for Levenshtein distance is to match attributes when building
+	 * diagnostic, user-visible messages.  Restrict the size of
+	 * MAX_LEVENSHTEIN_STRLEN at compile time so that this is guaranteed to
+	 * work.
+	 */
+	StaticAssertStmt(NAMEDATALEN <= MAX_LEVENSHTEIN_STRLEN,
+					 "Levenshtein hinting mechanism restricts NAMEDATALEN");
+
 	m = pg_mbstrlen_with_len(source, slen);
 	n = pg_mbstrlen_with_len(target, tlen);
 
diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h
index d8b9493..7ab966e 100644
--- a/src/include/parser/parse_relation.h
+++ b/src/include/parser/parse_relation.h
@@ -16,6 +16,24 @@
 
 #include "parser/parse_node.h"
 
+
+/*
+ * Support for fuzzily matching column.
+ *
+ * This is for building diagnostic messages, where non-exact matching
+ * attributes are suggested to the user.  The struct's fields may be facets of
+ * a particular RTE, or of an entire range table, depending on context.
+ */
+typedef struct
+{
+	int				distance;	/* Weighted distance (lowest so far) */
+	RangeTblEntry  *rfirst;		/* RTE of first */
+	AttrNumber		first;		/* Closest attribute so far */
+	RangeTblEntry  *rsecond;	/* RTE of second */
+	AttrNumber		second;		/* Second closest attribute so far */
+} FuzzyAttrMatchState;
+
+
 extern RangeTblEntry *refnameRangeTblEntry(ParseState *pstate,
 					 const char *schemaname,
 					 const char *refname,
@@ -35,7 +53,7 @@ extern RangeTblEntry *GetRTEByRangeTablePosn(ParseState *pstate,
 extern CommonTableExpr *GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte,
 			 int rtelevelsup);
 extern Node *scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte,
-				 char *colname, int location);
+				 char *colname, int location, FuzzyAttrMatchState *rtestate);
 extern Node *colNameToVar(ParseState *pstate, char *colname, bool localonly,
 			 int location);
 extern void markVarForSelectPriv(ParseState *pstate, Var *var,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index d233710..b24fa43 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -536,6 +536,7 @@ create table atacc1 ( test int );
 -- add a check constraint (fails)
 alter table atacc1 add constraint atacc_test1 check (test1>3);
 ERROR:  column "test1" does not exist
+HINT:  Perhaps you meant to reference the column "atacc1"."test".
 drop table atacc1;
 -- something a little more complicated
 create table atacc1 ( test int, test2 int, test3 int);
@@ -1342,6 +1343,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
@@ -1355,6 +1357,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
@@ -1479,6 +1482,7 @@ select oid > 0, * from altstartwith; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altstartwith;
                ^
+HINT:  Perhaps you meant to reference the column "altstartwith"."col".
 select * from altstartwith;
  col 
 -----
@@ -1515,10 +1519,12 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altinhoid;
                ^
+HINT:  Perhaps you meant to reference the column "altinhoid"."col".
 select * from altwithoid;
  col 
 -----
@@ -1554,6 +1560,7 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid;
  ?column? | col 
 ----------+-----
@@ -1580,6 +1587,7 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid;
  ?column? | col 
 ----------+-----
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 2501184..3ef5580 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2222,6 +2222,12 @@ select * from t1 left join t2 on (t1.a = t2.a);
  200 | 1000 | 200 | 2001
 (5 rows)
 
+-- Test matching of column name with wrong alias
+select t1.x from t1 join t3 on (t1.a = t3.x);
+ERROR:  column t1.x does not exist
+LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x);
+               ^
+HINT:  Perhaps you meant to reference the column "t3"."x".
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -3415,6 +3421,39 @@ select * from
 (0 rows)
 
 --
+-- Test hints given on incorrect column references are useful
+--
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+ERROR:  column t1.uunique1 does not exist
+LINE 1: select t1.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1".
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+ERROR:  column t2.uunique1 does not exist
+LINE 1: select t2.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t2"."unique1".
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+ERROR:  column "uunique1" does not exist
+LINE 1: select uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1" or the column "t2"."unique1".
+--
+-- Take care to reference the correct RTE
+--
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+ERROR:  column atts.relid does not exist
+LINE 1: select atts.relid::regclass, s.* from pg_stats s join
+               ^
+HINT:  Perhaps you meant to reference the column "atts"."indexrelid".
+--
 -- Test LATERAL
 --
 select unique2, x.*
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index 983f1b8..fb4abe6 100644
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -4782,6 +4782,7 @@ END$$;
 ERROR:  column "foo" does not exist
 LINE 1: SELECT rtrim(roomno) AS roomno, foo FROM Room ORDER BY roomn...
                                         ^
+HINT:  Perhaps you meant to reference the column "room"."roomno".
 QUERY:  SELECT rtrim(roomno) AS roomno, foo FROM Room ORDER BY roomno
 CONTEXT:  PL/pgSQL function inline_code_block line 4 at FOR over SELECT rows
 -- Check handling of errors thrown from/into anonymous code blocks.
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index 54525de..efd8fa9 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -452,6 +452,7 @@ select fullname.text from fullname;  -- error
 ERROR:  column fullname.text does not exist
 LINE 1: select fullname.text from fullname;
                ^
+HINT:  Perhaps you meant to reference the column "fullname"."last".
 -- same, but RECORD instead of named composite type:
 select cast (row('Jim', 'Beam') as text);
     row     
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index c79b45c..01c80af 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2396,6 +2396,7 @@ select xmin, * from fooview;  -- fail, views don't have such a column
 ERROR:  column "xmin" does not exist
 LINE 1: select xmin, * from fooview;
                ^
+HINT:  Perhaps you meant to reference the column "fooview"."x".
 select reltoastrelid, relkind, relfrozenxid
   from pg_class where oid = 'fooview'::regclass;
  reltoastrelid | relkind | relfrozenxid 
diff --git a/src/test/regress/expected/without_oid.out b/src/test/regress/expected/without_oid.out
index cb2c0c0..e805a6a 100644
--- a/src/test/regress/expected/without_oid.out
+++ b/src/test/regress/expected/without_oid.out
@@ -46,6 +46,7 @@ SELECT count(oid) FROM wo;
 ERROR:  column "oid" does not exist
 LINE 1: SELECT count(oid) FROM wo;
                      ^
+HINT:  Perhaps you meant to reference the column "wo"."i".
 VACUUM ANALYZE wi;
 VACUUM ANALYZE wo;
 SELECT min(relpages) < max(relpages), min(reltuples) - max(reltuples)
@@ -81,6 +82,7 @@ SELECT count(oid) FROM create_table_test3;
 ERROR:  column "oid" does not exist
 LINE 1: SELECT count(oid) FROM create_table_test3;
                      ^
+HINT:  Perhaps you meant to reference the column "create_table_test3"."c1" or the column "create_table_test3"."c2".
 PREPARE table_source(int) AS
     SELECT a + b AS c1, a - b AS c2, $1 AS c3 FROM create_table_test;
 CREATE TABLE execute_with WITH OIDS AS EXECUTE table_source(1);
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 718e1d9..ca7f966 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -397,6 +397,10 @@ insert into t2a values (200, 2001);
 
 select * from t1 left join t2 on (t1.a = t2.a);
 
+-- Test matching of column name with wrong alias
+
+select t1.x from t1 join t3 on (t1.a = t3.x);
+
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -1051,6 +1055,26 @@ select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
 
 --
+-- Test hints given on incorrect column references are useful
+--
+
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+
+--
+-- Take care to reference the correct RTE
+--
+
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+--
 -- Test LATERAL
 --
 
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Doing better at HINTing an appropriate column within errorMissingColumn()

Reply via email to