Screenshot:
 https://dmalcolm.fedorapeople.org/gcc/2015-09-10/spellcheck.html

There are a couple of FIXMEs here:
* where to call levenshtein_distance_unit_tests
* should we attempt error-recovery in c-typeck.c:build_component_ref

gcc/ChangeLog:
        * Makefile.in (OBJS): Add spellcheck.o.
        * spellcheck.c: New file.
        * spellcheck.h: New file.

gcc/c-family/ChangeLog:
        * c-common.h (lookup_name_fuzzy): New decl.

gcc/c/ChangeLog:
        * c-decl.c: Include spellcheck.h.
        (lookup_name_fuzzy): New.
        * c-parser.c: Include spellcheck.h.
        (c_parser_declaration_or_fndef): If "unknown type name",
        attempt to suggest a close match using lookup_name_fuzzy.
        (c_parser_postfix_expression): Pass source range in calls to
        build_component_ref.
        (c_parser_postfix_expression_after_primary): Likewise.
        * c-tree.h (build_component_ref): Add source_range * param.
        * c-typeck.c: Include gcc-rich-location.h and spellcheck.h.
        (lookup_field_fuzzy_find_candidates): New function.
        (lookup_field_fuzzy): New function.
        (build_component_ref): Add "ident_range" param, use it
        when printing field-not-found error.  Use lookup_field_fuzzy
        to suggest close matches.

gcc/objc/ChangeLog:
        * objc-act.c (objc_build_component_ref): Pass NULL to new param
        of build_component_ref.

gcc/testsuite/ChangeLog:
        * gcc.dg/spellcheck.c: New file.
---
 gcc/Makefile.in                   |   1 +
 gcc/c-family/c-common.h           |   1 +
 gcc/c/c-decl.c                    |  37 +++++++++++
 gcc/c/c-parser.c                  |  30 ++++++---
 gcc/c/c-tree.h                    |   2 +-
 gcc/c/c-typeck.c                  |  92 +++++++++++++++++++++++++++-
 gcc/objc/objc-act.c               |   3 +-
 gcc/spellcheck.c                  | 126 ++++++++++++++++++++++++++++++++++++++
 gcc/spellcheck.h                  |  32 ++++++++++
 gcc/testsuite/gcc.dg/spellcheck.c |  36 +++++++++++
 10 files changed, 347 insertions(+), 13 deletions(-)
 create mode 100644 gcc/spellcheck.c
 create mode 100644 gcc/spellcheck.h
 create mode 100644 gcc/testsuite/gcc.dg/spellcheck.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c472696..7fbd80a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1392,6 +1392,7 @@ OBJS = \
        shrink-wrap.o \
        simplify-rtx.o \
        sparseset.o \
+       spellcheck.o \
        sreal.o \
        stack-ptr-mod.o \
        statistics.o \
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index b9a5d72..1903574 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -972,6 +972,7 @@ extern tree finish_label_address_expr (tree, location_t);
    different implementations.  Used in c-common.c.  */
 extern tree lookup_label (tree);
 extern tree lookup_name (tree);
+extern tree lookup_name_fuzzy (tree);
 extern bool lvalue_p (const_tree);
 
 extern bool vector_targets_convertible_p (const_tree t1, const_tree t2);
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index b7f0241..778c935 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "c-family/c-ada-spec.h"
 #include "cilk.h"
 #include "builtins.h"
+#include "spellcheck.h"
 
 /* In grokdeclarator, distinguish syntactic contexts of declarators.  */
 enum decl_context
@@ -3900,6 +3901,42 @@ lookup_name_in_scope (tree name, struct c_scope *scope)
       return b->decl;
   return 0;
 }
+
+/* Look for the closest match for NAME within the currently valid
+   scopes.
+
+   This finds the identifier with the lowest Levenshtein distance to
+   NAME.  If there are multiple candidates with equal minimal distance,
+   the first one found is returned.  Scopes are searched from innermost
+   outwards, and within a scope in reverse order of declaration, thus
+   benefiting candidates "near" to the current scope.  */
+
+tree
+lookup_name_fuzzy (tree name)
+{
+  gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
+
+  c_binding *best_binding = NULL;
+  int best_distance = INT_MAX;
+
+  for (c_scope *scope = current_scope; scope; scope = scope->outer)
+    for (c_binding *binding = scope->bindings; binding; binding = 
binding->prev)
+      {
+       if (!binding->id)
+         continue;
+       int dist = levenshtein_distance (name, binding->id);
+       if (dist < best_distance)
+         {
+           best_distance = dist;
+           best_binding = binding;
+         }
+      }
+  if (best_binding)
+    return best_binding->id;
+  else
+    return NULL;
+}
+
 
 /* Create the predefined scalar types of C,
    and some nodes representing standard constants (0, 1, (void *) 0).
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 0c62496..d134d85 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -66,6 +66,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "gomp-constants.h"
 #include "c-family/c-indentation.h"
+#include "spellcheck.h"
 
 
 /* Initialization routine for this file.  */
@@ -1543,8 +1544,14 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
           || c_parser_peek_2nd_token (parser)->type == CPP_MULT)
       && (!nested || !lookup_name (c_parser_peek_token (parser)->value)))
     {
-      error_at (here, "unknown type name %qE",
-                c_parser_peek_token (parser)->value);
+      tree hint = lookup_name_fuzzy (c_parser_peek_token (parser)->value);
+      if (hint)
+       error_at (here, "unknown type name %qE; did you mean %qE?",
+                 c_parser_peek_token (parser)->value,
+                 hint);
+      else
+       error_at (here, "unknown type name %qE",
+                 c_parser_peek_token (parser)->value);
 
       /* Parse declspecs normally to get a correct pointer type, but avoid
          a further "fails to be a type name" error.  Refuse nested functions
@@ -7425,7 +7432,8 @@ c_parser_postfix_expression (c_parser *parser)
            if (c_parser_next_token_is (parser, CPP_NAME))
              {
                offsetof_ref = build_component_ref
-                 (loc, offsetof_ref, c_parser_peek_token (parser)->value);
+                 (loc, offsetof_ref, c_parser_peek_token (parser)->value,
+                  &c_parser_peek_token (parser)->range);
                c_parser_consume_token (parser);
                while (c_parser_next_token_is (parser, CPP_DOT)
                       || c_parser_next_token_is (parser,
@@ -7453,7 +7461,8 @@ c_parser_postfix_expression (c_parser *parser)
                          }
                        offsetof_ref = build_component_ref
                          (loc, offsetof_ref,
-                          c_parser_peek_token (parser)->value);
+                          c_parser_peek_token (parser)->value,
+                          &c_parser_peek_token (parser)->range);
                        c_parser_consume_token (parser);
                      }
                    else
@@ -7913,6 +7922,7 @@ c_parser_postfix_expression_after_primary (c_parser 
*parser,
   vec<location_t> arg_loc = vNULL;
   location_t start;
   location_t finish;
+  source_range ident_range;
 
   while (true)
     {
@@ -8031,10 +8041,12 @@ c_parser_postfix_expression_after_primary (c_parser 
*parser,
               expr.original_type = NULL;
              return expr;
            }
+         ident_range = c_parser_peek_token (parser)->range;
          start = EXPR_LOCATION_RANGE (expr.value).m_start;
-         finish = c_parser_peek_token (parser)->range.m_finish;
+         finish = ident_range.m_finish;
          c_parser_consume_token (parser);
-         expr.value = build_component_ref (op_loc, expr.value, ident);
+         expr.value = build_component_ref (op_loc, expr.value, ident,
+                                           &ident_range);
          set_source_range (&expr.value, start, finish);
          expr.original_code = ERROR_MARK;
          if (TREE_CODE (expr.value) != COMPONENT_REF)
@@ -8063,14 +8075,16 @@ c_parser_postfix_expression_after_primary (c_parser 
*parser,
              expr.original_type = NULL;
              return expr;
            }
+         ident_range = c_parser_peek_token (parser)->range;
          start = EXPR_LOCATION_RANGE (expr.value).m_start;
-         finish = c_parser_peek_token (parser)->range.m_finish;
+         finish = ident_range.m_finish;
          c_parser_consume_token (parser);
          expr.value = build_component_ref (op_loc,
                                            build_indirect_ref (op_loc,
                                                                expr.value,
                                                                RO_ARROW),
-                                           ident);
+                                           ident,
+                                           &ident_range);
          set_source_range (&expr.value, start, finish);
          expr.original_code = ERROR_MARK;
          if (TREE_CODE (expr.value) != COMPONENT_REF)
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 0810a74..4c3c637 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -593,7 +593,7 @@ extern struct c_expr convert_lvalue_to_rvalue (location_t, 
struct c_expr,
                                               bool, bool);
 extern void mark_exp_read (tree);
 extern tree composite_type (tree, tree);
-extern tree build_component_ref (location_t, tree, tree);
+extern tree build_component_ref (location_t, tree, tree, source_range *);
 extern tree build_array_ref (location_t, tree, tree);
 extern tree build_external_ref (source_range, tree, int, tree *);
 extern void pop_maybe_used (bool);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 506abb3..507400b 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -54,6 +54,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "c-family/c-ubsan.h"
 #include "cilk.h"
 #include "gomp-constants.h"
+#include "gcc-rich-location.h"
+#include "spellcheck.h"
 
 /* Possible cases of implicit bad conversions.  Used to select
    diagnostic messages in convert_for_assignment.  */
@@ -2259,12 +2261,72 @@ lookup_field (tree type, tree component)
   return tree_cons (NULL_TREE, field, NULL_TREE);
 }
 
+/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES.  */
+
+static void
+lookup_field_fuzzy_find_candidates (tree type, tree component,
+                                   vec<tree> *candidates)
+{
+  tree field;
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (DECL_NAME (field) == NULL_TREE
+         && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE
+             || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE))
+       {
+         lookup_field_fuzzy_find_candidates (TREE_TYPE (field),
+                                             component,
+                                             candidates);
+       }
+
+      if (DECL_NAME (field))
+       candidates->safe_push (field);
+    }
+}
+
+/* Like "lookup_field", but find the closest match, rather than
+   necessarily an exact match.  */
+
+static tree
+lookup_field_fuzzy (tree type, tree component)
+{
+  gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE);
+
+  /* FIXME: move this to a unittest suite. */
+  levenshtein_distance_unit_tests ();
+
+  /* First, gather a list of candidates.  */
+  auto_vec <tree> candidates;
+
+  lookup_field_fuzzy_find_candidates (type, component,
+                                     &candidates);
+
+  /* Now determine which is closest.  */
+  int i;
+  tree field;
+  tree best_field = NULL;
+  int best_distance = INT_MAX;
+  FOR_EACH_VEC_ELT (candidates, i, field)
+    {
+      int dist = levenshtein_distance (component, DECL_NAME (field));
+      if (dist < best_distance)
+       {
+         best_distance = dist;
+         best_field = field;
+       }
+    }
+
+  return best_field;
+}
+
 /* Make an expression to refer to the COMPONENT field of structure or
    union value DATUM.  COMPONENT is an IDENTIFIER_NODE.  LOC is the
-   location of the COMPONENT_REF.  */
+   location of the COMPONENT_REF.  IDENT_RANGE points to the source range of
+   the identifier, or is NULL.  */
 
 tree
-build_component_ref (location_t loc, tree datum, tree component)
+build_component_ref (location_t loc, tree datum, tree component,
+                    source_range *ident_range)
 {
   tree type = TREE_TYPE (datum);
   enum tree_code code = TREE_CODE (type);
@@ -2294,7 +2356,31 @@ build_component_ref (location_t loc, tree datum, tree 
component)
 
       if (!field)
        {
-         error_at (loc, "%qT has no member named %qE", type, component);
+         if (!ident_range)
+           {
+             error_at (loc, "%qT has no member named %qE",
+                       type, component);
+             return error_mark_node;
+           }
+         gcc_rich_location richloc (*ident_range);
+         if (TREE_CODE (datum) == INDIRECT_REF)
+           richloc.add_expr (TREE_OPERAND (datum, 0));
+         else
+           richloc.add_expr (datum);
+         field = lookup_field_fuzzy (type, component);
+         if (field)
+           {
+             error_at_rich_loc
+               (&richloc,
+                "%qT has no member named %qE; did you mean %qE?",
+                type, component, field);
+             /* FIXME: error recovery: should we try to keep going,
+                with "field"? (having issued an error, and hence no
+                output).  */
+           }
+         else
+           error_at_rich_loc (&richloc, "%qT has no member named %qE",
+                              type, component);
          return error_mark_node;
        }
 
diff --git a/gcc/objc/objc-act.c b/gcc/objc/objc-act.c
index a1e32fc..04f2824 100644
--- a/gcc/objc/objc-act.c
+++ b/gcc/objc/objc-act.c
@@ -2665,7 +2665,8 @@ objc_build_component_ref (tree datum, tree component)
   return finish_class_member_access_expr (datum, component, false,
                                           tf_warning_or_error);
 #else
-  return build_component_ref (input_location, datum, component);
+  return build_component_ref (input_location, datum, component,
+                             NULL);
 #endif
 }
 
diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c
new file mode 100644
index 0000000..2892381
--- /dev/null
+++ b/gcc/spellcheck.c
@@ -0,0 +1,126 @@
+/* Find near-matches for strings and identifiers.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "spellcheck.h"
+
+/* The Levenshtein distance is an "edit-distance": the minimal
+   number of one-character insertions, removals or substitutions
+   that are needed to change one string into another.
+
+   This implementation uses the Wagner-Fischer algorithm.
+
+   It is currently a naive implementation, various optimizations are
+   possible (e.g. removal of the M*N allocation in favor of a pair
+   of allocations of M and N).  */
+
+static int
+levenshtein_distance (const char *s, int m,
+                     const char *t, int n)
+{
+  const bool debug = false;
+
+  int *d = new int [(m + 1) * (n + 1)];
+  if (debug)
+    memset (d, 0xab, (m + 1) * (n + 1) * sizeof (int));
+
+#define D(I, J) (d[((I) * (n + 1)) + (J)])
+
+  for (int i = 0; i <= m; i++)
+    D(i, 0) = i;
+  for (int j = 0; j <= n; j++)
+    D(0, j) = j;
+
+  for (int j = 1; j <= n; j++)
+    for (int i = 1; i <= m; i++)
+      if (s[i - 1] == t[j - 1])
+       D(i, j) = D(i - 1, j - 1);
+      else
+       {
+         int deletion     = D(i - 1, j    ) + 1;
+         int insertion    = D(i,     j - 1) + 1;
+         int substitution = D(i - 1, j - 1) + 1;
+         int cheapest = MIN (deletion, insertion);
+         cheapest = MIN (cheapest, substitution);
+         D(i, j) = cheapest;
+       }
+
+  if (debug)
+    {
+      printf ("s=\"%s\" t=\"%s\"\n", s, t);
+      for (int j = 0; j <= n; j++)
+       {
+         for (int i = 0; i <= m; i++)
+           printf ("%i ", D(i, j));
+         printf ("\n");
+       }
+    }
+
+  int result = D(m, n);
+#undef D
+
+  delete[] d;
+
+  return result;
+}
+
+/* Calculate Levenshtein distance between two nil-terminated strings.
+   This exists purely for the unit tests.  */
+
+int
+levenshtein_distance (const char *s, const char *t)
+{
+  return levenshtein_distance (s, strlen (s), t, strlen (t));
+}
+
+/* Unit tests for levenshtein_distance.  */
+
+void
+levenshtein_distance_unit_tests (void)
+{
+  gcc_assert (levenshtein_distance ("", "nonempty") == strlen ("nonempty"));
+  gcc_assert (levenshtein_distance ("nonempty", "") == strlen ("nonempty"));
+  gcc_assert (levenshtein_distance ("saturday", "sunday") == 3);
+  gcc_assert (levenshtein_distance ("sunday", "saturday") == 3);
+  gcc_assert (levenshtein_distance ("foo", "m_foo") == 2);
+  gcc_assert (levenshtein_distance ("m_foo", "foo") == 2);
+  gcc_assert (levenshtein_distance ("hello_world", "HelloWorld") == 3);
+  gcc_assert (levenshtein_distance
+             ("the quick brown fox jumps over the lazy dog", "dog") == 40);
+  gcc_assert (levenshtein_distance
+             ("the quick brown fox jumps over the lazy dog", "fox") == 40);
+}
+
+/* Calculate Levenshtein distance between two identifiers.  */
+
+int
+levenshtein_distance (tree ident_s, tree ident_t)
+{
+  gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
+  gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
+
+  return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
+                              IDENTIFIER_LENGTH (ident_s),
+                              IDENTIFIER_POINTER (ident_t),
+                              IDENTIFIER_LENGTH (ident_t));
+}
diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h
new file mode 100644
index 0000000..7900aa2
--- /dev/null
+++ b/gcc/spellcheck.h
@@ -0,0 +1,32 @@
+/* Find near-matches for strings and identifiers.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SPELLCHECK_H
+#define GCC_SPELLCHECK_H
+
+extern void
+levenshtein_distance_unit_tests (void);
+
+extern int
+levenshtein_distance (const char *s, const char *t);
+
+extern int
+levenshtein_distance (tree ident_s, tree ident_t);
+
+#endif  /* GCC_SPELLCHECK_H  */
diff --git a/gcc/testsuite/gcc.dg/spellcheck.c 
b/gcc/testsuite/gcc.dg/spellcheck.c
new file mode 100644
index 0000000..e34ade8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/spellcheck.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-fdiagnostics-show-caret" } */
+
+struct foo
+{
+  int foo;
+  int bar;
+  int baz;
+};
+
+int test (struct foo *ptr)
+{
+  return ptr->m_bar; /* { dg-error "'struct foo' has no member named 'm_bar'; 
did you mean 'bar'?" } */
+
+/* { dg-begin-multiline-output "" }
+   return ptr->m_bar;
+          ~~~  ^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+int test2 (void)
+{
+  struct foo instance = {};
+  return instance.m_bar; /* { dg-error "'struct foo' has no member named 
'm_bar'; did you mean 'bar'?" } */
+
+/* { dg-begin-multiline-output "" }
+   return instance.m_bar;
+          ~~~~~~~~ ^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+int64 foo; /* { dg-error "unknown type name 'int64'; did you mean 'int'?" } */
+/* { dg-begin-multiline-output "" }
+ int64 foo;
+ ^~~~~
+   { dg-end-multiline-output "" } */
-- 
1.8.5.3

Reply via email to