Probably coming too late, sorry. On Thu, Nov 12, 2015 at 09:08:36PM -0500, David Malcolm wrote: > index 4335a87..eb4e1fc 100644 > --- a/gcc/c/c-typeck.c > +++ b/gcc/c/c-typeck.c > @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see > #include "c-family/c-ubsan.h" > #include "cilk.h" > #include "gomp-constants.h" > +#include "spellcheck.h" > > /* Possible cases of implicit bad conversions. Used to select > diagnostic messages in convert_for_assignment. */ > @@ -2242,6 +2243,72 @@ lookup_field (tree type, tree component) > return tree_cons (NULL_TREE, field, NULL_TREE); > } > > +/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES. */ > + > +static void > +lookup_field_fuzzy_find_candidates (tree type, tree component, > + vec<tree> *candidates) > +{ > + tree field; > + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
I'd prefer declaring field in the for loop, so for (tree field = TYPE_FIELDS... > + && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE > + || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)) This is RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)). > + { > + lookup_field_fuzzy_find_candidates (TREE_TYPE (field), > + component, > + candidates); > + } Lose the brackets around a single statement. > + if (DECL_NAME (field)) > + candidates->safe_push (DECL_NAME (field)); > + } > +} > + > +/* Like "lookup_field", but find the closest matching IDENTIFIER_NODE, > + rather than returning a TREE_LIST for an exact match. */ > + > +static tree > +lookup_field_fuzzy (tree type, tree component) > +{ > + gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE); > + > + /* First, gather a list of candidates. */ > + auto_vec <tree> candidates; > + > + lookup_field_fuzzy_find_candidates (type, component, > + &candidates); > + > + /* Now determine which is closest. */ > + int i; > + tree identifier; > + tree best_identifier = NULL; NULL_TREE > + edit_distance_t best_distance = MAX_EDIT_DISTANCE; > + FOR_EACH_VEC_ELT (candidates, i, identifier) > + { > + gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE); > + edit_distance_t dist = levenshtein_distance (component, identifier); > + if (dist < best_distance) > + { > + best_distance = dist; > + best_identifier = identifier; > + } > + } > + > + /* If more than half of the letters were misspelled, the suggestion is > + likely to be meaningless. */ > + if (best_identifier) > + { > + unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component), > + IDENTIFIER_LENGTH (best_identifier)) / 2; > + if (best_distance > cutoff) > + return NULL; NULL_TREE > +/* The Levenshtein distance is an "edit-distance": the minimal > + number of one-character insertions, removals or substitutions > + that are needed to change one string into another. > + > + This implementation uses the Wagner-Fischer algorithm. */ > + > +static edit_distance_t > +levenshtein_distance (const char *s, int len_s, > + const char *t, int len_t) > +{ > + const bool debug = false; > + > + if (debug) > + { > + printf ("s: \"%s\" (len_s=%i)\n", s, len_s); > + printf ("t: \"%s\" (len_t=%i)\n", t, len_t); > + } Did you leave this debug stuff here intentionally? > + /* Build the rest of the row by considering neighbours to > + the north, west and northwest. */ > + for (int j = 0; j < len_s; j++) > + { > + edit_distance_t cost = (s[j] == t[i] ? 0 : 1); > + edit_distance_t deletion = v1[j] + 1; > + edit_distance_t insertion = v0[j + 1] + 1; The formatting doesn't look right here. Marek