Probably coming too late, sorry.

On Thu, Nov 12, 2015 at 09:08:36PM -0500, David Malcolm wrote:
> index 4335a87..eb4e1fc 100644
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "c-family/c-ubsan.h"
>  #include "cilk.h"
>  #include "gomp-constants.h"
> +#include "spellcheck.h"
>  
>  /* Possible cases of implicit bad conversions.  Used to select
>     diagnostic messages in convert_for_assignment.  */
> @@ -2242,6 +2243,72 @@ lookup_field (tree type, tree component)
>    return tree_cons (NULL_TREE, field, NULL_TREE);
>  }
>  
> +/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES.  */
> +
> +static void
> +lookup_field_fuzzy_find_candidates (tree type, tree component,
> +                                 vec<tree> *candidates)
> +{
> +  tree field;
> +  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))

I'd prefer declaring field in the for loop, so
  for (tree field = TYPE_FIELDS...

> +       && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE
> +           || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE))

This is RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)).

> +     {
> +       lookup_field_fuzzy_find_candidates (TREE_TYPE (field),
> +                                           component,
> +                                           candidates);
> +     }

Lose the brackets around a single statement.

> +      if (DECL_NAME (field))
> +     candidates->safe_push (DECL_NAME (field));
> +    }
> +}
> +
> +/* Like "lookup_field", but find the closest matching IDENTIFIER_NODE,
> +   rather than returning a TREE_LIST for an exact match.  */
> +
> +static tree
> +lookup_field_fuzzy (tree type, tree component)
> +{
> +  gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE);
> +
> +  /* First, gather a list of candidates.  */
> +  auto_vec <tree> candidates;
> +
> +  lookup_field_fuzzy_find_candidates (type, component,
> +                                   &candidates);
> +
> +  /* Now determine which is closest.  */
> +  int i;
> +  tree identifier;
> +  tree best_identifier = NULL;

NULL_TREE

> +  edit_distance_t best_distance = MAX_EDIT_DISTANCE;
> +  FOR_EACH_VEC_ELT (candidates, i, identifier)
> +    {
> +      gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE);
> +      edit_distance_t dist = levenshtein_distance (component, identifier);
> +      if (dist < best_distance)
> +     {
> +       best_distance = dist;
> +       best_identifier = identifier;
> +     }
> +    }
> +
> +  /* If more than half of the letters were misspelled, the suggestion is
> +     likely to be meaningless.  */
> +  if (best_identifier)
> +    {
> +      unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component),
> +                              IDENTIFIER_LENGTH (best_identifier)) / 2;
> +      if (best_distance > cutoff)
> +     return NULL;

NULL_TREE

> +/* The Levenshtein distance is an "edit-distance": the minimal
> +   number of one-character insertions, removals or substitutions
> +   that are needed to change one string into another.
> +
> +   This implementation uses the Wagner-Fischer algorithm.  */
> +
> +static edit_distance_t
> +levenshtein_distance (const char *s, int len_s,
> +                   const char *t, int len_t)
> +{
> +  const bool debug = false;
> +
> +  if (debug)
> +    {
> +      printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
> +      printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
> +    }

Did you leave this debug stuff here intentionally?

> +      /* Build the rest of the row by considering neighbours to
> +      the north, west and northwest.  */
> +      for (int j = 0; j < len_s; j++)
> +     {
> +       edit_distance_t cost = (s[j] == t[i] ? 0 : 1);
> +       edit_distance_t deletion     = v1[j] + 1;
> +       edit_distance_t insertion    = v0[j + 1] + 1;

The formatting doesn't look right here.

        Marek

Reply via email to