Alfie Richards <alfie.richa...@arm.com> writes:
> The string_slice inherits from array_slice and is used to refer to a
> substring of an array that is memory managed elsewhere without modifying
> the underlying array.
>
> For example, this is useful in cases such as when needing to refer to a
> substring of an attribute in the syntax tree.
>
> This commit also adds some minimal helper functions for string_slice,
> such as a strtok alternative, equality operators, strcmp, and a function
> to strip whitespace from the beginning and end of a string_slice.
>
> gcc/ChangeLog:
>
>       * vec.cc (string_slice::strtok): New method.
>       (strcmp): Add implementation for string_slice.
>       (string_slice::strip): New method.
>       (test_string_slice_initializers): New test.
>       (test_string_slice_strtok): Ditto.
>       (test_string_slice_strcmp): Ditto.
>       (test_string_slice_equality): Ditto.
>       (test_string_slice_invalid): Ditto.
>       (test_string_slice_strip): Ditto.
>       (vec_cc_tests): Add new tests.
>       * vec.h (class string_slice): New class.
>       (strcmp): Add implementation for string_slice.

Thanks, mostly LGTM.  Some very minor things below, and a question:

> diff --git a/gcc/vec.cc b/gcc/vec.cc
> index 55f5f3dd447..189cb492c7e 100644
> --- a/gcc/vec.cc
> +++ b/gcc/vec.cc
> @@ -176,6 +176,61 @@ dump_vec_loc_statistics (void)
>    vec_mem_desc.dump (VEC_ORIGIN);
>  }
>  
> +string_slice
> +string_slice::tokenize (string_slice *str, string_slice delims)
> +{
> +  const char *ptr = str->begin ();
> +
> +  gcc_assert (str->is_valid () && delims.is_valid ());
> +
> +  for (; ptr < str->end (); ptr++)
> +    for (char c : delims)
> +      if (*ptr == c)
> +     {
> +       /* Update the input string to be the remaining string.  */
> +       const char* str_begin = str->begin ();

Formatting nit: const char *str_begin

> +       *str = string_slice (ptr  + 1, str->end ());
> +       return string_slice (str_begin, ptr);
> +     }
> +
> +  /* If no deliminators between the start and end, return the whole string.  
> */
> +  string_slice res = *str;
> +  *str = string_slice::invalid ();
> +  return res;
> +}
> +
> +int
> +strcmp (string_slice str1, string_slice str2)
> +{
> +  for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++)
> +    {
> +      if (str1[i] < str2[i])
> +     return -1;
> +      if (str1[i] > str2[i])
> +     return 1;
> +    }
> +
> +  if (str1.size () < str2.size ())
> +    return -1;
> +  if (str1.size () > str2.size ())
> +    return 1;
> +  return 0;
> +}
> +
> +string_slice
> +string_slice::strip ()
> +{
> +  const char *start = this->begin ();
> +  const char *end = this->end ();
> +
> +  while (start < end && ISSPACE (*start))
> +    start++;
> +  while (end > start && ISSPACE (*(end-1)))
> +    end--;
> +
> +  return string_slice (start, end-start);

Just string_slice (start, end) should be enough.

> +}
> +
>  #if CHECKING_P
>  /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as
>     witness elements.  */
> [...]
> diff --git a/gcc/vec.h b/gcc/vec.h
> index 915df06f03e..d709d339d40 100644
> --- a/gcc/vec.h
> +++ b/gcc/vec.h
> @@ -2484,4 +2484,69 @@ make_array_slice (T *base, unsigned int size)
>  # pragma GCC poison m_vec m_vecpfx m_vecdata
>  #endif
>  
> +/* string_slice inherits from array_slice, specifically to refer to a 
> substring
> +   of a character array.
> +   It includes some string like helpers.  */
> +class string_slice : public array_slice<const char>
> +{
> +public:
> +  explicit string_slice () : array_slice<const char> () {}
> +  explicit string_slice (const char *str) : array_slice (str, strlen (str)) 
> {}
> +  explicit string_slice (const char *str, size_t len) :
> +    array_slice (str, len) {}
> +  explicit string_slice (const char *start, const char *end) :
> +    array_slice (start, end-start) {}

Formatting nit: end - start.

What was the reason for making the constructors explicit?  It would be nice
if string literals at least could be used implicitly.

Thanks,
Richard

> +
> +  friend bool operator== (const string_slice &lhs, const string_slice &rhs)
> +  {
> +    if (!lhs.is_valid () || !rhs.is_valid ())
> +      return false;
> +    if (lhs.size () != rhs.size ())
> +      return false;
> +    return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0;
> +  }
> +
> +  friend bool operator== (const char *lhs, const string_slice &rhs)
> +  {
> +    return string_slice (lhs) == rhs;
> +  }
> +
> +  friend bool operator== (const string_slice &lhs, const char *rhs)
> +  {
> +    return lhs == string_slice (rhs);
> +  }
> +
> +  friend bool operator!= (const string_slice &lhs, const string_slice &rhs)
> +  {
> +    return !(lhs == rhs);
> +  }
> +
> +  friend bool operator!= (const char *lhs, const string_slice &rhs)
> +  {
> +    return !(string_slice (lhs) == rhs);
> +  }
> +
> +  friend bool operator!= (const string_slice &lhs, const char *rhs)
> +  {
> +    return !(lhs == string_slice (rhs));
> +  }
> +
> +  /* Returns an invalid string_slice.  */
> +  static string_slice invalid ()
> +  {
> +    return string_slice (nullptr, ~0U);
> +  }
> +
> +  /* tokenize is used to split a string by some deliminator into
> +     strtok_slice's.  Similarly to the posix strtok_r.but without modifying 
> the
> +     input string, and returning all tokens which may be empty in the case
> +     of an empty input string of consecutive deliminators.  */
> +  static string_slice tokenize (string_slice *str, string_slice delims);
> +
> +  /* Removes white space from the front and back of the string_slice.  */
> +  string_slice strip ();
> +};
> +
> +int strcmp (string_slice str1, string_slice str2);
> +
>  #endif // GCC_VEC_H

Reply via email to