Alfie Richards <alfie.richa...@arm.com> writes: > The string_slice inherits from array_slice and is used to refer to a > substring of an array that is memory managed elsewhere without modifying > the underlying array. > > For example, this is useful in cases such as when needing to refer to a > substring of an attribute in the syntax tree. > > This commit also adds some minimal helper functions for string_slice, > such as strtok, strcmp, and a function to strip whitespace from the > beginning and end of a slice. > > gcc/ChangeLog: > > * vec.cc (string_slice::strtok): New method. > (strcmp): Add implementation for string_slice. > (string_slice::strip): New method. > (test_string_slice_initializers): New test. > (test_string_slice_strtok): Ditto. > (test_string_slice_strcmp): Ditto. > (test_string_slice_equality): Ditto. > (test_string_slice_invalid): Ditto. > (test_string_slice_strip): Ditto. > (vec_cc_tests): Add new tests. > * vec.h (class string_slice): New class. > (strcmp): Add implementation for string_slice.
Thanks for doing this. > --- > gcc/vec.cc | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > gcc/vec.h | 38 +++++++++++++ > 2 files changed, 195 insertions(+) > > diff --git a/gcc/vec.cc b/gcc/vec.cc > index 55f5f3dd447..569dbf2a53c 100644 > --- a/gcc/vec.cc > +++ b/gcc/vec.cc > @@ -176,6 +176,67 @@ dump_vec_loc_statistics (void) > vec_mem_desc.dump (VEC_ORIGIN); > } > > +string_slice > +string_slice::strtok (string_slice *str, string_slice delims) > +{ > + const char *ptr = str->begin (); > + > + /* If the input string is empty or invalid, return an invalid slice > + as there are no more tokens to return. */ > + if (str->empty () || !str->is_valid ()) I think we should instead assert that str and delim are valid and just handle the case where str is empty. > + { > + *str = string_slice::invalid (); > + return string_slice::invalid (); > + } > + > + for (; ptr < str->end (); ptr++) > + for (const char *c = delims.begin (); c < delims.end(); c++) How about just: for (char c : delim) > + if (*ptr == *c) > + { > + const char *start = str->begin (); > + /* Update the input string to be the remaining string. */ > + *str = string_slice ((ptr + 1), str->end () - ptr - 1); > + return string_slice (start, (size_t) (ptr - start)); I think we should allow constructing string_slice with a [begin, end) iterator pair, which would make this simpler. It wouldn't be ambiguous with the current constructors. > + } > + > + /* If no deliminators between the start and end, return the whole string. > */ > + string_slice res = *str; > + *str = string_slice::invalid (); > + return res; > +} This doesn't quite follow normal strtok semantics, in that: strtok (",,,,,", ",") would return null on the first call, whereas this version would return empty strings for each ",". I imagine GCC would always want to see each delimiter, so returning empty strings seems like the right thing to do. But: (1) it would then be more consistent to return an empty string when given an empty string, so that strtok ("", ",") returns one empty string and strtok (",", ",") returns two empty strings (2) it might be better to call the function something else, to avoid confusion > [...] > diff --git a/gcc/vec.h b/gcc/vec.h > index 915df06f03e..409cdab5bc3 100644 > --- a/gcc/vec.h > +++ b/gcc/vec.h > @@ -2484,4 +2484,42 @@ make_array_slice (T *base, unsigned int size) > # pragma GCC poison m_vec m_vecpfx m_vecdata > #endif > > +/* string_slice inherits from array_slice, specifically to refer to a > substring > + of a character array. > + It includes some string like helpers. */ > +class string_slice; > + > +int > +strcmp (string_slice str1, string_slice str2); > + > +class string_slice : public array_slice<const char> > +{ > +public: > + explicit string_slice () : array_slice<const char> () {} > + explicit string_slice (const char *str) : array_slice (str, strlen (str)) > {} > + explicit string_slice (const char *str, size_t len) : array_slice (str, > len) > + {} > + > + bool operator== (string_slice other) const > + { > + if (!is_valid () || !other.is_valid ()) > + return false; > + if (size() != other.size()) > + return false; > + return memcmp (begin (), other.begin (), size()) == 0; > + } Formatting nit: the braces should be indented to the same column as "bool". Similarly below. Richard > + > + static string_slice invalid () > + { > + return string_slice (nullptr, ~0U); > + } > + > + /* strtok_slice is used to split a string by some deliminator into > + strtok_slice's. Similarly to the posix strtok_r.but without modifying > the > + input string. */ > + static string_slice strtok (string_slice *str, string_slice delims); > + /* Removes white space from the front and back of the string_slice. */ > + string_slice strip (); > +}; > + > #endif // GCC_VEC_H