Alfie Richards <alfie.richa...@arm.com> writes: > The string_slice inherits from array_slice and is used to refer to a > substring of an array that is memory managed elsewhere without modifying > the underlying array. > > For example, this is useful in cases such as when needing to refer to a > substring of an attribute in the syntax tree. > > This commit also adds some minimal helper functions for string_slice, > such as a strtok alternative, equality operators, strcmp, and a function > to strip whitespace from the beginning and end of a string_slice. > > gcc/ChangeLog: > > * vec.cc (string_slice::strtok): New method. > (strcmp): Add implementation for string_slice. > (string_slice::strip): New method. > (test_string_slice_initializers): New test. > (test_string_slice_strtok): Ditto. > (test_string_slice_strcmp): Ditto. > (test_string_slice_equality): Ditto. > (test_string_slice_invalid): Ditto. > (test_string_slice_strip): Ditto. > (vec_cc_tests): Add new tests. > * vec.h (class string_slice): New class. > (strcmp): Add implementation for string_slice.
Thanks, mostly LGTM. Some very minor things below, and a question: > diff --git a/gcc/vec.cc b/gcc/vec.cc > index 55f5f3dd447..189cb492c7e 100644 > --- a/gcc/vec.cc > +++ b/gcc/vec.cc > @@ -176,6 +176,61 @@ dump_vec_loc_statistics (void) > vec_mem_desc.dump (VEC_ORIGIN); > } > > +string_slice > +string_slice::tokenize (string_slice *str, string_slice delims) > +{ > + const char *ptr = str->begin (); > + > + gcc_assert (str->is_valid () && delims.is_valid ()); > + > + for (; ptr < str->end (); ptr++) > + for (char c : delims) > + if (*ptr == c) > + { > + /* Update the input string to be the remaining string. */ > + const char* str_begin = str->begin (); Formatting nit: const char *str_begin > + *str = string_slice (ptr + 1, str->end ()); > + return string_slice (str_begin, ptr); > + } > + > + /* If no deliminators between the start and end, return the whole string. > */ > + string_slice res = *str; > + *str = string_slice::invalid (); > + return res; > +} > + > +int > +strcmp (string_slice str1, string_slice str2) > +{ > + for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++) > + { > + if (str1[i] < str2[i]) > + return -1; > + if (str1[i] > str2[i]) > + return 1; > + } > + > + if (str1.size () < str2.size ()) > + return -1; > + if (str1.size () > str2.size ()) > + return 1; > + return 0; > +} > + > +string_slice > +string_slice::strip () > +{ > + const char *start = this->begin (); > + const char *end = this->end (); > + > + while (start < end && ISSPACE (*start)) > + start++; > + while (end > start && ISSPACE (*(end-1))) > + end--; > + > + return string_slice (start, end-start); Just string_slice (start, end) should be enough. > +} > + > #if CHECKING_P > /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as > witness elements. */ > [...] > diff --git a/gcc/vec.h b/gcc/vec.h > index 915df06f03e..d709d339d40 100644 > --- a/gcc/vec.h > +++ b/gcc/vec.h > @@ -2484,4 +2484,69 @@ make_array_slice (T *base, unsigned int size) > # pragma GCC poison m_vec m_vecpfx m_vecdata > #endif > > +/* string_slice inherits from array_slice, specifically to refer to a > substring > + of a character array. > + It includes some string like helpers. */ > +class string_slice : public array_slice<const char> > +{ > +public: > + explicit string_slice () : array_slice<const char> () {} > + explicit string_slice (const char *str) : array_slice (str, strlen (str)) > {} > + explicit string_slice (const char *str, size_t len) : > + array_slice (str, len) {} > + explicit string_slice (const char *start, const char *end) : > + array_slice (start, end-start) {} Formatting nit: end - start. What was the reason for making the constructors explicit? It would be nice if string literals at least could be used implicitly. Thanks, Richard > + > + friend bool operator== (const string_slice &lhs, const string_slice &rhs) > + { > + if (!lhs.is_valid () || !rhs.is_valid ()) > + return false; > + if (lhs.size () != rhs.size ()) > + return false; > + return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0; > + } > + > + friend bool operator== (const char *lhs, const string_slice &rhs) > + { > + return string_slice (lhs) == rhs; > + } > + > + friend bool operator== (const string_slice &lhs, const char *rhs) > + { > + return lhs == string_slice (rhs); > + } > + > + friend bool operator!= (const string_slice &lhs, const string_slice &rhs) > + { > + return !(lhs == rhs); > + } > + > + friend bool operator!= (const char *lhs, const string_slice &rhs) > + { > + return !(string_slice (lhs) == rhs); > + } > + > + friend bool operator!= (const string_slice &lhs, const char *rhs) > + { > + return !(lhs == string_slice (rhs)); > + } > + > + /* Returns an invalid string_slice. */ > + static string_slice invalid () > + { > + return string_slice (nullptr, ~0U); > + } > + > + /* tokenize is used to split a string by some deliminator into > + strtok_slice's. Similarly to the posix strtok_r.but without modifying > the > + input string, and returning all tokens which may be empty in the case > + of an empty input string of consecutive deliminators. */ > + static string_slice tokenize (string_slice *str, string_slice delims); > + > + /* Removes white space from the front and back of the string_slice. */ > + string_slice strip (); > +}; > + > +int strcmp (string_slice str1, string_slice str2); > + > #endif // GCC_VEC_H