The string_slice inherits from array_slice and is used to refer to a substring of an array that is memory managed elsewhere without modifying the underlying array.
For example, this is useful in cases such as when needing to refer to a substring of an attribute in the syntax tree. This commit also adds some minimal helper functions for string_slice, such as a strtok alternative, equality operators, strcmp, and a function to strip whitespace from the beginning and end of a string_slice. gcc/ChangeLog: * vec.cc (string_slice::strtok): New method. (strcmp): Add implementation for string_slice. (string_slice::strip): New method. (test_string_slice_initializers): New test. (test_string_slice_strtok): Ditto. (test_string_slice_strcmp): Ditto. (test_string_slice_equality): Ditto. (test_string_slice_invalid): Ditto. (test_string_slice_strip): Ditto. (vec_cc_tests): Add new tests. * vec.h (class string_slice): New class. (strcmp): Add implementation for string_slice. --- gcc/vec.cc | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/vec.h | 65 +++++++++++++++++ 2 files changed, 275 insertions(+)
diff --git a/gcc/vec.cc b/gcc/vec.cc index 55f5f3dd447..189cb492c7e 100644 --- a/gcc/vec.cc +++ b/gcc/vec.cc @@ -176,6 +176,61 @@ dump_vec_loc_statistics (void) vec_mem_desc.dump (VEC_ORIGIN); } +string_slice +string_slice::tokenize (string_slice *str, string_slice delims) +{ + const char *ptr = str->begin (); + + gcc_assert (str->is_valid () && delims.is_valid ()); + + for (; ptr < str->end (); ptr++) + for (char c : delims) + if (*ptr == c) + { + /* Update the input string to be the remaining string. */ + const char* str_begin = str->begin (); + *str = string_slice (ptr + 1, str->end ()); + return string_slice (str_begin, ptr); + } + + /* If no deliminators between the start and end, return the whole string. */ + string_slice res = *str; + *str = string_slice::invalid (); + return res; +} + +int +strcmp (string_slice str1, string_slice str2) +{ + for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++) + { + if (str1[i] < str2[i]) + return -1; + if (str1[i] > str2[i]) + return 1; + } + + if (str1.size () < str2.size ()) + return -1; + if (str1.size () > str2.size ()) + return 1; + return 0; +} + +string_slice +string_slice::strip () +{ + const char *start = this->begin (); + const char *end = this->end (); + + while (start < end && ISSPACE (*start)) + start++; + while (end > start && ISSPACE (*(end-1))) + end--; + + return string_slice (start, end-start); +} + #if CHECKING_P /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as witness elements. */ @@ -584,6 +639,154 @@ test_auto_alias () ASSERT_EQ (val, 0); } +static void +test_string_slice_initializers () +{ + string_slice str1 = string_slice (); + ASSERT_TRUE (str1.is_valid ()); + ASSERT_EQ (str1.size (), 0); + + string_slice str2 = string_slice ("Test string"); + ASSERT_TRUE (str2.is_valid ()); + ASSERT_EQ (str2.size (), 11); + + string_slice str3 = string_slice ("Test string", 4); + ASSERT_TRUE (str3.is_valid ()); + ASSERT_EQ (str3.size (), 4); +} + +static void +test_string_slice_tokenize () +{ + string_slice test_string_slice = string_slice (""); + string_slice test_delims = string_slice (","); + + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_FALSE (test_string_slice.is_valid ()); + + test_string_slice = string_slice (","); + test_delims = string_slice (","); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_FALSE (test_string_slice.is_valid ()); + + test_string_slice = string_slice (",test.,.test, , test "); + test_delims = string_slice (",."); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("test")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("test")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice (" ")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice (" test ")); + ASSERT_FALSE (test_string_slice.is_valid ()); + + const char *test_string + = "This is the test string, it \0 is for testing, 123 ,,"; + test_string_slice = string_slice (test_string, 52); + test_delims = string_slice (",\0", 2); + + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("This is the test string")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice (" it ")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice (" is for testing")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice (" 123 ")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_FALSE (test_string_slice.is_valid ()); +} + +static void +test_string_slice_strcmp () +{ + ASSERT_EQ (strcmp (string_slice (), string_slice ()), 0); + ASSERT_EQ (strcmp (string_slice ("test"), string_slice ()), 1); + ASSERT_EQ (strcmp (string_slice (), string_slice ("test")), -1); + ASSERT_EQ (strcmp (string_slice ("test"), string_slice ("test")), 0); + ASSERT_EQ (strcmp (string_slice ("a"), string_slice ("b")), -1); + ASSERT_EQ (strcmp (string_slice ("b"), string_slice ("a")), 1); + ASSERT_EQ (strcmp (string_slice ("ab", 1), string_slice ("a")), 0); + ASSERT_EQ (strcmp (string_slice ("ab", 2), string_slice ("a")), 1); +} + +static void +test_string_slice_equality () +{ + ASSERT_TRUE (string_slice () == string_slice ()); + ASSERT_FALSE (string_slice ("test") == string_slice ()); + ASSERT_FALSE ("test" == string_slice ()); + ASSERT_FALSE (string_slice () == string_slice ("test")); + ASSERT_FALSE (string_slice () == "test"); + ASSERT_TRUE (string_slice ("test") == string_slice ("test")); + ASSERT_TRUE ("test" == string_slice ("test")); + ASSERT_TRUE (string_slice ("test") == "test"); + ASSERT_FALSE (string_slice ("a") == string_slice ("b")); + ASSERT_FALSE ("a" == string_slice ("b")); + ASSERT_FALSE (string_slice ("a") == "b"); + ASSERT_FALSE (string_slice ("b") == string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 1) == string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 1) == "a"); + ASSERT_FALSE (string_slice ("ab", 2) == string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 2) == "a"); +} + +static void +test_string_slice_inequality () +{ + ASSERT_FALSE (string_slice () != string_slice ()); + ASSERT_TRUE (string_slice ("test") != string_slice ()); + ASSERT_TRUE ("test" != string_slice ()); + ASSERT_TRUE (string_slice () != string_slice ("test")); + ASSERT_TRUE (string_slice () != "test"); + ASSERT_FALSE (string_slice ("test") != string_slice ("test")); + ASSERT_FALSE ("test" != string_slice ("test")); + ASSERT_FALSE (string_slice ("test") != "test"); + ASSERT_TRUE (string_slice ("a") != string_slice ("b")); + ASSERT_TRUE ("a" != string_slice ("b")); + ASSERT_TRUE (string_slice ("a") != "b"); + ASSERT_TRUE (string_slice ("b") != string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 1) != string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 1) != "a"); + ASSERT_TRUE (string_slice ("ab", 2) != string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 2) != "a"); +} + +static void +test_string_slice_invalid () +{ + ASSERT_FALSE (string_slice::invalid ().is_valid ()); + ASSERT_FALSE (string_slice (NULL, 1).is_valid ()); + ASSERT_TRUE (string_slice (NULL, (size_t) 0).is_valid ()); + ASSERT_TRUE (string_slice ("Test", (size_t) 0).is_valid ()); + ASSERT_TRUE (string_slice ().is_valid ()); +} + +static void +test_string_slice_strip () +{ + ASSERT_EQ (string_slice (" test ").strip (), string_slice ("test")); + ASSERT_EQ (string_slice ("\t test string\t \n ").strip (), + string_slice ("test string")); + ASSERT_EQ (string_slice ("test").strip (), string_slice ("test")); + ASSERT_EQ (string_slice ().strip (), string_slice ()); + ASSERT_EQ (string_slice ("\t \n \t ").strip (), string_slice ()); +} + /* Run all of the selftests within this file. */ void @@ -604,6 +807,13 @@ vec_cc_tests () test_reverse (); test_auto_delete_vec (); test_auto_alias (); + test_string_slice_initializers (); + test_string_slice_tokenize (); + test_string_slice_strcmp (); + test_string_slice_equality (); + test_string_slice_inequality (); + test_string_slice_invalid (); + test_string_slice_strip (); } } // namespace selftest diff --git a/gcc/vec.h b/gcc/vec.h index 915df06f03e..d709d339d40 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -2484,4 +2484,69 @@ make_array_slice (T *base, unsigned int size) # pragma GCC poison m_vec m_vecpfx m_vecdata #endif +/* string_slice inherits from array_slice, specifically to refer to a substring + of a character array. + It includes some string like helpers. */ +class string_slice : public array_slice<const char> +{ +public: + explicit string_slice () : array_slice<const char> () {} + explicit string_slice (const char *str) : array_slice (str, strlen (str)) {} + explicit string_slice (const char *str, size_t len) : + array_slice (str, len) {} + explicit string_slice (const char *start, const char *end) : + array_slice (start, end-start) {} + + friend bool operator== (const string_slice &lhs, const string_slice &rhs) + { + if (!lhs.is_valid () || !rhs.is_valid ()) + return false; + if (lhs.size () != rhs.size ()) + return false; + return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0; + } + + friend bool operator== (const char *lhs, const string_slice &rhs) + { + return string_slice (lhs) == rhs; + } + + friend bool operator== (const string_slice &lhs, const char *rhs) + { + return lhs == string_slice (rhs); + } + + friend bool operator!= (const string_slice &lhs, const string_slice &rhs) + { + return !(lhs == rhs); + } + + friend bool operator!= (const char *lhs, const string_slice &rhs) + { + return !(string_slice (lhs) == rhs); + } + + friend bool operator!= (const string_slice &lhs, const char *rhs) + { + return !(lhs == string_slice (rhs)); + } + + /* Returns an invalid string_slice. */ + static string_slice invalid () + { + return string_slice (nullptr, ~0U); + } + + /* tokenize is used to split a string by some deliminator into + strtok_slice's. Similarly to the posix strtok_r.but without modifying the + input string, and returning all tokens which may be empty in the case + of an empty input string of consecutive deliminators. */ + static string_slice tokenize (string_slice *str, string_slice delims); + + /* Removes white space from the front and back of the string_slice. */ + string_slice strip (); +}; + +int strcmp (string_slice str1, string_slice str2); + #endif // GCC_VEC_H