https://gcc.gnu.org/g:c0374cbe19f2f8d1b2461a8491b25bf417d39beb
commit c0374cbe19f2f8d1b2461a8491b25bf417d39beb Author: Alfie Richards <alfie.richa...@arm.com> Date: Mon Mar 24 10:45:56 2025 +0000 Add string_slice class. The string_slice inherits from array_slice and is used to refer to a substring of an array that is memory managed elsewhere without modifying the underlying array. For example, this is useful in cases such as when needing to refer to a substring of an attribute in the syntax tree. Adds some minimal helper functions for string_slice, such as a strtok alternative, equality operators, strcmp, and a function to strip whitespace from the beginning and end of a string_slice. gcc/c-family/ChangeLog: * c-format.cc (local_string_slice_node): New node type. (asm_fprintf_char_table): New entry. (init_dynamic_diag_info): Add support for string_slice. * c-format.h (T_STRING_SLICE): New node type. gcc/ChangeLog: * pretty-print.cc (format_phase_2): Add support for string_slice. * vec.cc (string_slice::tokenize): New static method. (string_slice::strcmp): New static method. (string_slice::strip): New method. (test_string_slice_initializers): New test. (test_string_slice_tokenize): Ditto. (test_string_slice_strcmp): Ditto. (test_string_slice_equality): Ditto. (test_string_slice_inequality): Ditto. (test_string_slice_invalid): Ditto. (test_string_slice_strip): Ditto. (vec_cc_tests): Add new tests. * vec.h (class string_slice): New class. gcc/testsuite/ChangeLog * g++.dg/warn/Wformat-gcc_diag-1.C: Add string_slice "%B" format tests. Diff: --- gcc/c-family/c-format.cc | 7 + gcc/c-family/c-format.h | 1 + gcc/pretty-print.cc | 10 ++ gcc/testsuite/g++.dg/warn/Wformat-gcc_diag-1.C | 18 +- gcc/vec.cc | 228 +++++++++++++++++++++++++ gcc/vec.h | 46 +++++ 6 files changed, 306 insertions(+), 4 deletions(-) diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc index a44249a02227..80430e9a8f7c 100644 --- a/gcc/c-family/c-format.cc +++ b/gcc/c-family/c-format.cc @@ -70,6 +70,7 @@ static GTY(()) tree local_event_ptr_node; static GTY(()) tree local_pp_element_ptr_node; static GTY(()) tree local_gimple_ptr_node; static GTY(()) tree local_cgraph_node_ptr_node; +static GTY(()) tree local_string_slice_node; static GTY(()) tree locus; static bool decode_format_attr (const_tree, tree, tree, function_format_info *, @@ -770,6 +771,7 @@ static const format_char_info asm_fprintf_char_table[] = { "p", 1, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q", "c", NULL }, \ { "r", 1, STD_C89, { T89_C, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "", "//cR", NULL }, \ { "@", 1, STD_C89, { T_EVENT_PTR, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "", "\"", NULL }, \ + { "B", 1, STD_C89, { T_STRING_SLICE, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q", "", NULL }, \ { "e", 1, STD_C89, { T_PP_ELEMENT_PTR, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "", "\"", NULL }, \ { "<", 0, STD_C89, NOARGUMENTS, "", "<", NULL }, \ { ">", 0, STD_C89, NOARGUMENTS, "", ">", NULL }, \ @@ -5211,6 +5213,11 @@ init_dynamic_diag_info (void) || local_cgraph_node_ptr_node == void_type_node) local_cgraph_node_ptr_node = get_named_type ("cgraph_node"); + /* Similar to the above but for string_slice*. */ + if (!local_string_slice_node + || local_string_slice_node == void_type_node) + local_string_slice_node = get_named_type ("string_slice"); + /* Similar to the above but for diagnostic_event_id_t*. */ if (!local_event_ptr_node || local_event_ptr_node == void_type_node) diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h index 323338cb8e7f..d44d3862d830 100644 --- a/gcc/c-family/c-format.h +++ b/gcc/c-family/c-format.h @@ -317,6 +317,7 @@ struct format_kind_info #define T89_G { STD_C89, NULL, &local_gimple_ptr_node } #define T_CGRAPH_NODE { STD_C89, NULL, &local_cgraph_node_ptr_node } #define T_EVENT_PTR { STD_C89, NULL, &local_event_ptr_node } +#define T_STRING_SLICE { STD_C89, NULL, &local_string_slice_node } #define T_PP_ELEMENT_PTR { STD_C89, NULL, &local_pp_element_ptr_node } #define T89_T { STD_C89, NULL, &local_tree_type_node } #define T89_V { STD_C89, NULL, T_V } diff --git a/gcc/pretty-print.cc b/gcc/pretty-print.cc index 76bbf2b8cd9a..7e1b52ad7797 100644 --- a/gcc/pretty-print.cc +++ b/gcc/pretty-print.cc @@ -2044,6 +2044,16 @@ format_phase_2 (pretty_printer *pp, pp_string (pp, va_arg (*text.m_args_ptr, const char *)); break; + case 'B': + { + string_slice s = *va_arg (*text.m_args_ptr, string_slice *); + if (quote) + pp_quoted_string (pp, s.begin (), s.size ()); + else + pp_string_n (pp, s.begin (), s.size ()); + break; + } + case 'p': pp_pointer (pp, va_arg (*text.m_args_ptr, void *)); break; diff --git a/gcc/testsuite/g++.dg/warn/Wformat-gcc_diag-1.C b/gcc/testsuite/g++.dg/warn/Wformat-gcc_diag-1.C index dd41b08d621e..db8515030a08 100644 --- a/gcc/testsuite/g++.dg/warn/Wformat-gcc_diag-1.C +++ b/gcc/testsuite/g++.dg/warn/Wformat-gcc_diag-1.C @@ -29,6 +29,8 @@ typedef struct diagnostic_event_id_t diagnostic_event_id_t; namespace pp_markup { class element; } typedef pp_markup::element pp_element; +typedef class string_slice string_slice; + #define FORMAT(kind) __attribute__ ((format (__gcc_## kind ##__, 1, 2))) void diag (const char*, ...) FORMAT (diag); @@ -63,7 +65,7 @@ void test_diag (tree t, gimple *gc, diagnostic_event_id_t *event_id_ptr, diag ("%e", 42); /* { dg-warning "format" } */ } -void test_cdiag (tree t, gimple *gc) +void test_cdiag (tree t, gimple *gc, string_slice *s) { cdiag ("%<"); /* { dg-warning "unterminated quoting directive" } */ cdiag ("%>"); /* { dg-warning "unmatched quoting directive " } */ @@ -74,6 +76,7 @@ void test_cdiag (tree t, gimple *gc) cdiag ("%F", t); /* { dg-warning ".F. conversion used unquoted" } */ cdiag ("%G", gc); /* { dg-warning "format" } */ cdiag ("%K", t); /* { dg-warning "format" } */ + cdiag ("%B", s); cdiag ("%R"); /* { dg-warning "unmatched color reset directive" } */ cdiag ("%r", ""); /* { dg-warning "unterminated color directive" } */ @@ -90,6 +93,7 @@ void test_cdiag (tree t, gimple *gc) cdiag ("%<%F%>", t); cdiag ("%<%G%>", gc); /* { dg-warning "format" } */ cdiag ("%<%K%>", t); /* { dg-warning "format" } */ + cdiag ("%<%B%>", s); cdiag ("%<%R%>"); /* { dg-warning "unmatched color reset directive" } */ cdiag ("%<%r%>", ""); /* { dg-warning "unterminated color directive" } */ @@ -101,9 +105,10 @@ void test_cdiag (tree t, gimple *gc) cdiag ("%<%qD%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ cdiag ("%<%qE%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ cdiag ("%<%qT%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ + cdiag ("%<%qB%>", s); /* { dg-warning ".q. flag used within a quoted sequence" } */ } -void test_tdiag (tree t, gimple *gc) +void test_tdiag (tree t, gimple *gc, string_slice *s) { tdiag ("%<"); /* { dg-warning "unterminated quoting directive" } */ tdiag ("%>"); /* { dg-warning "unmatched quoting directive " } */ @@ -113,6 +118,7 @@ void test_tdiag (tree t, gimple *gc) tdiag ("%E", t); tdiag ("%G", gc); /* { dg-warning "format" } */ tdiag ("%K", t); /* { dg-warning "format" } */ + tdiag ("%B", s); tdiag ("%R"); /* { dg-warning "unmatched color reset directive" } */ tdiag ("%r", ""); /* { dg-warning "unterminated color directive" } */ @@ -138,9 +144,10 @@ void test_tdiag (tree t, gimple *gc) tdiag ("%<%qD%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ tdiag ("%<%qE%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ tdiag ("%<%qT%>", t); /* { dg-warning ".q. flag used within a quoted sequence" } */ + tdiag ("%<%qB%>", s); /* { dg-warning ".q. flag used within a quoted sequence" } */ } -void test_cxxdiag (tree t, gimple *gc) +void test_cxxdiag (tree t, gimple *gc, string_slice *s) { cxxdiag ("%A", t); /* { dg-warning ".A. conversion used unquoted" } */ cxxdiag ("%D", t); /* { dg-warning ".D. conversion used unquoted" } */ @@ -148,6 +155,7 @@ void test_cxxdiag (tree t, gimple *gc) cxxdiag ("%F", t); /* { dg-warning ".F. conversion used unquoted" } */ cxxdiag ("%G", gc); /* { dg-warning "format" } */ cxxdiag ("%K", t); /* { dg-warning "format" } */ + cxxdiag ("%B", s); cxxdiag ("%R"); /* { dg-warning "unmatched color reset directive" } */ cxxdiag ("%r", ""); /* { dg-warning "unterminated color directive" } */ @@ -172,9 +180,10 @@ void test_cxxdiag (tree t, gimple *gc) cxxdiag ("%<%T%>", t); cxxdiag ("%<%V%>", t); cxxdiag ("%<%X%>", t); + cxxdiag ("%<%B%>", s); } -void test_dump (tree t, gimple *stmt, cgraph_node *node) +void test_dump (tree t, gimple *stmt, cgraph_node *node, string_slice *s) { dump ("%<"); /* { dg-warning "unterminated quoting directive" } */ dump ("%>"); /* { dg-warning "unmatched quoting directive " } */ @@ -197,4 +206,5 @@ void test_dump (tree t, gimple *stmt, cgraph_node *node) dump ("%C", node); dump ("%f", 1.0); dump ("%4.2f", 1.0); /* { dg-warning "format" } */ + dump ("%B", s); } diff --git a/gcc/vec.cc b/gcc/vec.cc index 55f5f3dd447c..38314d7360c4 100644 --- a/gcc/vec.cc +++ b/gcc/vec.cc @@ -176,6 +176,74 @@ dump_vec_loc_statistics (void) vec_mem_desc.dump (VEC_ORIGIN); } +/* Gets the next token from STR delimited by DELIMS (deliminator not included + in returned string). + + Updates STR to be the remaining string after the given token. + + STR and DELIMS must both be valid string_slices. + + If there aren't any of the chars in DELIM in STR (ie no more tokens in STR) + then returns the string, and updates STR to be invalid. */ +string_slice +string_slice::tokenize (string_slice *str, string_slice delims) +{ + const char *ptr = str->begin (); + + gcc_assert (str->is_valid () && delims.is_valid ()); + + for (; ptr < str->end (); ptr++) + for (char c : delims) + if (*ptr == c) + { + /* Update the input string to be the remaining string. */ + const char *str_begin = str->begin (); + *str = string_slice (ptr + 1, str->end ()); + return string_slice (str_begin, ptr); + } + + /* If no deliminators between the start and end, return the whole string. */ + string_slice res = *str; + *str = string_slice::invalid (); + return res; +} + +/* Compares the string_slices STR1 and STR2 giving a lexograpical ordering. + Returns -1 if STR1 comes before STR2, 1 if STR1 comes after, and 0 if the + string_slices have the same contents. */ + +int +string_slice::strcmp (string_slice str1, string_slice str2) +{ + for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++) + { + if (str1[i] < str2[i]) + return -1; + if (str1[i] > str2[i]) + return 1; + } + + if (str1.size () < str2.size ()) + return -1; + if (str1.size () > str2.size ()) + return 1; + return 0; +} + +string_slice +string_slice::strip () +{ + const char *start = this->begin (); + const char *end = this->end (); + + while (start < end && ISSPACE (*start)) + start++; + while (end > start && ISSPACE (*(end-1))) + end--; + + return string_slice (start, end); +} + #if CHECKING_P /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as witness elements. */ @@ -584,6 +652,159 @@ test_auto_alias () ASSERT_EQ (val, 0); } +static void +test_string_slice_initializers () +{ + string_slice str1 = string_slice (); + ASSERT_TRUE (str1.is_valid ()); + ASSERT_EQ (str1.size (), 0); + + string_slice str2 = string_slice ("Test string"); + ASSERT_TRUE (str2.is_valid ()); + ASSERT_EQ (str2.size (), 11); + + string_slice str3 = "Test string the second"; + ASSERT_TRUE (str3.is_valid ()); + ASSERT_EQ (str3.size (), 22); + + string_slice str4 = string_slice ("Test string", 4); + ASSERT_TRUE (str4.is_valid ()); + ASSERT_EQ (str4.size (), 4); +} + +static void +test_string_slice_tokenize () +{ + string_slice test_string_slice = ""; + string_slice test_delims = ","; + + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), ""); + ASSERT_FALSE (test_string_slice.is_valid ()); + + test_string_slice = ","; + test_delims = ","; + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + string_slice ("")); + ASSERT_FALSE (test_string_slice.is_valid ()); + + test_string_slice = ",test.,.test, , test "; + test_delims = ",."; + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), ""); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "test"); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), ""); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), ""); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "test"); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), " "); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + " test "); + ASSERT_FALSE (test_string_slice.is_valid ()); + + const char *test_string + = "This is the test string, it \0 is for testing, 123 ,,"; + test_string_slice = string_slice (test_string, 52); + test_delims = string_slice (",\0", 2); + + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + "This is the test string"); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + " it "); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + " is for testing"); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + " 123 "); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + ""); + ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), + ""); + ASSERT_FALSE (test_string_slice.is_valid ()); +} + +static void +test_string_slice_strcmp () +{ + ASSERT_EQ (string_slice::strcmp (string_slice (), + string_slice ()), 0); + ASSERT_EQ (string_slice::strcmp (string_slice ("test"), + string_slice ()), 1); + ASSERT_EQ (string_slice::strcmp (string_slice (), + string_slice ("test")), -1); + ASSERT_EQ (string_slice::strcmp (string_slice ("test"), + string_slice ("test")), 0); + ASSERT_EQ (string_slice::strcmp (string_slice ("a"), + string_slice ("b")), -1); + ASSERT_EQ (string_slice::strcmp (string_slice ("b"), + string_slice ("a")), 1); + ASSERT_EQ (string_slice::strcmp (string_slice ("ab", 1), + string_slice ("a")), 0); + ASSERT_EQ (string_slice::strcmp (string_slice ("ab", 2), + string_slice ("a")), 1); +} + +static void +test_string_slice_equality () +{ + ASSERT_TRUE (string_slice () == string_slice ()); + ASSERT_FALSE (string_slice ("test") == string_slice ()); + ASSERT_FALSE ("test" == string_slice ()); + ASSERT_FALSE (string_slice () == string_slice ("test")); + ASSERT_FALSE (string_slice () == "test"); + ASSERT_TRUE (string_slice ("test") == string_slice ("test")); + ASSERT_TRUE ("test" == string_slice ("test")); + ASSERT_TRUE (string_slice ("test") == "test"); + ASSERT_FALSE (string_slice ("a") == string_slice ("b")); + ASSERT_FALSE ("a" == string_slice ("b")); + ASSERT_FALSE (string_slice ("a") == "b"); + ASSERT_FALSE (string_slice ("b") == string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 1) == string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 1) == "a"); + ASSERT_FALSE (string_slice ("ab", 2) == string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 2) == "a"); +} + +static void +test_string_slice_inequality () +{ + ASSERT_FALSE (string_slice () != string_slice ()); + ASSERT_TRUE (string_slice ("test") != string_slice ()); + ASSERT_TRUE ("test" != string_slice ()); + ASSERT_TRUE (string_slice () != string_slice ("test")); + ASSERT_TRUE (string_slice () != "test"); + ASSERT_FALSE (string_slice ("test") != string_slice ("test")); + ASSERT_FALSE ("test" != string_slice ("test")); + ASSERT_FALSE (string_slice ("test") != "test"); + ASSERT_TRUE (string_slice ("a") != string_slice ("b")); + ASSERT_TRUE ("a" != string_slice ("b")); + ASSERT_TRUE (string_slice ("a") != "b"); + ASSERT_TRUE (string_slice ("b") != string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 1) != string_slice ("a")); + ASSERT_FALSE (string_slice ("ab", 1) != "a"); + ASSERT_TRUE (string_slice ("ab", 2) != string_slice ("a")); + ASSERT_TRUE (string_slice ("ab", 2) != "a"); +} + +static void +test_string_slice_invalid () +{ + ASSERT_FALSE (string_slice::invalid ().is_valid ()); + ASSERT_FALSE (string_slice (NULL, 1).is_valid ()); + ASSERT_TRUE (string_slice (NULL, (size_t) 0).is_valid ()); + ASSERT_TRUE (string_slice ("Test", (size_t) 0).is_valid ()); + ASSERT_TRUE (string_slice ().is_valid ()); +} + +static void +test_string_slice_strip () +{ + ASSERT_EQ (string_slice (" test ").strip (), string_slice ("test")); + ASSERT_EQ (string_slice ("\t test string\t \n ").strip (), + string_slice ("test string")); + ASSERT_EQ (string_slice ("test").strip (), string_slice ("test")); + ASSERT_EQ (string_slice ().strip (), string_slice ()); + ASSERT_EQ (string_slice ("\t \n \t ").strip (), string_slice ()); +} + /* Run all of the selftests within this file. */ void @@ -604,6 +825,13 @@ vec_cc_tests () test_reverse (); test_auto_delete_vec (); test_auto_alias (); + test_string_slice_initializers (); + test_string_slice_tokenize (); + test_string_slice_strcmp (); + test_string_slice_equality (); + test_string_slice_inequality (); + test_string_slice_invalid (); + test_string_slice_strip (); } } // namespace selftest diff --git a/gcc/vec.h b/gcc/vec.h index 7e112d1a80a6..9604edb1c3cf 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -2495,4 +2495,50 @@ make_array_slice (T *base, unsigned int size) # pragma GCC poison m_vec m_vecpfx m_vecdata #endif +/* string_slice inherits from array_slice, specifically to refer to a substring + of a character array. + It includes some string like helpers. */ +class string_slice : public array_slice<const char> +{ +public: + string_slice () : array_slice<const char> () {} + string_slice (const char *str) : array_slice (str, strlen (str)) {} + explicit string_slice (const char *str, size_t len) + : array_slice (str, len) {} + explicit string_slice (const char *start, const char *end) + : array_slice (start, end - start) {} + + friend bool operator== (const string_slice &lhs, const string_slice &rhs) + { + if (!lhs.is_valid () || !rhs.is_valid ()) + return false; + if (lhs.size () != rhs.size ()) + return false; + return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0; + } + + friend bool operator!= (const string_slice &lhs, const string_slice &rhs) + { + return !(lhs == rhs); + } + + /* Returns an invalid string_slice. */ + static string_slice invalid () + { + return string_slice (nullptr, ~0U); + } + + /* tokenize is used to split a string by some deliminator into + string_slice's. Similarly to the posix strtok_r.but without modifying the + input string, and returning all tokens which may be empty in the case + of an empty input string of consecutive deliminators. */ + static string_slice tokenize (string_slice *str, string_slice delims); + + /* Removes white space from the front and back of the string_slice. */ + string_slice strip (); + + /* Compares two string_slices in lexographical ordering. */ + static int strcmp (string_slice str1, string_slice str2); +}; + #endif // GCC_VEC_H