The string_slice inherits from array_slice and is used to refer to a
substring of an array that is memory managed elsewhere without modifying
the underlying array.

For example, this is useful in cases such as when needing to refer to a
substring of an attribute in the syntax tree.

Adds some minimal helper functions for string_slice,
such as a strtok alternative, equality operators, strcmp, and a function
to strip whitespace from the beginning and end of a string_slice.

gcc/c-family/ChangeLog:

        * c-format.cc (local_string_slice_node): New node type.
        (asm_fprintf_char_table): New entry.
        (init_dynamic_diag_info): Add support for string_slice.
        * c-format.h (T_STRING_SLICE): New node type.

gcc/ChangeLog:

        * pretty-print.cc (format_phase_2): Add support for string_slice.
        * vec.cc (string_slice::tokenize): New method.
        (strcmp): New implementation for string_slice.
        (string_slice::strip): New method.
        (test_string_slice_initializers): New test.
        (test_string_slice_tokenize): Ditto.
        (test_string_slice_strcmp): Ditto.
        (test_string_slice_equality): Ditto.
        (test_string_slice_inequality): Ditto.
        (test_string_slice_invalid): Ditto.
        (test_string_slice_strip): Ditto.
        (vec_cc_tests): Add new tests.
        * vec.h (class string_slice): New class.
        (strcmp): New implementation for stirng_slice.
---
 gcc/c-family/c-format.cc |   7 ++
 gcc/c-family/c-format.h  |   1 +
 gcc/pretty-print.cc      |  10 ++
 gcc/vec.cc               | 207 +++++++++++++++++++++++++++++++++++++++
 gcc/vec.h                |  45 +++++++++
 5 files changed, 270 insertions(+)

diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 211d20dd25b..dd650d9d520 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -70,6 +70,7 @@ static GTY(()) tree local_event_ptr_node;
 static GTY(()) tree local_pp_element_ptr_node;
 static GTY(()) tree local_gimple_ptr_node;
 static GTY(()) tree local_cgraph_node_ptr_node;
+static GTY(()) tree local_string_slice_node;
 static GTY(()) tree locus;
 
 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
@@ -770,6 +771,7 @@ static const format_char_info asm_fprintf_char_table[] =
   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",    "//cR",   NULL 
}, \
   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
+  { "B",   1, STD_C89, { T_STRING_SLICE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "",   NULL }, \
   { "e",   1, STD_C89, { T_PP_ELEMENT_PTR,   BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"", NULL }, \
   { "<",   0, STD_C89, NOARGUMENTS, "",      "<",   NULL }, \
   { ">",   0, STD_C89, NOARGUMENTS, "",      ">",   NULL }, \
@@ -5211,6 +5213,11 @@ init_dynamic_diag_info (void)
       || local_cgraph_node_ptr_node == void_type_node)
     local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
 
+  /* Similar to the above but for string_slice*.  */
+  if (!local_string_slice_node
+      || local_string_slice_node == void_type_node)
+    local_string_slice_node = get_named_type ("string_slice");
+
   /* Similar to the above but for diagnostic_event_id_t*.  */
   if (!local_event_ptr_node
       || local_event_ptr_node == void_type_node)
diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 323338cb8e7..d44d3862d83 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -317,6 +317,7 @@ struct format_kind_info
 #define T89_G   { STD_C89, NULL, &local_gimple_ptr_node }
 #define T_CGRAPH_NODE   { STD_C89, NULL, &local_cgraph_node_ptr_node }
 #define T_EVENT_PTR    { STD_C89, NULL, &local_event_ptr_node }
+#define T_STRING_SLICE    { STD_C89, NULL, &local_string_slice_node }
 #define T_PP_ELEMENT_PTR    { STD_C89, NULL, &local_pp_element_ptr_node }
 #define T89_T   { STD_C89, NULL, &local_tree_type_node }
 #define T89_V  { STD_C89, NULL, T_V }
diff --git a/gcc/pretty-print.cc b/gcc/pretty-print.cc
index abd6c0b528f..aacd43420dd 100644
--- a/gcc/pretty-print.cc
+++ b/gcc/pretty-print.cc
@@ -2035,6 +2035,16 @@ format_phase_2 (pretty_printer *pp,
            pp_string (pp, va_arg (*text.m_args_ptr, const char *));
          break;
 
+       case 'B':
+         {
+           string_slice s = *va_arg (*text.m_args_ptr, string_slice *);
+           if (quote)
+             pp_quoted_string (pp, s.begin (), s.size ());
+           else
+             pp_string_n (pp, s.begin (), s.size ());
+           break;
+         }
+
        case 'p':
          pp_pointer (pp, va_arg (*text.m_args_ptr, void *));
          break;
diff --git a/gcc/vec.cc b/gcc/vec.cc
index 55f5f3dd447..cc9dfdbd48a 100644
--- a/gcc/vec.cc
+++ b/gcc/vec.cc
@@ -176,6 +176,61 @@ dump_vec_loc_statistics (void)
   vec_mem_desc.dump (VEC_ORIGIN);
 }
 
+string_slice
+string_slice::tokenize (string_slice *str, string_slice delims)
+{
+  const char *ptr = str->begin ();
+
+  gcc_assert (str->is_valid () && delims.is_valid ());
+
+  for (; ptr < str->end (); ptr++)
+    for (char c : delims)
+      if (*ptr == c)
+       {
+         /* Update the input string to be the remaining string.  */
+         const char *str_begin = str->begin ();
+         *str = string_slice (ptr  + 1, str->end ());
+         return string_slice (str_begin, ptr);
+       }
+
+  /* If no deliminators between the start and end, return the whole string.  */
+  string_slice res = *str;
+  *str = string_slice::invalid ();
+  return res;
+}
+
+int
+strcmp (string_slice str1, string_slice str2)
+{
+  for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++)
+    {
+      if (str1[i] < str2[i])
+       return -1;
+      if (str1[i] > str2[i])
+       return 1;
+    }
+
+  if (str1.size () < str2.size ())
+    return -1;
+  if (str1.size () > str2.size ())
+    return 1;
+  return 0;
+}
+
+string_slice
+string_slice::strip ()
+{
+  const char *start = this->begin ();
+  const char *end = this->end ();
+
+  while (start < end && ISSPACE (*start))
+    start++;
+  while (end > start && ISSPACE (*(end-1)))
+    end--;
+
+  return string_slice (start, end);
+}
+
 #if CHECKING_P
 /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as
    witness elements.  */
@@ -584,6 +639,151 @@ test_auto_alias ()
   ASSERT_EQ (val, 0);
 }
 
+static void
+test_string_slice_initializers ()
+{
+  string_slice str1 = string_slice ();
+  ASSERT_TRUE (str1.is_valid ());
+  ASSERT_EQ (str1.size (), 0);
+
+  string_slice str2 = string_slice ("Test string");
+  ASSERT_TRUE (str2.is_valid ());
+  ASSERT_EQ (str2.size (), 11);
+
+  string_slice str3 = "Test string the second";
+  ASSERT_TRUE (str3.is_valid ());
+  ASSERT_EQ (str3.size (), 22);
+
+  string_slice str4 = string_slice ("Test string", 4);
+  ASSERT_TRUE (str4.is_valid ());
+  ASSERT_EQ (str4.size (), 4);
+}
+
+static void
+test_string_slice_tokenize ()
+{
+  string_slice test_string_slice = "";
+  string_slice test_delims = ",";
+
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "");
+  ASSERT_FALSE (test_string_slice.is_valid ());
+
+  test_string_slice = ",";
+  test_delims = ",";
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            string_slice (""));
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            string_slice (""));
+  ASSERT_FALSE (test_string_slice.is_valid ());
+
+  test_string_slice = ",test.,.test, ,  test  ";
+  test_delims = ",.";
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "test");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), "test");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims), " ");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            "  test  ");
+  ASSERT_FALSE (test_string_slice.is_valid ());
+
+  const char *test_string
+    = "This is the test string, it \0 is for testing, 123 ,,";
+  test_string_slice = string_slice (test_string, 52);
+  test_delims = string_slice (",\0", 2);
+
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            "This is the test string");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            " it ");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            " is for testing");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            " 123 ");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            "");
+  ASSERT_EQ (string_slice::tokenize (&test_string_slice, test_delims),
+            "");
+  ASSERT_FALSE (test_string_slice.is_valid ());
+}
+
+static void
+test_string_slice_strcmp ()
+{
+  ASSERT_EQ (strcmp (string_slice (), string_slice ()), 0);
+  ASSERT_EQ (strcmp (string_slice ("test"), string_slice ()), 1);
+  ASSERT_EQ (strcmp (string_slice (), string_slice ("test")), -1);
+  ASSERT_EQ (strcmp (string_slice ("test"), string_slice ("test")), 0);
+  ASSERT_EQ (strcmp (string_slice ("a"), string_slice ("b")), -1);
+  ASSERT_EQ (strcmp (string_slice ("b"), string_slice ("a")), 1);
+  ASSERT_EQ (strcmp (string_slice ("ab", 1), string_slice ("a")), 0);
+  ASSERT_EQ (strcmp (string_slice ("ab", 2), string_slice ("a")), 1);
+}
+
+static void
+test_string_slice_equality ()
+{
+  ASSERT_TRUE (string_slice () == string_slice ());
+  ASSERT_FALSE (string_slice ("test") == string_slice ());
+  ASSERT_FALSE ("test" == string_slice ());
+  ASSERT_FALSE (string_slice () == string_slice ("test"));
+  ASSERT_FALSE (string_slice () == "test");
+  ASSERT_TRUE (string_slice ("test") == string_slice ("test"));
+  ASSERT_TRUE ("test" == string_slice ("test"));
+  ASSERT_TRUE (string_slice ("test") == "test");
+  ASSERT_FALSE (string_slice ("a") == string_slice ("b"));
+  ASSERT_FALSE ("a" == string_slice ("b"));
+  ASSERT_FALSE (string_slice ("a") == "b");
+  ASSERT_FALSE (string_slice ("b") == string_slice ("a"));
+  ASSERT_TRUE (string_slice ("ab", 1) == string_slice ("a"));
+  ASSERT_TRUE (string_slice ("ab", 1) == "a");
+  ASSERT_FALSE (string_slice ("ab", 2) == string_slice ("a"));
+  ASSERT_FALSE (string_slice ("ab", 2) == "a");
+}
+
+static void
+test_string_slice_inequality ()
+{
+  ASSERT_FALSE (string_slice () != string_slice ());
+  ASSERT_TRUE (string_slice ("test") != string_slice ());
+  ASSERT_TRUE ("test" != string_slice ());
+  ASSERT_TRUE (string_slice () != string_slice ("test"));
+  ASSERT_TRUE (string_slice () != "test");
+  ASSERT_FALSE (string_slice ("test") != string_slice ("test"));
+  ASSERT_FALSE ("test" != string_slice ("test"));
+  ASSERT_FALSE (string_slice ("test") != "test");
+  ASSERT_TRUE (string_slice ("a") != string_slice ("b"));
+  ASSERT_TRUE ("a" != string_slice ("b"));
+  ASSERT_TRUE (string_slice ("a") != "b");
+  ASSERT_TRUE (string_slice ("b") != string_slice ("a"));
+  ASSERT_FALSE (string_slice ("ab", 1) != string_slice ("a"));
+  ASSERT_FALSE (string_slice ("ab", 1) != "a");
+  ASSERT_TRUE (string_slice ("ab", 2) != string_slice ("a"));
+  ASSERT_TRUE (string_slice ("ab", 2) != "a");
+}
+
+static void
+test_string_slice_invalid ()
+{
+  ASSERT_FALSE (string_slice::invalid ().is_valid ());
+  ASSERT_FALSE (string_slice (NULL, 1).is_valid ());
+  ASSERT_TRUE (string_slice (NULL, (size_t) 0).is_valid ());
+  ASSERT_TRUE (string_slice ("Test", (size_t) 0).is_valid ());
+  ASSERT_TRUE (string_slice ().is_valid ());
+}
+
+static void
+test_string_slice_strip ()
+{
+  ASSERT_EQ (string_slice ("   test   ").strip (), string_slice ("test"));
+  ASSERT_EQ (string_slice ("\t   test string\t   \n ").strip (),
+            string_slice ("test string"));
+  ASSERT_EQ (string_slice ("test").strip (), string_slice ("test"));
+  ASSERT_EQ (string_slice ().strip (), string_slice ());
+  ASSERT_EQ (string_slice ("\t  \n \t   ").strip (), string_slice ());
+}
+
 /* Run all of the selftests within this file.  */
 
 void
@@ -604,6 +804,13 @@ vec_cc_tests ()
   test_reverse ();
   test_auto_delete_vec ();
   test_auto_alias ();
+  test_string_slice_initializers ();
+  test_string_slice_tokenize ();
+  test_string_slice_strcmp ();
+  test_string_slice_equality ();
+  test_string_slice_inequality ();
+  test_string_slice_invalid ();
+  test_string_slice_strip ();
 }
 
 } // namespace selftest
diff --git a/gcc/vec.h b/gcc/vec.h
index 915df06f03e..9750459327e 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2484,4 +2484,49 @@ make_array_slice (T *base, unsigned int size)
 # pragma GCC poison m_vec m_vecpfx m_vecdata
 #endif
 
+/* string_slice inherits from array_slice, specifically to refer to a substring
+   of a character array.
+   It includes some string like helpers.  */
+class string_slice : public array_slice<const char>
+{
+public:
+  string_slice () : array_slice<const char> () {}
+  string_slice (const char *str) : array_slice (str, strlen (str)) {}
+  explicit string_slice (const char *str, size_t len) :
+    array_slice (str, len) {}
+  explicit string_slice (const char *start, const char *end) :
+    array_slice (start, end - start) {}
+
+  friend bool operator== (const string_slice &lhs, const string_slice &rhs)
+  {
+    if (!lhs.is_valid () || !rhs.is_valid ())
+      return false;
+    if (lhs.size () != rhs.size ())
+      return false;
+    return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0;
+  }
+
+  friend bool operator!= (const string_slice &lhs, const string_slice &rhs)
+  {
+    return !(lhs == rhs);
+  }
+
+  /* Returns an invalid string_slice.  */
+  static string_slice invalid ()
+  {
+    return string_slice (nullptr, ~0U);
+  }
+
+  /* tokenize is used to split a string by some deliminator into
+     strtok_slice's.  Similarly to the posix strtok_r.but without modifying the
+     input string, and returning all tokens which may be empty in the case
+     of an empty input string of consecutive deliminators.  */
+  static string_slice tokenize (string_slice *str, string_slice delims);
+
+  /* Removes white space from the front and back of the string_slice.  */
+  string_slice strip ();
+};
+
+int strcmp (string_slice str1, string_slice str2);
+
 #endif // GCC_VEC_H
-- 
2.34.1

Reply via email to