Hi all,

This patch introduces a complete implementation of the SPLIT intrinsic, including documentation and test cases. While Tobias previously mentioned that a similar effect could be achieved with existing functionality, I believe having direct support for this operation won't cause any issues. Users will still have the flexibility to choose their preferred method.

Please take a look when you have a moment!

Thanks,
Yuao
From 3fb261141dbc61296adb9c9361bafcd9922ad8bd Mon Sep 17 00:00:00 2001
From: Yuao Ma <c...@outlook.com>
Date: Sat, 26 Jul 2025 20:59:26 +0800
Subject: [PATCH] fortran: implment split for fortran 2023

This patch includes the implementation, documentation, and test case for SPLIT.

gcc/fortran/ChangeLog:

        * check.cc (gfc_check_split): Argument check for SPLIT.
        * gfortran.h (enum gfc_isym_id): Define GFC_ISYM_SPLIT.
        * intrinsic.cc (add_subroutines): Register SPLIT intrinsic.
        * intrinsic.h (gfc_check_split): New decl.
        (gfc_resolve_split): Ditto.
        * intrinsic.texi: SPLIT documentation.
        * iresolve.cc (gfc_resolve_split): Add resolved_sym for SPLIT.
        * trans-decl.cc (gfc_build_intrinsic_function_decls): Add decl for
        SPLIT in libgfortran.
        * trans-intrinsic.cc (conv_intrinsic_split): SPLIT codegen.
        (gfc_conv_intrinsic_subroutine): Handle SPLIT case.
        * trans.h (GTY): Declare gfor_fndecl_string_split{, _char4}.

libgfortran/ChangeLog:

        * gfortran.map: Add split symbol.
        * intrinsics/string_intrinsics_inc.c (string_split):
        Runtime support for SPLIT.

gcc/testsuite/ChangeLog:

        * gfortran.dg/split_1.f90: New test.
        * gfortran.dg/split_2.f90: New test.
---
 gcc/fortran/check.cc                          | 21 ++++++
 gcc/fortran/gfortran.h                        |  2 +
 gcc/fortran/intrinsic.cc                      |  8 +++
 gcc/fortran/intrinsic.h                       |  2 +
 gcc/fortran/intrinsic.texi                    | 64 +++++++++++++++++
 gcc/fortran/iresolve.cc                       | 13 ++++
 gcc/fortran/trans-decl.cc                     | 14 ++++
 gcc/fortran/trans-intrinsic.cc                | 71 +++++++++++++++++++
 gcc/fortran/trans.h                           |  2 +
 gcc/testsuite/gfortran.dg/split_1.f90         | 28 ++++++++
 gcc/testsuite/gfortran.dg/split_2.f90         | 22 ++++++
 libgfortran/gfortran.map                      |  2 +
 .../intrinsics/string_intrinsics_inc.c        | 52 ++++++++++++++
 13 files changed, 301 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/split_1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/split_2.f90

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 838d523f7c4..862652683a7 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -5559,6 +5559,27 @@ gfc_check_scan (gfc_expr *x, gfc_expr *y, gfc_expr *z, 
gfc_expr *kind)
   return true;
 }
 
+bool
+gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr 
*back)
+{
+  if (!type_check (string, 0, BT_CHARACTER))
+    return false;
+
+  if (!type_check (set, 1, BT_CHARACTER))
+    return false;
+
+  if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2))
+    return false;
+
+  if (back != NULL
+      && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3)))
+    return false;
+
+  if (!same_type_check (string, 0, set, 1))
+    return false;
+
+  return true;
+}
 
 bool
 gfc_check_secnds (gfc_expr *r)
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 85feb18be8c..d9dcd1b504f 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -729,6 +729,8 @@ enum gfc_isym_id
   GFC_ISYM_COSPI,
   GFC_ISYM_SINPI,
   GFC_ISYM_TANPI,
+
+  GFC_ISYM_SPLIT,
 };
 
 enum init_local_logical
diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index 9e07627503d..259b59edc3b 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -3933,6 +3933,14 @@ add_subroutines (void)
              pt, BT_INTEGER, di, OPTIONAL, INTENT_IN,
              gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT);
 
+  add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_IMPURE,
+             BT_UNKNOWN, 0, GFC_STD_F2023,
+             gfc_check_split, NULL, gfc_resolve_split,
+             "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+             "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+             "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT,
+             "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN);
+
   /* The following subroutines are part of ISO_C_BINDING.  */
 
   add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0,
diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h
index fd54588054f..8a0ab935e1f 100644
--- a/gcc/fortran/intrinsic.h
+++ b/gcc/fortran/intrinsic.h
@@ -215,6 +215,7 @@ bool gfc_check_mvbits (gfc_expr *, gfc_expr *, gfc_expr *, 
gfc_expr *,
 bool gfc_check_random_init (gfc_expr *, gfc_expr *);
 bool gfc_check_random_number (gfc_expr *);
 bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *);
+bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *);
 bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *);
 bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *);
 bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *);
@@ -693,6 +694,7 @@ void gfc_resolve_link_sub (gfc_code *);
 void gfc_resolve_symlnk_sub (gfc_code *);
 void gfc_resolve_signal_sub (gfc_code *);
 void gfc_resolve_sleep_sub (gfc_code *);
+void gfc_resolve_split (gfc_code *);
 void gfc_resolve_stat_sub (gfc_code *);
 void gfc_resolve_system_clock (gfc_code *);
 void gfc_resolve_system_sub (gfc_code *);
diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi
index 3103da3da09..a24b234316c 100644
--- a/gcc/fortran/intrinsic.texi
+++ b/gcc/fortran/intrinsic.texi
@@ -313,6 +313,7 @@ Some basic guidelines for editing this document:
 * @code{SIZEOF}:        SIZEOF,    Determine the size in bytes of an expression
 * @code{SLEEP}:         SLEEP,     Sleep for the specified number of seconds
 * @code{SPACING}:       SPACING,   Smallest distance between two numbers of a 
given type
+* @code{SPLIT}:         SPLIT,     Parse a string into tokens, one at a time.
 * @code{SPREAD}:        SPREAD,    Add a dimension to an array 
 * @code{SQRT}:          SQRT,      Square-root function
 * @code{SRAND}:         SRAND,     Reinitialize the random number generator
@@ -14203,6 +14204,69 @@ Fortran 90 and later
 
 
 
+@node SPLIT
+@section @code{SPLIT} --- Parse a string into tokens, one at a time
+@fnindex SPLIT
+@cindex string, split
+
+@table @asis
+@item @emph{Synopsis}:
+@code{RESULT = SPLIT(STRING, SET, POS [, BACK])}
+
+@item @emph{Description}:
+Updates the integer @var{POS} to the position of the next (or previous)
+separator in @var{STRING}.
+
+If @var{BACK} is absent or is present with the value false, @var{POS} is
+assigned the position of the leftmost token delimiter in @var{STRING} whose
+position is greater than @var{POS}, or if there is no such character, it is
+assigned a value one greater than the length of @var{STRING}. This identifies
+a token with starting position one greater than the value of @var{POS} on
+invocation, and ending position one less than the value of @var{POS} on return.
+
+If @var{BACK} is present with the value true, @var{POS} is assigned the
+position of the rightmost token delimiter in @var{STRING} whose position is
+less than @var{POS}, or if there is no such character, it is assigned the value
+zero. This identifies a token with ending position one less than the value of
+@var{POS} on invocation, and starting position one greater than the value of
+@var{POS} on return.
+
+@item @emph{Class}:
+Subroutine
+
+@item @emph{Arguments}:
+@multitable @columnfractions .15 .70
+@item @var{STRING} @tab Shall be of type @code{CHARACTER}.
+@item @var{SET}    @tab Shall be of type @code{CHARACTER}.
+@item @var{POS}    @tab Shall be of type @code{INTEGER}.
+@item @var{BACK}   @tab (Optional) Shall be of type @code{LOGICAL}.
+@end multitable
+
+@item @emph{Example}:
+@smallexample
+character(len=:), allocatable :: input
+character(len=2) :: set = ', '
+integer :: p
+input = "one,last example"
+p = 0
+do
+  if (p > len(input)) exit
+  istart = p + 1
+  call split(input, set, p)
+  iend = p - 1
+  print '(t7, a)', input(istart:iend)
+end do
+@end smallexample
+
+@item @emph{Standard}:
+Fortran 2023
+
+@item @emph{See also}:
+@ref{SCAN}
+@end table
+
+
+
 @node SPREAD
 @section @code{SPREAD} --- Add a dimension to an array
 @fnindex SPREAD
diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc
index 10013096c70..da354ab5056 100644
--- a/gcc/fortran/iresolve.cc
+++ b/gcc/fortran/iresolve.cc
@@ -3863,6 +3863,19 @@ gfc_resolve_sleep_sub (gfc_code *c)
   c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
 }
 
+void
+gfc_resolve_split (gfc_code *c)
+{
+  const char *name;
+  gfc_expr *string;
+
+  string = c->ext.actual->expr;
+  if (string->ts.type == BT_CHARACTER && string->ts.kind == 4)
+    name = "__split_char4";
+  else
+    name = "__split";
+  c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
 
 /* G77 compatibility function srand().  */
 
diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc
index d5acdca719f..741acc052ee 100644
--- a/gcc/fortran/trans-decl.cc
+++ b/gcc/fortran/trans-decl.cc
@@ -197,6 +197,7 @@ tree gfor_fndecl_string_scan;
 tree gfor_fndecl_string_verify;
 tree gfor_fndecl_string_trim;
 tree gfor_fndecl_string_minmax;
+tree gfor_fndecl_string_split;
 tree gfor_fndecl_adjustl;
 tree gfor_fndecl_adjustr;
 tree gfor_fndecl_select_string;
@@ -208,6 +209,7 @@ tree gfor_fndecl_string_scan_char4;
 tree gfor_fndecl_string_verify_char4;
 tree gfor_fndecl_string_trim_char4;
 tree gfor_fndecl_string_minmax_char4;
+tree gfor_fndecl_string_split_char4;
 tree gfor_fndecl_adjustl_char4;
 tree gfor_fndecl_adjustr_char4;
 tree gfor_fndecl_select_string_char4;
@@ -3569,6 +3571,12 @@ gfc_build_intrinsic_function_decls (void)
        build_pointer_type (pchar1_type_node), integer_type_node,
        integer_type_node);
 
+  gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec (
+    get_identifier (PREFIX ("string_split")), ". . R . R . . ",
+    gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node,
+    gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node,
+    gfc_logical4_type_node);
+
   gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec (
        get_identifier (PREFIX("adjustl")), ". W . R ",
        void_type_node, 3, pchar1_type_node, gfc_charlen_type_node,
@@ -3641,6 +3649,12 @@ gfc_build_intrinsic_function_decls (void)
        build_pointer_type (pchar4_type_node), integer_type_node,
        integer_type_node);
 
+  gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec (
+    get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ",
+    gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node,
+    gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node,
+    gfc_logical4_type_node);
+
   gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec (
        get_identifier (PREFIX("adjustl_char4")), ". W . R ",
        void_type_node, 3, pchar4_type_node, gfc_charlen_type_node,
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index be984271d6a..c2a91c93d28 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -3466,6 +3466,73 @@ else
   return gfc_finish_block (&block);
 }
 
+static tree
+conv_intrinsic_split (gfc_code *code)
+{
+  stmtblock_t block;
+  gfc_se se;
+  tree string, string_len;
+  tree set, set_len;
+  tree pos, pos_for_call;
+  tree back;
+  tree fndecl, call;
+  gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr;
+
+  string_expr = code->ext.actual->expr;
+  set_expr = code->ext.actual->next->expr;
+  pos_expr = code->ext.actual->next->next->expr;
+  back_expr = code->ext.actual->next->next->next->expr;
+
+  gfc_start_block (&block);
+
+  gfc_init_se (&se, NULL);
+  gfc_conv_expr (&se, string_expr);
+  gfc_conv_string_parameter (&se);
+  string_len = se.string_length;
+  gfc_add_block_to_block (&block, &se.pre);
+  gfc_add_block_to_block (&block, &se.post);
+  string = se.expr;
+
+  gfc_init_se (&se, NULL);
+  gfc_conv_expr (&se, set_expr);
+  gfc_conv_string_parameter (&se);
+  set_len = se.string_length;
+  gfc_add_block_to_block (&block, &se.pre);
+  gfc_add_block_to_block (&block, &se.post);
+  set = se.expr;
+
+  gfc_init_se (&se, NULL);
+  gfc_conv_expr (&se, pos_expr);
+  gfc_add_block_to_block (&block, &se.pre);
+  gfc_add_block_to_block (&block, &se.post);
+  pos = se.expr;
+  pos_for_call = fold_convert (gfc_charlen_type_node, pos);
+
+  if (back_expr)
+    {
+      gfc_init_se (&se, NULL);
+      gfc_conv_expr (&se, back_expr);
+      gfc_add_block_to_block (&block, &se.pre);
+      gfc_add_block_to_block (&block, &se.post);
+      back = se.expr;
+    }
+  else
+    back = build_int_cst (gfc_get_logical_type (4), 0);
+
+  if (string_expr->ts.kind == 1)
+    fndecl = gfor_fndecl_string_split;
+  else
+    fndecl = gfor_fndecl_string_split_char4;
+
+  call = build_call_expr_loc (input_location, fndecl, 6, string_len, string,
+                             set_len, set, pos_for_call, back);
+  gfc_add_expr_to_block (&block, call);
+
+  gfc_add_modify (&block, pos,
+                 fold_convert (gfc_typenode_for_spec (&pos_expr->ts), call));
+
+  return gfc_finish_block (&block);
+}
 
 /* Return a character string containing the tty name.  */
 
@@ -13261,6 +13328,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
       res = conv_intrinsic_system_clock (code);
       break;
 
+    case GFC_ISYM_SPLIT:
+      res = conv_intrinsic_split (code);
+      break;
+
     default:
       res = NULL_TREE;
       break;
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index 461b0cdac71..40680e97cbc 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -961,6 +961,7 @@ extern GTY(()) tree gfor_fndecl_string_scan;
 extern GTY(()) tree gfor_fndecl_string_verify;
 extern GTY(()) tree gfor_fndecl_string_trim;
 extern GTY(()) tree gfor_fndecl_string_minmax;
+extern GTY(()) tree gfor_fndecl_string_split;
 extern GTY(()) tree gfor_fndecl_adjustl;
 extern GTY(()) tree gfor_fndecl_adjustr;
 extern GTY(()) tree gfor_fndecl_select_string;
@@ -972,6 +973,7 @@ extern GTY(()) tree gfor_fndecl_string_scan_char4;
 extern GTY(()) tree gfor_fndecl_string_verify_char4;
 extern GTY(()) tree gfor_fndecl_string_trim_char4;
 extern GTY(()) tree gfor_fndecl_string_minmax_char4;
+extern GTY(()) tree gfor_fndecl_string_split_char4;
 extern GTY(()) tree gfor_fndecl_adjustl_char4;
 extern GTY(()) tree gfor_fndecl_adjustr_char4;
 extern GTY(()) tree gfor_fndecl_select_string_char4;
diff --git a/gcc/testsuite/gfortran.dg/split_1.f90 
b/gcc/testsuite/gfortran.dg/split_1.f90
new file mode 100644
index 00000000000..21659b042e7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_1.f90
@@ -0,0 +1,28 @@
+! { dg-do run }
+program b
+  character(len=:), allocatable :: input
+  character(len=2) :: set = ', '
+  integer :: p
+  input = " one,last example,"
+  p = 0
+
+  call split(input, set, p)
+  if (p /= 1) STOP 1
+  call split(input, set, p)
+  if (p /= 5) STOP 2
+  call split(input, set, p)
+  if (p /= 10) STOP 3
+  call split(input, set, p)
+  if (p /= 18) STOP 4
+  call split(input, set, p)
+  if (p /= 19) STOP 5
+
+  call split(input, set, p, .true.)
+  if (p /= 18) STOP 6
+  call split(input, set, p, .true.)
+  if (p /= 10) STOP 7
+  call split(input, set, p, .true.)
+  if (p /= 5) STOP 8
+  call split(input, set, p, .true.)
+  if (p /= 1) STOP 9
+end program b
diff --git a/gcc/testsuite/gfortran.dg/split_2.f90 
b/gcc/testsuite/gfortran.dg/split_2.f90
new file mode 100644
index 00000000000..9afb30b01b1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/split_2.f90
@@ -0,0 +1,22 @@
+! { dg-do run }
+program b
+  integer, parameter :: ucs4 = selected_char_kind('ISO_10646')
+  character(kind=ucs4, len=:), allocatable :: input, set
+  integer :: p = 0
+
+  input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // 
char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4)
+  set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4)
+
+  call split(input, set, p)
+  if (p /= 2) stop 1
+  call split(input, set, p)
+  if (p /= 4) stop 2
+  call split(input, set, p)
+  if (p /= 5) stop 3
+  call split(input, set, p, .true.)
+  if (p /= 4) stop 4
+  call split(input, set, p, .true.)
+  if (p /= 2) stop 5
+  call split(input, set, p, .true.)
+  if (p /= 0) stop 6
+end program b
diff --git a/libgfortran/gfortran.map b/libgfortran/gfortran.map
index 742dddfe559..d8d8c7b0aae 100644
--- a/libgfortran/gfortran.map
+++ b/libgfortran/gfortran.map
@@ -2031,4 +2031,6 @@ GFORTRAN_15.2 {
     _gfortran_maxloc1_16_m16;
     _gfortran_mmaxloc1_16_m16;
     _gfortran_smaxloc1_16_m16;
+    _gfortran_string_split;
+    _gfortran_string_split_char4;
 } GFORTRAN_15;
diff --git a/libgfortran/intrinsics/string_intrinsics_inc.c 
b/libgfortran/intrinsics/string_intrinsics_inc.c
index d86bb6c8833..64b3d878a74 100644
--- a/libgfortran/intrinsics/string_intrinsics_inc.c
+++ b/libgfortran/intrinsics/string_intrinsics_inc.c
@@ -33,6 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #define string_verify SUFFIX(string_verify)
 #define string_trim SUFFIX(string_trim)
 #define string_minmax SUFFIX(string_minmax)
+#define string_split SUFFIX(string_split)
 #define zero_length_string SUFFIX(zero_length_string)
 #define compare_string SUFFIX(compare_string)
 
@@ -72,6 +73,10 @@ export_proto(string_trim);
 extern void string_minmax (gfc_charlen_type *, CHARTYPE **, int, int, ...);
 export_proto(string_minmax);
 
+extern gfc_charlen_type string_split (gfc_charlen_type, const CHARTYPE *,
+                                     gfc_charlen_type, const CHARTYPE *,
+                                     gfc_charlen_type, GFC_LOGICAL_4);
+export_proto (string_split);
 
 /* Use for functions which can return a zero-length string.  */
 static CHARTYPE zero_length_string = 0;
@@ -459,3 +464,50 @@ string_minmax (gfc_charlen_type *rlen, CHARTYPE **dest, 
int op, int nargs, ...)
       *dest = tmp;
     }
 }
+
+gfc_charlen_type
+string_split (gfc_charlen_type stringlen, const CHARTYPE *string,
+             gfc_charlen_type setlen, const CHARTYPE *set,
+             gfc_charlen_type pos, GFC_LOGICAL_4 back)
+{
+  gfc_charlen_type i, j;
+
+  if (!back)
+    {
+      if (pos > stringlen)
+       runtime_error ("If BACK is present with the value false, the value of "
+                      "POS shall be in the range [0, LEN (STRING)]");
+
+      for (i = pos + 1; i <= stringlen; i++)
+       {
+         for (j = 0; j < setlen; j++)
+           {
+             if (string[i - 1] == set[j])
+               {
+                 return i;
+               }
+           }
+       }
+
+      return stringlen + 1;
+    }
+  else
+    {
+      if (pos < 1 || pos > (stringlen + 1))
+       runtime_error ("If BACK is present with the value true, the value of "
+                      "POS shall be in the range [1, LEN (STRING) + 1]");
+
+      for (i = pos - 1; i != 0; i--)
+       {
+         for (j = 0; j < setlen; j++)
+           {
+             if (string[i - 1] == set[j])
+               {
+                 return i;
+               }
+           }
+       }
+
+      return 0;
+    }
+}
-- 
2.43.0

Reply via email to