Hi all,This patch introduces a complete implementation of the SPLIT intrinsic, including documentation and test cases. While Tobias previously mentioned that a similar effect could be achieved with existing functionality, I believe having direct support for this operation won't cause any issues. Users will still have the flexibility to choose their preferred method.
Please take a look when you have a moment! Thanks, Yuao
From 3fb261141dbc61296adb9c9361bafcd9922ad8bd Mon Sep 17 00:00:00 2001 From: Yuao Ma <c...@outlook.com> Date: Sat, 26 Jul 2025 20:59:26 +0800 Subject: [PATCH] fortran: implment split for fortran 2023 This patch includes the implementation, documentation, and test case for SPLIT. gcc/fortran/ChangeLog: * check.cc (gfc_check_split): Argument check for SPLIT. * gfortran.h (enum gfc_isym_id): Define GFC_ISYM_SPLIT. * intrinsic.cc (add_subroutines): Register SPLIT intrinsic. * intrinsic.h (gfc_check_split): New decl. (gfc_resolve_split): Ditto. * intrinsic.texi: SPLIT documentation. * iresolve.cc (gfc_resolve_split): Add resolved_sym for SPLIT. * trans-decl.cc (gfc_build_intrinsic_function_decls): Add decl for SPLIT in libgfortran. * trans-intrinsic.cc (conv_intrinsic_split): SPLIT codegen. (gfc_conv_intrinsic_subroutine): Handle SPLIT case. * trans.h (GTY): Declare gfor_fndecl_string_split{, _char4}. libgfortran/ChangeLog: * gfortran.map: Add split symbol. * intrinsics/string_intrinsics_inc.c (string_split): Runtime support for SPLIT. gcc/testsuite/ChangeLog: * gfortran.dg/split_1.f90: New test. * gfortran.dg/split_2.f90: New test. --- gcc/fortran/check.cc | 21 ++++++ gcc/fortran/gfortran.h | 2 + gcc/fortran/intrinsic.cc | 8 +++ gcc/fortran/intrinsic.h | 2 + gcc/fortran/intrinsic.texi | 64 +++++++++++++++++ gcc/fortran/iresolve.cc | 13 ++++ gcc/fortran/trans-decl.cc | 14 ++++ gcc/fortran/trans-intrinsic.cc | 71 +++++++++++++++++++ gcc/fortran/trans.h | 2 + gcc/testsuite/gfortran.dg/split_1.f90 | 28 ++++++++ gcc/testsuite/gfortran.dg/split_2.f90 | 22 ++++++ libgfortran/gfortran.map | 2 + .../intrinsics/string_intrinsics_inc.c | 52 ++++++++++++++ 13 files changed, 301 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/split_1.f90 create mode 100644 gcc/testsuite/gfortran.dg/split_2.f90 diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 838d523f7c4..862652683a7 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -5559,6 +5559,27 @@ gfc_check_scan (gfc_expr *x, gfc_expr *y, gfc_expr *z, gfc_expr *kind) return true; } +bool +gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr *back) +{ + if (!type_check (string, 0, BT_CHARACTER)) + return false; + + if (!type_check (set, 1, BT_CHARACTER)) + return false; + + if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2)) + return false; + + if (back != NULL + && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3))) + return false; + + if (!same_type_check (string, 0, set, 1)) + return false; + + return true; +} bool gfc_check_secnds (gfc_expr *r) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 85feb18be8c..d9dcd1b504f 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -729,6 +729,8 @@ enum gfc_isym_id GFC_ISYM_COSPI, GFC_ISYM_SINPI, GFC_ISYM_TANPI, + + GFC_ISYM_SPLIT, }; enum init_local_logical diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index 9e07627503d..259b59edc3b 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -3933,6 +3933,14 @@ add_subroutines (void) pt, BT_INTEGER, di, OPTIONAL, INTENT_IN, gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT); + add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_IMPURE, + BT_UNKNOWN, 0, GFC_STD_F2023, + gfc_check_split, NULL, gfc_resolve_split, + "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT, + "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN); + /* The following subroutines are part of ISO_C_BINDING. */ add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0, diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h index fd54588054f..8a0ab935e1f 100644 --- a/gcc/fortran/intrinsic.h +++ b/gcc/fortran/intrinsic.h @@ -215,6 +215,7 @@ bool gfc_check_mvbits (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *, bool gfc_check_random_init (gfc_expr *, gfc_expr *); bool gfc_check_random_number (gfc_expr *); bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *); +bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *); bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *); @@ -693,6 +694,7 @@ void gfc_resolve_link_sub (gfc_code *); void gfc_resolve_symlnk_sub (gfc_code *); void gfc_resolve_signal_sub (gfc_code *); void gfc_resolve_sleep_sub (gfc_code *); +void gfc_resolve_split (gfc_code *); void gfc_resolve_stat_sub (gfc_code *); void gfc_resolve_system_clock (gfc_code *); void gfc_resolve_system_sub (gfc_code *); diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 3103da3da09..a24b234316c 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -313,6 +313,7 @@ Some basic guidelines for editing this document: * @code{SIZEOF}: SIZEOF, Determine the size in bytes of an expression * @code{SLEEP}: SLEEP, Sleep for the specified number of seconds * @code{SPACING}: SPACING, Smallest distance between two numbers of a given type +* @code{SPLIT}: SPLIT, Parse a string into tokens, one at a time. * @code{SPREAD}: SPREAD, Add a dimension to an array * @code{SQRT}: SQRT, Square-root function * @code{SRAND}: SRAND, Reinitialize the random number generator @@ -14203,6 +14204,69 @@ Fortran 90 and later +@node SPLIT +@section @code{SPLIT} --- Parse a string into tokens, one at a time +@fnindex SPLIT +@cindex string, split + +@table @asis +@item @emph{Synopsis}: +@code{RESULT = SPLIT(STRING, SET, POS [, BACK])} + +@item @emph{Description}: +Updates the integer @var{POS} to the position of the next (or previous) +separator in @var{STRING}. + +If @var{BACK} is absent or is present with the value false, @var{POS} is +assigned the position of the leftmost token delimiter in @var{STRING} whose +position is greater than @var{POS}, or if there is no such character, it is +assigned a value one greater than the length of @var{STRING}. This identifies +a token with starting position one greater than the value of @var{POS} on +invocation, and ending position one less than the value of @var{POS} on return. + +If @var{BACK} is present with the value true, @var{POS} is assigned the +position of the rightmost token delimiter in @var{STRING} whose position is +less than @var{POS}, or if there is no such character, it is assigned the value +zero. This identifies a token with ending position one less than the value of +@var{POS} on invocation, and starting position one greater than the value of +@var{POS} on return. + +@item @emph{Class}: +Subroutine + +@item @emph{Arguments}: +@multitable @columnfractions .15 .70 +@item @var{STRING} @tab Shall be of type @code{CHARACTER}. +@item @var{SET} @tab Shall be of type @code{CHARACTER}. +@item @var{POS} @tab Shall be of type @code{INTEGER}. +@item @var{BACK} @tab (Optional) Shall be of type @code{LOGICAL}. +@end multitable + +@item @emph{Example}: +@smallexample +character(len=:), allocatable :: input +character(len=2) :: set = ', ' +integer :: p +input = "one,last example" +p = 0 +do + if (p > len(input)) exit + istart = p + 1 + call split(input, set, p) + iend = p - 1 + print '(t7, a)', input(istart:iend) +end do +@end smallexample + +@item @emph{Standard}: +Fortran 2023 + +@item @emph{See also}: +@ref{SCAN} +@end table + + + @node SPREAD @section @code{SPREAD} --- Add a dimension to an array @fnindex SPREAD diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index 10013096c70..da354ab5056 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -3863,6 +3863,19 @@ gfc_resolve_sleep_sub (gfc_code *c) c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); } +void +gfc_resolve_split (gfc_code *c) +{ + const char *name; + gfc_expr *string; + + string = c->ext.actual->expr; + if (string->ts.type == BT_CHARACTER && string->ts.kind == 4) + name = "__split_char4"; + else + name = "__split"; + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} /* G77 compatibility function srand(). */ diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index d5acdca719f..741acc052ee 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -197,6 +197,7 @@ tree gfor_fndecl_string_scan; tree gfor_fndecl_string_verify; tree gfor_fndecl_string_trim; tree gfor_fndecl_string_minmax; +tree gfor_fndecl_string_split; tree gfor_fndecl_adjustl; tree gfor_fndecl_adjustr; tree gfor_fndecl_select_string; @@ -208,6 +209,7 @@ tree gfor_fndecl_string_scan_char4; tree gfor_fndecl_string_verify_char4; tree gfor_fndecl_string_trim_char4; tree gfor_fndecl_string_minmax_char4; +tree gfor_fndecl_string_split_char4; tree gfor_fndecl_adjustl_char4; tree gfor_fndecl_adjustr_char4; tree gfor_fndecl_select_string_char4; @@ -3569,6 +3571,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar1_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node, + gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl")), ". W . R ", void_type_node, 3, pchar1_type_node, gfc_charlen_type_node, @@ -3641,6 +3649,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar4_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node, + gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl_char4")), ". W . R ", void_type_node, 3, pchar4_type_node, gfc_charlen_type_node, diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index be984271d6a..c2a91c93d28 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -3466,6 +3466,73 @@ else return gfc_finish_block (&block); } +static tree +conv_intrinsic_split (gfc_code *code) +{ + stmtblock_t block; + gfc_se se; + tree string, string_len; + tree set, set_len; + tree pos, pos_for_call; + tree back; + tree fndecl, call; + gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr; + + string_expr = code->ext.actual->expr; + set_expr = code->ext.actual->next->expr; + pos_expr = code->ext.actual->next->next->expr; + back_expr = code->ext.actual->next->next->next->expr; + + gfc_start_block (&block); + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, string_expr); + gfc_conv_string_parameter (&se); + string_len = se.string_length; + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&block, &se.post); + string = se.expr; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, set_expr); + gfc_conv_string_parameter (&se); + set_len = se.string_length; + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&block, &se.post); + set = se.expr; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, pos_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&block, &se.post); + pos = se.expr; + pos_for_call = fold_convert (gfc_charlen_type_node, pos); + + if (back_expr) + { + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, back_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&block, &se.post); + back = se.expr; + } + else + back = build_int_cst (gfc_get_logical_type (4), 0); + + if (string_expr->ts.kind == 1) + fndecl = gfor_fndecl_string_split; + else + fndecl = gfor_fndecl_string_split_char4; + + call = build_call_expr_loc (input_location, fndecl, 6, string_len, string, + set_len, set, pos_for_call, back); + gfc_add_expr_to_block (&block, call); + + gfc_add_modify (&block, pos, + fold_convert (gfc_typenode_for_spec (&pos_expr->ts), call)); + + return gfc_finish_block (&block); +} /* Return a character string containing the tty name. */ @@ -13261,6 +13328,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code) res = conv_intrinsic_system_clock (code); break; + case GFC_ISYM_SPLIT: + res = conv_intrinsic_split (code); + break; + default: res = NULL_TREE; break; diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 461b0cdac71..40680e97cbc 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -961,6 +961,7 @@ extern GTY(()) tree gfor_fndecl_string_scan; extern GTY(()) tree gfor_fndecl_string_verify; extern GTY(()) tree gfor_fndecl_string_trim; extern GTY(()) tree gfor_fndecl_string_minmax; +extern GTY(()) tree gfor_fndecl_string_split; extern GTY(()) tree gfor_fndecl_adjustl; extern GTY(()) tree gfor_fndecl_adjustr; extern GTY(()) tree gfor_fndecl_select_string; @@ -972,6 +973,7 @@ extern GTY(()) tree gfor_fndecl_string_scan_char4; extern GTY(()) tree gfor_fndecl_string_verify_char4; extern GTY(()) tree gfor_fndecl_string_trim_char4; extern GTY(()) tree gfor_fndecl_string_minmax_char4; +extern GTY(()) tree gfor_fndecl_string_split_char4; extern GTY(()) tree gfor_fndecl_adjustl_char4; extern GTY(()) tree gfor_fndecl_adjustr_char4; extern GTY(()) tree gfor_fndecl_select_string_char4; diff --git a/gcc/testsuite/gfortran.dg/split_1.f90 b/gcc/testsuite/gfortran.dg/split_1.f90 new file mode 100644 index 00000000000..21659b042e7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_1.f90 @@ -0,0 +1,28 @@ +! { dg-do run } +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = 0 + + call split(input, set, p) + if (p /= 1) STOP 1 + call split(input, set, p) + if (p /= 5) STOP 2 + call split(input, set, p) + if (p /= 10) STOP 3 + call split(input, set, p) + if (p /= 18) STOP 4 + call split(input, set, p) + if (p /= 19) STOP 5 + + call split(input, set, p, .true.) + if (p /= 18) STOP 6 + call split(input, set, p, .true.) + if (p /= 10) STOP 7 + call split(input, set, p, .true.) + if (p /= 5) STOP 8 + call split(input, set, p, .true.) + if (p /= 1) STOP 9 +end program b diff --git a/gcc/testsuite/gfortran.dg/split_2.f90 b/gcc/testsuite/gfortran.dg/split_2.f90 new file mode 100644 index 00000000000..9afb30b01b1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_2.f90 @@ -0,0 +1,22 @@ +! { dg-do run } +program b + integer, parameter :: ucs4 = selected_char_kind('ISO_10646') + character(kind=ucs4, len=:), allocatable :: input, set + integer :: p = 0 + + input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4) + set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4) + + call split(input, set, p) + if (p /= 2) stop 1 + call split(input, set, p) + if (p /= 4) stop 2 + call split(input, set, p) + if (p /= 5) stop 3 + call split(input, set, p, .true.) + if (p /= 4) stop 4 + call split(input, set, p, .true.) + if (p /= 2) stop 5 + call split(input, set, p, .true.) + if (p /= 0) stop 6 +end program b diff --git a/libgfortran/gfortran.map b/libgfortran/gfortran.map index 742dddfe559..d8d8c7b0aae 100644 --- a/libgfortran/gfortran.map +++ b/libgfortran/gfortran.map @@ -2031,4 +2031,6 @@ GFORTRAN_15.2 { _gfortran_maxloc1_16_m16; _gfortran_mmaxloc1_16_m16; _gfortran_smaxloc1_16_m16; + _gfortran_string_split; + _gfortran_string_split_char4; } GFORTRAN_15; diff --git a/libgfortran/intrinsics/string_intrinsics_inc.c b/libgfortran/intrinsics/string_intrinsics_inc.c index d86bb6c8833..64b3d878a74 100644 --- a/libgfortran/intrinsics/string_intrinsics_inc.c +++ b/libgfortran/intrinsics/string_intrinsics_inc.c @@ -33,6 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define string_verify SUFFIX(string_verify) #define string_trim SUFFIX(string_trim) #define string_minmax SUFFIX(string_minmax) +#define string_split SUFFIX(string_split) #define zero_length_string SUFFIX(zero_length_string) #define compare_string SUFFIX(compare_string) @@ -72,6 +73,10 @@ export_proto(string_trim); extern void string_minmax (gfc_charlen_type *, CHARTYPE **, int, int, ...); export_proto(string_minmax); +extern gfc_charlen_type string_split (gfc_charlen_type, const CHARTYPE *, + gfc_charlen_type, const CHARTYPE *, + gfc_charlen_type, GFC_LOGICAL_4); +export_proto (string_split); /* Use for functions which can return a zero-length string. */ static CHARTYPE zero_length_string = 0; @@ -459,3 +464,50 @@ string_minmax (gfc_charlen_type *rlen, CHARTYPE **dest, int op, int nargs, ...) *dest = tmp; } } + +gfc_charlen_type +string_split (gfc_charlen_type stringlen, const CHARTYPE *string, + gfc_charlen_type setlen, const CHARTYPE *set, + gfc_charlen_type pos, GFC_LOGICAL_4 back) +{ + gfc_charlen_type i, j; + + if (!back) + { + if (pos > stringlen) + runtime_error ("If BACK is present with the value false, the value of " + "POS shall be in the range [0, LEN (STRING)]"); + + for (i = pos + 1; i <= stringlen; i++) + { + for (j = 0; j < setlen; j++) + { + if (string[i - 1] == set[j]) + { + return i; + } + } + } + + return stringlen + 1; + } + else + { + if (pos < 1 || pos > (stringlen + 1)) + runtime_error ("If BACK is present with the value true, the value of " + "POS shall be in the range [1, LEN (STRING) + 1]"); + + for (i = pos - 1; i != 0; i--) + { + for (j = 0; j < setlen; j++) + { + if (string[i - 1] == set[j]) + { + return i; + } + } + } + + return 0; + } +} -- 2.43.0