Hi all,
The attached patch adds code in read_sf_internal to handle early
termination of reads in the presence of comma's. This is to support
legacy codes which are not standard conforming as far as we can tell.
The additions are executed only if -std=legacy is given at compile time.
It does not support kind=4 internal units since in legacy years there
should be no kind=4 internal units.
I have provuded a simplified test case for various combinations of comma
embedded strings.
This has been regression tested on x86_64-pc-linux-gnu.
OK for trunk?
This use to work way back in early versions so should probably go to 7
and 8 branches. Opinions welcome.
Regards,
Jerry
2018-11-04 Jerry DeLisle <jvdeli...@gcc.gnu.org>
* io/transfer.c (read_sf_internal): Add support for early
comma termination of internal unit formatted reads.
diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index 31198a3cc39..0d26101cef0 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c
@@ -241,16 +241,6 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length)
&& dtp->u.p.current_unit->pad_status == PAD_NO)
hit_eof (dtp);
- /* If we have seen an eor previously, return a length of 0. The
- caller is responsible for correctly padding the input field. */
- if (dtp->u.p.sf_seen_eor)
- {
- *length = 0;
- /* Just return something that isn't a NULL pointer, otherwise the
- caller thinks an error occurred. */
- return (char*) empty_string;
- }
-
/* There are some cases with mixed DTIO where we have read a character
and saved it in the last character buffer, so we need to backup. */
if (unlikely (dtp->u.p.current_unit->child_dtio > 0 &&
@@ -260,22 +250,80 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length)
sseek (dtp->u.p.current_unit->s, -1, SEEK_CUR);
}
- lorig = *length;
- if (is_char4_unit(dtp))
+ /* To support legacy code we have to scan the input string one byte
+ at a time because we don't no where an early comma may be and the
+ requested length could go passed the end of a comma shortened
+ string. We only do this if -std=legacy was given at compile
+ time. We also do not support this on kind=4 strings. */
+ if (unlikely(compile_options.warn_std == 0)) // the slow legacy way.
{
- gfc_char4_t *p = (gfc_char4_t *) mem_alloc_r4 (dtp->u.p.current_unit->s,
- length);
- base = fbuf_alloc (dtp->u.p.current_unit, lorig);
- for (size_t i = 0; i < *length; i++, p++)
- base[i] = *p > 255 ? '?' : (unsigned char) *p;
- }
- else
- base = mem_alloc_r (dtp->u.p.current_unit->s, length);
+ size_t n;
+ size_t tmp = 1;
+ char *q;
+
+ /* If we have seen an eor previously, return a length of 0. The
+ caller is responsible for correctly padding the input field. */
+ if (dtp->u.p.sf_seen_eor)
+ {
+ *length = 0;
+ /* Just return something that isn't a NULL pointer, otherwise the
+ caller thinks an error occurred. */
+ return (char*) empty_string;
+ }
+
+ /* Get the first chracter of the string to establish the base
+ address and check for comma or end-of-record condition. */
+ base = mem_alloc_r (dtp->u.p.current_unit->s, &tmp);
+ if (tmp == 0)
+ {
+ dtp->u.p.sf_seen_eor = 1;
+ *length = 0;
+ return (char*) empty_string;
+ }
+ if (*base == ',')
+ {
+ dtp->u.p.current_unit->bytes_left--;
+ *length = 0;
+ return (char*) empty_string;
+ }
- if (unlikely (lorig > *length))
+ /* Now we scan the rest and exit deal with an end-of-file
+ condition or the comma. */
+ for (n = 1; n < *length; n++)
+ {
+ q = mem_alloc_r (dtp->u.p.current_unit->s, &tmp);
+ if (tmp == 0)
+ {
+ hit_eof (dtp);
+ return NULL;
+ }
+ if (*q == ',')
+ {
+ dtp->u.p.current_unit->bytes_left -= n;
+ *length = n;
+ break;
+ }
+ }
+ }
+ else // the fast way
{
- hit_eof (dtp);
- return NULL;
+ lorig = *length;
+ if (is_char4_unit(dtp))
+ {
+ gfc_char4_t *p = (gfc_char4_t *) mem_alloc_r4 (dtp->u.p.current_unit->s,
+ length);
+ base = fbuf_alloc (dtp->u.p.current_unit, lorig);
+ for (size_t i = 0; i < *length; i++, p++)
+ base[i] = *p > 255 ? '?' : (unsigned char) *p;
+ }
+ else
+ base = mem_alloc_r (dtp->u.p.current_unit->s, length);
+
+ if (unlikely (lorig > *length))
+ {
+ hit_eof (dtp);
+ return NULL;
+ }
}
dtp->u.p.current_unit->bytes_left -= *length;
! { dg-do run }
! { dg-options "-std=legacy" }
! PR78351
program read_csv
implicit none
integer, parameter :: dbl = selected_real_kind(p=14, r=99)
call checkit("101,1.,2.,3.,7,7")
call checkit ("102,1.,,3.,,7")
call checkit (",1.,,3.,, ")
contains
subroutine checkit (text)
character(*) :: text
integer :: I1, I2, I3
real(dbl) :: R1, R2, R3
10 format (I8,3ES16.8,2I8)
I1=-99; I2=-99; I3=-99
R1=-99._DBL; R2=-99._DBL; R3=-99._DBL
read(text,10) I1, R1, R2, R3, I2, I3
if (I1 == -99) stop 1
if (I2 == -99) stop 2
if (I3 == -99) stop 3
if (R1 == -99._DBL) stop 4
if (R2 == -99._DBL) stop 5
if (R3 == -99._DBL) stop 6
end subroutine
end program