On Mon, 10 Jan 2005, Matt Diephouse wrote:

> # New Ticket Created by  Matt Diephouse
> # Please include the string:  [perl #33747]
> # in the subject line of all future correspondence about this issue.
> # <URL: http://rt.perl.org:80/rt3/Ticket/Display.html?id=33747 >
>
> I'm not sure what this should print, but I'm pretty sure the right
> answer isn't "8".
>
>   .sub main @MAIN
>     $S0 = ""
>     $S1 = substr $S0, -1, 1
>     print $S1
>     print "\n"
>     end
>   .end
>
> If you s/-1, 1/-1/, it doesn't print the 8.

 Well, you're trying to take a non-zero length substring from out of a
 zero-length string, so I would contend that Parrot ought to throw an
 exception. The reason that it doesn't is that this code in
 string_substr() in string.c does not act as intended for a zero-length
 string:

    ...

    true_offset = (UINTVAL)offset;

    /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
    if (offset == (INTVAL)string_length(interpreter, src) || length < 1) {
        return string_make_empty(interpreter, enum_stringrep_one, 0);
    }

    if (offset < 0) {
        true_offset = (UINTVAL)(src->strlen + offset);
    }

    if (true_offset > src->strlen - 1) {        /* 0 based... */
        internal_exception(SUBSTR_OUT_OF_STRING,
                "Cannot take substr outside string");
    }

    ...

 As best I can tell, the intent of the code is this:

  1) If you ask for a substring (of arbitrary length) starting at one
     character position past the end of the string, then return an
     empty string.

  2) If you ask for a zero-length substring, then return an empty string,
     regardless of the supplied offset

  3) If the supplied offset is before the start or after the end of the
     string, then throw an exception.

  4) Otherwise, continue.

 The problem here is with step three, and in particular the comparison
 between true_offset and src->strlen - 1. This does not work as intended
 for a zero-length string, because src->strlen is an _unsigned_ int, and
 so if src->strlen = 0, then src->strlen - 1 is very very large. The
 patch below fixes this bug while otherwise keeping the behaviour the
 same, but I'm posting it to the list rather than simply applying it
 because it is not clear to me that the behaviour described above is
 precisely what we want. Specifically, should this code:

   set S0, ""
   substr S2, S0, 0, 1

 throw an exception or not? At the moment, it does not (even with my
 patch), because the offset == the string length. On the other hand,
 this code:

  set S0, ""
  substr S2, S0, -1, 1

 will throw an exception. Is this behaviour correct?

 Incidentally, the reason that:

  set S0, ""
  substr S2, S0, -1

 works is that the code passes in the length of S0 as the length
 parameter, and since this is zero, string_substr returns an empty
 string, without any reference to the offset parameter.

 Simon

 PS I've also included a patch to t/op/string.t that adds tests which
 feed zero length strings to the various forms of the substr op. The
 tests assume that the behaviour described above is correct.

 ----------------------------------------------------------------------

Index: src/string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.229
diff -u -r1.229 string.c
--- src/string.c        3 Nov 2004 21:44:39 -0000       1.229
+++ src/string.c        10 Jan 2005 19:05:09 -0000
@@ -1421,7 +1421,7 @@
         true_offset = (UINTVAL)(src->strlen + offset);
     }

-    if (true_offset > src->strlen - 1) {        /* 0 based... */
+    if (src->strlen == 0 || true_offset > src->strlen - 1) {   /* 0 based... */
         internal_exception(SUBSTR_OUT_OF_STRING,
                 "Cannot take substr outside string");
     }

Index: t/op/string.t
===================================================================
RCS file: /cvs/public/parrot/t/op/string.t,v
retrieving revision 1.79
diff -u -r1.79 string.t
--- t/op/string.t       2 Jan 2005 11:34:55 -0000       1.79
+++ t/op/string.t       10 Jan 2005 19:06:18 -0000
@@ -16,7 +16,7 @@

 =cut

-use Parrot::Test tests => 135;
+use Parrot::Test tests => 144;
 use Test::More;

 output_is( <<'CODE', <<OUTPUT, "set_s_s|sc" );
@@ -512,6 +512,127 @@
   end
 CODE

+output_like( <<'CODE', <<'OUTPUT', "substr, +ve offset, zero-length string" );
+  set S0, ""
+  substr S1, S0, 10, 3
+  print S1
+  end
+CODE
+/Cannot take substr outside string/
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "substr, offset 0, zero-length string" );
+  set S0, ""
+  substr S1, S0, 0, 1
+  print S1
+  print "_\n"
+  end
+CODE
+_
+OUTPUT
+
+output_like( <<'CODE', <<'OUTPUT', "substr, offset -1, zero-length string" );
+  set S0, ""
+  substr S1, S0, -1, 1
+  print S1
+  end
+CODE
+/Cannot take substr outside string/
+OUTPUT
+
+output_like( <<'CODE', <<'OUTPUT', "substr, -ve offset, zero-length string" );
+  set S0, ""
+  substr S1, S0, -10, 5
+  print S1
+  end
+CODE
+/Cannot take substr outside string/
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "zero-length substr, zero-length string" );
+  set S0, ""
+  substr S1, S0, 10, 0
+  print S1
+  print "_\n"
+  end
+CODE
+_
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "zero-length substr, zero-length string" );
+  set S0, ""
+  substr S1, S0, -10, 0
+  print S1
+  print "_\n"
+  end
+CODE
+_
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "3-arg substr, zero-length string" );
+  set S0, ""
+  substr S1, S0, 2
+  print S1
+  print "_\n"
+  end
+CODE
+_
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "5 arg substr, zero-length string" );
+  set S0, ""
+  set S1, "xyz"
+  substr S2, S0, 0, 3, S1
+  print S0
+  print "\n"
+  print S1
+  print "\n"
+  print S2
+  print "\n"
+
+  set S3, ""
+  set S4, "abcde"
+  substr S5, S3, 0, 0, S4
+  print S3
+  print "\n"
+  print S4
+  print "\n"
+  print S5
+  print "\n"
+  end
+CODE
+xyz
+xyz
+
+abcde
+abcde
+
+OUTPUT
+
+output_is( <<'CODE', <<'OUTPUT', "4 arg substr replace, zero-length string" );
+  set S0, ""
+  set S1, "xyz"
+  substr S0, 0, 3, S1
+  print S0
+  print "\n"
+  print S1
+  print "\n"
+
+  set S2, ""
+  set S3, "abcde"
+  substr S2, 0, 0, S3
+  print S2
+  print "\n"
+  print S3
+  print "\n"
+  end
+CODE
+xyz
+xyz
+abcde
+abcde
+OUTPUT
+
 output_is( <<'CODE', '<><', "concat_s_s|sc, null onto null" );
  print "<>"
  concat S0, S0


Reply via email to