Nicholas Clark writes: > On Sun, Sep 07, 2003 at 02:50:35PM +0000, Peter Gibbs wrote: > > # New Ticket Created by Peter Gibbs > > # Please include the string: [perl #23752] > > # in the subject line of all future correspondence about this issue. > > # <URL: http://rt.perl.org/rt2/Ticket/Display.html?id=23752 > > > > > > > The 'index' opcode searches for a substring in another string. It seems > > reasonable to expect that equal strings would match at offset zero. But the > > program: > > ---------------------------------------- > > set S0, "\xAB" > > find_chartype I0, "8859-1" > > set_chartype S0, I0 > > find_encoding I0, "singlebyte" > > set_encoding S0, I0 > > > > find_encoding I0, "utf8" > > find_chartype I1, "unicode" > > transcode S1, S0, I0, I1 > > > > eq S0, S1, equal > > print "not " > > equal: > > print "equal\n" > > > > index I0, S0, S1 > > print I0 > > print "\n" > > > > end > > ---------------------------------------- > > yields the output: > > ---------------------------------------- > > equal > > -1 > > ---------------------------------------- > > which doesn't seem quite right. > > Looks like a bug to me, but I'm no expert on where to start searching for > it.
Here's a fix. Luke Index: string.c =================================================================== RCS file: /cvs/public/parrot/string.c,v retrieving revision 1.145 diff -u -r1.145 string.c --- string.c 7 Sep 2003 12:45:48 -0000 1.145 +++ string.c 21 Sep 2003 22:23:30 -0000 @@ -327,17 +327,17 @@ string_str_index_multibyte(struct Parrot_Interp *interpreter, const STRING *str, const STRING *find, UINTVAL start) { - const void* const lastmatch = - str->encoding->skip_backward((char*)str->strstart + str->strlen, - find->encoding->characters(find, find->strlen)); - const void* const lastfind = (char*)find->strstart + find->strlen; + const void* const lastmatch = str->encoding->skip_backward( + (char*)str->strstart + str->buflen, find->strlen); + const void* const lastfind = find->encoding->skip_forward( + find->strstart, find->strlen); const void* sp; const void* fp; const void* ip; INTVAL pos = start; sp = str->encoding->skip_forward(str->strstart, start); - while (sp < lastmatch) { + while (sp <= lastmatch) { fp = find->strstart; ip = sp; Index: t/op/string.t =================================================================== RCS file: /cvs/public/parrot/t/op/string.t,v retrieving revision 1.54 diff -u -r1.54 string.t --- t/op/string.t 20 Aug 2003 09:43:16 -0000 1.54 +++ t/op/string.t 21 Sep 2003 22:23:30 -0000 @@ -1,6 +1,6 @@ #! perl -w -use Parrot::Test tests => 117; +use Parrot::Test tests => 119; use Test::More; output_is( <<'CODE', <<OUTPUT, "set_s_s|sc" ); @@ -1294,6 +1294,57 @@ CODE 46368 -1 +OUTPUT + +output_is(<<'CODE',<<OUTPUT,"index, multibyte matching"); + set S0, "\xAB" + find_chartype I0, "usascii" + set_chartype S0, I0 + find_encoding I0, "singlebyte" + set_encoding S0, I0 + + find_encoding I0, "utf8" + find_chartype I1, "unicode" + transcode S1, S0, I0, I1 + + eq S0, S1, equal + print "not " +equal: + print "equal\n" + + index I0, S0, S1 + print I0 + print "\n" + index I0, S1, S0 + print I0 + print "\n" +CODE +equal +0 +0 +OUTPUT + +output_is(<<'CODE',<<OUTPUT,"index, multibyte matching 2"); + set S0, "\xAB\xBA" + set S1, "foo\xAB\xAB\xBAbar" + find_chartype I0, "usascii" + set_chartype S0, I0 + find_encoding I0, "singlebyte" + set_encoding S0, I0 + + find_chartype I0, "unicode" + find_encoding I1, "utf8" + transcode S1, S1, I1, I0 + + index I0, S0, S1 + print I0 + print "\n" + index I0, S1, S0 + print I0 + print "\n" +CODE +-1 +4 OUTPUT output_is(<<'CODE',<<OUTPUT,"num to string");