I've been working independently on my own regexp setup. The "patch" is attached, except my diff program ran out of memory when I tried to do a diff of core.ops. Ouch. So the stuff for core.ops is in something that looks like a patch, but really isn't. You'll have to set that up manually. The other two files this creates (t/op/re.t and include/parrot/re.h) are real patches, however.
This guy implements REs with support for both "^" and "^"-less REs, '.', literals, and '*?' (and '+?' by extension). It comes with five tests, and docs in the core.ops pseudo-patch. Share and Enjoy, --Brent Dax [EMAIL PROTECTED] Configure pumpking for Perl 6 When I take action, I’m not going to fire a $2 million missile at a $10 empty tent and hit a camel in the butt. --Dubya
+++core.ops /* ** core.ops */ +#include <parrot/re.h> + =head1 NAME core.ops --- +############################################################################### + +=head2 Regular Expression + +The regular expression opcodes implement certain matching, backtracking and +other semantics, using the OPINFO structure defined in F<parrot/re.h>. + +Note that these ops are currently in a state of constant fluctuation and may change +at any time. + +Use of the regular expression ops can be very confusing. The following example +may help--it implements C<"afoooobarz"=~/f.o*?bar/>, putting the result in I0: + + match "afoooobarz", "" + RE_1: + goforward RE_END + literal "f" + anything + RE_2: + lazyrepeat + literal "o" + saveindex + literal "bar" + + backtrack RE_2 + backtrack RE_1 + END: + endre I0 + +=over 4 + +=cut + + +######################################## + +=item B<match>(s, s) + +=item B<match>(sc, s) + +=item B<match>(s, sc) + +=item B<match>(sc, sc) + +Sets $1 as the string to match against and $2 as the options +on that string. + +=cut + +AUTO_OP match(s|sc, s|sc) { + cur_re=mem_sys_allocate(sizeof(REINFO)); + cur_re->string=$1; + /* re_options_set($2); -- unimplemented */ + cur_re->ok=1; + cur_re->reallyscrewed=0; + cur_re->index=0; + cur_re->startindex=0; + cur_re->savedindex=0; +} + +######################################## + +=item B<goforward>(ic) + +Implements the behavior of advancing to the next starting +position if the current starting position fails. $1 is the +label to branch to if a match is impossible at the current +position. + +=cut + +AUTO_OP goforward(ic) { + cur_re->index=cur_re->startindex++; + + if(cur_re->index >= string_length(cur_re->string)) { + cur_re->ok=0; + RETREL($1); + } + + cur_re->ok=1; +} + +######################################## + +=item B<literal>(s) + +=item B<literal>(sc) + +Matches the string $1 literally; $1 may be more than one +character. + +=cut + +AUTO_OP literal(s|sc) { + STRING * arg=$1; + STRING * cmp=string_make(interpreter, "", 0, 0, 0, 0); + + if(cur_re->reallyscrewed || ! cur_re->ok) {RETREL(*);} + + if(string_length(cur_re->string) < cur_re->index + string_length(arg)) { + cur_re->ok=0; + RETREL(*); + } + + string_substr( + interpreter, + cur_re->string, + cur_re->index, + string_length(arg), + &cmp + ); + + if(string_compare(interpreter, cmp, arg)) { + cur_re->ok=0; + } + else { + cur_re->index += string_length(arg); + } +} + +######################################## + +=item B<anything>() + +Matches any single character; equivalent to '.' in a regexp. + +=cut + +AUTO_OP anything() { + if(cur_re->reallyscrewed || ! cur_re->ok) {RETREL(*);} + + /* XXX this shouldn't match line break, but it does */ + if(string_length(cur_re->string) < ++cur_re->index) { + cur_re->ok=0; + } +} + +######################################## + +=item B<lazyrepeat>() + +Implements the behavior of *? (and, by extension, +?)-- +if we backtrack to this opcode, it will advance us past +whatever was matched between this opcode and a B<saveindex> +call. + +=cut + +AUTO_OP lazyrepeat() { + if(cur_re->reallyscrewed) { + cur_re->reallyscrewed++; + } + if(cur_re->reallyscrewed || ! cur_re->ok) {RETREL(*);} + if(cur_re->savedindex) { + cur_re->index=cur_re->savedindex; + } +} + +######################################## + +=item B<saveindex>() + +Remembers the current position so that B<lazyrepeat> can +advance past it if we backtrack. If something between +B<lazyrepeat> and this op failed, this failure is noted so that +we don't try to backtrack to the B<lazyrepeat> again (which would +throw us into an infinite loop). + +=cut + +AUTO_OP saveindex() { + if(!cur_re->reallyscrewed) { + if(cur_re->ok) { + cur_re->savedindex=cur_re->index; + } + else { + cur_re->reallyscrewed++; + } + } +} + +######################################## + +=item B<backtrack>(ic) + +Backtracks to the op at label $1 if the match failed (unless +B<lazyrepeat> told us it was pointless, in which case we clear +that flag). + +=cut + +AUTO_OP backtrack(ic) { + if(cur_re->reallyscrewed) { + cur_re->reallyscrewed--; + } + else { + /* + * if it hasn't been okay so far, we have to + * jump back to the thing that started the + * repetition. + */ + if(!cur_re->ok) { + cur_re->ok=1; + RETREL($1); + } + } +} + +######################################## + +=item B<endre>(i) + +Sets $1 to 1 if the match has been successful, 0 otherwise. + +=back + +=cut + +AUTO_OP endre(i) { + if(cur_re->reallyscrewed) { + $1=0; + } + else { + $1=cur_re->ok; + } +} --- /dev/null Wed Dec 31 16:00:00 1969 +++ include\parrot\re.h Fri Nov 2 01:43:10 2001 @@ -0,0 +1,28 @@ +#if !defined(PARROT_RE_H_GUARD) +#define PARROT_RE_H_GUARD 1 + +#include <parrot/parrot.h> +#include <parrot/string.h> + +typedef struct re_info { + /* the string we're matching against */ + STRING * string; + /* if the match has been successful or not */ + INTVAL ok; + /* indicates how many backtracks should be + * skipped to avoid getting caught in an + * infinite loop + */ + INTVAL reallyscrewed; + /* the current index */ + INTVAL index; + /* XXX these next two should be rolled up into a stack */ + /* the index we should start at next time we hit goforward */ + INTVAL startindex; + /* how far ahead 'lazyrepeat' should skip next time */ + INTVAL savedindex; +} REINFO; + +REINFO * cur_re; + +#endif --- /dev/null Wed Dec 31 16:00:00 1969 +++ t\op\re.t Fri Nov 2 02:22:44 2001 @@ -0,0 +1,60 @@ +#! perl -w + +use Parrot::Test tests => 5; + +output_is(<<'CODE', '1', "simple literal"); +match "a", "" +literal "a" +endre I0 + +print I0 +CODE + +output_is(<<'CODE', '1', "literal w/o matching front"); + match "ba", "" +RE_1: + goforward END + literal "a" + backtrack RE_1 +END: + endre I0 + print I0 +CODE + +output_is(<<'CODE', '0', "fails gracefully"); #3 + match "bb", "" +RE_1: + goforward END + literal "a" + backtrack RE_1 +END: + endre I0 + print I0 +CODE + +output_is(<<'CODE', '1', ". works"); #4 + match "xabc", "" +RE_1: + goforward END + anything + anything + literal "c" + backtrack RE_1 +END: + endre I0 + print I0 +CODE + +output_is(<<'CODE', '1', "?* works"); + match "foobar", "" + literal "f" +RE_0: + lazyrepeat + literal "o" + saveindex + literal "bar" + backtrack RE_0 + + endre I0 + print I0 +CODE