On 1 March 2016 at 12:43, Greg Reagle <greg.rea...@umbc.edu> wrote:
> That's interesting.  It's funny that sam does a better job since acme is a
> successor to sam.  I wonder if/how they share code.

acme/regx.c certainly was derived from sam/regexp.c. See attached diff.

cls
--- sam/regexp.c        2016-03-01 16:52:35.085509675 +0000
+++ acme/regx.c 2016-03-01 16:52:34.861508409 +0000
@@ -1,47 +1,52 @@
-#include "sam.h"
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <thread.h>
+#include <cursor.h>
+#include <mouse.h>
+#include <keyboard.h>
+#include <frame.h>
+#include <fcall.h>
+#include <plumb.h>
+#include "dat.h"
+#include "fns.h"
 
 Rangeset       sel;
-String         lastregexp;
+Rune           *lastregexp;
+
 /*
  * Machine Information
  */
 typedef struct Inst Inst;
-
 struct Inst
 {
-       long    type;   /* < 0x10000 ==> literal, otherwise action */
+       uint    type;   /* < 0x10000 ==> literal, otherwise action */
        union {
-               int rsid;
-               int rsubid;
+               int sid;
+               int subid;
                int class;
-               struct Inst *rother;
-               struct Inst *rright;
-       } r;
+               Inst *other;
+               Inst *right;
+       };
        union{
-               struct Inst *lleft;
-               struct Inst *lnext;
-       } l;
+               Inst *left;
+               Inst *next;
+       };
 };
-#define        sid     r.rsid
-#define        subid   r.rsubid
-#define        rclass  r.class
-#define        other   r.rother
-#define        right   r.rright
-#define        left    l.lleft
-#define        next    l.lnext
 
 #define        NPROG   1024
 Inst   program[NPROG];
 Inst   *progp;
 Inst   *startinst;     /* First inst. of program; might not be program[0] */
 Inst   *bstartinst;    /* same for backwards machine */
+Channel        *rechan;        /* chan(Inst*) */
 
 typedef struct Ilist Ilist;
 struct Ilist
 {
        Inst    *inst;          /* Instruction of the thread */
        Rangeset se;
-       Posn    startp;         /* first char of match */
+       uint    startp;         /* first char of match */
 };
 
 #define        NLIST   127
@@ -120,35 +125,40 @@
 void   bldcclass(void);
 
 void
-regerror(Err e)
+rxinit(void)
 {
-       Strzero(&lastregexp);
-       error(e);
+       rechan = chancreate(sizeof(Inst*), 0);
+       lastregexp = runemalloc(1);
 }
 
 void
-regerror_c(Err e, int c)
+regerror(char *e)
 {
-       Strzero(&lastregexp);
-       error_c(e, c);
+       lastregexp[0] = 0;
+       warning(nil, "regexp: %s\n", e);
+       sendp(rechan, nil);
+       threadexits(nil);
 }
 
 Inst *
 newinst(int t)
 {
        if(progp >= &program[NPROG])
-               regerror(Etoolong);
+               regerror("expression too long");
        progp->type = t;
-       progp->left = 0;
-       progp->right = 0;
+       progp->left = nil;
+       progp->right = nil;
        return progp++;
 }
 
-Inst *
-realcompile(Rune *s)
+void
+realcompile(void *arg)
 {
        int token;
+       Rune *s;
 
+       threadsetname("regcomp");
+       s = arg;
        startlex(s);
        atorp = atorstack;
        andp = andstack;
@@ -169,31 +179,44 @@
        operand(END);
        evaluntil(START);
        if(nbra)
-               regerror(Eleftpar);
+               regerror("unmatched `('");
        --andp; /* points to first and only operand */
-       return andp->first;
+       sendp(rechan, andp->first);
+       threadexits(nil);
 }
 
-void
-compile(String *s)
+/* r is null terminated */
+int
+rxcompile(Rune *r)
 {
-       int i;
+       int i, nr;
        Inst *oprogp;
 
-       if(Strcmp(s, &lastregexp)==0)
-               return;
+       nr = runestrlen(r)+1;
+       if(runeeq(lastregexp, runestrlen(lastregexp)+1, r, nr)==TRUE)
+               return TRUE;
+       lastregexp[0] = 0;
        for(i=0; i<nclass; i++)
                free(class[i]);
        nclass = 0;
        progp = program;
        backwards = FALSE;
-       startinst = realcompile(s->s);
+       bstartinst = nil;
+       threadcreate(realcompile, r, STACK);
+       startinst = recvp(rechan);
+       if(startinst == nil)
+               return FALSE;
        optimize(program);
        oprogp = progp;
        backwards = TRUE;
-       bstartinst = realcompile(s->s);
+       threadcreate(realcompile, r, STACK);
+       bstartinst = recvp(rechan);
+       if(bstartinst == nil)
+               return FALSE;
        optimize(oprogp);
-       Strduplstr(&lastregexp, s);
+       lastregexp = runerealloc(lastregexp, nr);
+       runemove(lastregexp, r, nr);
+       return TRUE;
 }
 
 void
@@ -206,7 +229,7 @@
        if(t == CCLASS){
                if(negateclass)
                        i->type = NCCLASS;      /* UGH */
-               i->rclass = nclass-1;           /* UGH */
+               i->class = nclass-1;            /* UGH */
        }
        pushand(i, i);
        lastwasand = TRUE;
@@ -216,12 +239,8 @@
 operator(int t)
 {
        if(t==RBRA && --nbra<0)
-               regerror(Erightpar);
+               regerror("unmatched `)'");
        if(t==LBRA){
-/*
- *             if(++cursubid >= NSUBEXP)
- *                     regerror(Esubexp);
- */
                cursubid++;     /* silently ignored */
                nbra++;
                if(lastwasand)
@@ -236,19 +255,10 @@
 }
 
 void
-cant(char *s)
-{
-       char buf[100];
-
-       sprint(buf, "regexp: can't happen: %s", s);
-       panic(buf);
-}
-
-void
 pushand(Inst *f, Inst *l)
 {
        if(andp >= &andstack[NSTACK])
-               cant("operand stack overflow");
+               error("operand stack overflow");
        andp->first = f;
        andp->last = l;
        andp++;
@@ -258,9 +268,9 @@
 pushator(int t)
 {
        if(atorp >= &atorstack[NSTACK])
-               cant("operator stack overflow");
+               error("operator stack overflow");
        *atorp++=t;
-       if(cursubid >= NSUBEXP)
+       if(cursubid >= NRange)
                *subidp++= -1;
        else
                *subidp++=cursubid;
@@ -269,19 +279,22 @@
 Node *
 popand(int op)
 {
+       char buf[64];
+
        if(andp <= &andstack[0])
-               if(op)
-                       regerror_c(Emissop, op);
-               else
-                       regerror(Ebadregexp);
+               if(op){
+                       sprint(buf, "missing operand for %c", op);
+                       regerror(buf);
+               }else
+                       regerror("malformed regexp");
        return --andp;
 }
 
 int
-popator(void)
+popator()
 {
        if(atorp <= &atorstack[0])
-               cant("operator stack underflow");
+               error("operator stack underflow");
        --subidp;
        return *--atorp;
 }
@@ -305,7 +318,7 @@
                        pushand(inst1, inst2);
                        return;         /* must have been RBRA */
                default:
-                       panic("unknown regexp operator");
+                       error("unknown regexp operator");
                        break;
                case OR:
                        op2 = popand('|');
@@ -321,8 +334,11 @@
                case CAT:
                        op2 = popand(0);
                        op1 = popand(0);
-                       if(backwards && op2->first->type!=END)
-                               t = op1, op1 = op2, op2 = t;
+                       if(backwards && op2->first->type!=END){
+                               t = op1;
+                               op1 = op2;
+                               op2 = t;
+                       }
                        op1->last->next = op2->first;
                        pushand(op1->first, op2->last);
                        break;
@@ -367,31 +383,6 @@
        }
 }
 
-#ifdef DEBUG
-void
-dumpstack(void){
-       Node *stk;
-       int *ip;
-
-       dprint("operators\n");
-       for(ip = atorstack; ip<atorp; ip++)
-               dprint("0%o\n", *ip);
-       dprint("operands\n");
-       for(stk = andstack; stk<andp; stk++)
-               dprint("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-void
-dump(void){
-       Inst *l;
-
-       l = program;
-       do{
-               dprint("%d:\t0%o\t%d\t%d\n", l-program, l->type,
-                       l->left-program, l->right-program);
-       }while(l++->type);
-}
-#endif
-
 void
 startlex(Rune *s)
 {
@@ -402,8 +393,9 @@
 
 int
 lex(void){
-       int c= *exprp++;
+       int c;
 
+       c = *exprp++;
        switch(c){
        case '\\':
                if(*exprp)
@@ -449,10 +441,11 @@
        return c;
 }
 
-long
-nextrec(void){
+int
+nextrec(void)
+{
        if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0))
-               regerror(Ebadclass);
+               regerror("malformed `[]'");
        if(exprp[0] == '\\'){
                exprp++;
                if(*exprp=='n'){
@@ -467,10 +460,10 @@
 void
 bldcclass(void)
 {
-       long c1, c2, n, na;
+       int c1, c2, n, na;
        Rune *classp;
 
-       classp = emalloc(DCLASS*RUNESIZE);
+       classp = runemalloc(DCLASS);
        n = 0;
        na = DCLASS;
        /* we have already seen the '[' */
@@ -484,11 +477,11 @@
                if(c1 == '-'){
     Error:
                        free(classp);
-                       regerror(Ebadclass);
+                       regerror("malformed `[]'");
                }
                if(n+4 >= na){          /* 3 runes plus NUL */
                        na += DCLASS;
-                       classp = erealloc(classp, na*RUNESIZE);
+                       classp = runerealloc(classp, na);
                }
                if(*exprp == '-'){
                        exprp++;        /* eat '-' */
@@ -504,7 +497,7 @@
        classp[n] = 0;
        if(nclass == Nclass){
                Nclass += DCLASS;
-               class = erealloc(class, Nclass*sizeof(Rune*));
+               class = realloc(class, Nclass*sizeof(Rune*));
        }
        class[nclass++] = classp;
 }
@@ -538,66 +531,93 @@
 
        for(p = l; p->inst; p++){
                if(p->inst==inst){
-                       if((sep)->p[0].p1 < p->se.p[0].p1)
+                       if((sep)->r[0].q0 < p->se.r[0].q0)
                                p->se= *sep;    /* this would be bug */
                        return 0;       /* It's already there */
                }
        }
        p->inst = inst;
        p->se= *sep;
-       (p+1)->inst = 0;
+       (p+1)->inst = nil;
        return 1;
 }
 
 int
-execute(File *f, Posn startp, Posn eof)
+rxnull(void)
+{
+       return startinst==nil || bstartinst==nil;
+}
+
+/* either t!=nil or r!=nil, and we match the string in the appropriate place */
+int
+rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
 {
-       int flag = 0;
+       int flag;
        Inst *inst;
        Ilist *tlp;
-       Posn p = startp;
-       int nnl = 0, ntl;
-       int c;
-       int wrapped = 0;
-       int startchar = startinst->type<OPERATOR? startinst->type : 0;
-
-       list[0][0].inst = list[1][0].inst = 0;
-       sel.p[0].p1 = -1;
+       uint p;
+       int nnl, ntl;
+       int nc, c;
+       int wrapped;
+       int startchar;
+
+       flag = 0;
+       p = startp;
+       startchar = 0;
+       wrapped = 0;
+       nnl = 0;
+       if(startinst->type<OPERATOR)
+               startchar = startinst->type;
+       list[0][0].inst = list[1][0].inst = nil;
+       sel.r[0].q0 = -1;
+       if(t != nil)
+               nc = t->file->nc;
+       else
+               nc = runestrlen(r);
        /* Execute machine once for each character */
        for(;;p++){
        doloop:
-               c = filereadc(f, p);
-               if(p>=eof || c<0){
+               if(p>=eof || p>=nc){
                        switch(wrapped++){
                        case 0:         /* let loop run one more click */
                        case 2:
                                break;
                        case 1:         /* expired; wrap to beginning */
-                               if(sel.p[0].p1>=0 || eof!=INFINITY)
+                               if(sel.r[0].q0>=0 || eof!=Infinity)
                                        goto Return;
-                               list[0][0].inst = list[1][0].inst = 0;
+                               list[0][0].inst = list[1][0].inst = nil;
                                p = 0;
                                goto doloop;
                        default:
                                goto Return;
                        }
-               }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0)
-                       break;
+                       c = 0;
+               }else{
+                       if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
+                               break;
+                       if(t != nil)
+                               c = textreadc(t, p);
+                       else
+                               c = r[p];
+               }
                /* fast check for first char */
                if(startchar && nnl==0 && c!=startchar)
                        continue;
                tl = list[flag];
                nl = list[flag^=1];
-               nl->inst = 0;
+               nl->inst = nil;
                ntl = nnl;
                nnl = 0;
-               if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){
+               if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
                        /* Add first instruction to this list */
-                       sempty.p[0].p1 = p;
+                       sempty.r[0].q0 = p;
                        if(addinst(tl, startinst, &sempty))
-                       if(++ntl >= NLIST)
+                       if(++ntl >= NLIST){
        Overflow:
-                               error(Eoverflow);
+                               warning(nil, "regexp list overflow\n");
+                               sel.r[0].q0 = -1;
+                               goto Return;
+                       }
                }
                /* Execute machine until this list is empty */
                for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
@@ -613,12 +633,12 @@
                                break;
                        case LBRA:
                                if(inst->subid>=0)
-                                       tlp->se.p[inst->subid].p1 = p;
+                                       tlp->se.r[inst->subid].q0 = p;
                                inst = inst->next;
                                goto Switchstmt;
                        case RBRA:
                                if(inst->subid>=0)
-                                       tlp->se.p[inst->subid].p2 = p;
+                                       tlp->se.r[inst->subid].q1 = p;
                                inst = inst->next;
                                goto Switchstmt;
                        case ANY:
@@ -626,7 +646,7 @@
                                        goto Addinst;
                                break;
                        case BOL:
-                               if(p==0 || filereadc(f, p - 1)=='\n'){
+                               if(p==0 || (t!=nil && textreadc(t, p-1)=='\n') 
|| (r!=nil && r[p-1]=='\n')){
        Step:
                                        inst = inst->next;
                                        goto Switchstmt;
@@ -637,11 +657,11 @@
                                        goto Step;
                                break;
                        case CCLASS:
-                               if(c>=0 && classmatch(inst->rclass, c, 0))
+                               if(c>=0 && classmatch(inst->class, c, 0))
                                        goto Addinst;
                                break;
                        case NCCLASS:
-                               if(c>=0 && classmatch(inst->rclass, c, 1))
+                               if(c>=0 && classmatch(inst->class, c, 1))
                                        goto Addinst;
                                break;
                        case OR:
@@ -653,77 +673,89 @@
                                inst = inst->left;
                                goto Switchstmt;
                        case END:       /* Match! */
-                               tlp->se.p[0].p2 = p;
+                               tlp->se.r[0].q1 = p;
                                newmatch(&tlp->se);
                                break;
                        }
                }
        }
     Return:
-       return sel.p[0].p1>=0;
+       *rp = sel;
+       return sel.r[0].q0 >= 0;
 }
 
 void
 newmatch(Rangeset *sp)
 {
-       int i;
-
-       if(sel.p[0].p1<0 || sp->p[0].p1<sel.p[0].p1 ||
-          (sp->p[0].p1==sel.p[0].p1 && sp->p[0].p2>sel.p[0].p2))
-               for(i = 0; i<NSUBEXP; i++)
-                       sel.p[i] = sp->p[i];
+       if(sel.r[0].q0<0 || sp->r[0].q0<sel.r[0].q0 ||
+          (sp->r[0].q0==sel.r[0].q0 && sp->r[0].q1>sel.r[0].q1))
+               sel = *sp;
 }
 
 int
-bexecute(File *f, Posn startp)
+rxbexecute(Text *t, uint startp, Rangeset *rp)
 {
-       int flag = 0;
+       int flag;
        Inst *inst;
        Ilist *tlp;
-       Posn p = startp;
-       int nnl = 0, ntl;
+       int p;
+       int nnl, ntl;
        int c;
-       int wrapped = 0;
-       int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0;
+       int wrapped;
+       int startchar;
 
-       list[0][0].inst = list[1][0].inst = 0;
-       sel.p[0].p1= -1;
+       flag = 0;
+       nnl = 0;
+       wrapped = 0;
+       p = startp;
+       startchar = 0;
+       if(bstartinst->type<OPERATOR)
+               startchar = bstartinst->type;
+       list[0][0].inst = list[1][0].inst = nil;
+       sel.r[0].q0= -1;
        /* Execute machine once for each character, including terminal NUL */
        for(;;--p){
        doloop:
-               if((c = filereadc(f, p - 1))==-1){
+               if(p <= 0){
                        switch(wrapped++){
                        case 0:         /* let loop run one more click */
                        case 2:
                                break;
                        case 1:         /* expired; wrap to end */
-                               if(sel.p[0].p1>=0)
-                       case 3:
+                               if(sel.r[0].q0>=0)
                                        goto Return;
-                               list[0][0].inst = list[1][0].inst = 0;
-                               p = f->nc;
+                               list[0][0].inst = list[1][0].inst = nil;
+                               p = t->file->nc;
                                goto doloop;
+                       case 3:
                        default:
                                goto Return;
                        }
-               }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0)
-                       break;
+                       c = 0;
+               }else{
+                       if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
+                               break;
+                       c = textreadc(t, p-1);
+               }
                /* fast check for first char */
                if(startchar && nnl==0 && c!=startchar)
                        continue;
                tl = list[flag];
                nl = list[flag^=1];
-               nl->inst = 0;
+               nl->inst = nil;
                ntl = nnl;
                nnl = 0;
-               if(sel.p[0].p1<0 && (!wrapped || p>startp)){
+               if(sel.r[0].q0<0 && (!wrapped || p>startp)){
                        /* Add first instruction to this list */
                        /* the minus is so the optimizations in addinst work */
-                       sempty.p[0].p1 = -p;
+                       sempty.r[0].q0 = -p;
                        if(addinst(tl, bstartinst, &sempty))
-                       if(++ntl >= NLIST)
+                       if(++ntl >= NLIST){
        Overflow:
-                               error(Eoverflow);
+                               warning(nil, "regexp list overflow\n");
+                               sel.r[0].q0 = -1;
+                               goto Return;
+                       }
                }
                /* Execute machine until this list is empty */
                for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
@@ -739,12 +771,12 @@
                                break;
                        case LBRA:
                                if(inst->subid>=0)
-                                       tlp->se.p[inst->subid].p1 = p;
+                                       tlp->se.r[inst->subid].q0 = p;
                                inst = inst->next;
                                goto Switchstmt;
                        case RBRA:
                                if(inst->subid >= 0)
-                                       tlp->se.p[inst->subid].p2 = p;
+                                       tlp->se.r[inst->subid].q1 = p;
                                inst = inst->next;
                                goto Switchstmt;
                        case ANY:
@@ -759,15 +791,15 @@
                                }
                                break;
                        case EOL:
-                               if(p==f->nc || filereadc(f, p)=='\n')
+                               if(p<t->file->nc && textreadc(t, p)=='\n')
                                        goto Step;
                                break;
                        case CCLASS:
-                               if(c>=0 && classmatch(inst->rclass, c, 0))
+                               if(c>0 && classmatch(inst->class, c, 0))
                                        goto Addinst;
                                break;
                        case NCCLASS:
-                               if(c>=0 && classmatch(inst->rclass, c, 1))
+                               if(c>0 && classmatch(inst->class, c, 1))
                                        goto Addinst;
                                break;
                        case OR:
@@ -779,24 +811,26 @@
                                inst = inst->left;
                                goto Switchstmt;
                        case END:       /* Match! */
-                               tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus 
sign */
-                               tlp->se.p[0].p2 = p;
+                               tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus 
sign */
+                               tlp->se.r[0].q1 = p;
                                bnewmatch(&tlp->se);
                                break;
                        }
                }
        }
     Return:
-       return sel.p[0].p1>=0;
+       *rp = sel;
+       return sel.r[0].q0 >= 0;
 }
 
 void
 bnewmatch(Rangeset *sp)
 {
         int  i;
-        if(sel.p[0].p1<0 || sp->p[0].p1>sel.p[0].p2 || 
(sp->p[0].p1==sel.p[0].p2 && sp->p[0].p2<sel.p[0].p1))
-                for(i = 0; i<NSUBEXP; i++){       /* note the reversal; p1<=p2 
*/
-                        sel.p[i].p1 = sp->p[i].p2;
-                        sel.p[i].p2 = sp->p[i].p1;
+
+        if(sel.r[0].q0<0 || sp->r[0].q0>sel.r[0].q1 || 
(sp->r[0].q0==sel.r[0].q1 && sp->r[0].q1<sel.r[0].q0))
+                for(i = 0; i<NRange; i++){       /* note the reversal; q0<=q1 
*/
+                        sel.r[i].q0 = sp->r[i].q1;
+                        sel.r[i].q1 = sp->r[i].q0;
                 }
 }

Reply via email to