Author: kevans
Date: Sat Dec  5 03:13:47 2020
New Revision: 368357
URL: https://svnweb.freebsd.org/changeset/base/368357

Log:
  libregex: implement \` and \' (begin-of-subj, end-of-subj)
  
  These are GNU extensions, generally equivalent to ^ and $ except that the
  new syntax will not match beginning of line after the first in a multi-line
  expression or the end of line before absolute last in a multi-line
  expression.

Modified:
  head/lib/libc/regex/engine.c
  head/lib/libc/regex/regcomp.c
  head/lib/libc/regex/regex2.h
  head/lib/libregex/tests/gnuext.in

Modified: head/lib/libc/regex/engine.c
==============================================================================
--- head/lib/libc/regex/engine.c        Sat Dec  5 02:23:11 2020        
(r368356)
+++ head/lib/libc/regex/engine.c        Sat Dec  5 03:13:47 2020        
(r368357)
@@ -109,7 +109,7 @@ static int matcher(struct re_guts *g, const char *stri
 static const char *dissect(struct match *m, const char *start, const char 
*stop, sopno startst, sopno stopst);
 static const char *backref(struct match *m, const char *start, const char 
*stop, sopno startst, sopno stopst, sopno lev, int);
 static const char *walk(struct match *m, const char *start, const char *stop, 
sopno startst, sopno stopst, bool fast);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, 
wint_t ch, states aft);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, 
wint_t ch, states aft, int sflags);
 #define MAX_RECURSION  100
 #define        BOL     (OUT-1)
 #define        EOL     (BOL-1)
@@ -119,6 +119,10 @@ static states step(struct re_guts *g, sopno start, sop
 #define        EOW     (BOL-5)
 #define        BADCHAR (BOL-6)
 #define        NONCHAR(c)      ((c) <= OUT)
+/* sflags */
+#define        SBOS    0x0001
+#define        SEOS    0x0002
+
 #ifdef REDEBUG
 static void print(struct match *m, const char *caption, states st, int ch, 
FILE *d);
 #endif
@@ -457,6 +461,8 @@ dissect(struct match *m,
                case OEOL:
                case OBOW:
                case OEOW:
+               case OBOS:
+               case OEOS:
                        break;
                case OANY:
                case OANYOF:
@@ -657,6 +663,18 @@ backref(struct match *m,
                        if (wc == BADCHAR || !CHIN(cs, wc))
                                return(NULL);
                        break;
+               case OBOS:
+                       if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
+                               { /* yes */ }
+                       else
+                               return(NULL);
+                       break;
+               case OEOS:
+                       if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
+                               { /* yes */ }
+                       else
+                               return(NULL);
+                       break;
                case OBOL:
                        if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
                            (sp > m->offp && sp < m->endp &&
@@ -819,15 +837,16 @@ walk(struct match *m, const char *start, const char *s
        wint_t c;
        wint_t lastc;           /* previous c */
        wint_t flagch;
-       int i;
+       int i, sflags;
        const char *matchp;     /* last p at which a match ended */
        size_t clen;
 
+       sflags = 0;
        AT("slow", start, stop, startst, stopst);
        CLEAR(st);
        SET1(st, startst);
        SP("sstart", st, *p);
-       st = step(m->g, startst, stopst, st, NOTHING, st);
+       st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
        if (fast)
                ASSIGN(fresh, st);
        matchp = NULL;
@@ -844,6 +863,7 @@ walk(struct match *m, const char *start, const char *s
        for (;;) {
                /* next character */
                lastc = c;
+               sflags = 0;
                if (p == m->endp) {
                        c = OUT;
                        clen = 0;
@@ -866,9 +886,20 @@ walk(struct match *m, const char *start, const char *s
                        flagch = (flagch == BOL) ? BOLEOL : EOL;
                        i += m->g->neol;
                }
+               if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
+                       sflags |= SBOS;
+                       /* Step one more for BOS. */
+                       i++;
+               }
+               if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
+                       sflags |= SEOS;
+                       /* Step one more for EOS. */
+                       i++;
+               }
                if (i != 0) {
                        for (; i > 0; i--)
-                               st = step(m->g, startst, stopst, st, flagch, 
st);
+                               st = step(m->g, startst, stopst, st, flagch, st,
+                                   sflags);
                        SP("sboleol", st, c);
                }
 
@@ -882,7 +913,7 @@ walk(struct match *m, const char *start, const char *s
                        flagch = EOW;
                }
                if (flagch == BOW || flagch == EOW) {
-                       st = step(m->g, startst, stopst, st, flagch, st);
+                       st = step(m->g, startst, stopst, st, flagch, st, 
sflags);
                        SP("sboweow", st, c);
                }
 
@@ -903,9 +934,10 @@ walk(struct match *m, const char *start, const char *s
                else
                        ASSIGN(st, empty);
                assert(c != OUT);
-               st = step(m->g, startst, stopst, tmp, c, st);
+               st = step(m->g, startst, stopst, tmp, c, st, sflags);
                SP("saft", st, c);
-               assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+               assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
+                   st));
                p += clen;
        }
 
@@ -939,7 +971,8 @@ step(struct re_guts *g,
        sopno stop,             /* state after stop state within strip */
        states bef,             /* states reachable before */
        wint_t ch,              /* character or NONCHAR code */
-       states aft)             /* states already known reachable after */
+       states aft,             /* states already known reachable after */
+       int sflags)             /* state flags */
 {
        cset *cs;
        sop s;
@@ -958,6 +991,14 @@ step(struct re_guts *g,
                        /* only characters can match */
                        assert(!NONCHAR(ch) || ch != OPND(s));
                        if (ch == OPND(s))
+                               FWD(aft, bef, 1);
+                       break;
+               case OBOS:
+                       if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
+                               FWD(aft, bef, 1);
+                       break;
+               case OEOS:
+                       if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
                                FWD(aft, bef, 1);
                        break;
                case OBOL:

Modified: head/lib/libc/regex/regcomp.c
==============================================================================
--- head/lib/libc/regex/regcomp.c       Sat Dec  5 02:23:11 2020        
(r368356)
+++ head/lib/libc/regex/regcomp.c       Sat Dec  5 03:13:47 2020        
(r368357)
@@ -480,6 +480,12 @@ p_ere_exp(struct parse *p, struct branchc *bc)
                if (p->gnuext) {
                        handled = 1;
                        switch (wc) {
+                       case '`':
+                               EMIT(OBOS, 0);
+                               break;
+                       case '\'':
+                               EMIT(OEOS, 0);
+                               break;
                        case 'W':
                        case 'w':
                        case 'S':
@@ -833,6 +839,12 @@ p_simp_re(struct parse *p, struct branchc *bc)
                if (p->gnuext) {
                        handled = true;
                        switch (c) {
+                       case BACKSL|'`':
+                               EMIT(OBOS, 0);
+                               break;
+                       case BACKSL|'\'':
+                               EMIT(OEOS, 0);
+                               break;
                        case BACKSL|'W':
                        case BACKSL|'w':
                        case BACKSL|'S':
@@ -1878,6 +1890,8 @@ findmust(struct parse *p, struct re_guts *g)
                case OEOW:
                case OBOL:
                case OEOL:
+               case OBOS:
+               case OEOS:
                case O_QUEST:
                case O_CH:
                case OEND:

Modified: head/lib/libc/regex/regex2.h
==============================================================================
--- head/lib/libc/regex/regex2.h        Sat Dec  5 02:23:11 2020        
(r368356)
+++ head/lib/libc/regex/regex2.h        Sat Dec  5 03:13:47 2020        
(r368357)
@@ -104,6 +104,8 @@ typedef unsigned long sopno;
 #define        O_CH    (18L<<OPSHIFT)  /* end choice   back to OOR1            
*/
 #define        OBOW    (19L<<OPSHIFT)  /* begin word   -                       
*/
 #define        OEOW    (20L<<OPSHIFT)  /* end word     -                       
*/
+#define        OBOS    (21L<<OPSHIFT)  /* begin subj.  -                       
*/
+#define        OEOS    (22L<<OPSHIFT)  /* end subj.    -                       
*/
 
 /*
  * Structures for [] character-set representation.

Modified: head/lib/libregex/tests/gnuext.in
==============================================================================
--- head/lib/libregex/tests/gnuext.in   Sat Dec  5 02:23:11 2020        
(r368356)
+++ head/lib/libregex/tests/gnuext.in   Sat Dec  5 03:13:47 2020        
(r368357)
@@ -25,8 +25,12 @@ a\|b\|c      b       abc     a
 #\B[abc]\B     &       <abc>   b
 #\B[abc]+      -       <abc>   bc
 #\B[abc]\+     b       <abc>   bc
-#\`abc\'       &       abc     abc
-#\`.+\'        -       abNc    abNc
-#\`.\+\'       b       abNc    abNc
-#(\`a) -       Na
-#(a\') -       aN
+\`abc  &       abc     abc
+abc\'  &       abc     abc
+\`abc\'        &       abc     abc
+\`.+\' -       abNc    abNc
+\`.\+\'        b       abNc    abNc
+(\`a)  -       Na
+(a\`)  -       aN
+(a\')  -       aN
+(\'a)  -       Na
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to