Mark Dilger <mark.dil...@enterprisedb.com> writes: > +select regexp_split_to_array('', '(?:((?:q+))){0}(\1){0,0}?*[^]'); > +server closed the connection unexpectedly
Here's a quick draft patch for this. Basically it moves the responsibility for clearing v->subs[] pointers into the freesubre() recursion, so that it will happen for contained capturing parens not only the top level. There is a potentially interesting definitional question: what exactly ought this regexp do? ((.)){0}\2 Because the capturing paren sets are zero-quantified, they will never be matched to any characters, so the backref can never have any defined referent. I suspect that study of the POSIX spec would lead to the conclusion that this is a legal regexp but it will never match anything. Implementing that would be tedious though, and what's more it seems very unlikely that the user wanted any such behavior. So I think throwing an error is an appropriate response. The existing code will throw such an error for ((.)){0}\1 so I guess Spencer did think about this to some extent -- he just forgot about the possibility of nested parens. This patch should work OK in HEAD and v14, but it will need a bit of fooling-about for older branches I think, given that they fill v->subs[] a little differently. regards, tom lane
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index ae3a7b6a38..ae1ff670f9 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -550,8 +550,6 @@ freev(struct vars *v, { if (v->re != NULL) rfree(v->re); - if (v->subs != v->sub10) - FREE(v->subs); if (v->nfa != NULL) freenfa(v->nfa); if (v->tree != NULL) @@ -562,6 +560,8 @@ freev(struct vars *v, freecvec(v->cv); if (v->cv2 != NULL) freecvec(v->cv2); + if (v->subs != v->sub10) + FREE(v->subs); if (v->lacons != NULL) freelacons(v->lacons, v->nlacons); ERR(err); /* nop if err==0 */ @@ -1091,8 +1091,8 @@ parseqatom(struct vars *v, { if (atom != NULL) freesubre(v, atom); - if (atomtype == '(') - v->subs[subno] = NULL; + /* freesubre() will clean up any v->subs[] pointers into the atom */ + assert(atomtype != '(' || v->subs[subno] == NULL); delsub(v->nfa, lp, rp); EMPTYARC(lp, rp); return top; @@ -2127,9 +2127,24 @@ freesrnode(struct vars *v, /* might be NULL */ if (sr == NULL) return; + /* + * If the node is referenced in v->subs[], clear that to avoid leaving a + * dangling pointer. This should only ever matter when parseqatom() is + * throwing away a {0,0}-quantified atom that contains capturing parens. + * Doing this will cause later backrefs to such parens to fail, which is + * maybe not strictly POSIX but seems more helpful than allowing a backref + * that can never have a defined referent. + */ + if (sr->capno > 0 && v != NULL) + { + assert(v->subs[sr->capno] == sr); + v->subs[sr->capno] = NULL; + } + if (!NULLCNFA(sr->cnfa)) freecnfa(&sr->cnfa); sr->flags = 0; /* in particular, not INUSE */ + sr->capno = 0; sr->child = sr->sibling = NULL; sr->begin = sr->end = NULL; diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 5a6cdf47c2..850e0c8ef5 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -3506,6 +3506,10 @@ select * from test_regex('((.))(\2)', 'xyy', 'oRP'); {yy,NULL,NULL,NULL} (2 rows) +-- # throwing an error for this is arguably not per POSIX +-- expectError 21.39 - {((.)){0}(\2){0}} ESUBREG +select * from test_regex('((.)){0}(\2){0}', '', '-'); +ERROR: invalid regular expression: invalid backreference number -- doing 22 "multicharacter collating elements" -- # again ugh -- MCCEs are not implemented in Postgres, so we skip all these tests diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 3419564203..378f22b67b 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -1020,6 +1020,9 @@ select * from test_regex('((.))(\2){0}', 'xy', 'RPQ'); select * from test_regex('((.))(\2)', 'xyy', 'RP'); -- expectMatch 21.38 oRP ((.))(\2) xyy yy {} {} {} select * from test_regex('((.))(\2)', 'xyy', 'oRP'); +-- # throwing an error for this is arguably not per POSIX +-- expectError 21.39 - {((.)){0}(\2){0}} ESUBREG +select * from test_regex('((.)){0}(\2){0}', '', '-'); -- doing 22 "multicharacter collating elements" -- # again ugh