> On Aug 5, 2021, at 3:15 PM, Tom Lane <t...@sss.pgh.pa.us> wrote: > > I don't immediately see what's different about your failing case > versus the not-failing ones.
I have now found lots of cases of this failure. I *believe* the backreference is always greater than 1, and it is always in a capture group which then has the {0} or {0,0} applied to it. You can find lots of cases using the attached regex generating script I whipped up for testing your work. (Note this is just a quick and dirty tool for hacking, not anything refined.)
#!/usr/bin/perl use strict; use warnings; our @alphabet = ('a'..'z'); sub rand_num { my $result = 0; $result++ while(int(rand(3))); return $result; } sub rand_char { return $alphabet[int(rand(@alphabet))]; } our @strings; sub rand_string { if (scalar(@strings)) { my $dice = int(rand(3)); return $strings[int(rand(@strings))] if ($dice == 0); shift(@strings) if ($dice == 1); pop(@strings) if ($dice == 2); } my $result = join('', map { rand_char() } (1..rand_num())); push (@strings, $result) if (int(rand(2))); return $result; } sub rand_long_string { my $result = ""; $result .= rand_string() while(int(rand(10))); return $result; } sub rand_quantifier { my $dice = int(rand(12)); return "*" if ($dice == 0); return "+" if ($dice == 1); return "?" if ($dice == 2); return "*?" if ($dice == 3); return "+?" if ($dice == 4); return "??" if ($dice == 5); my $beg = rand_num(); return "{$beg}" if ($dice == 6); return "{$beg,}" if ($dice == 7); return "{$beg}?" if ($dice == 8); return "{$beg,}?" if ($dice == 9); my $end = rand_num() + $beg; return "{$beg,$end}" if ($dice == 10); return "{$beg,$end}?" if ($dice == 11); return ""; } sub rand_escape { my $dice = int(rand(5)); return '\\0' if ($dice == 0); return '\\' . rand_char() if ($dice == 1); return '\\' . uc(rand_char()) if ($dice == 2); return '\\' . rand_string() if ($dice == 3); return '\\' . uc(rand_string()) if ($dice == 4); return ""; } our $max_capture = 0; sub rand_rgx { my ($depth) = @_; $depth = 0 unless defined $depth; # Choose option, but limit the choice if we're in danger of deep recursion my $dice = int(rand($depth < 5 ? 100 : 20)); # Base cases return "" if ($dice == 0); return rand_escape() if ($dice == 2); return rand_char() if ($dice < 5); if ($dice < 10 && $max_capture) { my $capgroup = 1 + int(rand($max_capture)); return '\\' . $capgroup; } return "." if ($dice < 20); # Recursive cases return '[' . rand_escape() . ']' if ($dice == 20); return '[^' . rand_escape() . ']' if ($dice == 21); return '[' . rand_string() . ']' if ($dice == 22); return '[^' . rand_string() . ']' if ($dice == 23); if ($dice < 60) { my $result = '(' . rand_rgx($depth+1) . ')'; $max_capture++; return $result; } return '(?:' . rand_rgx($depth+1) . ')' if ($dice < 70); return '(?=' . rand_rgx($depth+1) . ')' if ($dice == 71); return '(?!' . rand_rgx($depth+1) . ')' if ($dice == 72); return '(?<=' . rand_rgx($depth+1) . ')' if ($dice == 73); return '(?<!' . rand_rgx($depth+1) . ')' if ($dice == 74); return rand_rgx($depth+1) . rand_quantifier() if ($dice == 75); return rand_rgx($depth+1) . rand_rgx($depth+1); } sub rand_regex { $max_capture = 0; return rand_rgx(); } sub rand_flags { local @alphabet = qw(b c e i m n p q s t w x); return join('', grep { int(rand(@alphabet)) < 2 } @alphabet); } for (1..1000000) { print("select '", rand_long_string(), "' ~ '", rand_regex(), "';\n"); print("select '", rand_long_string(), "' !~ '", rand_regex(), "';\n"); print("select regexp_match('", rand_long_string(), "', '", rand_regex(), "');\n"); print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "');\n"); print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n"); print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "');\n"); print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n"); print("select regexp_replace('", rand_long_string(), "', '", rand_regex(), "', '", rand_string(), "', '", rand_flags(), "');\n"); }
— Mark Dilger EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company