> On Aug 5, 2021, at 3:15 PM, Tom Lane <t...@sss.pgh.pa.us> wrote:
> 
> I don't immediately see what's different about your failing case
> versus the not-failing ones.

I have now found lots of cases of this failure.  I *believe* the backreference 
is always greater than 1, and it is always in a capture group which then has 
the {0} or {0,0} applied to it.

You can find lots of cases using the attached regex generating script I whipped 
up for testing your work.  (Note this is just a quick and dirty tool for 
hacking, not anything refined.)

#!/usr/bin/perl

use strict;
use warnings;

our @alphabet = ('a'..'z');

sub rand_num
{
	my $result = 0;
	$result++ while(int(rand(3)));
	return $result;
}

sub rand_char
{
	return $alphabet[int(rand(@alphabet))];
}

our @strings;
sub rand_string
{
	if (scalar(@strings))
	{
		my $dice = int(rand(3));
		return $strings[int(rand(@strings))] if ($dice == 0);
		shift(@strings) if ($dice == 1);
		pop(@strings) if ($dice == 2);
	}

	my $result = join('', map { rand_char() } (1..rand_num()));
	push (@strings, $result) if (int(rand(2)));

	return $result;
}

sub rand_long_string
{
	my $result = "";
	$result .= rand_string() while(int(rand(10)));

	return $result;
}

sub rand_quantifier
{
	my $dice = int(rand(12));

	return "*"            if ($dice == 0);
	return "+"            if ($dice == 1);
	return "?"            if ($dice == 2);
	return "*?"           if ($dice == 3);
	return "+?"           if ($dice == 4);
	return "??"           if ($dice == 5);

	my $beg = rand_num();
	return "{$beg}"       if ($dice == 6);
	return "{$beg,}"      if ($dice == 7);
	return "{$beg}?"      if ($dice == 8);
	return "{$beg,}?"     if ($dice == 9);

	my $end = rand_num() + $beg;
	return "{$beg,$end}"  if ($dice == 10);
	return "{$beg,$end}?" if ($dice == 11);
	return "";
}

sub rand_escape
{
	my $dice = int(rand(5));

	return '\\0'                                            if ($dice == 0);
	return '\\' . rand_char()                               if ($dice == 1);
	return '\\' . uc(rand_char())                           if ($dice == 2);
	return '\\' . rand_string()                             if ($dice == 3);
	return '\\' . uc(rand_string())                         if ($dice == 4);

	return "";
}

our $max_capture = 0;

sub rand_rgx
{
	my ($depth) = @_;

	$depth = 0 unless defined $depth;

	# Choose option, but limit the choice if we're in danger of deep recursion
	my $dice = int(rand($depth < 5 ? 100 : 20));

	# Base cases
	return ""                         if ($dice == 0);
	return rand_escape()              if ($dice == 2);
	return rand_char()                if ($dice < 5);
	if ($dice < 10 && $max_capture)
	{
		my $capgroup = 1 + int(rand($max_capture));
		return '\\' . $capgroup;
	}
	return "."                        if ($dice < 20);

	# Recursive cases
	return '[' . rand_escape() . ']'              if ($dice == 20);
	return '[^' . rand_escape() . ']'             if ($dice == 21);
	return '[' . rand_string() . ']'              if ($dice == 22);
	return '[^' . rand_string() . ']'             if ($dice == 23);
	if ($dice < 60)
	{
		my $result = '(' . rand_rgx($depth+1) . ')';
		$max_capture++;
		return $result;
	}
	return '(?:' . rand_rgx($depth+1) . ')'       if ($dice < 70);
	return '(?=' . rand_rgx($depth+1) . ')'       if ($dice == 71);
	return '(?!' . rand_rgx($depth+1) . ')'       if ($dice == 72);
	return '(?<=' . rand_rgx($depth+1) . ')'      if ($dice == 73);
	return '(?<!' . rand_rgx($depth+1) . ')'      if ($dice == 74);
	return rand_rgx($depth+1) . rand_quantifier() if ($dice == 75);
	return rand_rgx($depth+1) . rand_rgx($depth+1);
}

sub rand_regex
{
	$max_capture = 0;
	return rand_rgx();
}

sub rand_flags
{
	local @alphabet = qw(b c e i m n p q s t w x);

	return join('', grep { int(rand(@alphabet)) < 2 } @alphabet);
}

for (1..1000000)
{
	print("select '", rand_long_string(), "' ~ '", rand_regex(), "';\n");
	print("select '", rand_long_string(), "' !~ '", rand_regex(), "';\n");
	print("select regexp_match('", rand_long_string(), "', '", rand_regex(), "');\n");
	print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "');\n");
	print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n");
	print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "');\n");
	print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n");
	print("select regexp_replace('", rand_long_string(), "', '", rand_regex(), "', '", rand_string(), "', '", rand_flags(), "');\n");
}

—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company



Reply via email to