> On Jul 20, 2021, at 11:28 AM, Tomas Vondra <tomas.von...@enterprisedb.com> > wrote: > > Tomas Vondra > EnterpriseDB: http://www.enterprisedb.com > The Enterprise PostgreSQL Company > <0001-Handling-Expr-op-Expr-clauses-in-extended-stats-20210720.patch>
Hi Tomas, I tested this patch against master looking for types of clauses that uniformly get worse with the patch applied. I found some. The tests are too large to attach, but the scripts that generate them are not. To perform the tests: git checkout master perl ./gentest.pl > src/test/regress/sql/gentest.sql cat /dev/null > src/test/regress/expected/gentest.out echo "test: gentest" >> src/test/regress/parallel_schedule ./configure && make && make check cp src/test/regress/results/gentest.out src/test/regress/expected/gentest.out patch -p 1 < 0001-Handling-Expr-op-Expr-clauses-in-extended-stats-20210720.patch make check cat src/test/regress/regression.diffs | perl ./check.pl This shows patterns of conditions that get worse, such as: better:0, worse:80: A < B and A <> A or not A < A better:0, worse:80: A < B and not A <= A or A <= A better:0, worse:80: A < B or A = A better:0, worse:80: A < B or A = A or not A >= A better:0, worse:80: A < B or A >= A better:0, worse:80: A < B or A >= A and not A <> A better:0, worse:80: A < B or not A < A better:0, worse:80: A < B or not A <> A better:0, worse:80: A < B or not A <> A or A <= A better:0, worse:80: A < B or not A >= A or not A < A It seems things get worse when the conditions contain a column compared against itself. I suspect that is being handled incorrectly.
#!/usr/bin/perl use strict; use warnings; my ($query, $where, $before, $after, @before, @after); my $better = 0; my $worse = 0; my %where = (); my %gripe; while (<>) { if (/from check_estimated_rows\('(.*)'\)/) { $query = $1; if ($query =~ m/where (.*)$/) { $where = $1; my %columns = map { $_ => 1 } ($where =~ m/(column_\d+)/g); my @normal = ('A'..'Z'); my @columns = sort keys %columns; for my $i (0..$#columns) { my $old = $columns[$i]; my $new = $normal[$i]; $where =~ s/\b$old\b/$new/g; } } } elsif (m/^-(\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+))\s*$/) { ($before, @before) = ($1, $2, $3, $4); } elsif (m/^\+(\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+))\s*$/) { ($after, @after) = ($1, $2, $3, $4); $where{$where}->{better} ||= 0; $where{$where}->{worse} ||= 0; # Don't count the difference as meaningful unless we're more than 5 better or worse than before if ($after[2] > 5 + $before[2]) { $worse++; $where{$where}->{worse}++; } elsif ($after[2] + 5 < $before[2]) { $better++; $where{$where}->{better}++; } if (!exists $gripe{$where} && $where{$where}->{better} == 0 && $where{$where}->{worse} > 50) { print "TERRIBLE:\n"; print "\tQUERY: $query\n"; print "\tBEFORE: $before\n"; print "\tAFTER: $after\n\n"; $gripe{$query} = 1; } } } foreach my $where (sort keys %where) { if ($where{$where}->{better} < $where{$where}->{worse}) { print("better:", $where{$where}->{better}, ", worse:", $where{$where}->{worse}, ": $where\n"); } } print("\n\nTOTAL:\n"); print("\tbetter: $better\n"); print("\tworse: $worse\n");
#!/usr/bin/perl use strict; use warnings; our ($tblnum, $tblname, $colnum, $colname); # Generate the where clauses to be used on all tables our (%wherepattern1, %wherepattern2, %wherepattern3); my @ops = (">", "<", ">=", "<=", "=", "<>"); my @conj = ("and", "or", "and not", "or not"); for (1..100) { my $op1 = $ops[int(rand(@ops))]; my $op2 = $ops[int(rand(@ops))]; my $op3 = $ops[int(rand(@ops))]; my $conj1 = $conj[int(rand(@conj))]; my $conj2 = $conj[int(rand(@conj))]; $wherepattern1{"\%s $op1 \%s"} = 1; $wherepattern2{"\%s $op1 \%s $conj1 \%s $op2 \%s"} = 1; $wherepattern3{"\%s $op1 \%s $conj1 \%s $op2 \%s $conj2 \%s $op3 \%s"} = 1; } sub next_table { $tblnum++; $tblname = "table_$tblnum"; $colnum = 0; $colname = "column_$colnum"; } sub next_column { $colnum++; $colname = "column_$colnum"; } for my $colcnt (2..10) { next_table(); print("CREATE TABLE $tblname (\n"); for (1..$colcnt-1) { next_column(); print("\t$colname INTEGER,\n"); } next_column(); print("\t$colname INTEGER\n);\n"); print("INSERT INTO $tblname (SELECT ", join(", ", map { "gs-$_" } (1..$colcnt)), " FROM generate_series(1,100) gs);\n"); print("VACUUM FREEZE $tblname;\n"); for my $colmax (2..$colcnt) { print("CREATE STATISTICS ${tblname}_stats_${colmax} ON ", join(", ", map { "column_$_" } (1..$colmax)), " FROM $tblname;\n"); } print("ANALYZE $tblname;\n"); } # Restart the table sequence $tblnum = 0; for my $colcnt (2..10) { next_table(); for (1..100) { my $a = sprintf("column_%d", 1+int(rand($colcnt))); my $b = sprintf("column_%d", 1+int(rand($colcnt))); my $c = sprintf("column_%d", 1+int(rand($colcnt))); foreach my $where1 (keys %wherepattern1) { my $whereclause1 = sprintf($where1, $a, $b); print(" select actual, estimated, abs(actual - estimated) AS misestimate from check_estimated_rows('select * from $tblname where $whereclause1');"); } foreach my $where2 (keys %wherepattern2) { my $whereclause1 = sprintf($where2, $a, $b, $a, $c); print(" select actual, estimated, abs(actual - estimated) AS misestimate from check_estimated_rows('select * from $tblname where $whereclause1');"); } foreach my $where3 (keys %wherepattern3) { my $whereclause1 = sprintf($where3, $a, $b, $a, $c, $c, $a); print(" select actual, estimated, abs(actual - estimated) AS misestimate from check_estimated_rows('select * from $tblname where $whereclause1');"); } } }
— Mark Dilger EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company