David Wheeler complained over in [1] that genbki.pl fails to produce a useful error message if there's anything wrong in a catalog data file. He's right about that, but the band-aid patch he proposed doesn't improve the situation much. The actual problem is that key error messages in genbki.pl expect $bki_values->{line_number} to be set, which it is not because we're invoking Catalog::ParseData with $preserve_formatting = 0, and that runs a fast path that doesn't do line-by-line processing and hence doesn't/can't fill that field.
I'm quite sure that those error cases worked as-intended when first coded. I did not check the git history, but I suppose that somebody added the non-preserve_formatting fast path later without any consideration for the damage it was doing to error reporting ability. I'm of the opinion that this change was penny-wise and pound-foolish. On my machine, the time to re-make the bki files with the fast path enabled is 0.597s, and with it disabled (measured with the attached quick-hack patch) it's 0.612s. Is that savings worth future hackers having to guess what they broke and where in a large .dat file? As you can see from the quick-hack patch, it's kind of a mess to use the $preserve_formatting = 1 case, because there are a lot of loops that have to be taught to ignore comment lines, which we don't really care about except in reformat_dat_file.pl. What I suggest doing, but have not yet coded up, is to nuke the fast path in Catalog::ParseData and reinterpret the $preserve_formatting argument as controlling whether comments and whitespace are entered in the output data structure, but not whether we parse it line-by-line. That should fix this problem with zero change in the callers, and also buy back a little bit of the cost compared to this quick hack. Thoughts? regards, tom lane [1] https://www.postgresql.org/message-id/60EF4E11-BC1C-4034-B37B-695662D28AD2%40justatheory.com
diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm index 55a8877aed..f4b13b600d 100644 --- a/src/backend/catalog/Catalog.pm +++ b/src/backend/catalog/Catalog.pm @@ -573,6 +573,7 @@ sub FindDefinedSymbolFromData my ($data, $symbol) = @_; foreach my $row (@{$data}) { + next if !ref $row; if ($row->{oid_symbol} eq $symbol) { return $row->{oid}; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 94afdc5491..df8493e39c 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -93,11 +93,14 @@ foreach my $header (@ARGV) # Not all catalogs have a data file. if (-e $datfile) { - my $data = Catalog::ParseData($datfile, $schema, 0); + my $data = Catalog::ParseData($datfile, $schema, 1); $catalog_data{$catname} = $data; foreach my $row (@$data) { + # Ignore comment lines. + next if !ref $row; + # Generate entries for pg_description and pg_shdescription. if (defined $row->{descr}) { @@ -224,6 +227,7 @@ my $C_COLLATION_OID = # can't do it, for lack of easy access to the other catalog. foreach my $row (@{ $catalog_data{pg_class} }) { + next if !ref $row; $row->{relnatts} = scalar(@{ $catalogs{ $row->{relname} }->{columns} }); } @@ -234,6 +238,7 @@ foreach my $row (@{ $catalog_data{pg_class} }) my %amoids; foreach my $row (@{ $catalog_data{pg_am} }) { + next if !ref $row; $amoids{ $row->{amname} } = $row->{oid}; } @@ -241,6 +246,7 @@ foreach my $row (@{ $catalog_data{pg_am} }) my %authidoids; foreach my $row (@{ $catalog_data{pg_authid} }) { + next if !ref $row; $authidoids{ $row->{rolname} } = $row->{oid}; } @@ -248,6 +254,7 @@ foreach my $row (@{ $catalog_data{pg_authid} }) my %classoids; foreach my $row (@{ $catalog_data{pg_class} }) { + next if !ref $row; $classoids{ $row->{relname} } = $row->{oid}; } @@ -255,6 +262,7 @@ foreach my $row (@{ $catalog_data{pg_class} }) my %collationoids; foreach my $row (@{ $catalog_data{pg_collation} }) { + next if !ref $row; $collationoids{ $row->{collname} } = $row->{oid}; } @@ -262,6 +270,7 @@ foreach my $row (@{ $catalog_data{pg_collation} }) my %langoids; foreach my $row (@{ $catalog_data{pg_language} }) { + next if !ref $row; $langoids{ $row->{lanname} } = $row->{oid}; } @@ -269,6 +278,7 @@ foreach my $row (@{ $catalog_data{pg_language} }) my %namespaceoids; foreach my $row (@{ $catalog_data{pg_namespace} }) { + next if !ref $row; $namespaceoids{ $row->{nspname} } = $row->{oid}; } @@ -276,6 +286,7 @@ foreach my $row (@{ $catalog_data{pg_namespace} }) my %opcoids; foreach my $row (@{ $catalog_data{pg_opclass} }) { + next if !ref $row; # There is no unique name, so we need to combine access method # and opclass name. my $key = sprintf "%s/%s", $row->{opcmethod}, $row->{opcname}; @@ -286,6 +297,7 @@ foreach my $row (@{ $catalog_data{pg_opclass} }) my %operoids; foreach my $row (@{ $catalog_data{pg_operator} }) { + next if !ref $row; # There is no unique name, so we need to invent one that contains # the relevant type names. my $key = sprintf "%s(%s,%s)", @@ -297,6 +309,7 @@ foreach my $row (@{ $catalog_data{pg_operator} }) my %opfoids; foreach my $row (@{ $catalog_data{pg_opfamily} }) { + next if !ref $row; # There is no unique name, so we need to combine access method # and opfamily name. my $key = sprintf "%s/%s", $row->{opfmethod}, $row->{opfname}; @@ -307,6 +320,7 @@ foreach my $row (@{ $catalog_data{pg_opfamily} }) my %procoids; foreach my $row (@{ $catalog_data{pg_proc} }) { + next if !ref $row; # Generate an entry under just the proname (corresponds to regproc lookup) my $prokey = $row->{proname}; if (defined $procoids{$prokey}) @@ -338,6 +352,7 @@ foreach my $row (@{ $catalog_data{pg_proc} }) my %tablespaceoids; foreach my $row (@{ $catalog_data{pg_tablespace} }) { + next if !ref $row; $tablespaceoids{ $row->{spcname} } = $row->{oid}; } @@ -345,6 +360,7 @@ foreach my $row (@{ $catalog_data{pg_tablespace} }) my %tsconfigoids; foreach my $row (@{ $catalog_data{pg_ts_config} }) { + next if !ref $row; $tsconfigoids{ $row->{cfgname} } = $row->{oid}; } @@ -352,6 +368,7 @@ foreach my $row (@{ $catalog_data{pg_ts_config} }) my %tsdictoids; foreach my $row (@{ $catalog_data{pg_ts_dict} }) { + next if !ref $row; $tsdictoids{ $row->{dictname} } = $row->{oid}; } @@ -359,6 +376,7 @@ foreach my $row (@{ $catalog_data{pg_ts_dict} }) my %tsparseroids; foreach my $row (@{ $catalog_data{pg_ts_parser} }) { + next if !ref $row; $tsparseroids{ $row->{prsname} } = $row->{oid}; } @@ -366,6 +384,7 @@ foreach my $row (@{ $catalog_data{pg_ts_parser} }) my %tstemplateoids; foreach my $row (@{ $catalog_data{pg_ts_template} }) { + next if !ref $row; $tstemplateoids{ $row->{tmplname} } = $row->{oid}; } @@ -374,6 +393,8 @@ my %typeoids; my %types; foreach my $row (@{ $catalog_data{pg_type} }) { + next if !ref $row; + # for OID macro substitutions $typeoids{ $row->{typname} } = $row->{oid}; @@ -582,6 +603,7 @@ EOM # Ordinary catalog with a data file foreach my $row (@{ $catalog_data{$catname} }) { + next if !ref $row; my %bki_values = %$row; # Complain about unrecognized keys; they are presumably misspelled