Andrew Dunstan <and...@dunslane.net> writes:

> On 2024-07-02 Tu 8:55 AM, Dagfinn Ilmari Mannsåker wrote:
>> Relatedly, I also had a look at prohibiting unused regex captures
>> (RegularExpressions::ProhibitUnusedCapture), which found a few real
>> cases, but also lots of false positives in Catalog.pm, because it
>> doesn't understand that %+ uses all named captures, so I won't propose a
>> patch for that until that's fixed upstream
>> (https://github.com/Perl-Critic/Perl-Critic/pull/1065).
>>
>
> We could mark Catalog.pm with a "## no critic (ProhibitUnusedCapture)"
> and then use the test elsewhere.

Yeah, that's what I've done for now. Here's a sequence of patches that
fixes the existing cases of unused captures, and adds the policy and
override.

The seg-validate.pl script seems unused, unmaintained and useless (it
doesn't actually match the syntax accepted by seg, specifcially the (+-)
syntax (which my patch fixes in passing)), so maybe we should just
delete it instead?

> cheers
>
> andrew

 -ilmari

>From 17a350c8d6ec2ae887fe7784dd6b9275028dc681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilm...@ilmari.org>
Date: Mon, 20 May 2024 22:54:52 +0100
Subject: [PATCH 1/3] seg-validate.pl: Use qr// instead of strings to assemble
 regexes

Also eliminate unused captures, and fix the plus/minus syntax to
accept (+-), not the erroneous '+-'.
---
 contrib/seg/seg-validate.pl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/contrib/seg/seg-validate.pl b/contrib/seg/seg-validate.pl
index 22cbca966d..518fbfb24a 100755
--- a/contrib/seg/seg-validate.pl
+++ b/contrib/seg/seg-validate.pl
@@ -5,15 +5,15 @@
 use strict;
 use warnings FATAL => 'all';
 
-my $integer = '[+-]?[0-9]+';
-my $real = '[+-]?[0-9]+\.[0-9]+';
+my $integer = qr/[+-]?[0-9]+/;
+my $real = qr/[+-]?[0-9]+\.[0-9]+/;
 
-my $RANGE = '(\.\.)(\.)?';
-my $PLUMIN = q(\'\+\-\');
-my $FLOAT = "(($integer)|($real))([eE]($integer))?";
-my $EXTENSION = '<|>|~';
+my $RANGE = qr/\.{2,3}/;
+my $PLUMIN = qr/\Q(+-)/;
+my $FLOAT = qr/(?:$integer|$real)(?:[eE]$integer)?/;
+my $EXTENSION = qr/[<>~]/;
 
-my $boundary = "($EXTENSION)?$FLOAT";
+my $boundary = qr/$EXTENSION?$FLOAT/;
 my $deviation = $FLOAT;
 
 my $rule_1 = $boundary . $PLUMIN . $deviation;
@@ -28,23 +28,23 @@
 {
 
 	#  s/ +//g;
-	if (/^($rule_1)$/)
+	if (/^$rule_1$/)
 	{
 		print;
 	}
-	elsif (/^($rule_2)$/)
+	elsif (/^$rule_2$/)
 	{
 		print;
 	}
-	elsif (/^($rule_3)$/)
+	elsif (/^$rule_3$/)
 	{
 		print;
 	}
-	elsif (/^($rule_4)$/)
+	elsif (/^$rule_4$/)
 	{
 		print;
 	}
-	elsif (/^($rule_5)$/)
+	elsif (/^$rule_5$/)
 	{
 		print;
 	}
-- 
2.39.2

>From 31ab31f409ea3305105822d91fd688ecd35b594d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilm...@ilmari.org>
Date: Tue, 2 Jul 2024 16:25:15 +0100
Subject: [PATCH 2/3] Fix unused regular expression captures

Remove unused captures, actually use the capture, or fix the capture
number used, as appropriate.
---
 src/backend/catalog/Catalog.pm                          | 2 +-
 src/backend/utils/activity/generate-wait_event_types.pl | 5 ++---
 src/backend/utils/mb/Unicode/convutils.pm               | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 8e709524cb..59ebe5f434 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -69,7 +69,7 @@ sub ParseHeader
 		if (!$is_client_code)
 		{
 			# Strip C-style comments.
-			s;/\*(.|\n)*\*/;;g;
+			s;/\*.*\*/;;gs;
 			if (m;/\*;)
 			{
 
diff --git a/src/backend/utils/activity/generate-wait_event_types.pl b/src/backend/utils/activity/generate-wait_event_types.pl
index 2f2fa5bb8f..4e8dc63e6b 100644
--- a/src/backend/utils/activity/generate-wait_event_types.pl
+++ b/src/backend/utils/activity/generate-wait_event_types.pl
@@ -55,10 +55,9 @@
 	next if /^\s*$/;
 
 	# Get waitclassname based on the section
-	if (/^Section: ClassName(.*)/)
+	if (/^Section: ClassName - (\w+)/)
 	{
-		$section_name = $_;
-		$section_name =~ s/^.*- //;
+		$section_name = $1;
 		$abi_compatibility = 0;
 		next;
 	}
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
index ee780acbe3..1b453d1eba 100644
--- a/src/backend/utils/mb/Unicode/convutils.pm
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -41,7 +41,7 @@ sub read_source
 
 		next if (/^$/);    # Ignore empty lines
 
-		next if (/^0x([0-9A-F]+)\s+(#.*)$/);
+		next if (/^0x[0-9A-F]+\s+#.*$/);
 
 		# The Unicode source files have three columns
 		# 1: The "foreign" code (in hex)
@@ -55,7 +55,7 @@ sub read_source
 		my $out = {
 			code => hex($1),
 			ucs => hex($2),
-			comment => $4,
+			comment => $3,
 			direction => BOTH,
 			f => $fname,
 			l => $.
-- 
2.39.2

>From 3558190b163378d810b5af405992ebd475c1f8a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilm...@ilmari.org>
Date: Tue, 2 Jul 2024 16:26:10 +0100
Subject: [PATCH 3/3] Prohibit unused regular expression captures

Disable the policy in Catalog::ParseHeader, since perlcritic doesn't
(yet) realise that %+ constitutes using all named captures.
---
 src/backend/catalog/Catalog.pm   | 5 +++++
 src/tools/perlcheck/perlcriticrc | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 59ebe5f434..8f62ad6153 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -91,6 +91,10 @@ sub ParseHeader
 		# Push the data into the appropriate data structure.
 		# Caution: when adding new recognized OID-defining macros,
 		# also update src/include/catalog/renumber_oids.pl.
+		#
+		# perlcritic doesn't know that %+ uses all named captures
+		# (https://github.com/Perl-Critic/Perl-Critic/pull/1065)
+		## no critic (ProhibitUnusedCapture)
 		if (/^DECLARE_TOAST\(\s*
 			 (?<parent_table>\w+),\s*
 			 (?<toast_oid>\d+),\s*
@@ -194,6 +198,7 @@ sub ParseHeader
 			$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
 			$declaring_attributes = 1;
 		}
+		## use critic
 		elsif ($is_client_code)
 		{
 			if (/^#endif/)
diff --git a/src/tools/perlcheck/perlcriticrc b/src/tools/perlcheck/perlcriticrc
index 6053dfcc2a..ba238b2baa 100644
--- a/src/tools/perlcheck/perlcriticrc
+++ b/src/tools/perlcheck/perlcriticrc
@@ -18,6 +18,9 @@ verbose = %f: %m at line %l, column %c.  %e.  ([%p] Severity: %s)\n
 [Variables::ProhibitUnusedVariables]
 severity = 5
 
+[RegularExpressions::ProhibitUnusedCapture]
+severity = 5
+
 # allow octal constants with leading zeros
 [-ValuesAndExpressions::ProhibitLeadingZeros]
 
-- 
2.39.2

Reply via email to