tags 476861 + patch
thanks

[ Cc-ing a somehow related bug log ]

On Sat, Apr 19, 2008 at 06:16:35PM +0200, Adeodato Simó wrote:
> Frequently, I find my self wanting to do an exact match for a package
> name contained in a multi-word field, e.g. Depends.

Same here.

> To ensure I'm not matching sub-words, I'm doing something like:
> 
>   % grep-dctrl -F Depends -e '(^| )package([, ]|$)'
> 
> Which is a tad inconvenient. I would like something like:
> 
>   % grep-dctrl --package-word -F Depends package

The attached patch does exactly that. The name of the flag I've chose
is "--whole-pkg", YMMV. Note that in my implementation the flag
implies "-e", because the implementation is actually (extended) regex
based.

More in detail, I just "dress" the given atom(s) with regex boundaries
as yours. It might be seen as hackish, but I find it way better than
adding specific parsing of inter-package relationship
fields. Considering that the use case we are discussing is really
common, I believe it is worth to implement this this way.

I've changed a bit the regular expression boundaries wrt yours, mines
are:

        #define RE_PKG_BEGIN    "(^| )"
        #define RE_PKG_END      "([, \\(]|$)"

with the rationale that whitespaces, according to policy, do not
necessarily appear between package names and optional version
requirements; hence also '(' can denote the end of package name.

A final note about whitespaces, the policy is not entirely clear about
which kind of whitespaces are accepted (or else I've missed it). While
the current implementation rely on real spaces only, it might be worth
to use the '[:space:]' character class instead.

Please consider applying the attached patch.

Cheers.

PS the attached patches (one for the code, one for the manpage) have
   been generated from the Git repo available at:
   
http://git.upsilon.cc/cgi-bin/gitweb.cgi?p=dctrl-tools.git;a=shortlog;h=refs/heads/features/whole-pkg

-- 
Stefano Zacchiroli -o- PhD in Computer Science \ PostDoc @ Univ. Paris 7
z...@{upsilon.cc,pps.jussieu.fr,debian.org} -<>- http://upsilon.cc/zack/
Dietro un grande uomo c'è ..|  .  |. Et ne m'en veux pas si je te tutoie
sempre uno zaino ...........| ..: |.... Je dis tu à tous ceux que j'aime
>From 13cb48a28f20a34a454b97b0b41c02b12733f988 Mon Sep 17 00:00:00 2001
From: Stefano Zacchiroli <z...@upsilon.cc>
Date: Thu, 29 Jan 2009 11:21:38 +0100
Subject: [PATCH] add '--whole-pkg' to match (eregex) on whole package names

Using this flag given extended regex will not match substring of
package names in fields expressing inter-package relationships (e.g.,
Depends, Recommends, ...).

Passing '--whole-pkg' implies '-e'.

(Closes: #476861) actually, proposed fix for ...
---
 grep-dctrl/grep-dctrl.c |   13 ++++++++++++-
 lib/predicate.c         |   17 +++++++++++++++--
 lib/predicate.h         |    2 ++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/grep-dctrl/grep-dctrl.c b/grep-dctrl/grep-dctrl.c
index 70a1fd1..d5fb5f5 100644
--- a/grep-dctrl/grep-dctrl.c
+++ b/grep-dctrl/grep-dctrl.c
@@ -59,7 +59,8 @@ enum {
         OPT_GE,
         OPT_MMAP,
         OPT_IGN_ERRS,
-        OPT_PATTERN
+        OPT_PATTERN,
+        OPT_WHOLE_PKG
 };
 
 #undef BANNER
@@ -139,6 +140,7 @@ static struct argp_option options[] = {
 	{ "mmap",           OPT_MMAP, 0,            0, N_("Attempt mmapping input files") },
 	{ "ignore-parse-errors", OPT_IGN_ERRS, 0,   0, N_("Ignore parse errors") },
         { "pattern",        OPT_PATTERN, N_("PATTERN"), 0, N_("Specify the pattern to search for") },
+	{ "whole-pkg",	    OPT_WHOLE_PKG, 0,	    0, N_("Do (eregex) matching on whole package names") },
 	{ 0 }
 };
 
@@ -235,6 +237,7 @@ struct atom * clone_atom(struct arguments * args)
 	rv->field_inx = atom->field_inx;
 	rv->mode = atom->mode;
 	rv->ignore_case = atom->ignore_case;
+	rv->whole_pkg = atom->whole_pkg;
 	rv->pat = atom->pat;
 	rv->patlen = atom->patlen;
 	struct atom_code * ac = args->atom_code[oa];
@@ -357,6 +360,7 @@ static struct atom * enter_atom(struct arguments * args)
 	rv->field_inx = -1;
 	rv->mode = M_SUBSTR;
 	rv->ignore_case = 0;
+	rv->whole_pkg = 0;
 	rv->pat = 0;
 	rv->patlen = 0;
 	return rv;
@@ -551,6 +555,12 @@ static error_t parse_opt (int key, char * arg, struct argp_state * state)
 		if (atom->pat == 0) fatal_enomem(0);
 		strcpy((char*)atom->pat, arg);
 		break;
+	case OPT_WHOLE_PKG:
+		debug_message("parse_opt: whole-pkg", 0);
+		atom = ENTER_ATOM;
+		atom->whole_pkg = 1;
+		set_mode(M_EREGEX);
+		break;
 	case ARGP_KEY_ARG:
 		debug_message("parse_opt: argument", 0);
 	redo:
@@ -620,6 +630,7 @@ static void dump_args(struct arguments * args)
 		printf("atoms[%zi].field_name = %s\n", i, args->p.atoms[i].field_name);
 		printf("atoms[%zi].mode = %i\n", i, args->p.atoms[i].mode);
 		printf("atoms[%zi].ignore_case = %i\n", i, args->p.atoms[i].ignore_case);
+		printf("atoms[%zi].whole_pkg = %i\n", i, args->p.atoms[i].whole_pkg);
 		printf("atoms[%zi].pat = %s\n", i, args->p.atoms[i].pat);
 	}
 	printf("proglen = %zi\n", args->p.proglen);
diff --git a/lib/predicate.c b/lib/predicate.c
index 2657457..14f8500 100644
--- a/lib/predicate.c
+++ b/lib/predicate.c
@@ -27,6 +27,9 @@
 #include "strutil.h"
 #include "version.h"
 
+#define RE_PKG_BEGIN	"(^| )"
+#define RE_PKG_END	"([, \\(]|$)"
+
 void init_predicate(struct predicate * p)
 {
 	p->num_atoms = 0;
@@ -46,6 +49,8 @@ void addinsn(struct predicate * p, int insn)
 void predicate_finish_atom(struct predicate * p)
 {
 	struct atom * atom =  get_current_atom(p);
+	char * regex_pat = NULL;
+	int regex_patlen = atom->patlen + 30;
 	debug_message("predicate_finish_atom", 0);
 	if (atom->field_name != 0) {
                 char * repl = strchr(atom->field_name, ':');
@@ -59,12 +64,20 @@ void predicate_finish_atom(struct predicate * p)
 	}
 
 	if (atom->mode == M_REGEX || atom->mode == M_EREGEX) {
+		regex_pat = calloc(1, regex_patlen);
+		if (regex_pat == 0)  fatal_enomem(0);
+		if (atom->whole_pkg)
+			strncat(regex_pat, RE_PKG_BEGIN, strlen(RE_PKG_BEGIN));
+		strncat(regex_pat, atom->pat, atom->patlen);
+		if (atom->whole_pkg)
+			strncat(regex_pat, RE_PKG_END, strlen(RE_PKG_END));
 		debug_message("compiling:", 0);
-		debug_message(atom->pat, 0);
-		int rerr = regcomp(&atom->regex, atom->pat,
+		debug_message(regex_pat, 0);
+		int rerr = regcomp(&atom->regex, regex_pat,
 				   (atom->mode == M_EREGEX ? REG_EXTENDED : 0)
 				   | REG_NOSUB
 				   | (atom->ignore_case ? REG_ICASE : 0));
+		free(regex_pat);
 		if (rerr != 0) {
 			char * s;
 			s = get_regerror(rerr, &atom->regex);
diff --git a/lib/predicate.h b/lib/predicate.h
index 6720ed7..d58b71f 100644
--- a/lib/predicate.h
+++ b/lib/predicate.h
@@ -66,6 +66,8 @@ struct atom {
 	/* A compiled version of pat; valid only when mode is M_REGEX
 	 * or M_EREGEX.  */
 	regex_t regex;
+	/* Flag: (extended) regex should match whole package names */
+	unsigned whole_pkg;
 };
 
 /* A predicate is represented as a set of atomic predicates and a
-- 
1.5.6.5

>From e791dfca88a3ddc322946993a4083b3527b37e52 Mon Sep 17 00:00:00 2001
From: Stefano Zacchiroli <z...@upsilon.cc>
Date: Thu, 29 Jan 2009 11:33:23 +0100
Subject: [PATCH] document --whole-pkg in grep-dctrl manpage

---
 man/grep-dctrl.1.cp |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/man/grep-dctrl.1.cp b/man/grep-dctrl.1.cp
index 36741e7..4d344f4 100644
--- a/man/grep-dctrl.1.cp
+++ b/man/grep-dctrl.1.cp
@@ -137,6 +137,12 @@ Ignore case when looking for a match in the current simple filter.
 .IP "\-X, \-\-exact\-match"
 Do an exact match (as opposed to a substring match) in the current
 simple filter.
+.IP "\-\-whole\-pkg"
+Do an extended regular expression match on whole package names,
+assuming the syntax of inter-package relationship fields such as
+Depends, Recommends, ... When this flag is given you should not worry
+about sub-package names such as "libpcre3" also matching
+"libpcre3-dev". This flag implies \-e.
 .IP "\-\-eq"
 Do an equality comparison under the Debian version number system.  If
 the pattern or the field to be searched in is not a valid Debian
-- 
1.5.6.5

Reply via email to