From 0b5003dd7c485bceba81cf8ffa901f3646c2417d Mon Sep 17 00:00:00 2001
From: Stephane Chazelas <stephane.chazelas@gmail.com>
Date: Tue, 25 Feb 2014 15:55:04 +0000
Subject: [PATCH] align grep -Pw with grep -w

For the -w option, with -P, we used to look for the pattern surrounded by
word boundaries. That's different from what grep -w does and what the
documentation describes. Now align with grep -w and the documentation by
using PCRE look-behind and look-ahead operators to match the pattern if
it is not surrounded by word constituents.
* src/pcresearch.c (Pcompile): Use (?<!\w)(?:...)(?!\w) rather than
\b(?:...)\b.
* NEWS (Bug fixes): Mention it.
* tests/pcre-w: New file.
* tests/Makefile.am (TESTS): Add it.
This complements the fix for http://debbugs.gnu.org/16865
---
 NEWS              |  7 +++++++
 src/pcresearch.c  |  4 ++--
 tests/Makefile.am |  1 +
 tests/pcre-w      | 31 +++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100755 tests/pcre-w

diff --git a/NEWS b/NEWS
index 49fe984..657f3d1 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,13 @@ GNU grep NEWS                                    -*- outline -*-
   echo aa|grep -Pw '(.)\1' would fail to match, yet
   echo aa|grep -Pw '(.)\2' would match.

+  grep -Pw now works like grep -w in that the matched string has to be
+  preceded and followed by non-word components or the beginning and end
+  of the line (as opposed to word boundaries before).  Before, this
+  echo a@@a| grep -Pw @@ would match, yet this
+  echo a@@a| grep -w @@ would not.  Now, they both fail to match,
+  per the documentation on how grep's -w works.
+

 * Noteworthy changes in release 2.18 (2014-02-20) [stable]

diff --git a/src/pcresearch.c b/src/pcresearch.c
index d4a20ff..319155f 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -77,7 +77,7 @@ Pcompile (char const *pattern, size_t size)
   if (match_lines)
     strcpy (n, "^(?:");
   if (match_words)
-    strcpy (n, "\\b(?:");
+    strcpy (n, "(?<!\\w)(?:");
   n += strlen (n);

   /* The PCRE interface doesn't allow NUL bytes in the pattern, so
@@ -103,7 +103,7 @@ Pcompile (char const *pattern, size_t size)
   n += patlim - p;
   *n = '\0';
   if (match_words)
-    strcpy (n, ")\\b");
+    strcpy (n, ")(?!\\w)");
   if (match_lines)
     strcpy (n, ")$");

diff --git a/tests/Makefile.am b/tests/Makefile.am
index ecbe0e6..742a580 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -83,6 +83,7 @@ TESTS =						\
   pcre-abort					\
   pcre-invalid-utf8-input			\
   pcre-utf8					\
+  pcre-w					\
   pcre-wx-backref				\
   pcre-z					\
   prefix-of-multibyte				\
diff --git a/tests/pcre-w b/tests/pcre-w
new file mode 100755
index 0000000..5040c5a
--- /dev/null
+++ b/tests/pcre-w
@@ -0,0 +1,31 @@
+#! /bin/sh
+# Before grep-2.19, grep -Pw %% would match %% enclosed in word boundaries
+#
+# Copyright (C) 2014 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo %aa% > in || framework_failure_
+grep -Pw aa in > out || fail=1
+compare out in || fail=1
+
+echo a%%a > in || framework_failure_
+grep -Pw %% in > out && fail=1
+compare /dev/null out || fail=1
+
+echo %%%% > in || framework_failure_
+grep -Pw %% in > out || fail=1
+compare out in || fail=1
+
+echo %% > in || framework_failure_
+grep -Pw %% in > out || fail=1
+compare out in || fail=1
+
+Exit $fail
-- 
1.9.0

