Module Name:    src
Committed By:   rillig
Date:           Mon Aug 30 23:10:58 UTC 2021

Modified Files:
        src/tests/usr.bin/grep: t_grep.sh

Log Message:
tests/grep: demonstrate word search bug in usr.bin/grep

By default, MKBSDGREP is "no", therefore GNU Grep is installed as
/usr/bin/grep instead of src/usr.bin/grep, and this bug does not occur
in practice.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/tests/usr.bin/grep/t_grep.sh

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/tests/usr.bin/grep/t_grep.sh
diff -u src/tests/usr.bin/grep/t_grep.sh:1.4 src/tests/usr.bin/grep/t_grep.sh:1.5
--- src/tests/usr.bin/grep/t_grep.sh:1.4	Mon Aug 30 22:17:32 2021
+++ src/tests/usr.bin/grep/t_grep.sh	Mon Aug 30 23:10:58 2021
@@ -1,4 +1,4 @@
-# $NetBSD: t_grep.sh,v 1.4 2021/08/30 22:17:32 rillig Exp $
+# $NetBSD: t_grep.sh,v 1.5 2021/08/30 23:10:58 rillig Exp $
 #
 # Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -104,11 +104,91 @@ word_locale_body()
 	atf_check -o file:"input" \
 	    env LC_ALL=C grep -w "array" "input"
 
-	# XXX: In an UTF-8 locale, '[' seems to be a word character.
+	# XXX: In an UTF-8 locale, GNU Grep treats '[' as a word character.
 	atf_check -s exit:1 -o empty \
 	    env LC_ALL="C.UTF-8" grep -w "array" "input"
 }
 
+atf_test_case word_in_line
+word_in_line_head()
+{
+	atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_body()
+{
+	# See usr.bin/grep/util.c, "Check for whole word match", which
+	# looks suspiciously wrong.  And indeed, NetBSD grep does not
+	# survive this test.  GNU Grep does.
+
+	echo "begin middle end" > "input"
+
+	# A word at the beginning of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "begin" "input"
+
+	# A word in the middle of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "middle" "input"
+
+	# A word at the end of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "end" "input"
+
+	# A subword at the beginning of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "be" "input"
+
+	# A subword in the middle of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "mid" "input"
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "dle" "input"
+
+	# A subword at the end of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "nd" "input"
+}
+
+atf_test_case word_in_line_utf8
+word_in_line_utf8_head()
+{
+	atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_utf8_body()
+{
+	# See usr.bin/grep/util.c, "Check for whole word match", which
+	# looks suspiciously wrong.  And indeed, NetBSD grep does not
+	# survive this test.  GNU Grep does.
+
+	echo "begin middle end" > "input"
+
+	# A word at the beginning of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "begin" "input"
+
+	# A word in the middle of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "middle" "input"
+
+	# A word at the end of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "end" "input"
+
+	# A subword at the beginning of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "be" "input"
+
+	# A subword in the middle of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "mid" "input"
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "dle" "input"
+
+	# A subword at the end of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "nd" "input"
+}
+
 atf_test_case begin_end
 begin_end_head()
 {
@@ -255,6 +335,8 @@ atf_init_test_cases()
 	atf_add_test_case recurse_symlink
 	atf_add_test_case word_regexps
 	atf_add_test_case word_locale
+	atf_add_test_case word_in_line
+	atf_add_test_case word_in_line_utf8
 	atf_add_test_case begin_end
 	atf_add_test_case ignore_case
 	atf_add_test_case invert

Reply via email to