Module Name: src Committed By: rillig Date: Mon Aug 30 23:10:58 UTC 2021
Modified Files: src/tests/usr.bin/grep: t_grep.sh Log Message: tests/grep: demonstrate word search bug in usr.bin/grep By default, MKBSDGREP is "no", therefore GNU Grep is installed as /usr/bin/grep instead of src/usr.bin/grep, and this bug does not occur in practice. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/tests/usr.bin/grep/t_grep.sh Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/tests/usr.bin/grep/t_grep.sh diff -u src/tests/usr.bin/grep/t_grep.sh:1.4 src/tests/usr.bin/grep/t_grep.sh:1.5 --- src/tests/usr.bin/grep/t_grep.sh:1.4 Mon Aug 30 22:17:32 2021 +++ src/tests/usr.bin/grep/t_grep.sh Mon Aug 30 23:10:58 2021 @@ -1,4 +1,4 @@ -# $NetBSD: t_grep.sh,v 1.4 2021/08/30 22:17:32 rillig Exp $ +# $NetBSD: t_grep.sh,v 1.5 2021/08/30 23:10:58 rillig Exp $ # # Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. # All rights reserved. @@ -104,11 +104,91 @@ word_locale_body() atf_check -o file:"input" \ env LC_ALL=C grep -w "array" "input" - # XXX: In an UTF-8 locale, '[' seems to be a word character. + # XXX: In an UTF-8 locale, GNU Grep treats '[' as a word character. atf_check -s exit:1 -o empty \ env LC_ALL="C.UTF-8" grep -w "array" "input" } +atf_test_case word_in_line +word_in_line_head() +{ + atf_set "descr" "Checks word search at the beginning of a line" +} +word_in_line_body() +{ + # See usr.bin/grep/util.c, "Check for whole word match", which + # looks suspiciously wrong. And indeed, NetBSD grep does not + # survive this test. GNU Grep does. + + echo "begin middle end" > "input" + + # A word at the beginning of a line is found. + atf_check -o file:"input" \ + env LC_ALL=C grep -w "begin" "input" + + # A word in the middle of a line is found. + atf_check -o file:"input" \ + env LC_ALL=C grep -w "middle" "input" + + # A word at the end of a line is found. + atf_check -o file:"input" \ + env LC_ALL=C grep -w "end" "input" + + # A subword at the beginning of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL=C grep -w "be" "input" + + # A subword in the middle of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL=C grep -w "mid" "input" + atf_check -s exit:1 -o empty \ + env LC_ALL=C grep -w "dle" "input" + + # A subword at the end of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL=C grep -w "nd" "input" +} + +atf_test_case word_in_line_utf8 +word_in_line_utf8_head() +{ + atf_set "descr" "Checks word search at the beginning of a line" +} +word_in_line_utf8_body() +{ + # See usr.bin/grep/util.c, "Check for whole word match", which + # looks suspiciously wrong. And indeed, NetBSD grep does not + # survive this test. GNU Grep does. + + echo "begin middle end" > "input" + + # A word at the beginning of a line is found. + atf_check -o file:"input" \ + env LC_ALL="C.UTF-8" grep -w "begin" "input" + + # A word in the middle of a line is found. + atf_check -o file:"input" \ + env LC_ALL="C.UTF-8" grep -w "middle" "input" + + # A word at the end of a line is found. + atf_check -o file:"input" \ + env LC_ALL="C.UTF-8" grep -w "end" "input" + + # A subword at the beginning of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL="C.UTF-8" grep -w "be" "input" + + # A subword in the middle of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL="C.UTF-8" grep -w "mid" "input" + atf_check -s exit:1 -o empty \ + env LC_ALL="C.UTF-8" grep -w "dle" "input" + + # A subword at the end of a line is not found. + atf_check -s exit:1 -o empty \ + env LC_ALL="C.UTF-8" grep -w "nd" "input" +} + atf_test_case begin_end begin_end_head() { @@ -255,6 +335,8 @@ atf_init_test_cases() atf_add_test_case recurse_symlink atf_add_test_case word_regexps atf_add_test_case word_locale + atf_add_test_case word_in_line + atf_add_test_case word_in_line_utf8 atf_add_test_case begin_end atf_add_test_case ignore_case atf_add_test_case invert