Severity: wishlist There are times, when the expression is too simple or will not be used too often to justify the extra time in -P that is required for JIT compilation.
Make it simpler for users to pass flags to the PCRE backend, and start with a flag to disable JIT (enabled by default)
>From caeca5e806fe1b2e368833f05bb4cfb75763d1b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <care...@gmail.com> Date: Sat, 16 Oct 2021 01:38:11 -0700 Subject: [PATCH] pcre: add a flag to disable JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mainly useful for performance testing. Signed-off-by: Carlo Marcelo Arenas Belón <care...@gmail.com> --- doc/grep.texi | 4 +++- src/grep.c | 13 +++++++++++-- src/grep.h | 1 + src/pcresearch.c | 6 ++++-- tests/Makefile.am | 1 + tests/pcre-nojit | 22 ++++++++++++++++++++++ 6 files changed, 42 insertions(+), 5 deletions(-) create mode 100755 tests/pcre-nojit diff --git a/doc/grep.texi b/doc/grep.texi index e5b9fd8..8fb41ac 100644 --- a/doc/grep.texi +++ b/doc/grep.texi @@ -1138,7 +1138,7 @@ Interpret patterns as fixed strings, not regular expressions. (@option{-F} is specified by POSIX.) @item -P -@itemx --perl-regexp +@itemx --perl-regexp[=@var{FLAG}] @opindex -P @opindex --perl-regexp @cindex matching Perl-compatible regular expressions @@ -1146,6 +1146,8 @@ Interpret patterns as Perl-compatible regular expressions (PCREs). PCRE support is here to stay, but consider this option experimental when combined with the @option{-z} (@option{--null-data}) option, and note that @samp{grep@ -P} may warn of unimplemented features. +The optional flag 'no-jit' could be used to disable JIT, and only use the +slower PCRE's interpreter. @xref{Other Options}. @end table diff --git a/src/grep.c b/src/grep.c index a55194c..44e21b7 100644 --- a/src/grep.c +++ b/src/grep.c @@ -508,7 +508,7 @@ static struct option const long_options[] = {"extended-regexp", no_argument, NULL, 'E'}, {"fixed-regexp", no_argument, NULL, 'F'}, {"fixed-strings", no_argument, NULL, 'F'}, - {"perl-regexp", no_argument, NULL, 'P'}, + {"perl-regexp", optional_argument, NULL, 'P'}, {"after-context", required_argument, NULL, 'A'}, {"before-context", required_argument, NULL, 'B'}, {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, @@ -563,6 +563,7 @@ bool match_icase; bool match_words; bool match_lines; char eolbyte; +bool pcre_jit = true; /* For error messages. */ /* The input file name, or (if standard input) null or a --label argument. */ @@ -1987,7 +1988,8 @@ Pattern selection and interpretation:\n"), getprogname ()); -E, --extended-regexp PATTERNS are extended regular expressions\n\ -F, --fixed-strings PATTERNS are strings\n\ -G, --basic-regexp PATTERNS are basic regular expressions\n\ - -P, --perl-regexp PATTERNS are Perl regular expressions\n")); + -P, --perl-regexp[=FLAG] PATTERNS are Perl regular expressions\n\ + FLAG is 'no-jit' (JIT enabled by default)\n")); /* -X is deliberately undocumented. */ printf (_("\ -e, --regexp=PATTERNS use PATTERNS for matching\n\ @@ -2545,6 +2547,13 @@ main (int argc, char **argv) case 'P': matcher = setmatcher ("perl", matcher); + if (optarg) + { + if (STREQ (optarg, "no-jit")) + pcre_jit = false; + else + die (EXIT_TROUBLE, 0, _("unknown PCRE flag")); + } break; case 'G': diff --git a/src/grep.h b/src/grep.h index 04c15dd..263e98c 100644 --- a/src/grep.h +++ b/src/grep.h @@ -29,6 +29,7 @@ extern bool match_icase; /* -i */ extern bool match_words; /* -w */ extern bool match_lines; /* -x */ extern char eolbyte; /* -z */ +extern bool pcre_jit; /* --perl-regexp=no-jit */ extern char const *pattern_file_name (idx_t, idx_t *); diff --git a/src/pcresearch.c b/src/pcresearch.c index 09f92c8..988d753 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -180,7 +180,9 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) if (!pc->cre) die (EXIT_TROUBLE, 0, "%s", ep); - int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE; + int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED; + if (pcre_jit) + pcre_study_flags |= PCRE_STUDY_JIT_COMPILE; pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep); if (ep) die (EXIT_TROUBLE, 0, "%s", ep); @@ -191,7 +193,7 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact) /* The PCRE documentation says that a 32 KiB stack is the default. */ if (e) - pc->jit_stack_size = 32 << 10; + pc->jit_stack_size = (pcre_jit) ? 32 << 10 : 0; #endif free (re); diff --git a/tests/Makefile.am b/tests/Makefile.am index c84cdc0..cd83e00 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -135,6 +135,7 @@ TESTS = \ null-byte \ options \ pcre \ + pcre-nojit \ pcre-abort \ pcre-context \ pcre-count \ diff --git a/tests/pcre-nojit b/tests/pcre-nojit new file mode 100755 index 0000000..e752f33 --- /dev/null +++ b/tests/pcre-nojit @@ -0,0 +1,22 @@ +#! /bin/sh +# Simple PCRE tests with JIT disabled. +# +# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_pcre_ + +fail=0 + +echo | grep --perl-regex=no-jit '\s*$' || fail=1 +echo | grep -z --perl-regex=no-jit '\s$' || fail=1 +echo '.ab' | returns_ 1 grep --perl-regex=no-jit -wx ab || fail=1 +echo x | grep --perl-regex=no-jit -z '[^a]' || fail=1 +printf 'x\n\0' | returns_ 1 grep -z --perl-regex=no-jit 'x$' || fail=1 +printf 'a\nb\0' | grep -zx --perl-regex=no-jit a && fail=1 + +Exit $fail -- 2.34.0.rc1.349.g8f33748433