From 3c65ea532f745a2bc619ed1aa32b9c26155329c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Fri, 6 Jan 2023 20:40:07 -0800
Subject: [PATCH] pcre: only use UTF when available in the library

Before this change, if linked with a PCRE library without unicode
any invocations of grep when using an UTF locale will error with:

  grep: this version of PCRE2 does not have Unicode support

* src/pcresearch.c: Check if Unicode was compiled in.
* tests/pcre-utf8-w: Add check to skip test.
* tests/pcre-utf8: Update check.
---
 src/pcresearch.c  | 4 +++-
 tests/pcre-utf8   | 2 +-
 tests/pcre-utf8-w | 5 ++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/pcresearch.c b/src/pcresearch.c
index 45b67ee..ac70a20 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -140,12 +140,14 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
   int ec;
   int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
   char *patlim = pattern + size;
+  int unicode = 1;
   struct pcre_comp *pc = ximalloc (sizeof *pc);
   pcre2_general_context *gcontext = pc->gcontext
     = pcre2_general_context_create (private_malloc, private_free, NULL);
   pcre2_compile_context *ccontext = pcre2_compile_context_create (gcontext);
 
-  if (localeinfo.multibyte)
+  pcre2_config (PCRE2_CONFIG_UNICODE, &unicode);
+  if (unicode && localeinfo.multibyte)
     {
       if (! localeinfo.using_utf8)
         die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
diff --git a/tests/pcre-utf8 b/tests/pcre-utf8
index 1b3b2d3..4d97e69 100755
--- a/tests/pcre-utf8
+++ b/tests/pcre-utf8
@@ -14,7 +14,7 @@ LC_ALL=en_US.UTF-8 require_pcre_
 fail=0
 
 echo '$' | LC_ALL=en_US.UTF-8 grep -qP '\p{S}' \
-  || skip_ 'PCRE support is compiled out, or it does not support properties'
+  || skip_ 'PCRE unicode support is compiled out'
 
 euro='\342\202\254 euro'
 printf "$euro\\n" > in || framework_failure_
diff --git a/tests/pcre-utf8-w b/tests/pcre-utf8-w
index 4cd7db6..81ac9ff 100755
--- a/tests/pcre-utf8-w
+++ b/tests/pcre-utf8-w
@@ -13,9 +13,12 @@ LC_ALL=en_US.UTF-8
 export LC_ALL
 require_pcre_
 
+echo . | grep -qP '(*UTF).' 2>/dev/null \
+  || skip_ 'PCRE unicode support is compiled out'
+
 fail=0
 
-echo 'Perú'> in || framework_failure_
+echo 'Perú' > in || framework_failure_
 
 echo 'ú' > exp || framework_failure_
 grep -Po '.\b' in > out || fail=1
-- 
2.37.1 (Apple Git-137.1)

