Control: tags -1 + patch

Please find attached a patch.

I have not tested the package except the build-time tests.  Initially,
I tried to follow upstream's desire for custom memory management, so I
created a pcre2_general_context in _PG_init (changing the
pgpcre_malloc/pgpcre_free prototypes as is required).  Unfortunately,
the test program crashes when pfree is invoked in
src:pcre2:src/pcre2_match.c:6846.  It looks like palloc/pfree are not
suitable as PCRE2 custom memory management functions as they use memory
context like in PCRE2 itself.  Or I am misunderstanding something.
>From f46d9ca762751a3c856369d781cf69554c31e001 Mon Sep 17 00:00:00 2001
From: Yavor Doganov <ya...@gnu.org>
Date: Sat, 16 Dec 2023 22:29:20 +0200
Subject: [PATCH] Port to PCRE2 (#1000001).

---
 debian/changelog           |   6 +
 debian/control             |   2 +-
 debian/control.in          |   2 +-
 debian/patches/pcre2.patch | 268 +++++++++++++++++++++++++++++++++++++
 debian/patches/series      |   1 +
 5 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 debian/patches/pcre2.patch
 create mode 100644 debian/patches/series

diff --git a/debian/changelog b/debian/changelog
index 1d0f545..cba8632 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+pgpcre (0.20190509-7) UNRELEASED; urgency=medium
+
+  * Port to PCRE2 (Closes: #1000001).
+
+ -- Yavor Doganov <ya...@gnu.org>  Sat, 16 Dec 2023 22:27:38 +0200
+
 pgpcre (0.20190509-6) unstable; urgency=medium
 
   * Upload for PostgreSQL 16.
diff --git a/debian/control b/debian/control
index 1073417..bb8c141 100644
--- a/debian/control
+++ b/debian/control
@@ -6,7 +6,7 @@ Uploaders:
  Christoph Berg <m...@debian.org>,
 Build-Depends:
  debhelper-compat (= 13),
- libpcre3-dev,
+ libpcre2-dev,
  pkg-config,
  postgresql-all (>= 217~),
 Standards-Version: 4.6.2
diff --git a/debian/control.in b/debian/control.in
index 5fe2318..0f01b24 100644
--- a/debian/control.in
+++ b/debian/control.in
@@ -6,7 +6,7 @@ Uploaders:
  Christoph Berg <m...@debian.org>,
 Build-Depends:
  debhelper-compat (= 13),
- libpcre3-dev,
+ libpcre2-dev,
  pkg-config,
  postgresql-all (>= 217~),
 Standards-Version: 4.6.2
diff --git a/debian/patches/pcre2.patch b/debian/patches/pcre2.patch
new file mode 100644
index 0000000..f138fb1
--- /dev/null
+++ b/debian/patches/pcre2.patch
@@ -0,0 +1,268 @@
+Description: Port to PCRE2.
+Bug-Debian: https://bugs.debian.org/1000001
+Author: Yavor Doganov <ya...@gnu.org>
+Forwarded: no
+Last-Update: 2023-12-16
+---
+
+--- pgpcre-0.20190509.orig/Makefile
++++ pgpcre-0.20190509/Makefile
+@@ -6,12 +6,12 @@
+ OBJS = pgpcre.o
+ DATA = pgpcre--0.sql pgpcre--1.sql pgpcre--0--1.sql
+ 
+-ifeq (no,$(shell $(PKG_CONFIG) libpcre || echo no))
++ifeq (no,$(shell $(PKG_CONFIG) libpcre2-8 || echo no))
+ $(warning libpcre not registed with pkg-config, build might fail)
+ endif
+ 
+-PG_CPPFLAGS += $(shell $(PKG_CONFIG) --cflags-only-I libpcre)
+-SHLIB_LINK += $(shell $(PKG_CONFIG) --libs libpcre)
++PG_CPPFLAGS += $(shell $(PKG_CONFIG) --cflags-only-I libpcre2-8)
++SHLIB_LINK += $(shell $(PKG_CONFIG) --libs libpcre2-8)
+ 
+ REGRESS = init test unicode
+ REGRESS_OPTS = --inputdir=test
+--- pgpcre-0.20190509.orig/pgpcre.c
++++ pgpcre-0.20190509/pgpcre.c
+@@ -5,7 +5,8 @@
+ #include <utils/array.h>
+ #include <utils/builtins.h>
+ 
+-#include <pcre.h>
++#define PCRE2_CODE_UNIT_WIDTH 8
++#include <pcre2.h>
+ 
+ PG_MODULE_MAGIC;
+ 
+@@ -57,19 +58,19 @@
+ pcre_in(PG_FUNCTION_ARGS)
+ {
+       char       *input_string = PG_GETARG_CSTRING(0);
+-      pcre       *pc;
+-      const char *err;
+-      int                     erroffset;
+-      size_t          in_strlen;
+-      int                     rc, total_len, pcsize;
++      pcre2_code *pc;
++      int         err;
++      PCRE2_SIZE              erroffset;
++      size_t          in_strlen, pcsize;
++      int                     rc, total_len;
+       pgpcre     *result;
+ 
+       in_strlen = strlen(input_string);
+ 
+       if (GetDatabaseEncoding() == PG_UTF8)
+-              pc = pcre_compile(input_string, PCRE_UTF8 | PCRE_UCP, &err, 
&erroffset, NULL);
++              pc = pcre2_compile((PCRE2_SPTR) input_string, in_strlen, 
PCRE2_UTF | PCRE2_UCP, &err, &erroffset, NULL);
+       else if (GetDatabaseEncoding() == PG_SQL_ASCII)
+-              pc = pcre_compile(input_string, 0, &err, &erroffset, NULL);
++              pc = pcre2_compile((PCRE2_SPTR) input_string, in_strlen, 0, 
&err, &erroffset, NULL);
+       else
+       {
+               char *utf8string;
+@@ -78,22 +79,27 @@
+                                                                               
                                in_strlen,
+                                                                               
                                GetDatabaseEncoding(),
+                                                                               
                                PG_UTF8);
+-              pc = pcre_compile(utf8string, PCRE_UTF8 | PCRE_UCP, &err, 
&erroffset, NULL);
++              pc = pcre2_compile((PCRE2_SPTR) utf8string, strlen(utf8string), 
PCRE2_UTF | PCRE2_UCP, &err, &erroffset, NULL);
+               if (utf8string != input_string)
+                       pfree(utf8string);
+       }
+       if (!pc)
+-              elog(ERROR, "PCRE compile error: %s", err);
++      {
++              PCRE2_UCHAR buf[120];
++
++              pcre2_get_error_message(err, buf, sizeof(buf));
++              elog(ERROR, "PCRE compile error: %s", buf);
++        }
+ 
+-      rc = pcre_fullinfo(pc, NULL, PCRE_INFO_SIZE, &pcsize);
++      rc = pcre2_pattern_info(pc, PCRE2_INFO_SIZE, &pcsize);
+       if (rc < 0)
+-              elog(ERROR, "pcre_fullinfo/PCRE_INFO_SIZE: %d", rc);
++              elog(ERROR, "pcre2_pattern_info/PCRE2_INFO_SIZE: %d", rc);
+ 
+       total_len = offsetof(pgpcre, data) + in_strlen + 1 + pcsize;
+       result = (pgpcre *) palloc0(total_len);
+       SET_VARSIZE(result, total_len);
+-      result->pcre_major = PCRE_MAJOR;
+-      result->pcre_minor = PCRE_MINOR;
++      result->pcre_major = PCRE2_MAJOR;
++      result->pcre_minor = PCRE2_MINOR;
+       result->pattern_strlen = in_strlen;
+       strcpy(result->data, input_string);
+       memcpy(result->data + in_strlen + 1, pc, pcsize);
+@@ -114,50 +120,48 @@
+ static bool
+ matches_internal(text *subject, pgpcre *pattern, char ***return_matches, int 
*num_captured)
+ {
+-      pcre       *pc;
++      pcre2_code         *pc;
++      pcre2_match_data   *md;
+       int                     rc;
+-      int                     num_substrings = 0;
+-      int                *ovector;
++      uint32_t                num_substrings = 0;
++      PCRE2_SIZE         *ovector;
+       int                     ovecsize;
+       char       *utf8string;
+       static bool warned = false;
+ 
+-      if (!warned && (pattern->pcre_major != PCRE_MAJOR || 
pattern->pcre_minor != PCRE_MINOR))
++      if (!warned && (pattern->pcre_major != PCRE2_MAJOR || 
pattern->pcre_minor != PCRE2_MINOR))
+       {
+               ereport(WARNING,
+                               (errmsg("PCRE version mismatch"),
+                                errdetail("The compiled pattern was created by 
PCRE version %d.%d, the current library is version %d.%d.  According to the 
PCRE documentation, \"compiling a regular expression with one version of PCRE 
for use with a different version is not guaranteed to work and may cause 
crashes.\"  This warning is shown only once per session.",
+                                                  pattern->pcre_major, 
pattern->pcre_minor,
+-                                                 PCRE_MAJOR, PCRE_MINOR),
++                                                 PCRE2_MAJOR, PCRE2_MINOR),
+                                errhint("You might want to recompile the 
stored patterns by running something like UPDATE ... SET pcre_col = 
pcre_col::text::pcre.")));
+               warned = true;
+       }
+ 
+-      pc = (pcre *) (pattern->data + pattern->pattern_strlen + 1);
++      pc = (pcre2_code *) (pattern->data + pattern->pattern_strlen + 1);
+ 
+       if (num_captured)
+       {
+-              int rc;
+-
+-              if ((rc = pcre_fullinfo(pc, NULL, PCRE_INFO_CAPTURECOUNT, 
&num_substrings)) != 0)
+-                      elog(ERROR, "pcre_fullinfo error: %d", rc);
++              if ((rc = pcre2_pattern_info(pc, PCRE2_INFO_CAPTURECOUNT, 
&num_substrings)) != 0)
++                      elog(ERROR, "pcre2_pattern_info error: %d", rc);
+       }
+ 
+       if (return_matches)
+       {
+               ovecsize = (num_substrings + 1) * 3;
+-              ovector = palloc(ovecsize * sizeof(*ovector));
++              md = pcre2_match_data_create(ovecsize, NULL);
+       }
+       else
+       {
+-              ovecsize = 0;
+-              ovector = NULL;
++              md = pcre2_match_data_create_from_pattern(pc, NULL);
+       }
+ 
+       if (GetDatabaseEncoding() == PG_UTF8 || GetDatabaseEncoding() == 
PG_SQL_ASCII)
+       {
+               utf8string = VARDATA_ANY(subject);
+-              rc = pcre_exec(pc, NULL, VARDATA_ANY(subject), 
VARSIZE_ANY_EXHDR(subject), 0, 0, ovector, ovecsize);
++              rc = pcre2_match(pc, (PCRE2_SPTR) VARDATA_ANY(subject), 
VARSIZE_ANY_EXHDR(subject), 0, 0, md, NULL);
+       }
+       else
+       {
+@@ -165,13 +169,16 @@
+                                                                               
                                VARSIZE_ANY_EXHDR(subject),
+                                                                               
                                GetDatabaseEncoding(),
+                                                                               
                                PG_UTF8);
+-              rc = pcre_exec(pc, NULL, utf8string, strlen(utf8string), 0, 0, 
ovector, ovecsize);
++              rc = pcre2_match(pc, (PCRE2_SPTR) utf8string, 
strlen(utf8string), 0, 0, md, NULL);
+       }
+ 
+-      if (rc == PCRE_ERROR_NOMATCH)
++      if (rc == PCRE2_ERROR_NOMATCH)
++      {
++              pcre2_match_data_free(md);
+               return false;
++      }
+       else if (rc < 0)
+-              elog(ERROR, "PCRE exec error: %d", rc);
++              elog(ERROR, "PCRE match error: %d", rc);
+ 
+       if (return_matches)
+       {
+@@ -183,32 +190,37 @@
+ 
+                       *num_captured = num_substrings;
+                       matches = palloc(num_substrings * sizeof(*matches));
++                      ovector = pcre2_get_ovector_pointer(md);
+ 
+                       for (i = 1; i <= num_substrings; i++)
+                       {
+-                              if (ovector[i * 2] < 0)
++                              if ((int) ovector[i * 2] < 0)
+                                       matches[i - 1] = NULL;
+                               else
+                               {
+-                                      const char *xmatch;
++                                      PCRE2_UCHAR *xmatch;
++                                      PCRE2_SIZE l;
+ 
+-                                      pcre_get_substring(utf8string, ovector, 
rc, i, &xmatch);
++                                      pcre2_substring_get_bynumber(md, i, 
&xmatch, &l);
+                                       matches[i - 1] = (char *) xmatch;
+                               }
+                       }
+               }
+               else
+               {
+-                      const char *xmatch;
++                      PCRE2_UCHAR *xmatch;
++                      PCRE2_SIZE l;
+ 
+                       matches = palloc(1 * sizeof(*matches));
+-                      pcre_get_substring(utf8string, ovector, rc, 0, &xmatch);
++                      pcre2_substring_get_bynumber(md, 0, &xmatch, &l);
+                       matches[0] = (char *) xmatch;
+               }
+ 
+               *return_matches = matches;
+       }
+ 
++      pcre2_match_data_free(md);
++
+       return true;
+ }
+ 
+@@ -307,23 +319,7 @@
+ }
+ 
+ 
+-static void *
+-pgpcre_malloc(size_t size)
+-{
+-      return palloc(size);
+-}
+-
+-
+-static void
+-pgpcre_free(void *ptr)
+-{
+-      pfree(ptr);
+-}
+-
+-
+ void
+ _PG_init(void)
+ {
+-      pcre_malloc = pgpcre_malloc;
+-      pcre_free = pgpcre_free;
+ }
+--- pgpcre-0.20190509.orig/test/expected/test.out
++++ pgpcre-0.20190509/test/expected/test.out
+@@ -5,7 +5,7 @@
+ (1 row)
+ 
+ SELECT pcre '+';
+-ERROR:  PCRE compile error: nothing to repeat
++ERROR:  PCRE compile error: quantifier does not follow a repeatable item
+ LINE 1: SELECT pcre '+';
+                     ^
+ SELECT 'foo' =~ 'fo+';
+@@ -21,7 +21,7 @@
+ (1 row)
+ 
+ SELECT 'error' =~ '+';
+-ERROR:  PCRE compile error: nothing to repeat
++ERROR:  PCRE compile error: quantifier does not follow a repeatable item
+ LINE 1: SELECT 'error' =~ '+';
+                           ^
+ SELECT 'foo' ~ pcre 'fo+';
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..8eb7ece
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1 @@
+pcre2.patch
-- 
2.43.0

Reply via email to