Control: tags -1 + patch Please find attached a patch.
I have not tested the package except the build-time tests. Initially, I tried to follow upstream's desire for custom memory management, so I created a pcre2_general_context in _PG_init (changing the pgpcre_malloc/pgpcre_free prototypes as is required). Unfortunately, the test program crashes when pfree is invoked in src:pcre2:src/pcre2_match.c:6846. It looks like palloc/pfree are not suitable as PCRE2 custom memory management functions as they use memory context like in PCRE2 itself. Or I am misunderstanding something.
>From f46d9ca762751a3c856369d781cf69554c31e001 Mon Sep 17 00:00:00 2001 From: Yavor Doganov <ya...@gnu.org> Date: Sat, 16 Dec 2023 22:29:20 +0200 Subject: [PATCH] Port to PCRE2 (#1000001). --- debian/changelog | 6 + debian/control | 2 +- debian/control.in | 2 +- debian/patches/pcre2.patch | 268 +++++++++++++++++++++++++++++++++++++ debian/patches/series | 1 + 5 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 debian/patches/pcre2.patch create mode 100644 debian/patches/series diff --git a/debian/changelog b/debian/changelog index 1d0f545..cba8632 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pgpcre (0.20190509-7) UNRELEASED; urgency=medium + + * Port to PCRE2 (Closes: #1000001). + + -- Yavor Doganov <ya...@gnu.org> Sat, 16 Dec 2023 22:27:38 +0200 + pgpcre (0.20190509-6) unstable; urgency=medium * Upload for PostgreSQL 16. diff --git a/debian/control b/debian/control index 1073417..bb8c141 100644 --- a/debian/control +++ b/debian/control @@ -6,7 +6,7 @@ Uploaders: Christoph Berg <m...@debian.org>, Build-Depends: debhelper-compat (= 13), - libpcre3-dev, + libpcre2-dev, pkg-config, postgresql-all (>= 217~), Standards-Version: 4.6.2 diff --git a/debian/control.in b/debian/control.in index 5fe2318..0f01b24 100644 --- a/debian/control.in +++ b/debian/control.in @@ -6,7 +6,7 @@ Uploaders: Christoph Berg <m...@debian.org>, Build-Depends: debhelper-compat (= 13), - libpcre3-dev, + libpcre2-dev, pkg-config, postgresql-all (>= 217~), Standards-Version: 4.6.2 diff --git a/debian/patches/pcre2.patch b/debian/patches/pcre2.patch new file mode 100644 index 0000000..f138fb1 --- /dev/null +++ b/debian/patches/pcre2.patch @@ -0,0 +1,268 @@ +Description: Port to PCRE2. +Bug-Debian: https://bugs.debian.org/1000001 +Author: Yavor Doganov <ya...@gnu.org> +Forwarded: no +Last-Update: 2023-12-16 +--- + +--- pgpcre-0.20190509.orig/Makefile ++++ pgpcre-0.20190509/Makefile +@@ -6,12 +6,12 @@ + OBJS = pgpcre.o + DATA = pgpcre--0.sql pgpcre--1.sql pgpcre--0--1.sql + +-ifeq (no,$(shell $(PKG_CONFIG) libpcre || echo no)) ++ifeq (no,$(shell $(PKG_CONFIG) libpcre2-8 || echo no)) + $(warning libpcre not registed with pkg-config, build might fail) + endif + +-PG_CPPFLAGS += $(shell $(PKG_CONFIG) --cflags-only-I libpcre) +-SHLIB_LINK += $(shell $(PKG_CONFIG) --libs libpcre) ++PG_CPPFLAGS += $(shell $(PKG_CONFIG) --cflags-only-I libpcre2-8) ++SHLIB_LINK += $(shell $(PKG_CONFIG) --libs libpcre2-8) + + REGRESS = init test unicode + REGRESS_OPTS = --inputdir=test +--- pgpcre-0.20190509.orig/pgpcre.c ++++ pgpcre-0.20190509/pgpcre.c +@@ -5,7 +5,8 @@ + #include <utils/array.h> + #include <utils/builtins.h> + +-#include <pcre.h> ++#define PCRE2_CODE_UNIT_WIDTH 8 ++#include <pcre2.h> + + PG_MODULE_MAGIC; + +@@ -57,19 +58,19 @@ + pcre_in(PG_FUNCTION_ARGS) + { + char *input_string = PG_GETARG_CSTRING(0); +- pcre *pc; +- const char *err; +- int erroffset; +- size_t in_strlen; +- int rc, total_len, pcsize; ++ pcre2_code *pc; ++ int err; ++ PCRE2_SIZE erroffset; ++ size_t in_strlen, pcsize; ++ int rc, total_len; + pgpcre *result; + + in_strlen = strlen(input_string); + + if (GetDatabaseEncoding() == PG_UTF8) +- pc = pcre_compile(input_string, PCRE_UTF8 | PCRE_UCP, &err, &erroffset, NULL); ++ pc = pcre2_compile((PCRE2_SPTR) input_string, in_strlen, PCRE2_UTF | PCRE2_UCP, &err, &erroffset, NULL); + else if (GetDatabaseEncoding() == PG_SQL_ASCII) +- pc = pcre_compile(input_string, 0, &err, &erroffset, NULL); ++ pc = pcre2_compile((PCRE2_SPTR) input_string, in_strlen, 0, &err, &erroffset, NULL); + else + { + char *utf8string; +@@ -78,22 +79,27 @@ + in_strlen, + GetDatabaseEncoding(), + PG_UTF8); +- pc = pcre_compile(utf8string, PCRE_UTF8 | PCRE_UCP, &err, &erroffset, NULL); ++ pc = pcre2_compile((PCRE2_SPTR) utf8string, strlen(utf8string), PCRE2_UTF | PCRE2_UCP, &err, &erroffset, NULL); + if (utf8string != input_string) + pfree(utf8string); + } + if (!pc) +- elog(ERROR, "PCRE compile error: %s", err); ++ { ++ PCRE2_UCHAR buf[120]; ++ ++ pcre2_get_error_message(err, buf, sizeof(buf)); ++ elog(ERROR, "PCRE compile error: %s", buf); ++ } + +- rc = pcre_fullinfo(pc, NULL, PCRE_INFO_SIZE, &pcsize); ++ rc = pcre2_pattern_info(pc, PCRE2_INFO_SIZE, &pcsize); + if (rc < 0) +- elog(ERROR, "pcre_fullinfo/PCRE_INFO_SIZE: %d", rc); ++ elog(ERROR, "pcre2_pattern_info/PCRE2_INFO_SIZE: %d", rc); + + total_len = offsetof(pgpcre, data) + in_strlen + 1 + pcsize; + result = (pgpcre *) palloc0(total_len); + SET_VARSIZE(result, total_len); +- result->pcre_major = PCRE_MAJOR; +- result->pcre_minor = PCRE_MINOR; ++ result->pcre_major = PCRE2_MAJOR; ++ result->pcre_minor = PCRE2_MINOR; + result->pattern_strlen = in_strlen; + strcpy(result->data, input_string); + memcpy(result->data + in_strlen + 1, pc, pcsize); +@@ -114,50 +120,48 @@ + static bool + matches_internal(text *subject, pgpcre *pattern, char ***return_matches, int *num_captured) + { +- pcre *pc; ++ pcre2_code *pc; ++ pcre2_match_data *md; + int rc; +- int num_substrings = 0; +- int *ovector; ++ uint32_t num_substrings = 0; ++ PCRE2_SIZE *ovector; + int ovecsize; + char *utf8string; + static bool warned = false; + +- if (!warned && (pattern->pcre_major != PCRE_MAJOR || pattern->pcre_minor != PCRE_MINOR)) ++ if (!warned && (pattern->pcre_major != PCRE2_MAJOR || pattern->pcre_minor != PCRE2_MINOR)) + { + ereport(WARNING, + (errmsg("PCRE version mismatch"), + errdetail("The compiled pattern was created by PCRE version %d.%d, the current library is version %d.%d. According to the PCRE documentation, \"compiling a regular expression with one version of PCRE for use with a different version is not guaranteed to work and may cause crashes.\" This warning is shown only once per session.", + pattern->pcre_major, pattern->pcre_minor, +- PCRE_MAJOR, PCRE_MINOR), ++ PCRE2_MAJOR, PCRE2_MINOR), + errhint("You might want to recompile the stored patterns by running something like UPDATE ... SET pcre_col = pcre_col::text::pcre."))); + warned = true; + } + +- pc = (pcre *) (pattern->data + pattern->pattern_strlen + 1); ++ pc = (pcre2_code *) (pattern->data + pattern->pattern_strlen + 1); + + if (num_captured) + { +- int rc; +- +- if ((rc = pcre_fullinfo(pc, NULL, PCRE_INFO_CAPTURECOUNT, &num_substrings)) != 0) +- elog(ERROR, "pcre_fullinfo error: %d", rc); ++ if ((rc = pcre2_pattern_info(pc, PCRE2_INFO_CAPTURECOUNT, &num_substrings)) != 0) ++ elog(ERROR, "pcre2_pattern_info error: %d", rc); + } + + if (return_matches) + { + ovecsize = (num_substrings + 1) * 3; +- ovector = palloc(ovecsize * sizeof(*ovector)); ++ md = pcre2_match_data_create(ovecsize, NULL); + } + else + { +- ovecsize = 0; +- ovector = NULL; ++ md = pcre2_match_data_create_from_pattern(pc, NULL); + } + + if (GetDatabaseEncoding() == PG_UTF8 || GetDatabaseEncoding() == PG_SQL_ASCII) + { + utf8string = VARDATA_ANY(subject); +- rc = pcre_exec(pc, NULL, VARDATA_ANY(subject), VARSIZE_ANY_EXHDR(subject), 0, 0, ovector, ovecsize); ++ rc = pcre2_match(pc, (PCRE2_SPTR) VARDATA_ANY(subject), VARSIZE_ANY_EXHDR(subject), 0, 0, md, NULL); + } + else + { +@@ -165,13 +169,16 @@ + VARSIZE_ANY_EXHDR(subject), + GetDatabaseEncoding(), + PG_UTF8); +- rc = pcre_exec(pc, NULL, utf8string, strlen(utf8string), 0, 0, ovector, ovecsize); ++ rc = pcre2_match(pc, (PCRE2_SPTR) utf8string, strlen(utf8string), 0, 0, md, NULL); + } + +- if (rc == PCRE_ERROR_NOMATCH) ++ if (rc == PCRE2_ERROR_NOMATCH) ++ { ++ pcre2_match_data_free(md); + return false; ++ } + else if (rc < 0) +- elog(ERROR, "PCRE exec error: %d", rc); ++ elog(ERROR, "PCRE match error: %d", rc); + + if (return_matches) + { +@@ -183,32 +190,37 @@ + + *num_captured = num_substrings; + matches = palloc(num_substrings * sizeof(*matches)); ++ ovector = pcre2_get_ovector_pointer(md); + + for (i = 1; i <= num_substrings; i++) + { +- if (ovector[i * 2] < 0) ++ if ((int) ovector[i * 2] < 0) + matches[i - 1] = NULL; + else + { +- const char *xmatch; ++ PCRE2_UCHAR *xmatch; ++ PCRE2_SIZE l; + +- pcre_get_substring(utf8string, ovector, rc, i, &xmatch); ++ pcre2_substring_get_bynumber(md, i, &xmatch, &l); + matches[i - 1] = (char *) xmatch; + } + } + } + else + { +- const char *xmatch; ++ PCRE2_UCHAR *xmatch; ++ PCRE2_SIZE l; + + matches = palloc(1 * sizeof(*matches)); +- pcre_get_substring(utf8string, ovector, rc, 0, &xmatch); ++ pcre2_substring_get_bynumber(md, 0, &xmatch, &l); + matches[0] = (char *) xmatch; + } + + *return_matches = matches; + } + ++ pcre2_match_data_free(md); ++ + return true; + } + +@@ -307,23 +319,7 @@ + } + + +-static void * +-pgpcre_malloc(size_t size) +-{ +- return palloc(size); +-} +- +- +-static void +-pgpcre_free(void *ptr) +-{ +- pfree(ptr); +-} +- +- + void + _PG_init(void) + { +- pcre_malloc = pgpcre_malloc; +- pcre_free = pgpcre_free; + } +--- pgpcre-0.20190509.orig/test/expected/test.out ++++ pgpcre-0.20190509/test/expected/test.out +@@ -5,7 +5,7 @@ + (1 row) + + SELECT pcre '+'; +-ERROR: PCRE compile error: nothing to repeat ++ERROR: PCRE compile error: quantifier does not follow a repeatable item + LINE 1: SELECT pcre '+'; + ^ + SELECT 'foo' =~ 'fo+'; +@@ -21,7 +21,7 @@ + (1 row) + + SELECT 'error' =~ '+'; +-ERROR: PCRE compile error: nothing to repeat ++ERROR: PCRE compile error: quantifier does not follow a repeatable item + LINE 1: SELECT 'error' =~ '+'; + ^ + SELECT 'foo' ~ pcre 'fo+'; diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..8eb7ece --- /dev/null +++ b/debian/patches/series @@ -0,0 +1 @@ +pcre2.patch -- 2.43.0