On Fri, Jul 19, 2024 at 5:49 AM Tomas Vondra <tomas.von...@enterprisedb.com> wrote: > > Hi, > > If I understand correctly, the problem is that it's not clear which of > the 'abc' substrings is matched/returned by the function, right? > > I wonder if this is a problem only for understanding the test, or if it > makes the tests a bit weaker. I mean, what if the function returns the > wrong substring? How would we know? >
this is for understanding the test. personally, sometimes, I feel the documentation is too dry, hard to follow. so i can based on regress tests better understand the documentation. that was my intention for the changes. we have more sophisticated regex test at https://git.postgresql.org/cgit/postgresql.git/tree/src/test/modules/test_regex > Also, if we tweak this, shouldn't we tweak also the regext_instr() calls > a bit earlier in the test script? > sure. please check attached.
From 0b9e7a9c32d4e3163ef0cc955a0916b9d5c7f395 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Thu, 25 Jul 2024 22:46:07 +0800 Subject: [PATCH v2 1/1] refactor regex related tests some tests for regexp_instr, regexp_substr using "abcabcabc" as the source string, which is not good for testing. Because in many tests the matched pattern is "abc", but in the source string we have 3 "abc" sometimes we don't know which "abc '' refers to. changing the source string so the results would be easier to understand. --- src/test/regress/expected/strings.out | 42 +++++++++++++++++++-------- src/test/regress/sql/strings.sql | 21 ++++++++------ 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 52b69a10..4471f9a2 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -790,31 +790,43 @@ SELECT regexp_instr('abcdefghi', 'd.q'); 0 (1 row) -SELECT regexp_instr('abcabcabc', 'a.c'); +SELECT regexp_instr('abcaXcaYc', 'a.c'); regexp_instr -------------- 1 (1 row) -SELECT regexp_instr('abcabcabc', 'a.c', 2); +SELECT regexp_instr('abcaXcaYc', 'a.c', 2); regexp_instr -------------- 4 (1 row) -SELECT regexp_instr('abcabcabc', 'a.c', 1, 3); +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 3); regexp_instr -------------- 7 (1 row) -SELECT regexp_instr('abcabcabc', 'a.c', 1, 4); +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 3, 1); + regexp_instr +-------------- + 10 +(1 row) + +SELECT regexp_instr('abcaXcaYc', 'a.c', 2, 2, 0); + regexp_instr +-------------- + 7 +(1 row) + +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 4); regexp_instr -------------- 0 (1 row) -SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i'); +SELECT regexp_instr('abcaXcayc', 'A.C', 1, 2, 0, 'i'); regexp_instr -------------- 4 @@ -925,22 +937,22 @@ SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t; t (1 row) -SELECT regexp_substr('abcabcabc', 'a.c'); +SELECT regexp_substr('abcaXcaYc', 'a.c'); regexp_substr --------------- abc (1 row) -SELECT regexp_substr('abcabcabc', 'a.c', 2); +SELECT regexp_substr('abcaXcaYc', 'a.c', 2); regexp_substr --------------- - abc + aXc (1 row) -SELECT regexp_substr('abcabcabc', 'a.c', 1, 3); +SELECT regexp_substr('abcaXcaYc', 'a.c', 1, 3); regexp_substr --------------- - abc + aYc (1 row) SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t; @@ -949,10 +961,16 @@ SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t; t (1 row) -SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i'); +SELECT regexp_substr('abcaXcaYc', 'A.C', 1, 2, 'i'); regexp_substr --------------- - abc + aXc +(1 row) + +SELECT regexp_substr('abcaXcaYc', 'A.C', 2, 2, 'i'); + regexp_substr +--------------- + aYc (1 row) SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0); diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 39596789..d553a98c 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -244,11 +244,13 @@ SELECT regexp_like('abc', 'a.c', 'g'); -- error -- regexp_instr tests SELECT regexp_instr('abcdefghi', 'd.f'); SELECT regexp_instr('abcdefghi', 'd.q'); -SELECT regexp_instr('abcabcabc', 'a.c'); -SELECT regexp_instr('abcabcabc', 'a.c', 2); -SELECT regexp_instr('abcabcabc', 'a.c', 1, 3); -SELECT regexp_instr('abcabcabc', 'a.c', 1, 4); -SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i'); +SELECT regexp_instr('abcaXcaYc', 'a.c'); +SELECT regexp_instr('abcaXcaYc', 'a.c', 2); +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 3); +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 3, 1); +SELECT regexp_instr('abcaXcaYc', 'a.c', 2, 2, 0); +SELECT regexp_instr('abcaXcaYc', 'a.c', 1, 4); +SELECT regexp_instr('abcaXcayc', 'A.C', 1, 2, 0, 'i'); SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0); SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1); SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2); @@ -274,11 +276,12 @@ SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1); -- regexp_substr tests SELECT regexp_substr('abcdefghi', 'd.f'); SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t; -SELECT regexp_substr('abcabcabc', 'a.c'); -SELECT regexp_substr('abcabcabc', 'a.c', 2); -SELECT regexp_substr('abcabcabc', 'a.c', 1, 3); +SELECT regexp_substr('abcaXcaYc', 'a.c'); +SELECT regexp_substr('abcaXcaYc', 'a.c', 2); +SELECT regexp_substr('abcaXcaYc', 'a.c', 1, 3); SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t; -SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i'); +SELECT regexp_substr('abcaXcaYc', 'A.C', 1, 2, 'i'); +SELECT regexp_substr('abcaXcaYc', 'A.C', 2, 2, 'i'); SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0); SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1); SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2); -- 2.34.1