Under the topic of getting rid of thread-unsafe functions in the backend
[0], here is a patch series to deal with strtok().
Of course, strtok() is famously not thread-safe and can be replaced by
strtok_r(). But it also has the wrong semantics in some cases, because
it considers adjacent delimiters to be one delimiter. So if you parse
SCRAM-SHA-256$<iterations>:<salt>$<storedkey>:<serverkey>
with strtok(), then
SCRAM-SHA-256$$<iterations>::<salt>$$<storedkey>::<serverkey>
parses just the same. In many cases, this is arguably wrong and could
hide mistakes.
So I'm suggesting to use strsep() in those places. strsep() is
nonstandard but widely available.
There are a few places where strtok() has the right semantics, such as
parsing tokens separated by whitespace. For those, I'm using strtok_r().
A reviewer job here would be to check whether I made that distinction
correctly in each case.
On the portability side, I'm including a port/ replacement for strsep()
and some workaround to get strtok_r() for Windows. I have included
these here as separate patches for clarity.
[0]:
https://www.postgresql.org/message-id/856e5ec3-879f-42ee-8258-8bcc6ec9b...@eisentraut.orgFrom 8ab537885f007d8bed58f839ca91108ef20422a6 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 10 Jun 2024 16:08:08 +0200
Subject: [PATCH v1 1/4] Replace some strtok() with strsep()
strtok() considers adjacent delimiters to be one delimiter, which is
arguably the wrong behavior in some cases. Replace with strsep(),
which has the right behavior: Adjacent delimiters create an empty
token.
---
src/backend/libpq/auth-scram.c | 11 +++++------
src/backend/utils/adt/pg_locale.c | 3 ++-
src/common/logging.c | 4 +++-
src/test/regress/pg_regress.c | 5 ++---
4 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/src/backend/libpq/auth-scram.c b/src/backend/libpq/auth-scram.c
index 41619599148..03ddddc3c27 100644
--- a/src/backend/libpq/auth-scram.c
+++ b/src/backend/libpq/auth-scram.c
@@ -608,16 +608,15 @@ parse_scram_secret(const char *secret, int *iterations,
* SCRAM-SHA-256$<iterations>:<salt>$<storedkey>:<serverkey>
*/
v = pstrdup(secret);
- if ((scheme_str = strtok(v, "$")) == NULL)
+ if ((scheme_str = strsep(&v, "$")) == NULL)
goto invalid_secret;
- if ((iterations_str = strtok(NULL, ":")) == NULL)
+ if ((iterations_str = strsep(&v, ":")) == NULL)
goto invalid_secret;
- if ((salt_str = strtok(NULL, "$")) == NULL)
+ if ((salt_str = strsep(&v, "$")) == NULL)
goto invalid_secret;
- if ((storedkey_str = strtok(NULL, ":")) == NULL)
- goto invalid_secret;
- if ((serverkey_str = strtok(NULL, "")) == NULL)
+ if ((storedkey_str = strsep(&v, ":")) == NULL)
goto invalid_secret;
+ serverkey_str = v;
/* Parse the fields */
if (strcmp(scheme_str, "SCRAM-SHA-256") != 0)
diff --git a/src/backend/utils/adt/pg_locale.c
b/src/backend/utils/adt/pg_locale.c
index 7e5bb2b703a..21924d89877 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2809,6 +2809,7 @@ icu_set_collation_attributes(UCollator *collator, const
char *loc,
char *icu_locale_id;
char *lower_str;
char *str;
+ char *token;
/*
* The input locale may be a BCP 47 language tag, e.g.
@@ -2834,7 +2835,7 @@ icu_set_collation_attributes(UCollator *collator, const
char *loc,
return;
str++;
- for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
+ while ((token = strsep(&str, ";")))
{
char *e = strchr(token, '=');
diff --git a/src/common/logging.c b/src/common/logging.c
index e9a02e3e46a..aedd1ae2d8c 100644
--- a/src/common/logging.c
+++ b/src/common/logging.c
@@ -119,7 +119,9 @@ pg_logging_init(const char *argv0)
if (colors)
{
- for (char *token = strtok(colors, ":"); token;
token = strtok(NULL, ":"))
+ char *token;
+
+ while ((token = strsep(&colors, ":")))
{
char *e = strchr(token, '=');
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 06f6775fc65..1abfe6c4a5c 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -234,12 +234,11 @@ static void
split_to_stringlist(const char *s, const char *delim, _stringlist **listhead)
{
char *sc = pg_strdup(s);
- char *token = strtok(sc, delim);
+ char *token;
- while (token)
+ while ((token = strsep(&sc, delim)))
{
add_stringlist_item(listhead, token);
- token = strtok(NULL, delim);
}
free(sc);
}
base-commit: ae482a7ec521e09bb0dbfc261da6e6170a5ac29f
--
2.45.2
From 4e4769864ed3f9ccca232ff7fa56d3ce9c9f5f2c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 10 Jun 2024 16:08:56 +0200
Subject: [PATCH v1 2/4] Replace remaining strtok() with strtok_r()
for thread-safety in the future
---
src/backend/utils/misc/tzparser.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/backend/utils/misc/tzparser.c
b/src/backend/utils/misc/tzparser.c
index 21fd866d6d6..96cc3912e56 100644
--- a/src/backend/utils/misc/tzparser.c
+++ b/src/backend/utils/misc/tzparser.c
@@ -97,6 +97,7 @@ validateTzEntry(tzEntry *tzentry)
static bool
splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
{
+ char *brkl;
char *abbrev;
char *offset;
char *offset_endptr;
@@ -106,7 +107,7 @@ splitTzLine(const char *filename, int lineno, char *line,
tzEntry *tzentry)
tzentry->lineno = lineno;
tzentry->filename = filename;
- abbrev = strtok(line, WHITESPACE);
+ abbrev = strtok_r(line, WHITESPACE, &brkl);
if (!abbrev)
{
GUC_check_errmsg("missing time zone abbreviation in time zone
file \"%s\", line %d",
@@ -115,7 +116,7 @@ splitTzLine(const char *filename, int lineno, char *line,
tzEntry *tzentry)
}
tzentry->abbrev = pstrdup(abbrev);
- offset = strtok(NULL, WHITESPACE);
+ offset = strtok_r(NULL, WHITESPACE, &brkl);
if (!offset)
{
GUC_check_errmsg("missing time zone offset in time zone file
\"%s\", line %d",
@@ -135,11 +136,11 @@ splitTzLine(const char *filename, int lineno, char *line,
tzEntry *tzentry)
return false;
}
- is_dst = strtok(NULL, WHITESPACE);
+ is_dst = strtok_r(NULL, WHITESPACE, &brkl);
if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
{
tzentry->is_dst = true;
- remain = strtok(NULL, WHITESPACE);
+ remain = strtok_r(NULL, WHITESPACE, &brkl);
}
else
{
@@ -158,7 +159,7 @@ splitTzLine(const char *filename, int lineno, char *line,
tzEntry *tzentry)
tzentry->zone = pstrdup(offset);
tzentry->offset = 0 * SECS_PER_HOUR;
tzentry->is_dst = false;
- remain = strtok(NULL, WHITESPACE);
+ remain = strtok_r(NULL, WHITESPACE, &brkl);
}
if (!remain) /* no more non-whitespace chars
*/
@@ -394,8 +395,9 @@ ParseTzFile(const char *filename, int depth,
{
/* pstrdup so we can use filename in result data
structure */
char *includeFile = pstrdup(line +
strlen("@INCLUDE"));
+ char *brki;
- includeFile = strtok(includeFile, WHITESPACE);
+ includeFile = strtok_r(includeFile, WHITESPACE, &brki);
if (!includeFile || !*includeFile)
{
GUC_check_errmsg("@INCLUDE without file name in
time zone file \"%s\", line %d",
--
2.45.2
From da59e9395e77b8849b1a9700b610a601548ffdd7 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 13 Jun 2024 11:14:36 +0200
Subject: [PATCH v1 3/4] Add port/ replacement for strsep()
from OpenBSD, similar to strlcat, strlcpy
---
configure | 23 ++++++++++++
configure.ac | 3 +-
meson.build | 2 +
src/include/pg_config.h.in | 7 ++++
src/include/port.h | 4 ++
src/port/meson.build | 1 +
src/port/strsep.c | 77 ++++++++++++++++++++++++++++++++++++++
7 files changed, 116 insertions(+), 1 deletion(-)
create mode 100644 src/port/strsep.c
diff --git a/configure b/configure
index 7b03db56a67..90664858ba7 100755
--- a/configure
+++ b/configure
@@ -15778,6 +15778,16 @@ fi
cat >>confdefs.h <<_ACEOF
#define HAVE_DECL_STRNLEN $ac_have_decl
_ACEOF
+ac_fn_c_check_decl "$LINENO" "strsep" "ac_cv_have_decl_strsep"
"$ac_includes_default"
+if test "x$ac_cv_have_decl_strsep" = xyes; then :
+ ac_have_decl=1
+else
+ ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRSEP $ac_have_decl
+_ACEOF
# We can't use AC_CHECK_FUNCS to detect these functions, because it
@@ -15925,6 +15935,19 @@ esac
fi
+ac_fn_c_check_func "$LINENO" "strsep" "ac_cv_func_strsep"
+if test "x$ac_cv_func_strsep" = xyes; then :
+ $as_echo "#define HAVE_STRSEP 1" >>confdefs.h
+
+else
+ case " $LIBOBJS " in
+ *" strsep.$ac_objext "* ) ;;
+ *) LIBOBJS="$LIBOBJS strsep.$ac_objext"
+ ;;
+esac
+
+fi
+
ac_fn_c_check_func "$LINENO" "pthread_barrier_wait"
"ac_cv_func_pthread_barrier_wait"
diff --git a/configure.ac b/configure.ac
index 63e7be38472..2fcb256123d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1795,7 +1795,7 @@ AC_CHECK_DECLS(posix_fadvise, [], [], [#include
<fcntl.h>])
]) # fi
AC_CHECK_DECLS(fdatasync, [], [], [#include <unistd.h>])
-AC_CHECK_DECLS([strlcat, strlcpy, strnlen])
+AC_CHECK_DECLS([strlcat, strlcpy, strnlen, strsep])
# We can't use AC_CHECK_FUNCS to detect these functions, because it
# won't handle deployment target restrictions on macOS
@@ -1814,6 +1814,7 @@ AC_REPLACE_FUNCS(m4_normalize([
strlcat
strlcpy
strnlen
+ strsep
]))
AC_REPLACE_FUNCS(pthread_barrier_wait)
diff --git a/meson.build b/meson.build
index 2767abd19e2..7d021138e72 100644
--- a/meson.build
+++ b/meson.build
@@ -2326,6 +2326,7 @@ decl_checks = [
['strlcat', 'string.h'],
['strlcpy', 'string.h'],
['strnlen', 'string.h'],
+ ['strsep', 'string.h'],
]
# Need to check for function declarations for these functions, because
@@ -2594,6 +2595,7 @@ func_checks = [
['strlcat'],
['strlcpy'],
['strnlen'],
+ ['strsep'],
['strsignal'],
['sync_file_range'],
['syncfs'],
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index f8d3e3b6b84..9862739b8e8 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -127,6 +127,10 @@
don't. */
#undef HAVE_DECL_STRNLEN
+/* Define to 1 if you have the declaration of `strsep', and to 0 if you don't.
+ */
+#undef HAVE_DECL_STRSEP
+
/* Define to 1 if you have the <editline/history.h> header file. */
#undef HAVE_EDITLINE_HISTORY_H
@@ -414,6 +418,9 @@
/* Define to 1 if you have the `strnlen' function. */
#undef HAVE_STRNLEN
+/* Define to 1 if you have the `strsep' function. */
+#undef HAVE_STRSEP
+
/* Define to 1 if you have the `strsignal' function. */
#undef HAVE_STRSIGNAL
diff --git a/src/include/port.h b/src/include/port.h
index ae115d2d970..c7400052675 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -432,6 +432,10 @@ extern size_t strlcpy(char *dst, const char *src, size_t
siz);
extern size_t strnlen(const char *str, size_t maxlen);
#endif
+#if !HAVE_DECL_STRSEP
+extern char *strsep(char **stringp, const char *delim);
+#endif
+
/* port/user.c */
#ifndef WIN32
extern bool pg_get_user_name(uid_t user_id, char *buffer, size_t buflen);
diff --git a/src/port/meson.build b/src/port/meson.build
index fd9ee199d1b..ff54b7b53e9 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -70,6 +70,7 @@ replace_funcs_neg = [
['strlcat'],
['strlcpy'],
['strnlen'],
+ ['strsep'],
]
if host_system != 'windows'
diff --git a/src/port/strsep.c b/src/port/strsep.c
new file mode 100644
index 00000000000..564125c5101
--- /dev/null
+++ b/src/port/strsep.c
@@ -0,0 +1,77 @@
+/*
+ * src/port/strsep.c
+ *
+ * $OpenBSD: strsep.c,v 1.8 2015/08/31 02:53:57 guenther Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "c.h"
+
+/*
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, strsep returns NULL.
+ */
+char *
+strsep(char **stringp, const char *delim)
+{
+ char *s;
+ const char *spanp;
+ int c,
+ sc;
+ char *tok;
+
+ if ((s = *stringp) == NULL)
+ return (NULL);
+ for (tok = s;;)
+ {
+ c = *s++;
+ spanp = delim;
+ do
+ {
+ if ((sc = *spanp++) == c)
+ {
+ if (c == 0)
+ s = NULL;
+ else
+ s[-1] = 0;
+ *stringp = s;
+ return (tok);
+ }
+ } while (sc != 0);
+ }
+ /* NOTREACHED */
+}
--
2.45.2
From 90631bdab4b6ed9a05cea0ce37b0c7e117883ac1 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 13 Jun 2024 11:18:50 +0200
Subject: [PATCH v1 4/4] Windows replacement for strtok_r()
They spell it "strtok_s" there.
---
src/include/port/win32_port.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 3d1de166cb0..7ffe5891c69 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -415,6 +415,11 @@ extern int _pglstat64(const char *name, struct stat *buf);
#undef ETIMEDOUT
#define ETIMEDOUT WSAETIMEDOUT
+/*
+ * Supplement to <string.h>.
+ */
+#define strtok_r strtok_s
+
/*
* Locale stuff.
*
--
2.45.2