Hello, hackers!
Recently I've faced an issue when playing with pg_controldata.
I made some rubbish pg_control files and caught a SIGSEGV.
Then I tried fuzzing REL_17_STABLE pg_controldata with AFL++ and got 7 crash
cases.
Also, it is not necessary to use fuzzing. You can simply generate
any pg_control by 32-bit initdb and give it to the 64-bit pg_controldata.
SIGSEGV was caused by two places in pg_controldata.c where there
is a work with localtime(&time_tmp));. So I added a check for not NULL.
The reason is such:
The 64-bit ControlFileData has gaps for alignment after DBState,
but the 32-bit ControlFileData does not have such alignment,
so the zero bytes from the gap are read in pg_time_t time (which is actually a
long long time):
(gdb output)
64-bit
type = struct ControlFileData {
/* 0 | 8 */ uint64 system_identifier;
/* 8 | 4 */ uint32 pg_control_version;
/* 12 | 4 */ uint32 catalog_version_no;
/* 16 | 4 */ DBState state;
/* XXX 4-byte hole */
/* 24 | 8 */ pg_time_t time;
32 bit
type = struct ControlFileData {
/* 0 | 8 */ uint64 system_identifier;
/* 8 | 4 */ uint32 pg_control_version;
/* 12 | 4 */ uint32 catalog_version_no;
/* 16 | 4 */ DBState state;
/* 20 | 8 */ pg_time_t time;
/* 28 | 8 */ XLogRecPtr checkPoint;
/* 36 | 76 */ CheckPoint checkPointCopy;
/* 112 | 8 */ XLogRecPtr unloggedLSN;
/* 120 | 8 */ XLogRecPtr minRecoveryPoint;
/* 128 | 4 */ TimeLineID minRecoveryPointTLI;
/* 132 | 8 */ XLogRecPtr backupStartPoint;
/* 140 | 8 */ XLogRecPtr backupEndPoint;
/* 148 | 1 */ _Bool backupEndRequired;
/* XXX 3-byte hole */
The other problem is FPE, caused by WalSegSz in pg_controldata.c.
For some reason this variable is signed, it is signed everywhere.
And when pg_control is not valid it can get a negative value, that
then causes an FPE. I changed it to unsigned in my patches.
This raised a question. What is the rationale behind using a signed
type for WAL segment size. Is there a historical or technical reason for this
choice?
I’ve implemented fixes tailored to the specific contexts of the affected
versions:
For REL_16 and later:
These versions utilize a function XLogFileName. So this raises a question as
changing wal_segsz_bytes
argument to unsigned seems logical. But it will be unsigned only here. Maybe it
is better to change it
in the other code?
For REL_12 to REL_15:
In these older versions, there is a macro in
./src/include/access/xlog_internal.h
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, \
(uint32) ((logSegNo) /
XLogSegmentsPerXLogId(wal_segsz_bytes)), \
(uint32) ((logSegNo) %
XLogSegmentsPerXLogId(wal_segsz_bytes)))
Where casting second operand in % (XLogSegmentsPerXLogId(wal_segsz_bytes)) to
unsigned seems enough. Would be glad to hear your thoughts.
The versions that contain the problem:
REL_12_STABLE,
REL_13_STABLE,
REL_14_STABLE,
REL_15_STABLE,
REL_16_STABLE,
REL_17_STABLE
Kind regards,
Ian Ilyasov.
Junior Software Developer at Postgres Professional
From 104a8f2f1bfe791b625752e25479b35b8f6bed23 Mon Sep 17 00:00:00 2001
From: Ian Ilyasov <ianilya...@outlook.com>
Date: Tue, 29 Oct 2024 17:59:29 +0300
Subject: [PATCH] fix possible SIGSEGV situations on rubbish
pg_control
uint32 for WalSegSz is crucial as xlog_seg_size in pg_control.h
is also uint32 and there could be situation when WalSegSz gets
a negative value and pg_controldata triggers FPE (division by zero):
((0x100000000UL) / (WalSegSz)) can turn into zero in
XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID,
segno, WalSegSz);
because if WalSegSz is -1 then by arithmetic rules in C we get
0x100000000UL / 0xFFFFFFFFFFFFFFFFUL == 0.
---
src/bin/pg_controldata/pg_controldata.c | 30 +++++++++++++++++++------
src/include/access/xlog_internal.h | 2 +-
2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 93a05d80ca7..bf759c050d5 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -97,6 +97,7 @@ main(int argc, char *argv[])
bool crc_ok;
char *DataDir = NULL;
time_t time_tmp;
+ struct tm *tm_tmp;
char pgctime_str[128];
char ckpttime_str[128];
char mock_auth_nonce_str[MOCK_AUTH_NONCE_LEN * 2 + 1];
@@ -105,7 +106,7 @@ main(int argc, char *argv[])
char xlogfilename[MAXFNAMELEN];
int c;
int i;
- int WalSegSz;
+ uint32 WalSegSz;
pg_logging_init(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata"));
@@ -178,8 +179,8 @@ main(int argc, char *argv[])
if (!IsValidWalSegSize(WalSegSz))
{
- pg_log_warning(ngettext("invalid WAL segment size in control file (%d byte)",
- "invalid WAL segment size in control file (%d bytes)",
+ pg_log_warning(ngettext("invalid WAL segment size in control file (%u byte)",
+ "invalid WAL segment size in control file (%u bytes)",
WalSegSz),
WalSegSz);
pg_log_warning_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
@@ -196,11 +197,26 @@ main(int argc, char *argv[])
* about %c
*/
time_tmp = (time_t) ControlFile->time;
- strftime(pgctime_str, sizeof(pgctime_str), strftime_fmt,
- localtime(&time_tmp));
+ tm_tmp = localtime(&time_tmp);
+
+ if (tm_tmp != NULL)
+ {
+ strftime(pgctime_str, sizeof(pgctime_str), strftime_fmt,
+ tm_tmp);
+ } else {
+ snprintf(pgctime_str, sizeof(pgctime_str), "(corrupted timestamp)");
+ }
+
time_tmp = (time_t) ControlFile->checkPointCopy.time;
- strftime(ckpttime_str, sizeof(ckpttime_str), strftime_fmt,
- localtime(&time_tmp));
+ tm_tmp = localtime(&time_tmp);
+
+ if (tm_tmp != NULL)
+ {
+ strftime(ckpttime_str, sizeof(ckpttime_str), strftime_fmt,
+ tm_tmp);
+ } else {
+ snprintf(ckpttime_str, sizeof(ckpttime_str), "(corrupted timestamp)");
+ }
/*
* Calculate name of the WAL file containing the latest checkpoint's REDO
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index c6a91fb4560..5ecbbba71dd 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -163,7 +163,7 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
* function allocating the result generated.
*/
static inline void
-XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
+XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, unsigned int wal_segsz_bytes)
{
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli,
(uint32) (logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes)),
--
2.39.5
From 0f1a78411a195c1c51cabb158ca1b58858a12d99 Mon Sep 17 00:00:00 2001
From: Ian Ilyasov <ianilya...@outlook.com>
Date: Tue, 29 Oct 2024 17:59:29 +0300
Subject: [PATCH] fix possible SIGSEGV situations on rubbish
pg_control
uint32 for WalSegSz is crucial as xlog_seg_size in pg_control.h
is also uint32 and there could be situation when WalSegSz gets
a negative value and pg_controldata triggers FPE (division by zero):
((0x100000000UL) / (WalSegSz)) can turn into zero in
XLogFileName(xlogfilename, ControlFile->checkPointCopy.ThisTimeLineID,
segno, WalSegSz);
because if WalSegSz is -1 then by arithmetic rules in C we get
0x100000000UL / 0xFFFFFFFFFFFFFFFFUL == 0.
---
src/bin/pg_controldata/pg_controldata.c | 28 ++++++++++++++++++++-----
1 file changed, 23 insertions(+), 5 deletions(-)
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index e73639df744..1b7cbb03167 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -97,6 +97,7 @@ main(int argc, char *argv[])
bool crc_ok;
char *DataDir = NULL;
time_t time_tmp;
+ struct tm *tm_tmp;
char pgctime_str[128];
char ckpttime_str[128];
char mock_auth_nonce_str[MOCK_AUTH_NONCE_LEN * 2 + 1];
@@ -105,7 +106,7 @@ main(int argc, char *argv[])
char xlogfilename[MAXFNAMELEN];
int c;
int i;
- int WalSegSz;
+ uint32 WalSegSz;
pg_logging_init(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata"));
@@ -177,10 +178,10 @@ main(int argc, char *argv[])
if (!IsValidWalSegSize(WalSegSz))
{
printf(_("WARNING: invalid WAL segment size\n"));
- printf(ngettext("The WAL segment size stored in the file, %d byte, is not a power of two\n"
+ printf(ngettext("The WAL segment size stored in the file, %u byte, is not a power of two\n"
"between 1 MB and 1 GB. The file is corrupt and the results below are\n"
"untrustworthy.\n\n",
- "The WAL segment size stored in the file, %d bytes, is not a power of two\n"
+ "The WAL segment size stored in the file, %u bytes, is not a power of two\n"
"between 1 MB and 1 GB. The file is corrupt and the results below are\n"
"untrustworthy.\n\n",
WalSegSz),
@@ -197,11 +198,28 @@ main(int argc, char *argv[])
* about %c
*/
time_tmp = (time_t) ControlFile->time;
+ tm_tmp = localtime(&time_tmp);
+
+ if (tm_tmp == NULL)
+ {
+ pg_log_error("timestamp is corrupted");
+ exit(1);
+ }
+
strftime(pgctime_str, sizeof(pgctime_str), strftime_fmt,
- localtime(&time_tmp));
+ tm_tmp);
+
time_tmp = (time_t) ControlFile->checkPointCopy.time;
+ tm_tmp = localtime(&time_tmp);
+
+ if (tm_tmp == NULL)
+ {
+ pg_log_error("timestamp is corrupted");
+ exit(1);
+ }
+
strftime(ckpttime_str, sizeof(ckpttime_str), strftime_fmt,
- localtime(&time_tmp));
+ tm_tmp);
/*
* Calculate name of the WAL file containing the latest checkpoint's REDO
--
2.39.5