On Tue, 10 Oct 2023 at 06:38, Tom Lane <[email protected]> wrote:
> Hm. I'd be inclined to use maxlen == 0 as the indicator of a read-only
> buffer, just because that would not create a problem if we ever want
> to change it to an unsigned type. Other than that, I agree with the
> idea of using a special maxlen value to indicate that the buffer is
> read-only and not owned by the StringInfo. We need to nail down the
> exact semantics though.
I've attached a slightly more worked on patch that makes maxlen == 0
mean read-only. Unsure if a macro is worthwhile there or not.
The patch still fails during 023_twophase_stream.pl for the reasons
mentioned upthread. Getting rid of the Assert in
initStringInfoFromStringWithLen() allows it to pass.
One thought I had about this is that the memory context behaviour
might catch someone out at some point. Right now if you do
initStringInfo() the memory context of the "data" field will be
CurrentMemoryContext, but if someone does
initStringInfoFromStringWithLen() and then changes to some other
memory context before doing an appendStringInfo on that string, then
we'll allocate "data" in whatever that memory context is. Maybe that's
ok if we document it. Fixing it would mean adding a MemoryContext
field to StringInfoData which would be set to CurrentMemoryContext
during initStringInfo() and initStringInfoFromStringWithLen().
I'm not fully happy with the extra code added in enlargeStringInfo().
It's a little repetitive. Fixing it up would mean having to have a
boolean variable to mark if the string was readonly so at the end we'd
know to repalloc or palloc/memcpy. For now, I just marked that code
as unlikely() since there's no place in the code base that uses it.
David
diff --git a/src/backend/replication/logical/applyparallelworker.c
b/src/backend/replication/logical/applyparallelworker.c
index 82f48a488e..fdfbd45e2c 100644
--- a/src/backend/replication/logical/applyparallelworker.c
+++ b/src/backend/replication/logical/applyparallelworker.c
@@ -774,10 +774,7 @@ LogicalParallelApplyLoop(shm_mq_handle *mqh)
if (len == 0)
elog(ERROR, "invalid message length");
- s.cursor = 0;
- s.maxlen = -1;
- s.data = (char *) data;
- s.len = len;
+ initStringInfoFromStringWithLen(&s, data, len);
/*
* The first byte of messages sent from leader apply
worker to
diff --git a/src/backend/replication/logical/proto.c
b/src/backend/replication/logical/proto.c
index d52c8963eb..e51a2b7ce1 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -879,6 +879,7 @@ logicalrep_read_tuple(StringInfo in, LogicalRepTupleData
*tuple)
/* Read the data */
for (i = 0; i < natts; i++)
{
+ char *buff;
char kind;
int len;
StringInfo value = &tuple->colvalues[i];
@@ -899,19 +900,16 @@ logicalrep_read_tuple(StringInfo in, LogicalRepTupleData
*tuple)
len = pq_getmsgint(in, 4); /* read length
*/
/* and data */
- value->data = palloc(len + 1);
- pq_copymsgbytes(in, value->data, len);
+ buff = palloc(len + 1);
+ pq_copymsgbytes(in, buff, len);
/*
* Not strictly necessary for
LOGICALREP_COLUMN_BINARY, but
* per StringInfo practice.
*/
- value->data[len] = '\0';
+ buff[len] = '\0';
- /* make StringInfo fully valid */
- value->len = len;
- value->cursor = 0;
- value->maxlen = len;
+ initStringInfoFromStringWithLen(value, buff,
len);
break;
default:
elog(ERROR, "unrecognized data representation
type '%c'", kind);
diff --git a/src/backend/replication/logical/worker.c
b/src/backend/replication/logical/worker.c
index 597947410f..b4bf3111cf 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -3582,10 +3582,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received)
/* Ensure we are reading the data into
our memory context. */
MemoryContextSwitchTo(ApplyMessageContext);
- s.data = buf;
- s.len = len;
- s.cursor = 0;
- s.maxlen = -1;
+ initStringInfoFromStringWithLen(&s,
buf, len);
c = pq_getmsgbyte(&s);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 21b9763183..d48029038b 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1816,23 +1816,19 @@ exec_bind_message(StringInfo input_message)
if (!isNull)
{
- const char *pvalue =
pq_getmsgbytes(input_message, plength);
+ char *pvalue;
/*
- * Rather than copying data around, we just set
up a phony
- * StringInfo pointing to the correct portion
of the message
- * buffer. We assume we can scribble on the
message buffer so
- * as to maintain the convention that
StringInfos have a
- * trailing null. This is grotty but is a big
win when
- * dealing with very large parameter strings.
+ * Rather than copying data around, we just
initialize a StringInfo
+ * pointing to the correct portion of the
message buffer. We assume we
+ * can scribble on the message buffer so as to
maintain the convention
+ * that StringInfos have a trailing null. This
is grotty but is a big win
+ * when dealing with very large parameter
strings.
*/
- pbuf.data = unconstify(char *, pvalue);
- pbuf.maxlen = plength + 1;
- pbuf.len = plength;
- pbuf.cursor = 0;
-
- csave = pbuf.data[plength];
- pbuf.data[plength] = '\0';
+ pvalue = unconstify(char *,
pq_getmsgbytes(input_message, plength));
+ csave = pvalue[plength];
+ pvalue[plength] = '\0';
+ initStringInfoFromStringWithLen(&pbuf, pvalue,
plength);
}
else
{
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index ad176651d8..e76c723f49 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -623,21 +623,18 @@ record_recv(PG_FUNCTION_ARGS)
}
else
{
+ char *strbuff;
+
/*
- * Rather than copying data around, we just set up a
phony
- * StringInfo pointing to the correct portion of the
input buffer.
- * We assume we can scribble on the input buffer so as
to maintain
+ * Initalize a new StringInfo using the correct portion
of the input
+ * buffer. We assume we can scribble on the input
buffer so as to maintain
* the convention that StringInfos have a trailing null.
*/
- item_buf.data = &buf->data[buf->cursor];
- item_buf.maxlen = itemlen + 1;
- item_buf.len = itemlen;
- item_buf.cursor = 0;
-
+ strbuff = &buf->data[buf->cursor];
buf->cursor += itemlen;
-
csave = buf->data[buf->cursor];
buf->data[buf->cursor] = '\0';
+ initStringInfoFromStringWithLen(&item_buf, strbuff,
itemlen);
bufptr = &item_buf;
nulls[i] = false;
diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c
index 05b22b5c53..2cf57a804f 100644
--- a/src/common/stringinfo.c
+++ b/src/common/stringinfo.c
@@ -30,6 +30,7 @@
#endif
#include "lib/stringinfo.h"
+#include "port/pg_bitutils.h"
/*
@@ -320,24 +321,50 @@ enlargeStringInfo(StringInfo str, int needed)
if (needed <= str->maxlen)
return; /* got enough space
already */
- /*
- * We don't want to allocate just a little more space with each append;
- * for efficiency, double the buffer size each time it overflows.
- * Actually, we might need to more than double it if 'needed' is big...
- */
- newlen = 2 * str->maxlen;
- while (needed > newlen)
- newlen = 2 * newlen;
-
- /*
- * Clamp to MaxAllocSize in case we went past it. Note we are assuming
- * here that MaxAllocSize <= INT_MAX/2, else the above loop could
- * overflow. We will still have newlen >= needed.
- */
- if (newlen > (int) MaxAllocSize)
- newlen = (int) MaxAllocSize;
-
- str->data = (char *) repalloc(str->data, newlen);
-
- str->maxlen = newlen;
+ if (likely(str->maxlen != 0))
+ {
+ /*
+ * We don't want to allocate just a little more space with each
append;
+ * for efficiency, double the buffer size each time it
overflows.
+ * Actually, we might need to more than double it if 'needed'
is big...
+ */
+ newlen = 2 * str->maxlen;
+ while (needed > newlen)
+ newlen = 2 * newlen;
+
+ /*
+ * Clamp to MaxAllocSize in case we went past it. Note we are
assuming
+ * here that MaxAllocSize <= INT_MAX/2, else the above loop
could
+ * overflow. We will still have newlen >= needed.
+ */
+ if (newlen > (int) MaxAllocSize)
+ newlen = (int) MaxAllocSize;
+
+ str->data = (char *) repalloc(str->data, newlen);
+
+ str->maxlen = newlen;
+ }
+ else
+ {
+ char *newdata;
+
+ /*
+ * The StringInfo may be read-only if initialized with
+ * initStringInfoFromStringWithLen(). This means we need to
copy the
+ * read-only string out into a newly palloc'd buffer.
+ */
+ newlen = pg_nextpower2_32(str->len) * 2;
+ while (needed > newlen)
+ newlen = 2 * newlen;
+
+ if (newlen > (int) MaxAllocSize)
+ newlen = (int) MaxAllocSize;
+
+ newdata = (char *) palloc(newlen);
+ memcpy(newdata, str->data, str->len + 1);
+
+ str->data = newdata;
+ str->maxlen = newlen;
+ return;
+ }
}
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index 36a416f8e0..f3739790a7 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -27,7 +27,10 @@
* maxlen is the allocated size in bytes of 'data', i.e. the
maximum
* string size (including the terminating '\0'
char) that we can
* currently store in 'data' without having to
reallocate
- * more space. We must always have maxlen > len.
+ * more space. We must always have maxlen > len
except in the
+ * case when maxlen is set to 0, in which case
'data' is classed
+ * "read-only". Before appending to a read-only
StringInfoData we
+ * must always copy the 'data' out into a new
allocation.
* cursor is initialized to zero by makeStringInfo or
initStringInfo,
* but is not otherwise touched by the
stringinfo.c routines.
* Some routines use it to scan through a
StringInfo.
@@ -79,6 +82,26 @@ extern StringInfo makeStringInfo(void);
*/
extern void initStringInfo(StringInfo str);
+/*------------------------
+ * initStringInfoFromStringWithLen
+ * Initialize a StringInfoData struct from an existing string without copying
+ * the string. The caller is responsible for ensuring the given string
+ * remains valid as long as the StringInfo does. The given string must be
+ * NUL terminated at 'len' bytes. Calls to this are used in performance
+ * critical locations where allocating a new buffer and copying would be too
+ * costly.
+ */
+static inline void
+initStringInfoFromStringWithLen(StringInfo str, char *data, int len)
+{
+ Assert(data[len] == '\0');
+
+ str->data = data;
+ str->maxlen = 0;
+ str->len = len;
+ str->cursor = 0;
+}
+
/*------------------------
* resetStringInfo
* Clears the current content of the StringInfo, if any. The