> The proposed patch extracts the code dealing with bytea from varlena.c > into a separate file, as proposed previously [1]. It merely changes > the location of the existing functions. There are no other changes.
Rebased. -- Best regards, Aleksander Alekseev
From 86776455a7b82f8cad6b984d3ddd67fc8f0e3258 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev <aleksander@timescale.com> Date: Tue, 25 Mar 2025 13:01:31 +0300 Subject: [PATCH v2] Split varlena.c into varlena.c and bytea.c Move most of the code dealing with bytea into a separate file. Aleksander Alekseev, reviewed by TODO FIXME Discussion: https://postgr.es/m/CAJ7c6TMPVPJ5DL447zDz5ydctB8OmuviURtSwd=PHCRFEPDEAQ@mail.gmail.com --- src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/bytea.c | 1147 +++++++++++++++++++++++++++ src/backend/utils/adt/meson.build | 1 + src/backend/utils/adt/varlena.c | 1214 ++--------------------------- 4 files changed, 1207 insertions(+), 1156 deletions(-) create mode 100644 src/backend/utils/adt/bytea.c diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 35e8c01aab9..d7b1902a070 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,6 +23,7 @@ OBJS = \ arrayutils.o \ ascii.o \ bool.o \ + bytea.o \ cash.o \ char.o \ cryptohashfuncs.o \ diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c new file mode 100644 index 00000000000..0b3acc7b300 --- /dev/null +++ b/src/backend/utils/adt/bytea.c @@ -0,0 +1,1147 @@ +/*------------------------------------------------------------------------- + * + * bytea.c + * Functions for the bytea type. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/bytea.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_collation.h" +#include "common/int.h" +#include "funcapi.h" +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/varlena.h" + +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + +static StringInfo + makeStringAggState(FunctionCallInfo fcinfo); +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, int S, int L, bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); + + +/* subroutine to initialize state */ +static StringInfo +makeStringAggState(FunctionCallInfo fcinfo) +{ + StringInfo state; + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across subsequent + * calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized \"bytea_output\" setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + state = makeStringAggState(fcinfo); + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/* + * Return reversed bytea + */ +Datum +bytea_reverse(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + const char *p = VARDATA_ANY(v); + int len = VARSIZE_ANY_EXHDR(v); + const char *endp = p + len; + bytea *result = palloc(len + VARHDRSZ); + char *dst = (char *) VARDATA(result) + len; + + SET_VARSIZE(result, len + VARHDRSZ); + + while (p < endp) + *(--dst) = *p++; + + PG_RETURN_BYTEA_P(result); +} + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_larger(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_smaller(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index f23cfad7182..bf94c50a8b3 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -12,6 +12,7 @@ backend_sources += files( 'arrayutils.c', 'ascii.c', 'bool.c', + 'bytea.c', 'cash.c', 'char.c', 'cryptohashfuncs.c', diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..ffae8c23abf 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -35,7 +35,6 @@ #include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" -#include "utils/bytea.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -43,10 +42,6 @@ #include "utils/sortsupport.h" #include "utils/varlena.h" - -/* GUC variable */ -int bytea_output = BYTEA_OUTPUT_HEX; - typedef struct varlena VarString; /* @@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); -static bytea *bytea_catenate(bytea *t1, bytea *t2); -static bytea *bytea_substring(Datum str, - int S, - int L, - bool length_not_specified); -static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); static void split_text_accum_result(SplitTextOutputData *tstate, @@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * USER I/O ROUTINES * *****************************************************************************/ - -#define VAL(CH) ((CH) - '0') -#define DIG(VAL) ((VAL) + '0') - -/* - * byteain - converts from printable representation of byte array - * - * Non-printable characters must be passed as '\nnn' (octal) and are - * converted to internal form. '\' must be passed as '\\'. - * ereport(ERROR, ...) if bad form. - * - * BUGS: - * The input is scanned twice. - * The error checking of input is minimal. - */ -Datum -byteain(PG_FUNCTION_ARGS) -{ - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; - - /* Recognize hex input */ - if (inputText[0] == '\\' && inputText[1] == 'x') - { - size_t len = strlen(inputText); - - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ - result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - - PG_RETURN_BYTEA_P(result); - } - - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); - - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - PG_RETURN_BYTEA_P(result); -} - -/* - * byteaout - converts to printable representation of byte array - * - * In the traditional escaped format, non-printable characters are - * printed as '\nnn' (octal) and '\' as '\\'. - */ -Datum -byteaout(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_PP(0); - char *result; - char *rp; - - if (bytea_output == BYTEA_OUTPUT_HEX) - { - /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); - *rp++ = '\\'; - *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); - } - else if (bytea_output == BYTEA_OUTPUT_ESCAPE) - { - /* Print traditional escaped format */ - char *vp; - uint64 len; - int i; - - len = 1; /* empty string has 1 char */ - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - len += 2; - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - len += 4; - else - len++; - } - - /* - * In principle len can't overflow uint32 if the input fit in 1GB, but - * for safety let's check rather than relying on palloc's internal - * check. - */ - if (len > MaxAllocSize) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_internal("result of bytea output conversion is too large"))); - rp = result = (char *) palloc(len); - - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - { - *rp++ = '\\'; - *rp++ = '\\'; - } - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - { - int val; /* holds unprintable chars */ - - val = *vp; - rp[0] = '\\'; - rp[3] = DIG(val & 07); - val >>= 3; - rp[2] = DIG(val & 07); - val >>= 3; - rp[1] = DIG(val & 03); - rp += 4; - } - else - *rp++ = *vp; - } - } - else - { - elog(ERROR, "unrecognized \"bytea_output\" setting: %d", - bytea_output); - rp = result = NULL; /* keep compiler quiet */ - } - *rp = '\0'; - PG_RETURN_CSTRING(result); -} - -/* - * bytearecv - converts external binary format to bytea - */ -Datum -bytearecv(PG_FUNCTION_ARGS) -{ - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - bytea *result; - int nbytes; - - nbytes = buf->len - buf->cursor; - result = (bytea *) palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - pq_copymsgbytes(buf, VARDATA(result), nbytes); - PG_RETURN_BYTEA_P(result); -} - -/* - * byteasend - converts bytea to binary format - * - * This is a special case: just copy the input... - */ -Datum -byteasend(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); - - PG_RETURN_BYTEA_P(vlena); -} - -Datum -bytea_string_agg_transfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - /* Append the value unless null, preceding it with the delimiter. */ - if (!PG_ARGISNULL(1)) - { - bytea *value = PG_GETARG_BYTEA_PP(1); - bool isfirst = false; - - /* - * You might think we can just throw away the first delimiter, however - * we must keep it as we may be a parallel worker doing partial - * aggregation building a state to send to the main process. We need - * to keep the delimiter of every aggregation so that the combine - * function can properly join up the strings of two separately - * partially aggregated results. The first delimiter is only stripped - * off in the final function. To know how much to strip off the front - * of the string, we store the length of the first delimiter in the - * StringInfo's cursor field, which we don't otherwise need here. - */ - if (state == NULL) - { - state = makeStringAggState(fcinfo); - isfirst = true; - } - - if (!PG_ARGISNULL(2)) - { - bytea *delim = PG_GETARG_BYTEA_PP(2); - - appendBinaryStringInfo(state, VARDATA_ANY(delim), - VARSIZE_ANY_EXHDR(delim)); - if (isfirst) - state->cursor = VARSIZE_ANY_EXHDR(delim); - } - - appendBinaryStringInfo(state, VARDATA_ANY(value), - VARSIZE_ANY_EXHDR(value)); - } - - /* - * The transition type for string_agg() is declared to be "internal", - * which is a pass-by-value type the same size as a pointer. - */ - if (state) - PG_RETURN_POINTER(state); - PG_RETURN_NULL(); -} - -Datum -bytea_string_agg_finalfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - /* cannot be called directly because of internal-type argument */ - Assert(AggCheckCallContext(fcinfo, NULL)); - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - if (state != NULL) - { - /* As per comment in transfn, strip data before the cursor position */ - bytea *result; - int strippedlen = state->len - state->cursor; - - result = (bytea *) palloc(strippedlen + VARHDRSZ); - SET_VARSIZE(result, strippedlen + VARHDRSZ); - memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); - PG_RETURN_BYTEA_P(result); - } - else - PG_RETURN_NULL(); -} - /* * textin - converts cstring to internal representation */ @@ -2959,552 +2647,91 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS) } -/*------------------------------------------------------------- - * byteaoctetlen - * - * get the number of bytes contained in an instance of type 'bytea' - *------------------------------------------------------------- +/* text_name() + * Converts a text type to a Name type. */ Datum -byteaoctetlen(PG_FUNCTION_ARGS) +text_name(PG_FUNCTION_ARGS) { - Datum str = PG_GETARG_DATUM(0); + text *s = PG_GETARG_TEXT_PP(0); + Name result; + int len; - /* We need not detoast the input at all */ - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); + len = VARSIZE_ANY_EXHDR(s); + + /* Truncate oversize input */ + if (len >= NAMEDATALEN) + len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1); + + /* We use palloc0 here to ensure result is zero-padded */ + result = (Name) palloc0(NAMEDATALEN); + memcpy(NameStr(*result), VARDATA_ANY(s), len); + + PG_RETURN_NAME(result); } -/* - * byteacat - - * takes two bytea* and returns a bytea* that is the concatenation of - * the two. - * - * Cloned from textcat and modified as required. +/* name_text() + * Converts a Name type to a text type. */ Datum -byteacat(PG_FUNCTION_ARGS) +name_text(PG_FUNCTION_ARGS) { - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); + Name s = PG_GETARG_NAME(0); - PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); + PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s))); } + /* - * bytea_catenate - * Guts of byteacat(), broken out so it can be used by other functions + * textToQualifiedNameList - convert a text object to list of names * - * Arguments can be in short-header form, but not compressed or out-of-line + * This implements the input parsing needed by nextval() and other + * functions that take a text parameter representing a qualified name. + * We split the name at dots, downcase if not double-quoted, and + * truncate names if they're too long. */ -static bytea * -bytea_catenate(bytea *t1, bytea *t2) +List * +textToQualifiedNameList(text *textval) { - bytea *result; - int len1, - len2, - len; - char *ptr; + char *rawname; + List *result = NIL; + List *namelist; + ListCell *l; - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); + /* Convert to C string (handles possible detoasting). */ + /* Note we rely on being able to modify rawname below. */ + rawname = text_to_cstring(textval); - /* paranoia ... probably should throw error instead? */ - if (len1 < 0) - len1 = 0; - if (len2 < 0) - len2 = 0; + if (!SplitIdentifierString(rawname, '.', &namelist)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); - len = len1 + len2 + VARHDRSZ; - result = (bytea *) palloc(len); + if (namelist == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); - /* Set size of result string... */ - SET_VARSIZE(result, len); + foreach(l, namelist) + { + char *curname = (char *) lfirst(l); - /* Fill data field of result string... */ - ptr = VARDATA(result); - if (len1 > 0) - memcpy(ptr, VARDATA_ANY(t1), len1); - if (len2 > 0) - memcpy(ptr + len1, VARDATA_ANY(t2), len2); + result = lappend(result, makeString(pstrdup(curname))); + } + + pfree(rawname); + list_free(namelist); return result; } -#define PG_STR_GET_BYTEA(str_) \ - DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) - /* - * bytea_substr() - * Return a substring starting at the specified position. - * Cloned from text_substr and modified as required. + * SplitIdentifierString --- parse a string containing identifiers * - * Input: - * - string - * - starting position (is one-based) - * - string length (optional) - * - * If the starting position is zero or less, then return from the start of the string - * adjusting the length to be consistent with the "negative start" per SQL. - * If the length is less than zero, an ERROR is thrown. If no third argument - * (length) is provided, the length to the end of the string is assumed. - */ -Datum -bytea_substr(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - PG_GETARG_INT32(2), - false)); -} - -/* - * bytea_substr_no_len - - * Wrapper to avoid opr_sanity failure due to - * one function accepting a different number of args. - */ -Datum -bytea_substr_no_len(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - -1, - true)); -} - -static bytea * -bytea_substring(Datum str, - int S, - int L, - bool length_not_specified) -{ - int32 S1; /* adjusted start position */ - int32 L1; /* adjusted substring length */ - int32 E; /* end position */ - - /* - * The logic here should generally match text_substring(). - */ - S1 = Max(S, 1); - - if (length_not_specified) - { - /* - * Not passed a length - DatumGetByteaPSlice() grabs everything to the - * end of the string if we pass it a negative value for length. - */ - L1 = -1; - } - else if (L < 0) - { - /* SQL99 says to throw an error for E < S, i.e., negative length */ - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - L1 = -1; /* silence stupider compilers */ - } - else if (pg_add_s32_overflow(S, L, &E)) - { - /* - * L could be large enough for S + L to overflow, in which case the - * substring must run to end of string. - */ - L1 = -1; - } - else - { - /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. - */ - if (E < 1) - return PG_STR_GET_BYTEA(""); - - L1 = E - S1; - } - - /* - * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. We need only convert S1 to zero-based starting position. - */ - return DatumGetByteaPSlice(str, S1 - 1, L1); -} - -/* - * byteaoverlay - * Replace specified substring of first string with second - * - * The SQL standard defines OVERLAY() in terms of substring and concatenation. - * This code is a direct implementation of what the standard says. - */ -Datum -byteaoverlay(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl = PG_GETARG_INT32(3); /* substring length */ - - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -Datum -byteaoverlay_no_len(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl; - - sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -static bytea * -bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) -{ - bytea *result; - bytea *s1; - bytea *s2; - int sp_pl_sl; - - /* - * Check for possible integer-overflow cases. For negative sp, throw a - * "substring length" error because that's what should be expected - * according to the spec's definition of OVERLAY(). - */ - if (sp <= 0) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - - s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); - s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); - result = bytea_catenate(s1, t2); - result = bytea_catenate(result, s2); - - return result; -} - -/* - * bit_count - */ -Datum -bytea_bit_count(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); -} - -/* - * byteapos - - * Return the position of the specified substring. - * Implements the SQL POSITION() function. - * Cloned from textpos and modified as required. - */ -Datum -byteapos(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int pos; - int px, - p; - int len1, - len2; - char *p1, - *p2; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - if (len2 <= 0) - PG_RETURN_INT32(1); /* result for empty pattern */ - - p1 = VARDATA_ANY(t1); - p2 = VARDATA_ANY(t2); - - pos = 0; - px = (len1 - len2); - for (p = 0; p <= px; p++) - { - if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) - { - pos = p + 1; - break; - }; - p1++; - }; - - PG_RETURN_INT32(pos); -} - -/*------------------------------------------------------------- - * byteaGetByte - * - * this routine treats "bytea" as an array of bytes. - * It returns the Nth byte (a number between 0 and 255). - *------------------------------------------------------------- - */ -Datum -byteaGetByte(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - byte = ((unsigned char *) VARDATA_ANY(v))[n]; - - PG_RETURN_INT32(byte); -} - -/*------------------------------------------------------------- - * byteaGetBit - * - * This routine treats a "bytea" type like an array of bits. - * It returns the value of the Nth bit (0 or 1). - * - *------------------------------------------------------------- - */ -Datum -byteaGetBit(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int64 n = PG_GETARG_INT64(1); - int byteNo, - bitNo; - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; - - if (byte & (1 << bitNo)) - PG_RETURN_INT32(1); - else - PG_RETURN_INT32(0); -} - -/*------------------------------------------------------------- - * byteaSetByte - * - * Given an instance of type 'bytea' creates a new one with - * the Nth byte set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetByte(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); - int32 newByte = PG_GETARG_INT32(2); - int len; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - /* - * Now set the byte. - */ - ((unsigned char *) VARDATA(res))[n] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/*------------------------------------------------------------- - * byteaSetBit - * - * Given an instance of type 'bytea' creates a new one with - * the Nth bit set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetBit(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int64 n = PG_GETARG_INT64(1); - int32 newBit = PG_GETARG_INT32(2); - int len; - int oldByte, - newByte; - int byteNo, - bitNo; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - /* - * sanity check! - */ - if (newBit != 0 && newBit != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("new bit must be 0 or 1"))); - - /* - * Update the byte. - */ - oldByte = ((unsigned char *) VARDATA(res))[byteNo]; - - if (newBit == 0) - newByte = oldByte & (~(1 << bitNo)); - else - newByte = oldByte | (1 << bitNo); - - ((unsigned char *) VARDATA(res))[byteNo] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/* - * Return reversed bytea - */ -Datum -bytea_reverse(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - const char *p = VARDATA_ANY(v); - int len = VARSIZE_ANY_EXHDR(v); - const char *endp = p + len; - bytea *result = palloc(len + VARHDRSZ); - char *dst = (char *) VARDATA(result) + len; - - SET_VARSIZE(result, len + VARHDRSZ); - - while (p < endp) - *(--dst) = *p++; - - PG_RETURN_BYTEA_P(result); -} - - -/* text_name() - * Converts a text type to a Name type. - */ -Datum -text_name(PG_FUNCTION_ARGS) -{ - text *s = PG_GETARG_TEXT_PP(0); - Name result; - int len; - - len = VARSIZE_ANY_EXHDR(s); - - /* Truncate oversize input */ - if (len >= NAMEDATALEN) - len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1); - - /* We use palloc0 here to ensure result is zero-padded */ - result = (Name) palloc0(NAMEDATALEN); - memcpy(NameStr(*result), VARDATA_ANY(s), len); - - PG_RETURN_NAME(result); -} - -/* name_text() - * Converts a Name type to a text type. - */ -Datum -name_text(PG_FUNCTION_ARGS) -{ - Name s = PG_GETARG_NAME(0); - - PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s))); -} - - -/* - * textToQualifiedNameList - convert a text object to list of names - * - * This implements the input parsing needed by nextval() and other - * functions that take a text parameter representing a qualified name. - * We split the name at dots, downcase if not double-quoted, and - * truncate names if they're too long. - */ -List * -textToQualifiedNameList(text *textval) -{ - char *rawname; - List *result = NIL; - List *namelist; - ListCell *l; - - /* Convert to C string (handles possible detoasting). */ - /* Note we rely on being able to modify rawname below. */ - rawname = text_to_cstring(textval); - - if (!SplitIdentifierString(rawname, '.', &namelist)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_NAME), - errmsg("invalid name syntax"))); - - if (namelist == NIL) - ereport(ERROR, - (errcode(ERRCODE_INVALID_NAME), - errmsg("invalid name syntax"))); - - foreach(l, namelist) - { - char *curname = (char *) lfirst(l); - - result = lappend(result, makeString(pstrdup(curname))); - } - - pfree(rawname); - list_free(namelist); - - return result; -} - -/* - * SplitIdentifierString --- parse a string containing identifiers - * - * This is the guts of textToQualifiedNameList, and is exported for use in - * other situations such as parsing GUC variables. In the GUC case, it's - * important to avoid memory leaks, so the API is designed to minimize the - * amount of stuff that needs to be allocated and freed. + * This is the guts of textToQualifiedNameList, and is exported for use in + * other situations such as parsing GUC variables. In the GUC case, it's + * important to avoid memory leaks, so the API is designed to minimize the + * amount of stuff that needs to be allocated and freed. * * Inputs: * rawstring: the input string; must be overwritable! On return, it's @@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator, return true; } - -/***************************************************************************** - * Comparison Functions used for bytea - * - * Note: btree indexes need these routines not to leak memory; therefore, - * be careful to free working copies of toasted datums. Most places don't - * need to be so careful. - *****************************************************************************/ - -Datum -byteaeq(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) == 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -byteane(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) != 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -bytealt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); -} - -Datum -byteale(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); -} - -Datum -byteagt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); -} - -Datum -byteage(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); -} - -Datum -byteacmp(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - if ((cmp == 0) && (len1 != len2)) - cmp = (len1 < len2) ? -1 : 1; - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_INT32(cmp); -} - -Datum -bytea_larger(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_smaller(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_sortsupport(PG_FUNCTION_ARGS) -{ - SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); - - /* Use generic string SortSupport, forcing "C" collation */ - varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); - - MemoryContextSwitchTo(oldcontext); - - PG_RETURN_VOID(); -} - -/* Cast bytea -> int2 */ -Datum -bytea_int2(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint16 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("smallint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT16(result); -} - -/* Cast bytea -> int4 */ -Datum -bytea_int4(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint32 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT32(result); -} - -/* Cast bytea -> int8 */ -Datum -bytea_int8(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint64 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT64(result); -} - -/* Cast int2 -> bytea; can just use int2send() */ -Datum -int2_bytea(PG_FUNCTION_ARGS) -{ - return int2send(fcinfo); -} - -/* Cast int4 -> bytea; can just use int4send() */ -Datum -int4_bytea(PG_FUNCTION_ARGS) -{ - return int4send(fcinfo); -} - -/* Cast int8 -> bytea; can just use int8send() */ -Datum -int8_bytea(PG_FUNCTION_ARGS) -{ - return int8send(fcinfo); -} - /* * appendStringInfoText * -- 2.49.0