On 12/2/23 17:37, Joe Conway wrote:
On 12/2/23 16:53, Nathan Bossart wrote:
On Sat, Dec 02, 2023 at 10:11:20AM -0500, Tom Lane wrote:
So if you are writing a production that might need to match
FORMAT followed by JSON, you need to match FORMAT_LA too.
Thanks for the pointer. That does seem to be the culprit.
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d631ac89a9..048494dd07 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -3490,6 +3490,10 @@ copy_generic_opt_elem:
{
$$ = makeDefElem($1, $2, @1);
}
+ | FORMAT_LA copy_generic_opt_arg
+ {
+ $$ = makeDefElem("format", $2, @1);
+ }
;
copy_generic_opt_arg:
Yep -- I concluded the same. Thanks Tom!
The attached implements the above repair, as well as adding support for
array decoration (or not) and/or comma row delimiters when not an array.
This covers the three variations of json import/export formats that I
have found after light searching (SQL Server and DuckDB).
Still lacks and documentation, tests, and COPY FROM support, but here is
what it looks like in a nutshell:
8<-----------------------------------------------
create table foo(id int8, f1 text, f2 timestamptz);
insert into foo
select g.i,
'line: ' || g.i::text,
clock_timestamp()
from generate_series(1,4) as g(i);
copy foo to stdout (format json);
{"id":1,"f1":"line: 1","f2":"2023-12-01T12:58:16.776863-05:00"}
{"id":2,"f1":"line: 2","f2":"2023-12-01T12:58:16.777084-05:00"}
{"id":3,"f1":"line: 3","f2":"2023-12-01T12:58:16.777096-05:00"}
{"id":4,"f1":"line: 4","f2":"2023-12-01T12:58:16.777103-05:00"}
copy foo to stdout (format json, force_array);
[
{"id":1,"f1":"line: 1","f2":"2023-12-01T12:58:16.776863-05:00"}
,{"id":2,"f1":"line: 2","f2":"2023-12-01T12:58:16.777084-05:00"}
,{"id":3,"f1":"line: 3","f2":"2023-12-01T12:58:16.777096-05:00"}
,{"id":4,"f1":"line: 4","f2":"2023-12-01T12:58:16.777103-05:00"}
]
copy foo to stdout (format json, force_row_delimiter);
{"id":1,"f1":"line: 1","f2":"2023-12-01T12:58:16.776863-05:00"}
,{"id":2,"f1":"line: 2","f2":"2023-12-01T12:58:16.777084-05:00"}
,{"id":3,"f1":"line: 3","f2":"2023-12-01T12:58:16.777096-05:00"}
,{"id":4,"f1":"line: 4","f2":"2023-12-01T12:58:16.777103-05:00"}
copy foo to stdout (force_array);
ERROR: COPY FORCE_ARRAY requires JSON mode
copy foo to stdout (force_row_delimiter);
ERROR: COPY FORCE_ROW_DELIMITER requires JSON mode
8<-----------------------------------------------
--
Joe Conway
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cfad47b..1f9ac31 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** ProcessCopyOptions(ParseState *pstate,
*** 443,448 ****
--- 443,450 ----
/* default format */ ;
else if (strcmp(fmt, "csv") == 0)
opts_out->csv_mode = true;
+ else if (strcmp(fmt, "json") == 0)
+ opts_out->json_mode = true;
else if (strcmp(fmt, "binary") == 0)
opts_out->binary = true;
else
*************** ProcessCopyOptions(ParseState *pstate,
*** 540,545 ****
--- 542,559 ----
defel->defname),
parser_errposition(pstate, defel->location)));
}
+ else if (strcmp(defel->defname, "force_row_delimiter") == 0)
+ {
+ if (opts_out->force_row_delimiter)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->force_row_delimiter = true;
+ }
+ else if (strcmp(defel->defname, "force_array") == 0)
+ {
+ if (opts_out->force_array)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->force_array = true;
+ }
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
*************** ProcessCopyOptions(ParseState *pstate,
*** 598,603 ****
--- 612,631 ----
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DEFAULT in BINARY mode")));
+ if (opts_out->json_mode)
+ {
+ if (opts_out->force_array)
+ opts_out->force_row_delimiter = true;
+ }
+ else if (opts_out->force_array)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY FORCE_ARRAY requires JSON mode")));
+ else if (opts_out->force_row_delimiter)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY FORCE_ROW_DELIMITER requires JSON mode")));
+
/* Set defaults for omitted options */
if (!opts_out->delim)
opts_out->delim = opts_out->csv_mode ? "," : "\t";
*************** ProcessCopyOptions(ParseState *pstate,
*** 667,672 ****
--- 695,705 ----
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot specify HEADER in BINARY mode")));
+ if (opts_out->json_mode && opts_out->header_line)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify HEADER in JSON mode")));
+
/* Check quote */
if (!opts_out->csv_mode && opts_out->quote != NULL)
ereport(ERROR,
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index c66a047..ca3680e 100644
*** a/src/backend/commands/copyto.c
--- b/src/backend/commands/copyto.c
***************
*** 37,42 ****
--- 37,43 ----
#include "rewrite/rewriteHandler.h"
#include "storage/fd.h"
#include "tcop/tcopprot.h"
+ #include "utils/json.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
*************** typedef struct
*** 112,117 ****
--- 113,120 ----
/* NOTE: there's a copy of this in copyfromparse.c */
static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+ /* need delimiter to start next json array element */
+ static bool json_row_delim_needed = false;
/* non-export function prototypes */
static void EndCopy(CopyToState cstate);
*************** DoCopyTo(CopyToState cstate)
*** 845,850 ****
--- 848,867 ----
CopySendEndOfRow(cstate);
}
+
+ /*
+ * If JSON has been requested, and FORCE_ARRAY has been specified
+ * send the opening bracket.
+ */
+ if (cstate->opts.json_mode)
+ {
+ if (cstate->opts.force_array)
+ {
+ CopySendChar(cstate, '[');
+ CopySendEndOfRow(cstate);
+ }
+ json_row_delim_needed = false;
+ }
}
if (cstate->rel)
*************** DoCopyTo(CopyToState cstate)
*** 892,897 ****
--- 909,925 ----
CopySendEndOfRow(cstate);
}
+ /*
+ * If JSON has been requested, and FORCE_ARRAY has been specified
+ * send the closing bracket.
+ */
+ if (cstate->opts.json_mode &&
+ cstate->opts.force_array)
+ {
+ CopySendChar(cstate, ']');
+ CopySendEndOfRow(cstate);
+ }
+
MemoryContextDelete(cstate->rowcontext);
if (fe_copy)
*************** DoCopyTo(CopyToState cstate)
*** 906,916 ****
static void
CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
- bool need_delim = false;
- FmgrInfo *out_functions = cstate->out_functions;
MemoryContext oldcontext;
- ListCell *cur;
- char *string;
MemoryContextReset(cstate->rowcontext);
oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
--- 934,940 ----
*************** CopyOneRowTo(CopyToState cstate, TupleTa
*** 921,974 ****
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
!
! foreach(cur, cstate->attnumlist)
{
! int attnum = lfirst_int(cur);
! Datum value = slot->tts_values[attnum - 1];
! bool isnull = slot->tts_isnull[attnum - 1];
! if (!cstate->opts.binary)
! {
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
! else
! CopySendInt32(cstate, -1);
! }
! else
{
if (!cstate->opts.binary)
{
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
else
! CopyAttributeOutText(cstate, string);
}
else
{
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
CopySendEndOfRow(cstate);
--- 945,1028 ----
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! if (!cstate->opts.json_mode)
{
! bool need_delim = false;
! FmgrInfo *out_functions = cstate->out_functions;
! ListCell *cur;
! char *string;
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
! foreach(cur, cstate->attnumlist)
{
+ int attnum = lfirst_int(cur);
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+
if (!cstate->opts.binary)
{
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
!
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
else
! CopySendInt32(cstate, -1);
}
else
{
! if (!cstate->opts.binary)
! {
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
! else
! CopyAttributeOutText(cstate, string);
! }
! else
! {
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
! }
}
}
}
+ else
+ {
+ Datum rowdata = ExecFetchSlotHeapTupleDatum(slot);
+ StringInfo result;
+
+ result = makeStringInfo();
+ composite_to_json(rowdata, result, false);
+
+ if (json_row_delim_needed &&
+ cstate->opts.force_row_delimiter)
+ {
+ CopySendChar(cstate, ',');
+ }
+ else if (cstate->opts.force_row_delimiter)
+ {
+ /* first row needs no delimiter */
+ CopySendChar(cstate, ' ');
+ json_row_delim_needed = true;
+ }
+
+ CopyAttributeOutText(cstate, result->data);
+ }
CopySendEndOfRow(cstate);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d631ac8..16aa131 100644
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
*************** copy_opt_item:
*** 3408,3413 ****
--- 3408,3417 ----
{
$$ = makeDefElem("format", (Node *) makeString("csv"), @1);
}
+ | JSON
+ {
+ $$ = makeDefElem("format", (Node *) makeString("json"), @1);
+ }
| HEADER_P
{
$$ = makeDefElem("header", (Node *) makeBoolean(true), @1);
*************** copy_opt_item:
*** 3448,3453 ****
--- 3452,3465 ----
{
$$ = makeDefElem("encoding", (Node *) makeString($2), @1);
}
+ | FORCE ROW DELIMITER
+ {
+ $$ = makeDefElem("force_row_delimiter", (Node *) makeBoolean(true), @1);
+ }
+ | FORCE ARRAY
+ {
+ $$ = makeDefElem("force_array", (Node *) makeBoolean(true), @1);
+ }
;
/* The following exist for backward compatibility with very old versions */
*************** copy_generic_opt_elem:
*** 3490,3495 ****
--- 3502,3511 ----
{
$$ = makeDefElem($1, $2, @1);
}
+ | FORMAT_LA copy_generic_opt_arg
+ {
+ $$ = makeDefElem("format", $2, @1);
+ }
;
copy_generic_opt_arg:
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 71ae53f..cb4311e 100644
*** a/src/backend/utils/adt/json.c
--- b/src/backend/utils/adt/json.c
*************** typedef struct JsonAggState
*** 83,90 ****
JsonUniqueBuilderState unique_check;
} JsonAggState;
- static void composite_to_json(Datum composite, StringInfo result,
- bool use_line_feeds);
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
Datum *vals, bool *nulls, int *valcount,
JsonTypeCategory tcategory, Oid outfuncoid,
--- 83,88 ----
*************** array_to_json_internal(Datum array, Stri
*** 490,497 ****
/*
* Turn a composite / record into JSON.
*/
! static void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
--- 488,496 ----
/*
* Turn a composite / record into JSON.
+ * Exported so COPY TO can use it.
*/
! void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index f2cca0b..266910d 100644
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
*************** typedef struct CopyFormatOptions
*** 43,48 ****
--- 43,49 ----
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
+ bool json_mode; /* JSON format? */
CopyHeaderChoice header_line; /* header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
*************** typedef struct CopyFormatOptions
*** 61,66 ****
--- 62,69 ----
List *force_null; /* list of column names */
bool force_null_all; /* FORCE_NULL *? */
bool *force_null_flags; /* per-column CSV FN flags */
+ bool force_row_delimiter; /* use comma as per-row JSON delimiter */
+ bool force_array; /* JSON array; implies force_row_delimiter */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
} CopyFormatOptions;
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index f07e82c..badc5a6 100644
*** a/src/include/utils/json.h
--- b/src/include/utils/json.h
***************
*** 17,22 ****
--- 17,24 ----
#include "lib/stringinfo.h"
/* functions in json.c */
+ extern void composite_to_json(Datum composite, StringInfo result,
+ bool use_line_feeds);
extern void escape_json(StringInfo buf, const char *str);
extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
const int *tzp);