On 12/6/23 07:36, Andrew Dunstan wrote:
On 2023-12-05 Tu 16:46, Joe Conway wrote:
On 12/5/23 16:20, Andrew Dunstan wrote:
On 2023-12-05 Tu 16:09, Joe Conway wrote:
On 12/5/23 16:02, Joe Conway wrote:
On 12/5/23 15:55, Andrew Dunstan wrote:
and in any other case (e.g. LINES) I can't see why you
would have them.
Oh I didn't address this -- I saw examples in the interwebs of MSSQL
server I think [1] which had the non-array with commas import and
export style. It was not that tough to support and the code as
written already does it, so why not?
That seems quite absurd, TBH. I know we've catered for some absurdity in
the CSV code (much of it down to me), so maybe we need to be liberal in
what we accept here too. IMNSHO, we should produce either a single JSON
document (the ARRAY case) or a series of JSON documents, one per row
(the LINES case).
So your preference would be to not allow the non-array-with-commas
case but if/when we implement COPY FROM we would accept that format?
As in Postel'a law ("be conservative in what you do, be liberal in
what you accept from others")?
Yes, I think so.
Awesome. The attached does it that way. I also ran pgindent.
I believe this is ready to commit unless there are further comments or
objections.
--
Joe Conway
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
Add json format mode to COPY TO
Add json format mode support to COPY TO, which includes two output
variations: 1) "json lines" which is each row as a json object delimited
by newlines (the default); and 2) "json array" which is the same as #1,
but with the addition of a leading "[", trailing "]", and comma row
delimiters, to form a valid json array.
Early versions: helpful hints/reviews provided by Nathan Bossart,
Tom Lane, and Maciek Sakrejda. Final versions: reviewed by Andrew Dunstan
and Davin Shearer.
Requested-by: Davin Shearer
Author: Joe Conway
Reviewed-by: Andrew Dunstan, Davin Shearer
Discussion: https://postgr.es/m/flat/24e3ee88-ec1e-421b-89ae-8a47ee0d2df1%40joeconway.com#a5e6b8829f9a74dfc835f6f29f2e44c5
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 18ecc69..8915fb3 100644
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 43,48 ****
--- 43,49 ----
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
+ FORCE_ARRAY [ <replaceable class="parameter">boolean</replaceable> ]
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
</synopsis>
</refsynopsisdiv>
*************** COPY { <replaceable class="parameter">ta
*** 206,214 ****
--- 207,220 ----
Selects the data format to be read or written:
<literal>text</literal>,
<literal>csv</literal> (Comma Separated Values),
+ <literal>json</literal> (JavaScript Object Notation),
or <literal>binary</literal>.
The default is <literal>text</literal>.
</para>
+ <para>
+ The <literal>json</literal> option is allowed only in
+ <command>COPY TO</command>.
+ </para>
</listitem>
</varlistentry>
*************** COPY { <replaceable class="parameter">ta
*** 372,377 ****
--- 378,396 ----
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>FORCE_ARRAY</literal></term>
+ <listitem>
+ <para>
+ Force output of square brackets as array decorations at the beginning
+ and end of output, and commas between the rows. It is allowed only in
+ <command>COPY TO</command>, and only when using
+ <literal>JSON</literal> format. The default is
+ <literal>false</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
<varlistentry>
<term><literal>ENCODING</literal></term>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cfad47b..23b570f 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** ProcessCopyOptions(ParseState *pstate,
*** 419,424 ****
--- 419,425 ----
bool format_specified = false;
bool freeze_specified = false;
bool header_specified = false;
+ bool force_array_specified = false;
ListCell *option;
/* Support external use for option sanity checking */
*************** ProcessCopyOptions(ParseState *pstate,
*** 443,448 ****
--- 444,451 ----
/* default format */ ;
else if (strcmp(fmt, "csv") == 0)
opts_out->csv_mode = true;
+ else if (strcmp(fmt, "json") == 0)
+ opts_out->json_mode = true;
else if (strcmp(fmt, "binary") == 0)
opts_out->binary = true;
else
*************** ProcessCopyOptions(ParseState *pstate,
*** 540,545 ****
--- 543,555 ----
defel->defname),
parser_errposition(pstate, defel->location)));
}
+ else if (strcmp(defel->defname, "force_array") == 0)
+ {
+ if (force_array_specified)
+ errorConflictingDefElem(defel, pstate);
+ force_array_specified = true;
+ opts_out->force_array = defGetBoolean(defel);
+ }
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
*************** ProcessCopyOptions(ParseState *pstate,
*** 598,603 ****
--- 608,625 ----
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DEFAULT in BINARY mode")));
+ if (opts_out->json_mode)
+ {
+ if (is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use JSON mode in COPY FROM")));
+ }
+ else if (opts_out->force_array)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY FORCE_ARRAY requires JSON mode")));
+
/* Set defaults for omitted options */
if (!opts_out->delim)
opts_out->delim = opts_out->csv_mode ? "," : "\t";
*************** ProcessCopyOptions(ParseState *pstate,
*** 667,672 ****
--- 689,699 ----
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot specify HEADER in BINARY mode")));
+ if (opts_out->json_mode && opts_out->header_line)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify HEADER in JSON mode")));
+
/* Check quote */
if (!opts_out->csv_mode && opts_out->quote != NULL)
ereport(ERROR,
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index c66a047..6e351ec 100644
*** a/src/backend/commands/copyto.c
--- b/src/backend/commands/copyto.c
***************
*** 37,42 ****
--- 37,43 ----
#include "rewrite/rewriteHandler.h"
#include "storage/fd.h"
#include "tcop/tcopprot.h"
+ #include "utils/json.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
*************** typedef struct
*** 112,117 ****
--- 113,120 ----
/* NOTE: there's a copy of this in copyfromparse.c */
static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+ /* need delimiter to start next json array element */
+ static bool json_row_delim_needed = false;
/* non-export function prototypes */
static void EndCopy(CopyToState cstate);
*************** DoCopyTo(CopyToState cstate)
*** 845,850 ****
--- 848,867 ----
CopySendEndOfRow(cstate);
}
+
+ /*
+ * If JSON has been requested, and FORCE_ARRAY has been specified send
+ * the opening bracket.
+ */
+ if (cstate->opts.json_mode)
+ {
+ if (cstate->opts.force_array)
+ {
+ CopySendChar(cstate, '[');
+ CopySendEndOfRow(cstate);
+ }
+ json_row_delim_needed = false;
+ }
}
if (cstate->rel)
*************** DoCopyTo(CopyToState cstate)
*** 892,897 ****
--- 909,925 ----
CopySendEndOfRow(cstate);
}
+ /*
+ * If JSON has been requested, and FORCE_ARRAY has been specified send the
+ * closing bracket.
+ */
+ if (cstate->opts.json_mode &&
+ cstate->opts.force_array)
+ {
+ CopySendChar(cstate, ']');
+ CopySendEndOfRow(cstate);
+ }
+
MemoryContextDelete(cstate->rowcontext);
if (fe_copy)
*************** DoCopyTo(CopyToState cstate)
*** 906,916 ****
static void
CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
- bool need_delim = false;
- FmgrInfo *out_functions = cstate->out_functions;
MemoryContext oldcontext;
- ListCell *cur;
- char *string;
MemoryContextReset(cstate->rowcontext);
oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
--- 934,940 ----
*************** CopyOneRowTo(CopyToState cstate, TupleTa
*** 921,974 ****
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
!
! foreach(cur, cstate->attnumlist)
{
! int attnum = lfirst_int(cur);
! Datum value = slot->tts_values[attnum - 1];
! bool isnull = slot->tts_isnull[attnum - 1];
! if (!cstate->opts.binary)
! {
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
! else
! CopySendInt32(cstate, -1);
! }
! else
{
if (!cstate->opts.binary)
{
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
else
! CopyAttributeOutText(cstate, string);
}
else
{
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
CopySendEndOfRow(cstate);
--- 945,1028 ----
CopySendInt16(cstate, list_length(cstate->attnumlist));
}
! if (!cstate->opts.json_mode)
{
! bool need_delim = false;
! FmgrInfo *out_functions = cstate->out_functions;
! ListCell *cur;
! char *string;
! /* Make sure the tuple is fully deconstructed */
! slot_getallattrs(slot);
! foreach(cur, cstate->attnumlist)
{
+ int attnum = lfirst_int(cur);
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+
if (!cstate->opts.binary)
{
! if (need_delim)
! CopySendChar(cstate, cstate->opts.delim[0]);
! need_delim = true;
! }
!
! if (isnull)
! {
! if (!cstate->opts.binary)
! CopySendString(cstate, cstate->opts.null_print_client);
else
! CopySendInt32(cstate, -1);
}
else
{
! if (!cstate->opts.binary)
! {
! string = OutputFunctionCall(&out_functions[attnum - 1],
! value);
! if (cstate->opts.csv_mode)
! CopyAttributeOutCSV(cstate, string,
! cstate->opts.force_quote_flags[attnum - 1],
! list_length(cstate->attnumlist) == 1);
! else
! CopyAttributeOutText(cstate, string);
! }
! else
! {
! bytea *outputbytes;
! outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! value);
! CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! CopySendData(cstate, VARDATA(outputbytes),
! VARSIZE(outputbytes) - VARHDRSZ);
! }
}
}
}
+ else
+ {
+ Datum rowdata = ExecFetchSlotHeapTupleDatum(slot);
+ StringInfo result;
+
+ result = makeStringInfo();
+ composite_to_json(rowdata, result, false);
+
+ if (json_row_delim_needed &&
+ cstate->opts.force_array)
+ {
+ CopySendChar(cstate, ',');
+ }
+ else if (cstate->opts.force_array)
+ {
+ /* first row needs no delimiter */
+ CopySendChar(cstate, ' ');
+ json_row_delim_needed = true;
+ }
+
+ CopySendData(cstate, result->data, result->len);
+ }
CopySendEndOfRow(cstate);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d631ac8..e6789d7 100644
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
*************** copy_opt_item:
*** 3408,3413 ****
--- 3408,3417 ----
{
$$ = makeDefElem("format", (Node *) makeString("csv"), @1);
}
+ | JSON
+ {
+ $$ = makeDefElem("format", (Node *) makeString("json"), @1);
+ }
| HEADER_P
{
$$ = makeDefElem("header", (Node *) makeBoolean(true), @1);
*************** copy_opt_item:
*** 3448,3453 ****
--- 3452,3461 ----
{
$$ = makeDefElem("encoding", (Node *) makeString($2), @1);
}
+ | FORCE ARRAY
+ {
+ $$ = makeDefElem("force_array", (Node *) makeBoolean(true), @1);
+ }
;
/* The following exist for backward compatibility with very old versions */
*************** copy_generic_opt_elem:
*** 3490,3495 ****
--- 3498,3507 ----
{
$$ = makeDefElem($1, $2, @1);
}
+ | FORMAT_LA copy_generic_opt_arg
+ {
+ $$ = makeDefElem("format", $2, @1);
+ }
;
copy_generic_opt_arg:
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 71ae53f..cb4311e 100644
*** a/src/backend/utils/adt/json.c
--- b/src/backend/utils/adt/json.c
*************** typedef struct JsonAggState
*** 83,90 ****
JsonUniqueBuilderState unique_check;
} JsonAggState;
- static void composite_to_json(Datum composite, StringInfo result,
- bool use_line_feeds);
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
Datum *vals, bool *nulls, int *valcount,
JsonTypeCategory tcategory, Oid outfuncoid,
--- 83,88 ----
*************** array_to_json_internal(Datum array, Stri
*** 490,497 ****
/*
* Turn a composite / record into JSON.
*/
! static void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
--- 488,496 ----
/*
* Turn a composite / record into JSON.
+ * Exported so COPY TO can use it.
*/
! void
composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
{
HeapTupleHeader td;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index f2cca0b..97899b6 100644
*** a/src/include/commands/copy.h
--- b/src/include/commands/copy.h
*************** typedef struct CopyFormatOptions
*** 43,48 ****
--- 43,49 ----
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
+ bool json_mode; /* JSON format? */
CopyHeaderChoice header_line; /* header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
*************** typedef struct CopyFormatOptions
*** 61,66 ****
--- 62,68 ----
List *force_null; /* list of column names */
bool force_null_all; /* FORCE_NULL *? */
bool *force_null_flags; /* per-column CSV FN flags */
+ bool force_array; /* add JSON array decorations */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
} CopyFormatOptions;
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index f07e82c..badc5a6 100644
*** a/src/include/utils/json.h
--- b/src/include/utils/json.h
***************
*** 17,22 ****
--- 17,24 ----
#include "lib/stringinfo.h"
/* functions in json.c */
+ extern void composite_to_json(Datum composite, StringInfo result,
+ bool use_line_feeds);
extern void escape_json(StringInfo buf, const char *str);
extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
const int *tzp);
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index b48365e..31913f6 100644
*** a/src/test/regress/expected/copy.out
--- b/src/test/regress/expected/copy.out
*************** copy copytest3 to stdout csv header;
*** 42,47 ****
--- 42,117 ----
c1,"col with , comma","col with "" quote"
1,a,1
2,b,2
+ --- test copying in JSON mode with various styles
+ copy copytest to stdout json;
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+ {"style":"Unix","test":"abc\ndef","filler":2}
+ {"style":"Mac","test":"abc\rdef","filler":3}
+ {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+ copy copytest to stdout (format json);
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+ {"style":"Unix","test":"abc\ndef","filler":2}
+ {"style":"Mac","test":"abc\rdef","filler":3}
+ {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+ copy copytest to stdout (format json, force_array);
+ [
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+ ,{"style":"Unix","test":"abc\ndef","filler":2}
+ ,{"style":"Mac","test":"abc\rdef","filler":3}
+ ,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+ ]
+ copy copytest to stdout (format json, force_array true);
+ [
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+ ,{"style":"Unix","test":"abc\ndef","filler":2}
+ ,{"style":"Mac","test":"abc\rdef","filler":3}
+ ,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+ ]
+ copy copytest to stdout (format json, force_array false);
+ {"style":"DOS","test":"abc\r\ndef","filler":1}
+ {"style":"Unix","test":"abc\ndef","filler":2}
+ {"style":"Mac","test":"abc\rdef","filler":3}
+ {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4}
+ -- Error
+ copy copytest to stdout (format json, header);
+ ERROR: cannot specify HEADER in JSON mode
+ -- embedded escaped characters
+ create temp table copyjsontest (
+ id bigserial,
+ f1 text,
+ f2 timestamptz);
+ insert into copyjsontest
+ select g.i,
+ CASE WHEN g.i % 2 = 0 THEN
+ 'line with '' in it: ' || g.i::text
+ ELSE
+ 'line with " in it: ' || g.i::text
+ END,
+ 'Mon Feb 10 17:32:01 1997 PST'
+ from generate_series(1,5) as g(i);
+ insert into copyjsontest (f1) values
+ (E'aaa\"bbb'::text),
+ (E'aaa\\bbb'::text),
+ (E'aaa\/bbb'::text),
+ (E'aaa\bbbb'::text),
+ (E'aaa\fbbb'::text),
+ (E'aaa\nbbb'::text),
+ (E'aaa\rbbb'::text),
+ (E'aaa\tbbb'::text);
+ copy copyjsontest to stdout json;
+ {"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"}
+ {"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"}
+ {"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"}
+ {"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"}
+ {"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"}
+ {"id":1,"f1":"aaa\"bbb","f2":null}
+ {"id":2,"f1":"aaa\\bbb","f2":null}
+ {"id":3,"f1":"aaa/bbb","f2":null}
+ {"id":4,"f1":"aaa\bbbb","f2":null}
+ {"id":5,"f1":"aaa\fbbb","f2":null}
+ {"id":6,"f1":"aaa\nbbb","f2":null}
+ {"id":7,"f1":"aaa\rbbb","f2":null}
+ {"id":8,"f1":"aaa\tbbb","f2":null}
create temp table copytest4 (
c1 int,
"colname with tab: " text);
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 43d2e90..4b76541 100644
*** a/src/test/regress/sql/copy.sql
--- b/src/test/regress/sql/copy.sql
*************** this is just a line full of junk that wo
*** 54,59 ****
--- 54,101 ----
copy copytest3 to stdout csv header;
+ --- test copying in JSON mode with various styles
+ copy copytest to stdout json;
+
+ copy copytest to stdout (format json);
+
+ copy copytest to stdout (format json, force_array);
+
+ copy copytest to stdout (format json, force_array true);
+
+ copy copytest to stdout (format json, force_array false);
+
+ -- Error
+ copy copytest to stdout (format json, header);
+
+ -- embedded escaped characters
+ create temp table copyjsontest (
+ id bigserial,
+ f1 text,
+ f2 timestamptz);
+
+ insert into copyjsontest
+ select g.i,
+ CASE WHEN g.i % 2 = 0 THEN
+ 'line with '' in it: ' || g.i::text
+ ELSE
+ 'line with " in it: ' || g.i::text
+ END,
+ 'Mon Feb 10 17:32:01 1997 PST'
+ from generate_series(1,5) as g(i);
+
+ insert into copyjsontest (f1) values
+ (E'aaa\"bbb'::text),
+ (E'aaa\\bbb'::text),
+ (E'aaa\/bbb'::text),
+ (E'aaa\bbbb'::text),
+ (E'aaa\fbbb'::text),
+ (E'aaa\nbbb'::text),
+ (E'aaa\rbbb'::text),
+ (E'aaa\tbbb'::text);
+
+ copy copyjsontest to stdout json;
+
create temp table copytest4 (
c1 int,
"colname with tab: " text);