On Mon, Aug 19, 2024 at 8:00 AM jian he <jian.universal...@gmail.com> wrote: > > On Mon, Apr 1, 2024 at 8:00 PM jian he <jian.universal...@gmail.com> wrote: > > > rebased. > minor cosmetic error message change. > > I think all the issues in this thread have been addressed.
hi. I did some minor changes based on the v11. mainly changing some error code from ERRCODE_FEATURE_NOT_SUPPORTED to ERRCODE_INVALID_PARAMETER_VALUE.
From 24c0ac71f0a3fe1f4eb3d558aef34620234b2b35 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Thu, 22 Aug 2024 12:15:52 +0800 Subject: [PATCH v12 2/2] Add option force_array for COPY TO force_array option can only be used in COPY TO with JSON format. it make the output json output behave like json array type. discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c...@joeconway.com --- doc/src/sgml/ref/copy.sgml | 14 ++++++++++++++ src/backend/commands/copy.c | 13 +++++++++++++ src/backend/commands/copyto.c | 28 ++++++++++++++++++++++++++++ src/include/commands/copy.h | 1 + src/test/regress/expected/copy.out | 23 +++++++++++++++++++++++ src/test/regress/sql/copy.sql | 9 +++++++++ 6 files changed, 88 insertions(+) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 616abf508e..4659e49ec3 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } + FORCE_ARRAY [ <replaceable class="parameter">boolean</replaceable> ] ON_ERROR <replaceable class="parameter">error_action</replaceable> ENCODING '<replaceable class="parameter">encoding_name</replaceable>' LOG_VERBOSITY <replaceable class="parameter">verbosity</replaceable> @@ -390,6 +391,19 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>FORCE_ARRAY</literal></term> + <listitem> + <para> + Force output of square brackets as array decorations at the beginning + and end of output, and commas between the rows. It is allowed only in + <command>COPY TO</command>, and only when using + <literal>JSON</literal> format. The default is + <literal>false</literal>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>ON_ERROR</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 2f65c96dd3..54793fecab 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -470,6 +470,7 @@ ProcessCopyOptions(ParseState *pstate, bool header_specified = false; bool on_error_specified = false; bool log_verbosity_specified = false; + bool force_array_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -624,6 +625,13 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "force_array") == 0) + { + if (force_array_specified) + errorConflictingDefElem(defel, pstate); + force_array_specified = true; + opts_out->force_array = defGetBoolean(defel); + } else if (strcmp(defel->defname, "on_error") == 0) { if (on_error_specified) @@ -850,6 +858,11 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("COPY json mode cannot be used with %s", "COPY FROM"))); + if (!opts_out->json_mode && opts_out->force_array) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY %s can only used with JSON mode", "FORCE_ARRAY"))); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 14ba9fde50..b56b78a331 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -81,6 +81,7 @@ typedef struct CopyToStateData List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDOUT */ bool is_program; /* is 'filename' a program to popen? */ + bool json_row_delim_needed; /* need delimiter to start next json array element */ copy_data_dest_cb data_dest_cb; /* function for writing data */ CopyFormatOptions opts; @@ -854,6 +855,15 @@ DoCopyTo(CopyToState cstate) CopySendEndOfRow(cstate); } + /* + * If JSON has been requested, and FORCE_ARRAY has been specified send + * the opening bracket. + */ + if (cstate->opts.json_mode && cstate->opts.force_array) + { + CopySendChar(cstate, '['); + CopySendEndOfRow(cstate); + } } if (cstate->rel) @@ -901,6 +911,15 @@ DoCopyTo(CopyToState cstate) CopySendEndOfRow(cstate); } + /* + * If JSON has been requested, and FORCE_ARRAY has been specified send the + * closing bracket. + */ + if (cstate->opts.json_mode && cstate->opts.force_array) + { + CopySendChar(cstate, ']'); + CopySendEndOfRow(cstate); + } MemoryContextDelete(cstate->rowcontext); if (fe_copy) @@ -1004,6 +1023,15 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) result = makeStringInfo(); composite_to_json(rowdata, result, false); + if (cstate->json_row_delim_needed && cstate->opts.force_array) + CopySendChar(cstate, ','); + else if (cstate->opts.force_array) + { + /* first row needs no delimiter */ + CopySendChar(cstate, ' '); + cstate->json_row_delim_needed = true; + } + CopySendData(cstate, result->data, result->len); } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index ff6ecc7ae7..f76fe8fafe 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -81,6 +81,7 @@ typedef struct CopyFormatOptions List *force_null; /* list of column names */ bool force_null_all; /* FORCE_NULL *? */ bool *force_null_flags; /* per-column CSV FN flags */ + bool force_array; /* add JSON array decorations */ bool convert_selectively; /* do selective binary conversion? */ CopyOnErrorChoice on_error; /* what to do when error happened */ CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */ diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 77cb36cd61..ed1cb6b28e 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -59,6 +59,29 @@ ERROR: cannot specify HEADER in JSON mode -- Error copy copytest from stdout (format json); ERROR: COPY json mode cannot be used with COPY FROM +--Error +copy copytest to stdout (format csv, force_array true); +ERROR: COPY FORCE_ARRAY can only used with JSON mode +--ok +copy copytest to stdout (format json, force_array); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format json, force_array true); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format json, force_array false); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} -- embedded escaped characters create temp table copyjsontest ( id bigserial, diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 67f77f6512..28f698ecc6 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -64,6 +64,15 @@ copy copytest to stdout (format json, header); -- Error copy copytest from stdout (format json); +--Error +copy copytest to stdout (format csv, force_array true); + +--ok +copy copytest to stdout (format json, force_array); + +copy copytest to stdout (format json, force_array true); + +copy copytest to stdout (format json, force_array false); -- embedded escaped characters create temp table copyjsontest ( id bigserial, -- 2.34.1
From 51f0322ad7ce7159cc46f80380f98d604560a001 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Thu, 22 Aug 2024 11:58:12 +0800 Subject: [PATCH v12 1/2] introduce json format for COPY TO json format is only allowed in COPY TO operation. also cannot be used with header option. discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c...@joeconway.com --- doc/src/sgml/ref/copy.sgml | 5 +++ src/backend/commands/copy.c | 13 +++++++ src/backend/commands/copyto.c | 51 +++++++++++++++++++++++++--- src/backend/parser/gram.y | 8 +++++ src/backend/utils/adt/json.c | 5 ++- src/include/commands/copy.h | 1 + src/include/utils/json.h | 2 ++ src/test/regress/expected/copy.out | 54 ++++++++++++++++++++++++++++++ src/test/regress/sql/copy.sql | 38 +++++++++++++++++++++ 9 files changed, 169 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 1518af8a04..616abf508e 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -218,9 +218,14 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable Selects the data format to be read or written: <literal>text</literal>, <literal>csv</literal> (Comma Separated Values), + <literal>json</literal> (JavaScript Object Notation), or <literal>binary</literal>. The default is <literal>text</literal>. </para> + <para> + The <literal>json</literal> option is allowed only in + <command>COPY TO</command>. + </para> </listitem> </varlistentry> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3bb579a3a4..2f65c96dd3 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -494,6 +494,8 @@ ProcessCopyOptions(ParseState *pstate, /* default format */ ; else if (strcmp(fmt, "csv") == 0) opts_out->csv_mode = true; + else if (strcmp(fmt, "json") == 0) + opts_out->json_mode = true; else if (strcmp(fmt, "binary") == 0) opts_out->binary = true; else @@ -739,6 +741,11 @@ ProcessCopyOptions(ParseState *pstate, /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ errmsg("cannot specify %s in BINARY mode", "HEADER"))); + if (opts_out->json_mode && opts_out->header_line) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot specify %s in JSON mode", "HEADER"))); + /* Check quote */ if (!opts_out->csv_mode && opts_out->quote != NULL) ereport(ERROR, @@ -837,6 +844,12 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY %s cannot be used with %s", "FREEZE", "COPY TO"))); + /* Check json format */ + if (opts_out->json_mode && is_from) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY json mode cannot be used with %s", "COPY FROM"))); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 91de442f43..14ba9fde50 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -24,6 +24,7 @@ #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" @@ -31,6 +32,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "tcop/tcopprot.h" +#include "utils/json.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -139,9 +141,20 @@ SendCopyBegin(CopyToState cstate) pq_beginmessage(&buf, PqMsg_CopyOutResponse); pq_sendbyte(&buf, format); /* overall format */ - pq_sendint16(&buf, natts); - for (i = 0; i < natts; i++) - pq_sendint16(&buf, format); /* per-column formats */ + if (!cstate->opts.json_mode) + { + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + } + else + { + /* + * JSON mode is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + } pq_endmessage(&buf); cstate->copy_dest = COPY_FRONTEND; } @@ -917,7 +930,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); - if (!cstate->opts.binary) + if (!cstate->opts.binary && !cstate->opts.json_mode) { bool need_delim = false; @@ -945,7 +958,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) } } } - else + else if (!cstate->opts.json_mode) { foreach_int(attnum, cstate->attnumlist) { @@ -965,6 +978,34 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) } } } + else + { + Datum rowdata; + StringInfo result; + + /* + * if COPY TO source data is from a query, not a table, then we need + * copy CopyToState->TupleDesc->attrs to + * slot->tts_tupleDescriptor->attrs because the slot's TupleDesc->attrs + * may change during query execution, but composite_to_json requires + * correct TupleDesc->attrs for constructing the json keys. + * composite_to_json will iterate each TupleDesc->attrs so no need to + * copy other filed in cstate->queryDesc->tupDesc. + */ + if(!cstate->rel) + { + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0), + TupleDescAttr(cstate->queryDesc->tupDesc, 0), + cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute)); + + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + CopySendData(cstate, result->data, result->len); + } CopySendEndOfRow(cstate); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c3f25582c3..76bd6f8949 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3500,6 +3500,10 @@ copy_opt_item: { $$ = makeDefElem("format", (Node *) makeString("csv"), @1); } + | JSON + { + $$ = makeDefElem("format", (Node *) makeString("json"), @1); + } | HEADER_P { $$ = makeDefElem("header", (Node *) makeBoolean(true), @1); @@ -3582,6 +3586,10 @@ copy_generic_opt_elem: { $$ = makeDefElem($1, $2, @1); } + | FORMAT_LA copy_generic_opt_arg + { + $$ = makeDefElem("format", $2, @1); + } ; copy_generic_opt_arg: diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 4eeeeaf0a6..b47ae14f20 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -85,8 +85,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -516,8 +514,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 141fd48dc1..ff6ecc7ae7 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -62,6 +62,7 @@ typedef struct CopyFormatOptions bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ + bool json_mode; /* JSON format? */ CopyHeaderChoice header_line; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 79c1062e1b..c904ef6c6e 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern void escape_json_with_len(StringInfo buf, const char *str, int len); extern void escape_json_text(StringInfo buf, const text *txt); diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 44114089a6..77cb36cd61 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -42,6 +42,60 @@ copy copytest3 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 +--- test copying in JSON mode with various styles +copy copytest to stdout json; +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +copy copytest to stdout (format json); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- Error +copy copytest to stdout (format json, header); +ERROR: cannot specify HEADER in JSON mode +-- Error +copy copytest from stdout (format json); +ERROR: COPY json mode cannot be used with COPY FROM +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout json; +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index e2dd24cb35..67f77f6512 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -54,6 +54,44 @@ this is just a line full of junk that would error out if parsed copy copytest3 to stdout csv header; +--- test copying in JSON mode with various styles +copy copytest to stdout json; + +copy copytest to stdout (format json); + +-- Error +copy copytest to stdout (format json, header); +-- Error +copy copytest from stdout (format json); + +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout json; + create temp table copytest4 ( c1 int, "colname with tab: " text); base-commit: 9bb842f95ef3384f0822c386a4c569780e613e4e -- 2.34.1