On Sun, Mar 2, 2025 at 1:28 PM Junwang Zhao <zhjw...@gmail.com> wrote: > > > I've refactored the patch to adapt the newly introduced CopyToRoutine struct, > see 2e4127b6d2. > > v15-0001 is the merged one of v14-0001 and v14-0002 > > There are some other ongoing *copy to/from* refactors[1] which we can benefit > to make the code cleaner, especially the checks done in ProcessCopyOptions. > > [1]: > https://www.postgresql.org/message-id/20250301.115009.424844407736647598.kou%40clear-code.com > hi.
git apply --check $PATCHES/v15-0001-Introduce-json-format-for-COPY-TO.patch error: patch failed: src/backend/commands/copyfrom.c:155 error: src/backend/commands/copyfrom.c: patch does not apply error: patch failed: src/backend/commands/copyto.c:176 error: src/backend/commands/copyto.c: patch does not apply seems to need rebase. the attachment is the rebase, minor comments tweaks, and commit message tweaks. another issue is this patch entry in commitfest [1] status is: Not processed, which means no cfbots CI tests, seems not great. not sure how to resolve this issue.... [1] https://commitfest.postgresql.org/patch/4716/
From 24e1858722dbb25c4842d0ec2dee5b1047edcc23 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Tue, 11 Mar 2025 16:19:55 +0800 Subject: [PATCH v16 2/2] Add option force_array for COPY JSON FORMAT force_array option can only be used in COPY TO with JSON format. it make the output json output behave like json array type. refactored by Junwang Zhao to adapt the newly introduced CopyToRoutine struct(2e4127b6d2). Author: Joe Conway <m...@joeconway.com> discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c...@joeconway.com --- doc/src/sgml/ref/copy.sgml | 14 ++++++++++++ src/backend/commands/copy.c | 13 ++++++++++++ src/backend/commands/copyto.c | 34 +++++++++++++++++++++++++++++- src/bin/psql/tab-complete.in.c | 2 +- src/include/commands/copy.h | 1 + src/test/regress/expected/copy.out | 23 ++++++++++++++++++++ src/test/regress/sql/copy.sql | 8 +++++++ 7 files changed, 93 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 9c519d8a9e2..7b3c913d4ee 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } + FORCE_ARRAY [ <replaceable class="parameter">boolean</replaceable> ] ON_ERROR <replaceable class="parameter">error_action</replaceable> REJECT_LIMIT <replaceable class="parameter">maxerror</replaceable> ENCODING '<replaceable class="parameter">encoding_name</replaceable>' @@ -392,6 +393,19 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>FORCE_ARRAY</literal></term> + <listitem> + <para> + Force output of square brackets as array decorations at the beginning + and end of output, and commas between the rows. It is allowed only in + <command>COPY TO</command>, and only when using + <literal>JSON</literal> format. The default is + <literal>false</literal>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>ON_ERROR</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index b6f74c798d0..7b4c64ea97e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -504,6 +504,7 @@ ProcessCopyOptions(ParseState *pstate, bool on_error_specified = false; bool log_verbosity_specified = false; bool reject_limit_specified = false; + bool force_array_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -658,6 +659,13 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "force_array") == 0) + { + if (force_array_specified) + errorConflictingDefElem(defel, pstate); + force_array_specified = true; + opts_out->force_array = defGetBoolean(defel); + } else if (strcmp(defel->defname, "on_error") == 0) { if (on_error_specified) @@ -906,6 +914,11 @@ ProcessCopyOptions(ParseState *pstate, errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("COPY json mode cannot be used with %s", "COPY FROM")); + if (opts_out->format != COPY_FORMAT_JSON && opts_out->force_array) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY %s can only used with JSON mode", "FORCE_ARRAY")); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index c1d4cbeedea..393c0440ad7 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -84,6 +84,7 @@ typedef struct CopyToStateData List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDOUT */ bool is_program; /* is 'filename' a program to popen? */ + bool json_row_delim_needed; /* need delimiter to start next json array element */ copy_data_dest_cb data_dest_cb; /* function for writing data */ CopyFormatOptions opts; @@ -128,6 +129,7 @@ static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot, bool is_csv); static void CopyToTextLikeEnd(CopyToState cstate); static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot); +static void CopyToJsonEnd(CopyToState cstate); static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc); static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo); static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot); @@ -172,7 +174,7 @@ static const CopyToRoutine CopyToRoutineJson = { .CopyToStart = CopyToTextLikeStart, .CopyToOutFunc = CopyToTextLikeOutFunc, .CopyToOneRow = CopyToJsonOneRow, - .CopyToEnd = CopyToTextLikeEnd, + .CopyToEnd = CopyToJsonEnd, }; /* binary format */ @@ -238,6 +240,16 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) CopySendTextLikeEndOfRow(cstate); } + + /* + * If JSON has been requested, and FORCE_ARRAY has been specified send + * the opening bracket. + */ + if (cstate->opts.format == COPY_FORMAT_JSON && cstate->opts.force_array) + { + CopySendChar(cstate, '['); + CopySendTextLikeEndOfRow(cstate); + } } /* @@ -349,11 +361,31 @@ CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) result = makeStringInfo(); composite_to_json(rowdata, result, false); + if (cstate->json_row_delim_needed && cstate->opts.force_array) + CopySendChar(cstate, ','); + else if (cstate->opts.force_array) + { + /* first row needs no delimiter */ + CopySendChar(cstate, ' '); + cstate->json_row_delim_needed = true; + } + CopySendData(cstate, result->data, result->len); CopySendTextLikeEndOfRow(cstate); } +/* Implementation of the end callback for json format */ +static void +CopyToJsonEnd(CopyToState cstate) +{ + if (cstate->opts.force_array) + { + CopySendChar(cstate, ']'); + CopySendTextLikeEndOfRow(cstate); + } +} + /* * Implementation of the start callback for binary format. Send a header * for a binary copy. diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 6069b118834..e0c7412ec0a 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3278,7 +3278,7 @@ match_previous_words(int pattern_id, else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(")) COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", - "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", + "FORCE_NOT_NULL", "FORCE_NULL", "FORCE_ARRAY", "ENCODING", "DEFAULT", "ON_ERROR", "LOG_VERBOSITY"); /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 07fcc2bc9ac..fa8a8ab7e31 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -89,6 +89,7 @@ typedef struct CopyFormatOptions List *force_notnull; /* list of column names */ bool force_notnull_all; /* FORCE_NOT_NULL *? */ bool *force_notnull_flags; /* per-column CSV FNN flags */ + bool force_array; /* add JSON array decorations */ List *force_null; /* list of column names */ bool force_null_all; /* FORCE_NULL *? */ bool *force_null_flags; /* per-column CSV FN flags */ diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 0d4cfc0b60a..3d0781700b3 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -110,6 +110,29 @@ LINE 1: copy copytest to stdout (format json, on_error ignore); copy copytest from stdin(format json); ERROR: COPY json mode cannot be used with COPY FROM -- all of the above should yield error +--Error +copy copytest to stdout (format csv, force_array true); +ERROR: COPY FORCE_ARRAY can only used with JSON mode +--ok +copy copytest to stdout (format json, force_array); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format json, force_array true); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format json, force_array false); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} -- embedded escaped characters create temp table copyjsontest ( id bigserial, diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 6ee96f5aa51..2781c24bd84 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -101,6 +101,14 @@ copy copytest to stdout (format json, on_error ignore); copy copytest from stdin(format json); -- all of the above should yield error +--Error +copy copytest to stdout (format csv, force_array true); + +--ok +copy copytest to stdout (format json, force_array); +copy copytest to stdout (format json, force_array true); +copy copytest to stdout (format json, force_array false); + -- embedded escaped characters create temp table copyjsontest ( id bigserial, -- 2.34.1
From 71cf17c9d1aefecc89cc388b979bbfc9952898c8 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Tue, 11 Mar 2025 15:53:37 +0800 Subject: [PATCH v16 1/2] Introduce json format for COPY TO json format is only allowed in COPY TO operation. also cannot be used with {header, default, null, delimiter} options and many other options. fully tested on src/test/regress/sql/copy.sql. CopyFormat enum part was coied from Joel Jacobson <j...@compiler.org> refactored by Jian He to fix some miscellaneous issue. refactored by Junwang Zhao to adapt the newly introduced CopyToRoutine struct(2e4127b6d2). Author: Joe Conway <m...@joeconway.com> Reviewed-by: "Andrey M. Borodin" <x4...@yandex-team.ru>, Reviewed-by: Dean Rasheed <dean.a.rash...@gmail.com>, Reviewed-by: Daniel Verite <dan...@manitou-mail.org>, Reviewed-by: Andrew Dunstan <and...@dunslane.net>, Reviewed-by: Davin Shearer <da...@apache.org>, Reviewed-by: Masahiko Sawada <sawada.m...@gmail.com>, Reviewed-by: Alvaro Herrera <alvhe...@alvh.no-ip.org> discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c...@joeconway.com --- doc/src/sgml/ref/copy.sgml | 13 +++-- src/backend/commands/copy.c | 77 ++++++++++++++++++-------- src/backend/commands/copyfrom.c | 6 +- src/backend/commands/copyfromparse.c | 6 +- src/backend/commands/copyto.c | 83 ++++++++++++++++++++++++---- src/backend/parser/gram.y | 8 +++ src/backend/utils/adt/json.c | 5 +- src/bin/psql/tab-complete.in.c | 2 +- src/include/commands/copy.h | 14 ++++- src/include/utils/json.h | 2 + src/test/regress/expected/copy.out | 74 +++++++++++++++++++++++++ src/test/regress/sql/copy.sql | 47 ++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 13 files changed, 286 insertions(+), 52 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index df093da97c5..9c519d8a9e2 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -219,10 +219,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable Selects the data format to be read or written: <literal>text</literal>, <literal>csv</literal> (Comma Separated Values), + <literal>json</literal> (JavaScript Object Notation), or <literal>binary</literal>. The default is <literal>text</literal>. See <xref linkend="sql-copy-file-formats"/> below for details. </para> + <para> + The <literal>json</literal> option is allowed only in + <command>COPY TO</command>. + </para> </listitem> </varlistentry> @@ -257,7 +262,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable (line) of the file. The default is a tab character in text format, a comma in <literal>CSV</literal> format. This must be a single one-byte character. - This option is not allowed when using <literal>binary</literal> format. + This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format. </para> </listitem> </varlistentry> @@ -271,7 +276,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable string in <literal>CSV</literal> format. You might prefer an empty string even in text format for cases where you don't want to distinguish nulls from empty strings. - This option is not allowed when using <literal>binary</literal> format. + This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format. </para> <note> @@ -294,7 +299,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable is found in the input file, the default value of the corresponding column will be used. This option is allowed only in <command>COPY FROM</command>, and only when - not using <literal>binary</literal> format. + not using <literal>binary</literal> or <literal>json</literal> format. </para> </listitem> </varlistentry> @@ -310,7 +315,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable If this option is set to <literal>MATCH</literal>, the number and names of the columns in the header line must match the actual column names of the table, in order; otherwise an error is raised. - This option is not allowed when using <literal>binary</literal> format. + This option is not allowed when using <literal>binary</literal> or <literal>json</literal> format. The <literal>MATCH</literal> option is only valid for <command>COPY FROM</command> commands. </para> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index cfca9d9dc29..b6f74c798d0 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -525,11 +525,13 @@ ProcessCopyOptions(ParseState *pstate, errorConflictingDefElem(defel, pstate); format_specified = true; if (strcmp(fmt, "text") == 0) - /* default format */ ; + opts_out->format = COPY_FORMAT_TEXT; else if (strcmp(fmt, "csv") == 0) - opts_out->csv_mode = true; + opts_out->format = COPY_FORMAT_CSV; else if (strcmp(fmt, "binary") == 0) - opts_out->binary = true; + opts_out->format = COPY_FORMAT_BINARY; + else if (strcmp(fmt, "json") == 0) + opts_out->format = COPY_FORMAT_JSON; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -689,31 +691,47 @@ ProcessCopyOptions(ParseState *pstate, * Check for incompatible options (must do these three before inserting * defaults) */ - if (opts_out->binary && opts_out->delim) + if (opts_out->format == COPY_FORMAT_BINARY && opts_out->delim) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ errmsg("cannot specify %s in BINARY mode", "DELIMITER"))); - if (opts_out->binary && opts_out->null_print) + if (opts_out->format == COPY_FORMAT_JSON && opts_out->delim) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in JSON mode", "DELIMITER")); + + if (opts_out->format == COPY_FORMAT_BINARY && opts_out->null_print) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify %s in BINARY mode", "NULL"))); - if (opts_out->binary && opts_out->default_print) + if (opts_out->format == COPY_FORMAT_JSON && opts_out->null_print) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "NULL")); + + if (opts_out->format == COPY_FORMAT_BINARY && opts_out->default_print) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify %s in BINARY mode", "DEFAULT"))); + if (opts_out->format == COPY_FORMAT_JSON && opts_out->default_print) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "DEFAULT")); + /* Set defaults for omitted options */ if (!opts_out->delim) - opts_out->delim = opts_out->csv_mode ? "," : "\t"; + opts_out->delim = opts_out->format == COPY_FORMAT_CSV ? "," : "\t"; if (!opts_out->null_print) - opts_out->null_print = opts_out->csv_mode ? "" : "\\N"; + opts_out->null_print = opts_out->format == COPY_FORMAT_CSV ? "" : "\\N"; opts_out->null_print_len = strlen(opts_out->null_print); - if (opts_out->csv_mode) + if (opts_out->format == COPY_FORMAT_CSV) { if (!opts_out->quote) opts_out->quote = "\""; @@ -761,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate, * future-proofing. Likewise we disallow all digits though only octal * digits are actually dangerous. */ - if (!opts_out->csv_mode && + if (opts_out->format != COPY_FORMAT_CSV && strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789", opts_out->delim[0]) != NULL) ereport(ERROR, @@ -769,43 +787,48 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->binary && opts_out->header_line) + if (opts_out->format == COPY_FORMAT_BINARY && opts_out->header_line) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ errmsg("cannot specify %s in BINARY mode", "HEADER"))); + if (opts_out->format == COPY_FORMAT_JSON && opts_out->header_line) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot specify %s in JSON mode", "HEADER")); + /* Check quote */ - if (!opts_out->csv_mode && opts_out->quote != NULL) + if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ errmsg("COPY %s requires CSV mode", "QUOTE"))); - if (opts_out->csv_mode && strlen(opts_out->quote) != 1) + if (opts_out->format == COPY_FORMAT_CSV && strlen(opts_out->quote) != 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY quote must be a single one-byte character"))); - if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0]) + if (opts_out->format == COPY_FORMAT_CSV && opts_out->delim[0] == opts_out->quote[0]) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("COPY delimiter and quote must be different"))); /* Check escape */ - if (!opts_out->csv_mode && opts_out->escape != NULL) + if (opts_out->format != COPY_FORMAT_CSV && opts_out->escape != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ errmsg("COPY %s requires CSV mode", "ESCAPE"))); - if (opts_out->csv_mode && strlen(opts_out->escape) != 1) + if (opts_out->format == COPY_FORMAT_CSV && strlen(opts_out->escape) != 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY escape must be a single one-byte character"))); /* Check force_quote */ - if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all)) + if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_quote || opts_out->force_quote_all)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ @@ -819,8 +842,8 @@ ProcessCopyOptions(ParseState *pstate, "COPY FROM"))); /* Check force_notnull */ - if (!opts_out->csv_mode && (opts_out->force_notnull != NIL || - opts_out->force_notnull_all)) + if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_notnull != NIL || + opts_out->force_notnull_all)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ @@ -835,8 +858,8 @@ ProcessCopyOptions(ParseState *pstate, "COPY TO"))); /* Check force_null */ - if (!opts_out->csv_mode && (opts_out->force_null != NIL || - opts_out->force_null_all)) + if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_null != NIL || + opts_out->force_null_all)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ @@ -860,7 +883,7 @@ ProcessCopyOptions(ParseState *pstate, "NULL"))); /* Don't allow the CSV quote char to appear in the null string. */ - if (opts_out->csv_mode && + if (opts_out->format == COPY_FORMAT_CSV && strchr(opts_out->null_print, opts_out->quote[0]) != NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -877,6 +900,12 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY %s cannot be used with %s", "FREEZE", "COPY TO"))); + /* Check json format */ + if (opts_out->format == COPY_FORMAT_JSON && is_from) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY json mode cannot be used with %s", "COPY FROM")); + if (opts_out->default_print) { if (!is_from) @@ -896,7 +925,7 @@ ProcessCopyOptions(ParseState *pstate, "DEFAULT"))); /* Don't allow the CSV quote char to appear in the default string. */ - if (opts_out->csv_mode && + if (opts_out->format == COPY_FORMAT_CSV && strchr(opts_out->default_print, opts_out->quote[0]) != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -913,7 +942,7 @@ ProcessCopyOptions(ParseState *pstate, errmsg("NULL specification and DEFAULT specification cannot be the same"))); } /* Check on_error */ - if (opts_out->binary && opts_out->on_error != COPY_ON_ERROR_STOP) + if (opts_out->format == COPY_FORMAT_BINARY && opts_out->on_error != COPY_ON_ERROR_STOP) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("only ON_ERROR STOP is allowed in BINARY mode"))); diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index bcf66f0adf8..bfe1937539b 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -155,9 +155,9 @@ static const CopyFromRoutine CopyFromRoutineBinary = { static const CopyFromRoutine * CopyFromGetRoutine(const CopyFormatOptions *opts) { - if (opts->csv_mode) + if (opts->format == COPY_FORMAT_CSV) return &CopyFromRoutineCSV; - else if (opts->binary) + else if (opts->format == COPY_FORMAT_BINARY) return &CopyFromRoutineBinary; /* default is text */ @@ -261,7 +261,7 @@ CopyFromErrorCallback(void *arg) cstate->cur_relname); return; } - if (cstate->opts.binary) + if (cstate->opts.format == COPY_FORMAT_BINARY) { /* can't usefully display the data */ if (cstate->cur_attname) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index e8128f85e6b..02263f1b1f5 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -171,7 +171,7 @@ ReceiveCopyBegin(CopyFromState cstate) { StringInfoData buf; int natts = list_length(cstate->attnumlist); - int16 format = (cstate->opts.binary ? 1 : 0); + int16 format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0); int i; pq_beginmessage(&buf, PqMsg_CopyInResponse); @@ -747,7 +747,7 @@ bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) { return NextCopyFromRawFieldsInternal(cstate, fields, nfields, - cstate->opts.csv_mode); + cstate->opts.format == COPY_FORMAT_CSV); } /* @@ -774,7 +774,7 @@ NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields bool done; /* only available for text or csv input */ - Assert(!cstate->opts.binary); + Assert(!(cstate->opts.format == COPY_FORMAT_BINARY)); /* on input check that the header line is correct if needed */ if (cstate->cur_lineno == 0 && cstate->opts.header_line) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 84a3f3879a8..c1d4cbeedea 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -24,6 +24,7 @@ #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" @@ -31,6 +32,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "tcop/tcopprot.h" +#include "utils/json.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -125,6 +127,7 @@ static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot, bool is_csv); static void CopyToTextLikeEnd(CopyToState cstate); +static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc); static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo); static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot); @@ -144,7 +147,7 @@ static void CopySendInt16(CopyToState cstate, int16 val); /* * COPY TO routines for built-in formats. * - * CSV and text formats share the same TextLike routines except for the + * CSV and text, json formats share the same TextLike routines except for the * one-row callback. */ @@ -164,6 +167,14 @@ static const CopyToRoutine CopyToRoutineCSV = { .CopyToEnd = CopyToTextLikeEnd, }; +/* json format */ +static const CopyToRoutine CopyToRoutineJson = { + .CopyToStart = CopyToTextLikeStart, + .CopyToOutFunc = CopyToTextLikeOutFunc, + .CopyToOneRow = CopyToJsonOneRow, + .CopyToEnd = CopyToTextLikeEnd, +}; + /* binary format */ static const CopyToRoutine CopyToRoutineBinary = { .CopyToStart = CopyToBinaryStart, @@ -176,16 +187,18 @@ static const CopyToRoutine CopyToRoutineBinary = { static const CopyToRoutine * CopyToGetRoutine(const CopyFormatOptions *opts) { - if (opts->csv_mode) + if (opts->format == COPY_FORMAT_CSV) return &CopyToRoutineCSV; - else if (opts->binary) + else if (opts->format == COPY_FORMAT_BINARY) return &CopyToRoutineBinary; + else if (opts->format == COPY_FORMAT_JSON) + return &CopyToRoutineJson; /* default is text */ return &CopyToRoutineText; } -/* Implementation of the start callback for text and CSV formats */ +/* Implementation of the start callback for text, CSV, and json formats */ static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) { @@ -204,6 +217,8 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) ListCell *cur; bool hdr_delim = false; + Assert(cstate->opts.format != COPY_FORMAT_JSON); + foreach(cur, cstate->attnumlist) { int attnum = lfirst_int(cur); @@ -215,7 +230,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname); - if (cstate->opts.csv_mode) + if (cstate->opts.format == COPY_FORMAT_CSV) CopyAttributeOutCSV(cstate, colname, false); else CopyAttributeOutText(cstate, colname); @@ -226,7 +241,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) } /* - * Implementation of the outfunc callback for text and CSV formats. Assign + * Implementation of the outfunc callback for text, CSV, and json formats. Assign * the output function data to the given *finfo. */ static void @@ -299,13 +314,46 @@ CopyToTextLikeOneRow(CopyToState cstate, CopySendTextLikeEndOfRow(cstate); } -/* Implementation of the end callback for text and CSV formats */ +/* Implementation of the end callback for text, CSV, and json formats */ static void CopyToTextLikeEnd(CopyToState cstate) { /* Nothing to do here */ } +/* Implementation of per-row callback for json format */ +static void +CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) +{ + Datum rowdata; + StringInfo result; + + /* + * if the COPY TO source data come from query rather than plain table, we need + * copy CopyToState->QueryDesc->TupleDesc to slot->tts_tupleDescriptor. + * This is necessary because the slot's TupleDesc may change during query execution, + * and we depend on it when calling composite_to_json. + */ + if (!cstate->rel) + { + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0), + TupleDescAttr(cstate->queryDesc->tupDesc, 0), + cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute)); + + for (int i = 0; i < cstate->queryDesc->tupDesc->natts; i++) + populate_compact_attribute(slot->tts_tupleDescriptor, i); + + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + CopySendData(cstate, result->data, result->len); + + CopySendTextLikeEndOfRow(cstate); +} + /* * Implementation of the start callback for binary format. Send a header * for a binary copy. @@ -392,14 +440,25 @@ SendCopyBegin(CopyToState cstate) { StringInfoData buf; int natts = list_length(cstate->attnumlist); - int16 format = (cstate->opts.binary ? 1 : 0); + int16 format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0); int i; pq_beginmessage(&buf, PqMsg_CopyOutResponse); pq_sendbyte(&buf, format); /* overall format */ - pq_sendint16(&buf, natts); - for (i = 0; i < natts; i++) - pq_sendint16(&buf, format); /* per-column formats */ + if (cstate->opts.format != COPY_FORMAT_JSON) + { + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + } + else + { + /* + * JSON format is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + } pq_endmessage(&buf); cstate->copy_dest = COPY_FRONTEND; } @@ -499,7 +558,7 @@ CopySendEndOfRow(CopyToState cstate) } /* - * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the + * Wrapper function of CopySendEndOfRow for text, CSV, and json formats. Sends the * line termination and do common appropriate things for the end of row. */ static inline void diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 271ae26cbaf..e26881bb13f 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3493,6 +3493,10 @@ copy_opt_item: { $$ = makeDefElem("format", (Node *) makeString("csv"), @1); } + | JSON + { + $$ = makeDefElem("format", (Node *) makeString("json"), @1); + } | HEADER_P { $$ = makeDefElem("header", (Node *) makeBoolean(true), @1); @@ -3575,6 +3579,10 @@ copy_generic_opt_elem: { $$ = makeDefElem($1, $2, @1); } + | FORMAT_LA copy_generic_opt_arg + { + $$ = makeDefElem("format", $2, @1); + } ; copy_generic_opt_arg: diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 51452755f58..bf69347fa94 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -85,8 +85,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -516,8 +514,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 8432be641ac..6069b118834 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3283,7 +3283,7 @@ match_previous_words(int pattern_id, /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) - COMPLETE_WITH("binary", "csv", "text"); + COMPLETE_WITH("binary", "csv", "text", "json"); /* Complete COPY <sth> FROM filename WITH (ON_ERROR */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 06dfdfef721..07fcc2bc9ac 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -51,6 +51,17 @@ typedef enum CopyLogVerbosityChoice COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */ } CopyLogVerbosityChoice; +/* + * Represents the format of the COPY operation. + */ +typedef enum CopyFormat +{ + COPY_FORMAT_TEXT = 0, + COPY_FORMAT_BINARY, + COPY_FORMAT_CSV, + COPY_FORMAT_JSON, +} CopyFormat; + /* * A struct to hold COPY options, in a parsed form. All of these are related * to formatting, except for 'freeze', which doesn't really belong here, but @@ -61,9 +72,8 @@ typedef struct CopyFormatOptions /* parameters from the COPY command */ int file_encoding; /* file or remote side's character encoding, * -1 if not specified */ - bool binary; /* binary format? */ + CopyFormat format; /* format of the COPY operation */ bool freeze; /* freeze rows on loading? */ - bool csv_mode; /* Comma Separated Value format? */ CopyHeaderChoice header_line; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 49bbda7ac06..1fa8e2ce8e2 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern void escape_json_with_len(StringInfo buf, const char *str, int len); extern void escape_json_text(StringInfo buf, const text *txt); diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 06bae8c61ae..0d4cfc0b60a 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -73,6 +73,80 @@ copy copytest3 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 +--- test copying in JSON mode with various styles +copy copytest to stdout json; +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +copy copytest to stdout (format json); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +ERROR: cannot specify DELIMITER in JSON mode +copy copytest to stdout (format json, null '\N'); +ERROR: cannot specify NULL in JSON mode +copy copytest to stdout (format json, default '|'); +ERROR: cannot specify DEFAULT in JSON mode +copy copytest to stdout (format json, header); +ERROR: cannot specify HEADER in JSON mode +copy copytest to stdout (format json, quote '"'); +ERROR: COPY QUOTE requires CSV mode +copy copytest to stdout (format json, escape '"'); +ERROR: COPY ESCAPE requires CSV mode +copy copytest to stdout (format json, force_quote *); +ERROR: COPY FORCE_QUOTE requires CSV mode +copy copytest to stdout (format json, force_not_null *); +ERROR: COPY FORCE_NOT_NULL requires CSV mode +copy copytest to stdout (format json, force_null *); +ERROR: COPY FORCE_NULL requires CSV mode +copy copytest to stdout (format json, on_error ignore); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: copy copytest to stdout (format json, on_error ignore); + ^ +copy copytest from stdin(format json); +ERROR: COPY json mode cannot be used with COPY FROM +-- all of the above should yield error +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout json; +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 3009bdfdf89..6ee96f5aa51 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -82,6 +82,53 @@ this is just a line full of junk that would error out if parsed copy copytest3 to stdout csv header; +--- test copying in JSON mode with various styles +copy copytest to stdout json; + +copy copytest to stdout (format json); + +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +copy copytest to stdout (format json, null '\N'); +copy copytest to stdout (format json, default '|'); +copy copytest to stdout (format json, header); +copy copytest to stdout (format json, quote '"'); +copy copytest to stdout (format json, escape '"'); +copy copytest to stdout (format json, force_quote *); +copy copytest to stdout (format json, force_not_null *); +copy copytest to stdout (format json, force_null *); +copy copytest to stdout (format json, on_error ignore); +copy copytest from stdin(format json); +-- all of the above should yield error + +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout json; + create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9840060997f..a2e4bfee0e2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -500,6 +500,7 @@ ConversionLocation ConvertRowtypeExpr CookedConstraint CopyDest +CopyFormat CopyFormatOptions CopyFromRoutine CopyFromState -- 2.34.1