Michael Paquier wrote: > If you can produce a new version, please feel free to post it.
Here's a rebased version with a couple regression tests added per the discussions during the previous CF. Now at https://commitfest.postgresql.org/20/1861/ Best regards, -- Daniel Vérité PostgreSQL-powered mailer: http://www.manitou-mail.org Twitter: @DanielVerite
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 13a8b68..98147ef 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -672,6 +672,10 @@ COPY <replaceable class="parameter">count</replaceable> <refsect2> <title>CSV Format</title> + <indexterm> + <primary>CSV</primary> + <secondary>in COPY</secondary> + </indexterm> <para> This format option is used for importing and exporting the Comma diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index eb9d93a..7617c5e 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -152,6 +152,20 @@ EOF </varlistentry> <varlistentry> + <term><option>--csv</option></term> + <listitem> + <para> + <indexterm> + <primary>CSV</primary> + <secondary>in psql</secondary> + </indexterm> + Switches to <acronym>CSV</acronym> output mode. This is equivalent + to <command>\pset format csv</command>. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-d <replaceable class="parameter">dbname</replaceable></option></term> <term><option>--dbname=<replaceable class="parameter">dbname</replaceable></option></term> <listitem> @@ -2557,6 +2571,19 @@ lo_import 152801 </varlistentry> <varlistentry> + <term><literal>fieldsep_csv</literal></term> + <listitem> + <para> + Specifies the field separator to be used in the + <acronym>CSV</acronym> format. When the separator appears in a field + value, that field is output inside double quotes according to + <acronym>CSV</acronym> rules. To set a tab as field separator, type + <literal>\pset fieldsep_csv '\t'</literal>. The default is a comma. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><literal>fieldsep_zero</literal></term> <listitem> <para> @@ -2584,12 +2611,11 @@ lo_import 152801 <term><literal>format</literal></term> <listitem> <para> - Sets the output format to one of <literal>unaligned</literal>, - <literal>aligned</literal>, <literal>wrapped</literal>, - <literal>html</literal>, <literal>asciidoc</literal>, + Sets the output format to one of <literal>aligned</literal>, + <literal>asciidoc</literal>, <literal>csv</literal>, <literal>html</literal>, <literal>latex</literal> (uses <literal>tabular</literal>), - <literal>latex-longtable</literal>, or - <literal>troff-ms</literal>. + <literal>latex-longtable</literal>, <literal>troff-ms</literal>, + <literal>unaligned</literal>, or <literal>wrapped</literal>. Unique abbreviations are allowed. (That would mean one letter is enough.) </para> @@ -2597,14 +2623,27 @@ lo_import 152801 <para><literal>unaligned</literal> format writes all columns of a row on one line, separated by the currently active field separator. This is useful for creating output that might be intended to be read - in by other programs (for example, tab-separated or comma-separated - format). + in by other programs. </para> <para><literal>aligned</literal> format is the standard, human-readable, nicely formatted text output; this is the default. </para> + <para><literal>csv</literal> format writes columns separated by + commas, applying the quoting rules described in RFC 4180. + Alternative separators can be selected with + <command>\pset fieldsep_csv</command>. + The output is compatible with the CSV format of the + <command>COPY</command> command. The header with column names + is output unless the <literal>tuples_only</literal> parameter is + <literal>on</literal>. Title and footers are not printed. + Each row is terminated by the system-dependent end-of-line character, + which is typically a single newline (<literal>\n</literal>) for + Unix-like systems or a carriage return and newline sequence + (<literal>\r\n</literal>) for Microsoft Windows. + </para> + <para><literal>wrapped</literal> format is like <literal>aligned</literal> but wraps wide data values across lines to make the output fit in the target column width. The target width is determined as described under diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 5b4d54a..8d0ad71 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -1941,8 +1941,8 @@ exec_command_pset(PsqlScanState scan_state, bool active_branch) int i; static const char *const my_list[] = { - "border", "columns", "expanded", "fieldsep", "fieldsep_zero", - "footer", "format", "linestyle", "null", + "border", "columns", "expanded", "fieldsep", "fieldsep_csv", + "fieldsep_zero", "footer", "format", "linestyle", "null", "numericlocale", "pager", "pager_min_lines", "recordsep", "recordsep_zero", "tableattr", "title", "tuples_only", @@ -3584,6 +3584,9 @@ _align2string(enum printFormat in) case PRINT_TROFF_MS: return "troff-ms"; break; + case PRINT_CSV: + return "csv"; + break; } return "unknown"; } @@ -3639,25 +3642,27 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet) { if (!value) ; - else if (pg_strncasecmp("unaligned", value, vallen) == 0) - popt->topt.format = PRINT_UNALIGNED; else if (pg_strncasecmp("aligned", value, vallen) == 0) popt->topt.format = PRINT_ALIGNED; - else if (pg_strncasecmp("wrapped", value, vallen) == 0) - popt->topt.format = PRINT_WRAPPED; - else if (pg_strncasecmp("html", value, vallen) == 0) - popt->topt.format = PRINT_HTML; else if (pg_strncasecmp("asciidoc", value, vallen) == 0) popt->topt.format = PRINT_ASCIIDOC; + else if (pg_strncasecmp("csv", value, vallen) == 0) + popt->topt.format = PRINT_CSV; + else if (pg_strncasecmp("html", value, vallen) == 0) + popt->topt.format = PRINT_HTML; else if (pg_strncasecmp("latex", value, vallen) == 0) popt->topt.format = PRINT_LATEX; else if (pg_strncasecmp("latex-longtable", value, vallen) == 0) popt->topt.format = PRINT_LATEX_LONGTABLE; else if (pg_strncasecmp("troff-ms", value, vallen) == 0) popt->topt.format = PRINT_TROFF_MS; + else if (pg_strncasecmp("unaligned", value, vallen) == 0) + popt->topt.format = PRINT_UNALIGNED; + else if (pg_strncasecmp("wrapped", value, vallen) == 0) + popt->topt.format = PRINT_WRAPPED; else { - psql_error("\\pset: allowed formats are unaligned, aligned, wrapped, html, asciidoc, latex, latex-longtable, troff-ms\n"); + psql_error("\\pset: allowed formats are aligned, asciidoc, csv, html, latex, latex-longtable, troff-ms, unaligned, wrapped\n"); return false; } } @@ -3785,6 +3790,26 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet) } } + else if (strcmp(param, "fieldsep_csv") == 0) + { + if (value) + { + /* check for value being non-empty and with an MB length of 1 */ + if (*value == '\0' || value[PQmblen(value, pset.encoding)] != '\0') + { + psql_error("\\pset: the CSV field separator must be a single character\n"); + return false; + } + if (value[0] == '"' || value[0] == '\n' || value[0] == '\r') + { + psql_error("\\pset: the CSV field separator must not be a double quote, newline, or carriage return\n"); + return false; + } + free(popt->topt.fieldSepCsv); + popt->topt.fieldSepCsv = pg_strdup(value); + } + } + else if (strcmp(param, "fieldsep_zero") == 0) { free(popt->topt.fieldSep.separator); @@ -3940,6 +3965,13 @@ printPsetInfo(const char *param, struct printQueryOpt *popt) printf(_("Field separator is zero byte.\n")); } + /* show field separator for CSV format */ + else if (strcmp(param, "fieldsep_csv") == 0) + { + printf(_("Field separator for CSV is \"%s\".\n"), + popt->topt.fieldSepCsv); + } + /* show disable "(x rows)" footer */ else if (strcmp(param, "footer") == 0) { @@ -4134,6 +4166,8 @@ pset_value_string(const char *param, struct printQueryOpt *popt) return pset_quoted_string(popt->topt.fieldSep.separator ? popt->topt.fieldSep.separator : ""); + else if (strcmp(param, "fieldsep_csv") == 0) + return pset_quoted_string(popt->topt.fieldSepCsv); else if (strcmp(param, "fieldsep_zero") == 0) return pstrdup(pset_bool_string(popt->topt.fieldSep.separator_zero)); else if (strcmp(param, "footer") == 0) diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index 586aebd..ad18759 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -108,6 +108,7 @@ usage(unsigned short int pager) fprintf(output, _("\nOutput format options:\n")); fprintf(output, _(" -A, --no-align unaligned table output mode\n")); + fprintf(output, _(" --csv Comma-Separated-Values output mode\n")); fprintf(output, _(" -F, --field-separator=STRING\n" " field separator for unaligned output (default: \"%s\")\n"), DEFAULT_FIELD_SEP); @@ -272,10 +273,10 @@ slashUsage(unsigned short int pager) fprintf(output, _(" \\H toggle HTML output mode (currently %s)\n"), ON(pset.popt.topt.format == PRINT_HTML)); fprintf(output, _(" \\pset [NAME [VALUE]] set table output option\n" - " (NAME := {border|columns|expanded|fieldsep|fieldsep_zero|\n" - " footer|format|linestyle|null|numericlocale|pager|\n" - " pager_min_lines|recordsep|recordsep_zero|tableattr|title|\n" - " tuples_only|unicode_border_linestyle|\n" + " (NAME := {border|columns|expanded|fieldsep|fieldsep_csv|\n" + " fieldsep_zero|footer|format|linestyle|null|numericlocale|\n" + " pager|pager_min_lines|recordsep|recordsep_zero|tableattr|\n" + " title|tuples_only|unicode_border_linestyle|\n" " unicode_column_linestyle|unicode_header_linestyle})\n")); fprintf(output, _(" \\t [on|off] show only rows (currently %s)\n"), ON(pset.popt.topt.tuples_only)); diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index be57574..8e7e9d0 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -148,6 +148,8 @@ main(int argc, char *argv[]) pset.popt.topt.unicode_column_linestyle = UNICODE_LINESTYLE_SINGLE; pset.popt.topt.unicode_header_linestyle = UNICODE_LINESTYLE_SINGLE; + pset.popt.topt.fieldSepCsv = pg_strdup(","); + refresh_utf8format(&(pset.popt.topt)); /* We must get COLUMNS here before readline() sets it */ @@ -436,6 +438,7 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts *options) {"echo-all", no_argument, NULL, 'a'}, {"no-align", no_argument, NULL, 'A'}, {"command", required_argument, NULL, 'c'}, + {"csv", no_argument, NULL, 2}, /* no single-letter short form */ {"dbname", required_argument, NULL, 'd'}, {"echo-queries", no_argument, NULL, 'e'}, {"echo-errors", no_argument, NULL, 'b'}, @@ -658,6 +661,10 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts *options) exit(EXIT_SUCCESS); } break; + case 2: + /* --csv (only as a long option) */ + pset.popt.topt.format = PRINT_CSV; + break; default: unknown_option: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index a980f92..5804587 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3527,9 +3527,9 @@ psql_completion(const char *text, int start, int end) else if (TailMatchesCS("\\password")) COMPLETE_WITH_QUERY(Query_for_list_of_roles); else if (TailMatchesCS("\\pset")) - COMPLETE_WITH_CS("border", "columns", "expanded", - "fieldsep", "fieldsep_zero", "footer", "format", - "linestyle", "null", "numericlocale", + COMPLETE_WITH_CS("border", "columns", "expanded", "fieldsep", + "fieldsep_csv", "fieldsep_zero", "footer", + "format", "linestyle", "null", "numericlocale", "pager", "pager_min_lines", "recordsep", "recordsep_zero", "tableattr", "title", "tuples_only", @@ -3539,9 +3539,8 @@ psql_completion(const char *text, int start, int end) else if (TailMatchesCS("\\pset", MatchAny)) { if (TailMatchesCS("format")) - COMPLETE_WITH_CS("unaligned", "aligned", "wrapped", "html", - "asciidoc", "latex", "latex-longtable", - "troff-ms"); + COMPLETE_WITH_CS("aligned", "asciidoc", "csv", "html", "latex", + "latex-longtable", "troff-ms", "unaligned", "wrapped"); else if (TailMatchesCS("linestyle")) COMPLETE_WITH_CS("ascii", "old-ascii", "unicode"); else if (TailMatchesCS("pager")) diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c index cb9a9a0..fb83c4e 100644 --- a/src/fe_utils/print.c +++ b/src/fe_utils/print.c @@ -2783,6 +2783,113 @@ print_troff_ms_vertical(const printTableContent *cont, FILE *fout) } } +/*************************/ +/* CSV */ +/*************************/ +static void +csv_escaped_print(const char *text, FILE *fout) +{ + const char *p; + + fputc('"', fout); + for (p = text; *p; p++) + { + if (*p == '"') + fputc('"', fout); /* double quotes are doubled */ + fputc(*p, fout); + } + fputc('"', fout); +} + +static void +csv_print_field(const char *text, FILE *fout, const char *sep) +{ + /* + * Enclose and escape field contents when one of these conditions is + * met: + * - the field separator is found in the contents + * - the field contains a CR or LF + * - the field contains a double quote + */ + if ((sep != NULL && *sep != '\0' && strstr(text, sep) != NULL) || + strcspn(text, "\r\n\"") != strlen(text)) + { + csv_escaped_print(text, fout); + } + else + fputs(text, fout); +} + +static void +print_csv_text(const printTableContent *cont, FILE *fout) +{ + const char *const *ptr; + int i; + + if (cancel_pressed) + return; + + /* + * The title and footer are never printed in csv format. + * The header is printed if opt_tuples_only is false. + * + * Despite RFC 4180 saying that end of lines are CRLF, terminate + * lines with '\n', which represent system-dependent end of lines + * in text mode (typically LF on Unix and CRLF on Windows). + */ + + if (cont->opt->start_table && !cont->opt->tuples_only) + { + /* print headers */ + for (ptr = cont->headers; *ptr; ptr++) + { + if (ptr != cont->headers) + fputs(cont->opt->fieldSepCsv, fout); + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + } + fputc('\n', fout); + } + + /* print cells */ + for (i = 0, ptr = cont->cells; *ptr; i++, ptr++) + { + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + + if ((i + 1) % cont->ncolumns) + fputs(cont->opt->fieldSepCsv, fout); + else + { + fputc('\n', fout); + } + } +} + +static void +print_csv_vertical(const printTableContent *cont, FILE *fout) +{ + unsigned int i; + const char *const *ptr; + + /* print records */ + for (i = 0, ptr = cont->cells; *ptr; i++, ptr++) + { + if (cancel_pressed) + return; + + /* print name of column */ + csv_print_field(cont->headers[i % cont->ncolumns], fout, + cont->opt->fieldSepCsv); + + /* print field separator */ + fputs(cont->opt->fieldSepCsv, fout); + + /* print field value */ + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + + fputc('\n', fout); + } +} + /********************************/ /* Public functions */ @@ -3234,6 +3341,12 @@ printTable(const printTableContent *cont, else print_aligned_text(cont, fout, is_pager); break; + case PRINT_CSV: + if (cont->opt->expanded == 1) + print_csv_vertical(cont, fout); + else + print_csv_text(cont, fout); + break; case PRINT_HTML: if (cont->opt->expanded == 1) print_html_vertical(cont, fout); diff --git a/src/include/fe_utils/print.h b/src/include/fe_utils/print.h index 83320d0..030419e 100644 --- a/src/include/fe_utils/print.h +++ b/src/include/fe_utils/print.h @@ -33,7 +33,8 @@ enum printFormat PRINT_ASCIIDOC, PRINT_LATEX, PRINT_LATEX_LONGTABLE, - PRINT_TROFF_MS + PRINT_TROFF_MS, + PRINT_CSV /* add your favourite output format here ... */ }; @@ -112,6 +113,7 @@ typedef struct printTableOpt const printTextFormat *line_style; /* line style (NULL for default) */ struct separator fieldSep; /* field separator for unaligned text mode */ struct separator recordSep; /* record separator for unaligned text mode */ + char *fieldSepCsv; /* field separator for csv format */ bool numericLocale; /* locale-aware numeric units separator and * decimal marker */ char *tableAttr; /* attributes for HTML <table ...> */ diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 3818cfe..9df8ce5 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -262,6 +262,7 @@ border 1 columns 0 expanded off fieldsep '|' +fieldsep_csv ',' fieldsep_zero off footer on format aligned @@ -3243,3 +3244,112 @@ last error message: division by zero \echo 'last error code:' :LAST_ERROR_SQLSTATE last error code: 22012 \unset FETCH_COUNT +-- test csv format +prepare q as select 'ab,cd' as col1, 'ab' as "col,2", E'a\tb' as col3, '"' as col4, + '""' as col5, 'a"b' as "col""6", E'a\nb' as col7, NULL as col8, 'ab' as "col + 9", array['ab', E'cd\nef'] as col10, + '{"a":"a,b", "a,b":null, "c":"a,\"b"}'::json as col11 + from generate_series(1,2); +\pset format csv +\pset fieldsep_csv ',' +\pset expanded off +\t off +execute q; +col1,"col,2",col3,col4,col5,"col""6",col7,col8,"col + 9",col10,col11 +"ab,cd",ab,a b,"""","""""","a""b","a +b",,ab,"{ab,""cd +ef""}","{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +"ab,cd",ab,a b,"""","""""","a""b","a +b",,ab,"{ab,""cd +ef""}","{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\pset fieldsep_csv '\t' +execute q; +col1 col,2 col3 col4 col5 "col""6" col7 col8 "col + 9" col10 col11 +ab,cd ab "a b" """" """""" "a""b" "a +b" ab "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +ab,cd ab "a b" """" """""" "a""b" "a +b" ab "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\t on +execute q; +ab,cd ab "a b" """" """""" "a""b" "a +b" ab "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +ab,cd ab "a b" """" """""" "a""b" "a +b" ab "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\t off +\pset expanded on +execute q; +col1 ab,cd +col,2 ab +col3 "a b" +col4 """" +col5 """""" +"col""6" "a""b" +col7 "a +b" +col8 +"col + 9" ab +col10 "{ab,""cd +ef""}" +col11 "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +col1 ab,cd +col,2 ab +col3 "a b" +col4 """" +col5 """""" +"col""6" "a""b" +col7 "a +b" +col8 +"col + 9" ab +col10 "{ab,""cd +ef""}" +col11 "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\pset fieldsep_csv ',' +execute q; +col1,"ab,cd" +"col,2",ab +col3,a b +col4,"""" +col5,"""""" +"col""6","a""b" +col7,"a +b" +col8, +"col + 9",ab +col10,"{ab,""cd +ef""}" +col11,"{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +col1,"ab,cd" +"col,2",ab +col3,a b +col4,"""" +col5,"""""" +"col""6","a""b" +col7,"a +b" +col8, +"col + 9",ab +col10,"{ab,""cd +ef""}" +col11,"{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +-- illegal csv separators +\pset fieldsep_csv '' +\pset: the CSV field separator must be a single character +\pset fieldsep_csv ',,' +\pset: the CSV field separator must be a single character +\pset fieldsep_csv '\0' +\pset: the CSV field separator must be a single character +deallocate q; +\pset format aligned +\pset expanded off +\t off diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql index b45da9b..86e504b 100644 --- a/src/test/regress/sql/psql.sql +++ b/src/test/regress/sql/psql.sql @@ -688,3 +688,35 @@ select 1/(15-unique2) from tenk1 order by unique2 limit 19; \echo 'last error code:' :LAST_ERROR_SQLSTATE \unset FETCH_COUNT + +-- test csv format +prepare q as select 'ab,cd' as col1, 'ab' as "col,2", E'a\tb' as col3, '"' as col4, + '""' as col5, 'a"b' as "col""6", E'a\nb' as col7, NULL as col8, 'ab' as "col + 9", array['ab', E'cd\nef'] as col10, + '{"a":"a,b", "a,b":null, "c":"a,\"b"}'::json as col11 + from generate_series(1,2); + +\pset format csv +\pset fieldsep_csv ',' +\pset expanded off +\t off +execute q; +\pset fieldsep_csv '\t' +execute q; +\t on +execute q; +\t off +\pset expanded on +execute q; +\pset fieldsep_csv ',' +execute q; + +-- illegal csv separators +\pset fieldsep_csv '' +\pset fieldsep_csv ',,' +\pset fieldsep_csv '\0' + +deallocate q; +\pset format aligned +\pset expanded off +\t off