On Thu, Nov 08, 2018 at 01:58:34PM +0900, Michael Paquier wrote: > Anyway, I am still going through the patch, so no need to send a new > version for now.
Okay, I have done a round of more in-depth review, and the patch looks to be in pretty good shape. Relying on tuples_only to decide if the header should be printed or not looks good to me. + /* check for value being non-empty and with an MB length of 1 */ + if (*value == '\0' || value[PQmblen(value, pset.encoding)] != '\0') It seems to me that this can just be replaced with that: if (strlen(value) != 1) Attached is what I am finishing up with for the moment. Comments are welcome. I am still planning look at that stuff a bit more once again, particularly the printing part, but I am lacking of time now.. -- Michael
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index a1ca94057b..6d45f25aee 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -151,6 +151,21 @@ EOF </listitem> </varlistentry> + <varlistentry> + <term><option>--csv</option></term> + <listitem> + <para> + <indexterm> + <primary>CSV</primary> + <secondary>in psql</secondary> + </indexterm> + Switches to <acronym>CSV</acronym> (Comma Separated Values) + table output mode. This is equivalent to + <command>\pset format csv</command>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>-d <replaceable class="parameter">dbname</replaceable></option></term> <term><option>--dbname=<replaceable class="parameter">dbname</replaceable></option></term> @@ -2556,6 +2571,19 @@ lo_import 152801 </listitem> </varlistentry> + <varlistentry> + <term><literal>fieldsep_csv</literal></term> + <listitem> + <para> + Specifies the field separator to be used in the + <acronym>CSV</acronym> format. When the separator appears in a field + value, that field is output inside double quotes according to + <acronym>CSV</acronym> rules. To set a tab as field separator, type + <literal>\pset fieldsep_csv '\t'</literal>. The default is a comma. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>fieldsep_zero</literal></term> <listitem> @@ -2585,7 +2613,8 @@ lo_import 152801 <listitem> <para> Sets the output format to one of <literal>aligned</literal>, - <literal>asciidoc</literal>, <literal>html</literal>, + <literal>asciidoc</literal>, <literal>csv</literal>, + <literal>html</literal>, <literal>latex</literal> (uses <literal>tabular</literal>), <literal>latex-longtable</literal>, <literal>troff-ms</literal>, <literal>unaligned</literal>, or <literal>wrapped</literal>. @@ -2604,6 +2633,21 @@ lo_import 152801 nicely formatted text output; this is the default. </para> + <para><literal>csv</literal> format writes columns separated by + commas, applying the quoting rules described in + <ulink url="https://tools.ietf.org/html/rfc4180">RFC 4180</ulink>. + Alternative separators can be selected with + <command>\pset fieldsep_csv</command>. + The output is compatible with the CSV format of the + <command>COPY</command> command. The header with column names + is generated unless the <literal>tuples_only</literal> parameter is + <literal>on</literal>. Title and footers are not printed. + Each row is terminated by the system-dependent end-of-line character, + which is typically a single newline (<literal>\n</literal>) for + Unix-like systems or a carriage return and newline sequence + (<literal>\r\n</literal>) for Microsoft Windows. + </para> + <para><literal>wrapped</literal> format is like <literal>aligned</literal> but wraps wide data values across lines to make the output fit in the target column width. The target width is determined as described under diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 0dea54d3ce..7e3f577897 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -1941,8 +1941,8 @@ exec_command_pset(PsqlScanState scan_state, bool active_branch) int i; static const char *const my_list[] = { - "border", "columns", "expanded", "fieldsep", "fieldsep_zero", - "footer", "format", "linestyle", "null", + "border", "columns", "expanded", "fieldsep", "fieldsep_csv", + "fieldsep_zero", "footer", "format", "linestyle", "null", "numericlocale", "pager", "pager_min_lines", "recordsep", "recordsep_zero", "tableattr", "title", "tuples_only", @@ -3566,6 +3566,9 @@ _align2string(enum printFormat in) case PRINT_ASCIIDOC: return "asciidoc"; break; + case PRINT_CSV: + return "csv"; + break; case PRINT_HTML: return "html"; break; @@ -3643,6 +3646,8 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet) popt->topt.format = PRINT_ALIGNED; else if (pg_strncasecmp("asciidoc", value, vallen) == 0) popt->topt.format = PRINT_ASCIIDOC; + else if (pg_strncasecmp("csv", value, vallen) == 0) + popt->topt.format = PRINT_CSV; else if (pg_strncasecmp("html", value, vallen) == 0) popt->topt.format = PRINT_HTML; else if (pg_strncasecmp("latex", value, vallen) == 0) @@ -3657,7 +3662,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet) popt->topt.format = PRINT_WRAPPED; else { - psql_error("\\pset: allowed formats are aligned, asciidoc, html, latex, latex-longtable, troff-ms, unaligned, wrapped\n"); + psql_error("\\pset: allowed formats are aligned, asciidoc, csv, html, latex, latex-longtable, troff-ms, unaligned, wrapped\n"); return false; } } @@ -3785,6 +3790,26 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet) } } + else if (strcmp(param, "fieldsep_csv") == 0) + { + if (value) + { + /* CSV separator has to be a one-byte character */ + if (strlen(value) != 1) + { + psql_error("\\pset: the CSV field separator must be a single character\n"); + return false; + } + if (value[0] == '"' || value[0] == '\n' || value[0] == '\r') + { + psql_error("\\pset: the CSV field separator cannot be a double quote, a newline, or a carriage return\n"); + return false; + } + free(popt->topt.fieldSepCsv); + popt->topt.fieldSepCsv = pg_strdup(value); + } + } + else if (strcmp(param, "fieldsep_zero") == 0) { free(popt->topt.fieldSep.separator); @@ -3940,6 +3965,13 @@ printPsetInfo(const char *param, struct printQueryOpt *popt) printf(_("Field separator is zero byte.\n")); } + /* show field separator for CSV format */ + else if (strcmp(param, "fieldsep_csv") == 0) + { + printf(_("Field separator for CSV is \"%s\".\n"), + popt->topt.fieldSepCsv); + } + /* show disable "(x rows)" footer */ else if (strcmp(param, "footer") == 0) { @@ -4134,6 +4166,8 @@ pset_value_string(const char *param, struct printQueryOpt *popt) return pset_quoted_string(popt->topt.fieldSep.separator ? popt->topt.fieldSep.separator : ""); + else if (strcmp(param, "fieldsep_csv") == 0) + return pset_quoted_string(popt->topt.fieldSepCsv); else if (strcmp(param, "fieldsep_zero") == 0) return pstrdup(pset_bool_string(popt->topt.fieldSep.separator_zero)); else if (strcmp(param, "footer") == 0) diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index 586aebddd3..20b3dfc683 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -108,6 +108,7 @@ usage(unsigned short int pager) fprintf(output, _("\nOutput format options:\n")); fprintf(output, _(" -A, --no-align unaligned table output mode\n")); + fprintf(output, _(" --csv CSV (Comma Separated Values) table output mode\n")); fprintf(output, _(" -F, --field-separator=STRING\n" " field separator for unaligned output (default: \"%s\")\n"), DEFAULT_FIELD_SEP); @@ -272,10 +273,10 @@ slashUsage(unsigned short int pager) fprintf(output, _(" \\H toggle HTML output mode (currently %s)\n"), ON(pset.popt.topt.format == PRINT_HTML)); fprintf(output, _(" \\pset [NAME [VALUE]] set table output option\n" - " (NAME := {border|columns|expanded|fieldsep|fieldsep_zero|\n" - " footer|format|linestyle|null|numericlocale|pager|\n" - " pager_min_lines|recordsep|recordsep_zero|tableattr|title|\n" - " tuples_only|unicode_border_linestyle|\n" + " (NAME := {border|columns|expanded|fieldsep|fieldsep_csv|\n" + " fieldsep_zero|footer|format|linestyle|null|numericlocale|\n" + " pager|pager_min_lines|recordsep|recordsep_zero|tableattr|\n" + " title|tuples_only|unicode_border_linestyle|\n" " unicode_column_linestyle|unicode_header_linestyle})\n")); fprintf(output, _(" \\t [on|off] show only rows (currently %s)\n"), ON(pset.popt.topt.tuples_only)); diff --git a/src/bin/psql/settings.h b/src/bin/psql/settings.h index 69e617e6b5..93d0b957f5 100644 --- a/src/bin/psql/settings.h +++ b/src/bin/psql/settings.h @@ -13,6 +13,7 @@ #include "fe_utils/print.h" #define DEFAULT_FIELD_SEP "|" +#define DEFAULT_FIELD_SEP_CSV "," #define DEFAULT_RECORD_SEP "\n" #if defined(WIN32) || defined(__CYGWIN__) diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index be57574cd3..40e14494cb 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -148,6 +148,8 @@ main(int argc, char *argv[]) pset.popt.topt.unicode_column_linestyle = UNICODE_LINESTYLE_SINGLE; pset.popt.topt.unicode_header_linestyle = UNICODE_LINESTYLE_SINGLE; + pset.popt.topt.fieldSepCsv = pg_strdup(DEFAULT_FIELD_SEP_CSV); + refresh_utf8format(&(pset.popt.topt)); /* We must get COLUMNS here before readline() sets it */ @@ -468,6 +470,7 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts *options) {"expanded", no_argument, NULL, 'x'}, {"no-psqlrc", no_argument, NULL, 'X'}, {"help", optional_argument, NULL, 1}, + {"csv", no_argument, NULL, 2}, {NULL, 0, NULL, 0} }; @@ -658,6 +661,9 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts *options) exit(EXIT_SUCCESS); } break; + case 2: + pset.popt.topt.format = PRINT_CSV; + break; default: unknown_option: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 7294824948..eb1c54e47a 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3527,9 +3527,9 @@ psql_completion(const char *text, int start, int end) else if (TailMatchesCS("\\password")) COMPLETE_WITH_QUERY(Query_for_list_of_roles); else if (TailMatchesCS("\\pset")) - COMPLETE_WITH_CS("border", "columns", "expanded", - "fieldsep", "fieldsep_zero", "footer", "format", - "linestyle", "null", "numericlocale", + COMPLETE_WITH_CS("border", "columns", "expanded", "fieldsep", + "fieldsep_csv", "fieldsep_zero", "footer", + "format", "linestyle", "null", "numericlocale", "pager", "pager_min_lines", "recordsep", "recordsep_zero", "tableattr", "title", "tuples_only", @@ -3539,7 +3539,7 @@ psql_completion(const char *text, int start, int end) else if (TailMatchesCS("\\pset", MatchAny)) { if (TailMatchesCS("format")) - COMPLETE_WITH_CS("aligned", "asciidoc", "html", "latex", + COMPLETE_WITH_CS("aligned", "asciidoc", "csv", "html", "latex", "latex-longtable", "troff-ms", "unaligned", "wrapped"); else if (TailMatchesCS("linestyle")) diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c index cb9a9a0613..a56c5f3762 100644 --- a/src/fe_utils/print.c +++ b/src/fe_utils/print.c @@ -2783,6 +2783,113 @@ print_troff_ms_vertical(const printTableContent *cont, FILE *fout) } } +/*************************/ +/* CSV */ +/*************************/ +static void +csv_escaped_print(const char *text, FILE *fout) +{ + const char *p; + + fputc('"', fout); + for (p = text; *p; p++) + { + if (*p == '"') + fputc('"', fout); /* double quotes are doubled */ + fputc(*p, fout); + } + fputc('"', fout); +} + +static void +csv_print_field(const char *text, FILE *fout, const char *sep) +{ + /*---------------- + * Enclose and escape field contents when one of these conditions is met: + * - the field separator is found in the contents. + * - the field contains a CR or LF. + * - the field contains a double quote. + *---------------- + */ + if ((sep != NULL && *sep != '\0' && strstr(text, sep) != NULL) || + strcspn(text, "\r\n\"") != strlen(text)) + { + csv_escaped_print(text, fout); + } + else + fputs(text, fout); +} + +static void +print_csv_text(const printTableContent *cont, FILE *fout) +{ + const char *const *ptr; + int i; + + if (cancel_pressed) + return; + + /* + * The title and footer are never printed in csv format. The header is + * printed if opt_tuples_only is false. + * + * Despite RFC 4180 saying that end of lines are CRLF, terminate lines + * with '\n', which represent system-dependent end of lines in text mode + * (typically LF on Unix and CRLF on Windows). + */ + + if (cont->opt->start_table && !cont->opt->tuples_only) + { + /* print headers */ + for (ptr = cont->headers; *ptr; ptr++) + { + if (ptr != cont->headers) + fputs(cont->opt->fieldSepCsv, fout); + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + } + fputc('\n', fout); + } + + /* print cells */ + for (i = 0, ptr = cont->cells; *ptr; i++, ptr++) + { + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + + if ((i + 1) % cont->ncolumns) + fputs(cont->opt->fieldSepCsv, fout); + else + { + fputc('\n', fout); + } + } +} + +static void +print_csv_vertical(const printTableContent *cont, FILE *fout) +{ + unsigned int i; + const char *const *ptr; + + /* print records */ + for (i = 0, ptr = cont->cells; *ptr; i++, ptr++) + { + if (cancel_pressed) + return; + + /* print name of column */ + csv_print_field(cont->headers[i % cont->ncolumns], fout, + cont->opt->fieldSepCsv); + + /* print field separator */ + fputs(cont->opt->fieldSepCsv, fout); + + /* print field value */ + csv_print_field(*ptr, fout, cont->opt->fieldSepCsv); + + fputc('\n', fout); + } +} + /********************************/ /* Public functions */ @@ -3234,6 +3341,12 @@ printTable(const printTableContent *cont, else print_aligned_text(cont, fout, is_pager); break; + case PRINT_CSV: + if (cont->opt->expanded == 1) + print_csv_vertical(cont, fout); + else + print_csv_text(cont, fout); + break; case PRINT_HTML: if (cont->opt->expanded == 1) print_html_vertical(cont, fout); diff --git a/src/include/fe_utils/print.h b/src/include/fe_utils/print.h index b761349bc7..665a7827e2 100644 --- a/src/include/fe_utils/print.h +++ b/src/include/fe_utils/print.h @@ -28,6 +28,7 @@ enum printFormat PRINT_NOTHING = 0, /* to make sure someone initializes this */ PRINT_ALIGNED, PRINT_ASCIIDOC, + PRINT_CSV, PRINT_HTML, PRINT_LATEX, PRINT_LATEX_LONGTABLE, @@ -112,6 +113,7 @@ typedef struct printTableOpt const printTextFormat *line_style; /* line style (NULL for default) */ struct separator fieldSep; /* field separator for unaligned text mode */ struct separator recordSep; /* record separator for unaligned text mode */ + char *fieldSepCsv; /* field separator for csv format */ bool numericLocale; /* locale-aware numeric units separator and * decimal marker */ char *tableAttr; /* attributes for HTML <table ...> */ diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 3818cfea7e..ceea28f027 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -262,6 +262,7 @@ border 1 columns 0 expanded off fieldsep '|' +fieldsep_csv ',' fieldsep_zero off footer on format aligned @@ -3243,3 +3244,146 @@ last error message: division by zero \echo 'last error code:' :LAST_ERROR_SQLSTATE last error code: 22012 \unset FETCH_COUNT +-- test csv format +-- test multi-line headers, wrapping, quoting rules and newline indicators +prepare q as select 'ab,cd' as col1, + 'ab' as "col,2", + E'a\tb' as col3, + '"' as col4, + '""' as col5, + 'a"b' as "col""6", + E'a\nb' as col7, + NULL as col8, + 'ab' as "col +9", + 'cd' as "col + +10", + array['ab', E'cd\nef'] as col11, + '{"a":"a,b", "a,b":null, "c":"a,\"b"}'::json as col12 + from generate_series(1,2); +\pset format csv +\pset fieldsep_csv ',' +\pset expanded off +\t off +execute q; +col1,"col,2",col3,col4,col5,"col""6",col7,col8,"col +9","col + +10",col11,col12 +"ab,cd",ab,a b,"""","""""","a""b","a +b",,ab,cd,"{ab,""cd +ef""}","{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +"ab,cd",ab,a b,"""","""""","a""b","a +b",,ab,cd,"{ab,""cd +ef""}","{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\pset fieldsep_csv '\t' +execute q; +col1 col,2 col3 col4 col5 "col""6" col7 col8 "col +9" "col + +10" col11 col12 +ab,cd ab "a b" """" """""" "a""b" "a +b" ab cd "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +ab,cd ab "a b" """" """""" "a""b" "a +b" ab cd "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\t on +execute q; +ab,cd ab "a b" """" """""" "a""b" "a +b" ab cd "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +ab,cd ab "a b" """" """""" "a""b" "a +b" ab cd "{ab,""cd +ef""}" "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\t off +\pset expanded on +execute q; +col1 ab,cd +col,2 ab +col3 "a b" +col4 """" +col5 """""" +"col""6" "a""b" +col7 "a +b" +col8 +"col +9" ab +"col + +10" cd +col11 "{ab,""cd +ef""}" +col12 "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +col1 ab,cd +col,2 ab +col3 "a b" +col4 """" +col5 """""" +"col""6" "a""b" +col7 "a +b" +col8 +"col +9" ab +"col + +10" cd +col11 "{ab,""cd +ef""}" +col12 "{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +\pset fieldsep_csv ',' +execute q; +col1,"ab,cd" +"col,2",ab +col3,a b +col4,"""" +col5,"""""" +"col""6","a""b" +col7,"a +b" +col8, +"col +9",ab +"col + +10",cd +col11,"{ab,""cd +ef""}" +col12,"{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +col1,"ab,cd" +"col,2",ab +col3,a b +col4,"""" +col5,"""""" +"col""6","a""b" +col7,"a +b" +col8, +"col +9",ab +"col + +10",cd +col11,"{ab,""cd +ef""}" +col12,"{""a"":""a,b"", ""a,b"":null, ""c"":""a,\""b""}" +-- illegal csv separators +\pset fieldsep_csv '' +\pset: the CSV field separator must be a single character +\pset fieldsep_csv ',,' +\pset: the CSV field separator must be a single character +\pset fieldsep_csv '\0' +\pset: the CSV field separator must be a single character +\pset fieldsep_csv '\n' +\pset: the CSV field separator cannot be a double quote, a newline, or a carriage return +\pset fieldsep_csv '\r' +\pset: the CSV field separator cannot be a double quote, a newline, or a carriage return +\pset fieldsep_csv '"' +\pset: the CSV field separator cannot be a double quote, a newline, or a carriage return +deallocate q; +\pset format aligned +\pset expanded off +\t off diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql index b45da9bb8d..fd2d0df0c3 100644 --- a/src/test/regress/sql/psql.sql +++ b/src/test/regress/sql/psql.sql @@ -688,3 +688,50 @@ select 1/(15-unique2) from tenk1 order by unique2 limit 19; \echo 'last error code:' :LAST_ERROR_SQLSTATE \unset FETCH_COUNT + +-- test csv format +-- test multi-line headers, wrapping, quoting rules and newline indicators +prepare q as select 'ab,cd' as col1, + 'ab' as "col,2", + E'a\tb' as col3, + '"' as col4, + '""' as col5, + 'a"b' as "col""6", + E'a\nb' as col7, + NULL as col8, + 'ab' as "col +9", + 'cd' as "col + +10", + array['ab', E'cd\nef'] as col11, + '{"a":"a,b", "a,b":null, "c":"a,\"b"}'::json as col12 + from generate_series(1,2); + +\pset format csv +\pset fieldsep_csv ',' +\pset expanded off +\t off +execute q; +\pset fieldsep_csv '\t' +execute q; +\t on +execute q; +\t off +\pset expanded on +execute q; +\pset fieldsep_csv ',' +execute q; + +-- illegal csv separators +\pset fieldsep_csv '' +\pset fieldsep_csv ',,' +\pset fieldsep_csv '\0' +\pset fieldsep_csv '\n' +\pset fieldsep_csv '\r' +\pset fieldsep_csv '"' + +deallocate q; +\pset format aligned +\pset expanded off +\t off
signature.asc
Description: PGP signature