Artur Zakirov wrote:

> I've tested the patch and it seems it works as expected.

Thanks for looking at this!

> It seems it isn't necessary to handle "\." within
> "CopyAttributeOutCSV()" (file "src/backend/commands/copyto.c")
> anymore.

It's still useful to produce CSV data that can be safely
reloaded by previous versions.
Until these versions are EOL'ed, I assume we'd better
continue to quote "\."

> Another thing is that the comparison "copystream ==
> pset.cur_cmd_source" happens twice within "handleCopyIn()". TBH it is
> a bit confusing to me what is the real purpose of that check, but one
> of the comparisons looks unnecessary.

Indeed, good catch. The redundant comparison is removed in the
attached v6.


Best regards,
-- 
Daniel Vérité
https://postgresql.verite.pro/
Twitter: @DanielVerite
From b05ebe4c4d5123592797c59282e3fab6e8eeed04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20V=C3=A9rit=C3=A9?= <dan...@manitou-mail.org>
Date: Mon, 30 Sep 2024 20:57:50 +0200
Subject: [PATCH v6] Support backslash-dot on a line by itself as valid data in
 COPY FROM...CSV

---
 doc/src/sgml/ref/copy.sgml           |  6 +--
 doc/src/sgml/ref/psql-ref.sgml       |  4 --
 src/backend/commands/copyfromparse.c | 57 +++++++---------------------
 src/bin/psql/copy.c                  | 28 +++++++++-----
 src/test/regress/expected/copy.out   | 18 +++++++++
 src/test/regress/sql/copy.sql        | 12 ++++++
 6 files changed, 63 insertions(+), 62 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a04..f7eb59dbac 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -814,11 +814,7 @@ COPY <replaceable class="parameter">count</replaceable>
     format, <literal>\.</literal>, the end-of-data marker, could also appear
     as a data value.  To avoid any misinterpretation, a <literal>\.</literal>
     data value appearing as a lone entry on a line is automatically
-    quoted on output, and on input, if quoted, is not interpreted as the
-    end-of-data marker.  If you are loading a file created by another
-    application that has a single unquoted column and might have a
-    value of <literal>\.</literal>, you might need to quote that value in the
-    input file.
+    quoted on output.
    </para>
 
    <note>
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 3fd9959ed1..0570211cf2 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1179,10 +1179,6 @@ SELECT $1 \parse stmt1
         destination, because all data must pass through the client/server
         connection.  For large amounts of data the <acronym>SQL</acronym>
         command might be preferable.
-        Also, because of this pass-through method, <literal>\copy
-        ... from</literal> in <acronym>CSV</acronym> mode will erroneously
-        treat a <literal>\.</literal> data value alone on a line as an
-        end-of-input marker.
         </para>
         </tip>
 
diff --git a/src/backend/commands/copyfromparse.c 
b/src/backend/commands/copyfromparse.c
index 97a4c387a3..d0df2d807f 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -136,14 +136,6 @@ if (1) \
        } \
 } else ((void) 0)
 
-/* Undo any read-ahead and jump out of the block. */
-#define NO_END_OF_COPY_GOTO \
-if (1) \
-{ \
-       input_buf_ptr = prev_raw_ptr + 1; \
-       goto not_end_of_copy; \
-} else ((void) 0)
-
 /* NOTE: there's a copy of this in copyto.c */
 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 
@@ -1182,7 +1174,6 @@ CopyReadLineText(CopyFromState cstate)
        bool            result = false;
 
        /* CSV variables */
-       bool            first_char_in_line = true;
        bool            in_quote = false,
                                last_was_esc = false;
        char            quotec = '\0';
@@ -1377,10 +1368,9 @@ CopyReadLineText(CopyFromState cstate)
                }
 
                /*
-                * In CSV mode, we only recognize \. alone on a line.  This is 
because
-                * \. is a valid CSV data value.
+                * In CSV mode, backslash is a normal character.
                 */
-               if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+               if (c == '\\' && !cstate->opts.csv_mode)
                {
                        char            c2;
 
@@ -1413,21 +1403,15 @@ CopyReadLineText(CopyFromState cstate)
 
                                        if (c2 == '\n')
                                        {
-                                               if (!cstate->opts.csv_mode)
-                                                       ereport(ERROR,
-                                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                        
errmsg("end-of-copy marker does not match previous newline style")));
-                                               else
-                                                       NO_END_OF_COPY_GOTO;
+                                               ereport(ERROR,
+                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                                
errmsg("end-of-copy marker does not match previous newline style")));
                                        }
                                        else if (c2 != '\r')
                                        {
-                                               if (!cstate->opts.csv_mode)
-                                                       ereport(ERROR,
-                                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                        
errmsg("end-of-copy marker corrupt")));
-                                               else
-                                                       NO_END_OF_COPY_GOTO;
+                                               ereport(ERROR,
+                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                                
errmsg("end-of-copy marker corrupt")));
                                        }
                                }
 
@@ -1438,12 +1422,9 @@ CopyReadLineText(CopyFromState cstate)
 
                                if (c2 != '\r' && c2 != '\n')
                                {
-                                       if (!cstate->opts.csv_mode)
-                                               ereport(ERROR,
-                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                
errmsg("end-of-copy marker corrupt")));
-                                       else
-                                               NO_END_OF_COPY_GOTO;
+                                       ereport(ERROR,
+                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                        errmsg("end-of-copy 
marker corrupt")));
                                }
 
                                if ((cstate->eol_type == EOL_NL && c2 != '\n') 
||
@@ -1467,7 +1448,7 @@ CopyReadLineText(CopyFromState cstate)
                                result = true;  /* report EOF */
                                break;
                        }
-                       else if (!cstate->opts.csv_mode)
+                       else
                        {
                                /*
                                 * If we are here, it means we found a 
backslash followed by
@@ -1475,23 +1456,11 @@ CopyReadLineText(CopyFromState cstate)
                                 * after a backslash is special, so we skip 
over that second
                                 * character too.  If we didn't do that \\. 
would be
                                 * considered an eof-of copy, while in non-CSV 
mode it is a
-                                * literal backslash followed by a period.  In 
CSV mode,
-                                * backslashes are not special, so we want to 
process the
-                                * character after the backslash just like a 
normal character,
-                                * so we don't increment in those cases.
+                                * literal backslash followed by a period.
                                 */
                                input_buf_ptr++;
                        }
                }
-
-               /*
-                * This label is for CSV cases where \. appears at the start of 
a
-                * line, but there is more text after it, meaning it was a data 
value.
-                * We are more strict for \. in CSV mode because \. could be a 
data
-                * value, while in non-CSV mode, \. cannot be a data value.
-                */
-not_end_of_copy:
-               first_char_in_line = false;
        }                                                       /* end of outer 
loop */
 
        /*
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index 961ae32694..57b36a082e 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -620,20 +620,30 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool 
isbinary, PGresult **res)
                                /* current line is done? */
                                if (buf[buflen - 1] == '\n')
                                {
-                                       /* check for EOF marker, but not on a 
partial line */
-                                       if (at_line_begin)
+                                       /*
+                                        * When at the beginning of the line, 
check for EOF
+                                        * marker. If the marker is found and 
the data is inlined,
+                                        * we must stop at this point.  If not, 
the \. line can be
+                                        * sent to the server, and we let it 
decide whether it's
+                                        * an EOF or not depending on the 
format: in basic TEXT,
+                                        * \. is going to be interpreted as an 
EOF, in CSV, it
+                                        * will not.
+                                        */
+                                       if (at_line_begin && copystream == 
pset.cur_cmd_source)
                                        {
-                                               /*
-                                                * This code erroneously 
assumes '\.' on a line alone
-                                                * inside a quoted CSV string 
terminates the \copy.
-                                                * 
https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org
-                                                *
-                                                * 
https://www.postgresql.org/message-id/bfcd57e4-8f23-4c3e-a5db-2571d0920...@beta.fastmail.com
-                                                */
                                                if ((linelen == 3 && 
memcmp(fgresult, "\\.\n", 3) == 0) ||
                                                        (linelen == 4 && 
memcmp(fgresult, "\\.\r\n", 4) == 0))
                                                {
                                                        copydone = true;
+
+                                                       /*
+                                                        * Remove the EOF 
marker from the data sent. In
+                                                        * the case of CSV, the 
EOF marker must be
+                                                        * removed, otherwise 
it would be interpreted by
+                                                        * the server as valid 
data.
+                                                        */
+                                                       *fgresult = '\0';
+                                                       buflen -= linelen;
                                                }
                                        }
 
diff --git a/src/test/regress/expected/copy.out 
b/src/test/regress/expected/copy.out
index 44114089a6..174fe05603 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -32,6 +32,24 @@ select * from copytest except select * from copytest2;
 -------+------+--------
 (0 rows)
 
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+ test  
+-------
+ \.
+ line1
+ line2
+(3 rows)
+
 -- test header line feature
 create temp table copytest3 (
        c1 int,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index e2dd24cb35..8ed7922ab4 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -38,6 +38,18 @@ copy copytest2 from :'filename' csv quote '''' escape E'\\';
 
 select * from copytest except select * from copytest2;
 
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+
 
 -- test header line feature
 
-- 
2.34.1

Reply via email to