Hi, Currently we can skip header line on COPY FROM but having the ability to skip and stop copying at any line can use to divide long copy operation and enable to copy a subset of the file and skipping footer. Attach is a patch for it
Regards Surafel
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 411941ed31..86f9a6a905 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -43,6 +43,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ENCODING '<replaceable class="parameter">encoding_name</replaceable>' + START <replaceable class="parameter">starting_line_number</replaceable> + END <replaceable class="parameter">ending_line_number</replaceable> </synopsis> </refsynopsisdiv> @@ -353,6 +355,24 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>START</literal></term> + <listitem> + <para> + Specifies the line number to begin copying. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>END</literal></term> + <listitem> + <para> + Specifies the line number to end copying. + </para> + </listitem> + </varlistentry> + </variablelist> </refsect1> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 4311e16007..4d07716a9f 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -121,6 +121,8 @@ typedef struct CopyStateData int file_encoding; /* file or remote side's character encoding */ bool need_transcoding; /* file encoding diff from server? */ bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ + int start_postion; /* copying star line */ + int end_postion; /* copying end line */ /* parameters from the COPY command */ Relation rel; /* relation to copy to or from */ @@ -347,6 +349,7 @@ static void CopySendInt32(CopyState cstate, int32 val); static bool CopyGetInt32(CopyState cstate, int32 *val); static void CopySendInt16(CopyState cstate, int16 val); static bool CopyGetInt16(CopyState cstate, int16 *val); +static void skipLines(CopyState cstate); /* @@ -1223,6 +1226,34 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "start") == 0) + { + if (cstate->start_postion) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + cstate->start_postion = defGetInt64(defel); + if (cstate->start_postion < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid START line number"), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "end") == 0) + { + if (cstate->end_postion) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + cstate->end_postion = defGetInt64(defel); + if (cstate->end_postion < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid END line number"), + parser_errposition(pstate, defel->location))); + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -1373,6 +1404,13 @@ ProcessCopyOptions(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CSV quote character must not appear in the NULL specification"))); + + if (cstate->end_postion != 0 && + cstate->start_postion > cstate->end_postion) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("START line number can not be greater then END line number"))); + } /* @@ -2317,6 +2355,7 @@ CopyFrom(CopyState cstate) uint64 lastPartitionSampleLineNo = 0; uint64 nPartitionChanges = 0; double avgTuplesPerPartChange = 0; + uint64 end_line = 1; Assert(cstate->rel); @@ -2619,6 +2658,20 @@ CopyFrom(CopyState cstate) has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_instead_row); + if (cstate->start_postion) + { + end_line = cstate->start_postion; + skipLines(cstate); + } + + /* throw the header line away means start copying at second line */ + if (cstate->start_postion == 0 && cstate->header_line) + { + cstate->start_postion = 2; + end_line = cstate->start_postion; + skipLines(cstate); + } + /* * Check BEFORE STATEMENT insertion triggers. It's debatable whether we * should do this for COPY, since it's not really an "INSERT" statement as @@ -2644,6 +2697,9 @@ CopyFrom(CopyState cstate) TupleTableSlot *slot; bool skip_tuple; + if (cstate->end_postion !=0 && cstate->end_postion < end_line++) + break; + CHECK_FOR_INTERRUPTS(); if (nBufferedTuples == 0) @@ -3394,14 +3450,6 @@ NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields) /* only available for text or csv input */ Assert(!cstate->binary); - /* on input just throw the header line away */ - if (cstate->cur_lineno == 0 && cstate->header_line) - { - cstate->cur_lineno++; - if (CopyReadLine(cstate)) - return false; /* done */ - } - cstate->cur_lineno++; /* Actually read the line into memory here */ @@ -4086,6 +4134,22 @@ not_end_of_copy: return result; } +/* + * go to starting postion. + */ +static void +skipLines(CopyState cstate) +{ + for (int i=1; i < cstate->start_postion; i++) + { + if (CopyReadLineText(cstate)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("START line can not be greater than the number of record in the file"))); + + } +} + /* * Return decimal value for a hexadecimal digit */ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 2c2208ffb7..a16b91cf11 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3093,6 +3093,14 @@ copy_opt_item: { $$ = makeDefElem("encoding", (Node *)makeString($2), @1); } + | START Iconst + { + $$ = makeDefElem("start", (Node *)makeInteger($2), @1); + } + | END_P Iconst + { + $$ = makeDefElem("end", (Node *)makeInteger($2), @1); + } ; /* The following exist for backward compatibility with very old versions */ diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b5cadce3ef..ae2d11226f 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -41,4 +41,5 @@ extern uint64 CopyFrom(CopyState cstate); extern DestReceiver *CreateCopyDestReceiver(void); + #endif /* COPY_H */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 19bb538411..d01e3f5cf0 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -49,6 +49,9 @@ CONTEXT: COPY x, line 1: "2002 232 40 50 60 70 80" COPY x (b, c, d, e) from stdin delimiter ',' null 'x'; COPY x from stdin WITH DELIMITER AS ';' NULL AS ''; COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii'; +COPY x (a, b, c, d, e) from stdin (START 2); +COPY x (a, b, c, d, e) from stdin (END 3); +COPY x (a, b, c, d, e) from stdin (START 2 , END 3); -- check results of copy in SELECT * FROM x; a | b | c | d | e @@ -73,12 +76,21 @@ SELECT * FROM x; 4006 | 6 | BackslashN | \N | before trigger fired 4007 | 7 | XX | XX | before trigger fired 4008 | 8 | Delimiter | : | before trigger fired + 50002 | 23 | 33 | 43 | before trigger fired + 50003 | 24 | 34 | 44 | before trigger fired + 50004 | 25 | 35 | 45 | before trigger fired + 50005 | 26 | 36 | 46 | before trigger fired + 60001 | 22 | 32 | 42 | before trigger fired + 60002 | 23 | 33 | 43 | before trigger fired + 60003 | 24 | 34 | 44 | before trigger fired + 70002 | 23 | 33 | 43 | before trigger fired + 70003 | 24 | 34 | 44 | before trigger fired 1 | 1 | stuff | test_1 | after trigger fired 2 | 2 | stuff | test_2 | after trigger fired 3 | 3 | stuff | test_3 | after trigger fired 4 | 4 | stuff | test_4 | after trigger fired 5 | 5 | stuff | test_5 | after trigger fired -(25 rows) +(34 rows) -- check copy out COPY x TO stdout; @@ -102,6 +114,15 @@ COPY x TO stdout; 4006 6 BackslashN \\N before trigger fired 4007 7 XX XX before trigger fired 4008 8 Delimiter : before trigger fired +50002 23 33 43 before trigger fired +50003 24 34 44 before trigger fired +50004 25 35 45 before trigger fired +50005 26 36 46 before trigger fired +60001 22 32 42 before trigger fired +60002 23 33 43 before trigger fired +60003 24 34 44 before trigger fired +70002 23 33 43 before trigger fired +70003 24 34 44 before trigger fired 1 1 stuff test_1 after trigger fired 2 2 stuff test_2 after trigger fired 3 3 stuff test_3 after trigger fired @@ -128,6 +149,15 @@ N before trigger fired BackslashN before trigger fired XX before trigger fired Delimiter before trigger fired +33 before trigger fired +34 before trigger fired +35 before trigger fired +36 before trigger fired +32 before trigger fired +33 before trigger fired +34 before trigger fired +33 before trigger fired +34 before trigger fired stuff after trigger fired stuff after trigger fired stuff after trigger fired @@ -154,6 +184,15 @@ I'm null before trigger fired 6 before trigger fired 7 before trigger fired 8 before trigger fired +23 before trigger fired +24 before trigger fired +25 before trigger fired +26 before trigger fired +22 before trigger fired +23 before trigger fired +24 before trigger fired +23 before trigger fired +24 before trigger fired 1 after trigger fired 2 after trigger fired 3 after trigger fired diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index e36df8858e..92e576e0b2 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -95,6 +95,30 @@ COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii'; 4008:8:Delimiter:\::\: \. +COPY x (a, b, c, d, e) from stdin (START 2); +50001 22 32 42 52 +50002 23 33 43 53 +50003 24 34 44 54 +50004 25 35 45 55 +50005 26 36 46 56 +\. + +COPY x (a, b, c, d, e) from stdin (END 3); +60001 22 32 42 52 +60002 23 33 43 53 +60003 24 34 44 54 +60004 25 35 45 55 +60005 26 36 46 56 +\. + +COPY x (a, b, c, d, e) from stdin (START 2 , END 3); +70001 22 32 42 52 +70002 23 33 43 53 +70003 24 34 44 54 +70004 25 35 45 55 +70005 26 36 46 56 +\. + -- check results of copy in SELECT * FROM x;