Hi Gary,

what do you think about the Decorator approach, suggested by Adrian Crum in
JIRA?

Benedikt

2014-10-29 6:44 GMT+01:00 <ggreg...@apache.org>:

> Author: ggregory
> Date: Wed Oct 29 05:44:40 2014
> New Revision: 1635052
>
> URL: http://svn.apache.org/r1635052
> Log:
> [CSV-131] Save positions of records to enable random access. The floor is
> open for code review and further discussion based on the comments in the
> Jira.
>
> Modified:
>     commons/proper/csv/trunk/src/changes/changes.xml
>
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
>
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
>
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
>
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java
>
> Modified: commons/proper/csv/trunk/src/changes/changes.xml
> URL:
> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/changes/changes.xml?rev=1635052&r1=1635051&r2=1635052&view=diff
>
> ==============================================================================
> --- commons/proper/csv/trunk/src/changes/changes.xml (original)
> +++ commons/proper/csv/trunk/src/changes/changes.xml Wed Oct 29 05:44:40
> 2014
> @@ -39,6 +39,7 @@
>    </properties>
>    <body>
>      <release version="1.1" date="2014-mm-dd" description="Feature and bug
> fix release">
> +      <action issue="CSV-131" type="add" dev="ggregory" due-to="Holger
> Stratmann">Save positions of records to enable random access</action>
>        <action issue="CSV-130" type="fix" dev="ggregory" due-to="Sergei
> Lebedev">CSVFormat#withHeader doesn't work well with #printComment, add
> withHeaderComments(String...)</action>
>        <action issue="CSV-128" type="fix" dev="ggregory">CSVFormat.EXCEL
> should ignore empty header names</action>
>        <action issue="CSV-129" type="add" dev="ggregory">Add
> CSVFormat#with 0-arg methods matching boolean arg methods</action>
>
> Modified:
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
> URL:
> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1635052&r1=1635051&r2=1635052&view=diff
>
> ==============================================================================
> ---
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
> (original)
> +++
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
> Wed Oct 29 05:44:40 2014
> @@ -219,6 +219,12 @@ public final class CSVParser implements
>      private final List<String> record = new ArrayList<String>();
>
>      private long recordNumber;
> +
> +    /**
> +     * Lexer offset if the parser does not start parsing at the beginning
> of the source. Usually used in combination
> +     * with {@link #setNextRecordNumber(long)}
> +     */
> +    private long characterOffset;
>
>      private final Token reusableToken = new Token();
>
> @@ -296,6 +302,43 @@ public final class CSVParser implements
>      }
>
>      /**
> +     * Sets the record number to be assigned to the next record read.
> +     * <p>
> +     * Use this if the reader is not positioned at the first record when
> you create the parser. For example, the first
> +     * record read might be the 51st record in the source file.
> +     * </p>
> +     * <p>
> +     * If you want the records to also have the correct character
> position referring to the underlying source, call
> +     * {@link #setNextCharacterPosition(long)}.
> +     * </p>
> +     *
> +     * @param nextRecordNumber
> +     *            the next record number
> +     * @since 1.1
> +     */
> +    public void setNextRecordNumber(long nextRecordNumber) {
> +        this.recordNumber = nextRecordNumber - 1;
> +    }
> +
> +    /**
> +     * Sets the current position in the source stream regardless of where
> the parser and lexer start reading.
> +     * <p>
> +     * For example: We open a file and seek to position 5434 in order to
> start reading at record 42. In order to have
> +     * the parser assign the correct characterPosition to records, we
> call this method.
> +     * </p>
> +     * <p>
> +     * If you want the records to also have the correct record numbers,
> call {@link #setNextRecordNumber(long)}
> +     * </p>
> +     *
> +     * @param position
> +     *            the new character position
> +     * @since 1.1
> +     */
> +    public void setNextCharacterPosition(long position) {
> +        this.characterOffset = position - lexer.getCharacterPosition();
> +    }
> +
> +    /**
>       * Returns the current record number in the input stream.
>       *
>       * <p>
> @@ -445,6 +488,7 @@ public final class CSVParser implements
>          CSVRecord result = null;
>          this.record.clear();
>          StringBuilder sb = null;
> +        final long startCharPosition = lexer.getCharacterPosition() +
> this.characterOffset;
>          do {
>              this.reusableToken.reset();
>              this.lexer.nextToken(this.reusableToken);
> @@ -480,7 +524,7 @@ public final class CSVParser implements
>              this.recordNumber++;
>              final String comment = sb == null ? null : sb.toString();
>              result = new CSVRecord(this.record.toArray(new
> String[this.record.size()]), this.headerMap, comment,
> -                    this.recordNumber);
> +                    this.recordNumber, startCharPosition);
>          }
>          return result;
>      }
>
> Modified:
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
> URL:
> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java?rev=1635052&r1=1635051&r2=1635052&view=diff
>
> ==============================================================================
> ---
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
> (original)
> +++
> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
> Wed Oct 29 05:44:40 2014
> @@ -36,6 +36,8 @@ public final class CSVRecord implements
>
>      private static final long serialVersionUID = 1L;
>
> +    private final long characterPosition;
> +
>      /** The accumulated comments (if any) */
>      private final String comment;
>
> @@ -44,15 +46,16 @@ public final class CSVRecord implements
>
>      /** The record number. */
>      private final long recordNumber;
> -
> +
>      /** The values of the record */
>      private final String[] values;
>
> -    CSVRecord(final String[] values, final Map<String, Integer> mapping,
> final String comment, final long recordNumber) {
> +    CSVRecord(final String[] values, final Map<String, Integer> mapping,
> final String comment, final long recordNumber, long characterPosition) {
>          this.recordNumber = recordNumber;
>          this.values = values != null ? values : EMPTY_STRING_ARRAY;
>          this.mapping = mapping;
>          this.comment = comment;
> +        this.characterPosition = characterPosition;
>      }
>
>      /**
> @@ -110,6 +113,16 @@ public final class CSVRecord implements
>      }
>
>      /**
> +     * Returns the start position of this record as a character position
> in the source stream. This may or may not
> +     * correspond to the byte position depending on the character set.
> +     *
> +     * @return the position of this record in the source stream.
> +     */
> +    public long getCharacterPosition() {
> +        return characterPosition;
> +    }
> +
> +    /**
>       * Returns the comment for this record, if any.
>       *
>       * @return the comment for this record, or null if no comment for
> this record is available.
>
> Modified:
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
> URL:
> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1635052&r1=1635051&r2=1635052&view=diff
>
> ==============================================================================
> ---
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
> (original)
> +++
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
> Wed Oct 29 05:44:40 2014
> @@ -299,22 +299,23 @@ public class CSVParserTest {
>          }
>      }
>
> -//    @Test
> -//    public void testStartWithEmptyLinesThenHeaders() throws Exception {
> -//        final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n",
> "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
> -//        final String[][] res = { { "hello", "" }, { "" }, // Excel
> format does not ignore empty lines
> -//                { "" } };
> -//        for (final String code : codes) {
> -//            final CSVParser parser = CSVParser.parse(code,
> CSVFormat.EXCEL);
> -//            final List<CSVRecord> records = parser.getRecords();
> -//            assertEquals(res.length, records.size());
> -//            assertTrue(records.size() > 0);
> -//            for (int i = 0; i < res.length; i++) {
> -//                assertArrayEquals(res[i], records.get(i).values());
> -//            }
> -//            parser.close();
> -//        }
> -//    }
> +    // @Test
> +    // public void testStartWithEmptyLinesThenHeaders() throws Exception {
> +    // final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n",
> "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
> +    // "hello,\"\"\n\n\n" };
> +    // final String[][] res = { { "hello", "" }, { "" }, // Excel format
> does not ignore empty lines
> +    // { "" } };
> +    // for (final String code : codes) {
> +    // final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL);
> +    // final List<CSVRecord> records = parser.getRecords();
> +    // assertEquals(res.length, records.size());
> +    // assertTrue(records.size() > 0);
> +    // for (int i = 0; i < res.length; i++) {
> +    // assertArrayEquals(res[i], records.get(i).values());
> +    // }
> +    // parser.close();
> +    // }
> +    // }
>
>      @Test
>      public void testEndOfFileBehaviorCSV() throws Exception {
> @@ -475,6 +476,16 @@ public class CSVParserTest {
>      }
>
>      @Test
> +    public void testGetRecordPositionWithCRLF() throws Exception {
> +        this.validateRecordPosition(CRLF);
> +    }
> +
> +    @Test
> +    public void testGetRecordPositionWithLF() throws Exception {
> +        this.validateRecordPosition(String.valueOf(LF));
> +    }
> +
> +    @Test
>      public void testGetOneLine() throws IOException {
>          final CSVParser parser = CSVParser.parse(CSV_INPUT_1,
> CSVFormat.DEFAULT);
>          final CSVRecord record = parser.getRecords().get(0);
> @@ -902,4 +913,65 @@ public class CSVParserTest {
>          parser.close();
>      }
>
> +    private void validateRecordPosition(final String lineSeparator)
> throws IOException {
> +        final String nl = lineSeparator; // used as linebreak in values
> for better distinction
> +
> +        String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
> +        // to see if recordPosition correctly points to the enclosing
> quote
> +                "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
> +                // unicode test... not very relevant while operating on
> strings instead of bytes, but for
> +                // completeness...
> +                "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
> +
> +        final CSVFormat format =
> CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
> +        CSVParser parser = CSVParser.parse(code, format);
> +
> +        CSVRecord record;
> +        assertEquals(0, parser.getRecordNumber());
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(1, record.getRecordNumber());
> +        assertEquals(code.indexOf('a'), record.getCharacterPosition());
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(2, record.getRecordNumber());
> +        assertEquals(code.indexOf('1'), record.getCharacterPosition());
> +
> +        assertNotNull(record = parser.nextRecord());
> +        final long positionRecord3 = record.getCharacterPosition();
> +        assertEquals(3, record.getRecordNumber());
> +        assertEquals(code.indexOf("'A"), record.getCharacterPosition());
> +        assertEquals("A" + lineSeparator + "A", record.get(0));
> +        assertEquals("B" + lineSeparator + "B", record.get(1));
> +        assertEquals("CC", record.get(2));
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(4, record.getRecordNumber());
> +        assertEquals(code.indexOf('\u00c4'),
> record.getCharacterPosition());
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(5, record.getRecordNumber());
> +        assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
> +
> +        parser.close();
> +
> +        // now try to read starting at record 3
> +        parser = CSVParser.parse(code.substring((int) positionRecord3),
> format);
> +        parser.setNextRecordNumber(3);
> +        parser.setNextCharacterPosition(positionRecord3);
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(3, record.getRecordNumber());
> +        assertEquals(code.indexOf("'A"), record.getCharacterPosition());
> +        assertEquals("A" + lineSeparator + "A", record.get(0));
> +        assertEquals("B" + lineSeparator + "B", record.get(1));
> +        assertEquals("CC", record.get(2));
> +
> +        assertNotNull(record = parser.nextRecord());
> +        assertEquals(4, record.getRecordNumber());
> +        assertEquals(code.indexOf('\u00c4'),
> record.getCharacterPosition());
> +        assertEquals("\u00c4", record.get(0));
> +
> +        parser.close();
> +    }
>  }
>
> Modified:
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java
> URL:
> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java?rev=1635052&r1=1635051&r2=1635052&view=diff
>
> ==============================================================================
> ---
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java
> (original)
> +++
> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java
> Wed Oct 29 05:44:40 2014
> @@ -45,12 +45,12 @@ public class CSVRecordTest {
>      @Before
>      public void setUp() throws Exception {
>          values = new String[] { "A", "B", "C" };
> -        record = new CSVRecord(values, null, null, 0);
> +        record = new CSVRecord(values, null, null, 0, -1);
>          header = new HashMap<String, Integer>();
>          header.put("first", Integer.valueOf(0));
>          header.put("second", Integer.valueOf(1));
>          header.put("third", Integer.valueOf(2));
> -        recordWithHeader = new CSVRecord(values, header, null, 0);
> +        recordWithHeader = new CSVRecord(values, header, null, 0, -1);
>      }
>
>      @Test
>
>
>


-- 
http://people.apache.org/~britter/
http://www.systemoutprintln.de/
http://twitter.com/BenediktRitter
http://github.com/britter

Reply via email to