2013/5/7 <s...@apache.org> > Author: sebb > Date: Tue May 7 15:12:48 2013 > New Revision: 1479936 > > URL: http://svn.apache.org/r1479936 > Log: > CSV-98 Line number counting is confusing > > Modified: > > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVLexer.java > > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java > > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java > > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1.java > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306663.java > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306667.java > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer3.java >
The above 4 lines look strange to me. We should add the corresponding Lexers of the above revisions. > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java > > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java > > Modified: > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVLexer.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVLexer.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVLexer.java > (original) > +++ > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVLexer.java > Tue May 7 15:12:48 2013 > @@ -202,7 +202,7 @@ final class CSVLexer extends Lexer { > */ > private Token parseEncapsulatedToken(final Token tkn) throws > IOException { > // save current line number in case needed for IOE > - final long startLineNumber = getLineNumber(); > + final long startLineNumber = getCurrentLineNumber(); > int c; > while (true) { > c = in.read(); > @@ -235,7 +235,7 @@ final class CSVLexer extends Lexer { > return tkn; > } else if (!isWhitespace(c)) { > // error invalid char between token and next > delimiter > - throw new IOException("(line " + > getLineNumber() + > + throw new IOException("(line " + > getCurrentLineNumber() + > ") invalid char between encapsulated > token and delimiter"); > } > } > > Modified: > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java > (original) > +++ > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java > Tue May 7 15:12:48 2013 > @@ -158,8 +158,8 @@ public class CSVParser implements Iterab > * > * @return current line number > */ > - public long getLineNumber() { > - return lexer.getLineNumber(); > + public long getCurrentLineNumber() { > + return lexer.getCurrentLineNumber(); > } > > /** > @@ -200,7 +200,7 @@ public class CSVParser implements Iterab > } > break; > case INVALID: > - throw new IOException("(line " + getLineNumber() + ") > invalid parse sequence"); > + throw new IOException("(line " + getCurrentLineNumber() + > ") invalid parse sequence"); > case COMMENT: // Ignored currently > if (sb == null) { // first comment for this record > sb = new StringBuilder(); > > Modified: > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java > (original) > +++ > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java > Tue May 7 15:12:48 2013 > @@ -39,8 +39,8 @@ final class ExtendedBufferedReader exten > /** The last char returned */ > private int lastChar = UNDEFINED; > > - /** The line counter */ > - private long lineCounter; > + /** The count of EOLs (CR/LF/CRLF) seen so far */ > + private long eolCounter = 0; > > /** > * Created extended buffered reader using default buffer-size > @@ -53,7 +53,7 @@ final class ExtendedBufferedReader exten > public int read() throws IOException { > final int current = super.read(); > if (current == CR || (current == LF && lastChar != CR)) { > - lineCounter++; > + eolCounter++; > } > lastChar = current; > return lastChar; > @@ -85,10 +85,10 @@ final class ExtendedBufferedReader exten > final char ch = buf[i]; > if (ch == LF) { > if (CR != (i > 0 ? buf[i - 1] : lastChar)) { > - lineCounter++; > + eolCounter++; > } > } else if (ch == CR) { > - lineCounter++; > + eolCounter++; > } > } > > @@ -105,7 +105,7 @@ final class ExtendedBufferedReader exten > * Calls {@link BufferedReader#readLine()} which drops the line > terminator(s). This method should only be called > * when processing a comment, otherwise information can be lost. > * <p> > - * Increments {@link #lineCounter} > + * Increments {@link #eolCounter} > * <p> > * Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF, otherwise > to LF > * > @@ -117,7 +117,7 @@ final class ExtendedBufferedReader exten > > if (line != null) { > lastChar = LF; // needed for detecting start of line > - lineCounter++; > + eolCounter++; > } else { > lastChar = END_OF_STREAM; > } > @@ -127,7 +127,7 @@ final class ExtendedBufferedReader exten > > /** > * Returns the next character in the current reader without consuming > it. So the next call to {@link #read()} will > - * still return this value. > + * still return this value. Does not affect line number or last > character. > * > * @return the next character > * > @@ -143,11 +143,15 @@ final class ExtendedBufferedReader exten > } > > /** > - * Returns the number of lines read > + * Returns the current line number > * > - * @return the number of EOLs seen so far > + * @return the current line number > */ > - long getLineNumber() { > - return lineCounter; > + long getCurrentLineNumber() { > + // Check if we are at EOL or EOF or just starting > + if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || > lastChar == END_OF_STREAM) { > + return eolCounter; // counter is accurate > + } > + return eolCounter + 1; // Allow for counter being incremented > only at EOL > } > } > > Modified: > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java > (original) > +++ > commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java > Tue May 7 15:12:48 2013 > @@ -70,12 +70,12 @@ abstract class Lexer { > } > > /** > - * Returns the number of lines read > + * Returns the current line number > * > - * @return the number of EOLs seen so far > + * @return the current line number > */ > - long getLineNumber() { > - return in.getLineNumber(); > + long getCurrentLineNumber() { > + return in.getCurrentLineNumber(); > } > > // TODO escape handling needs more work > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1.java > Tue May 7 15:12:48 2013 > @@ -198,7 +198,7 @@ class CSVLexer1 extends Lexer { > */ > private Token encapsulatedTokenLexer(final Token tkn, int c) throws > IOException { > // save current line > - final long startLineNumber = getLineNumber(); > + final long startLineNumber = getCurrentLineNumber(); > // ignore the given delimiter > // assert c == delimiter; > while (true) { > @@ -230,7 +230,7 @@ class CSVLexer1 extends Lexer { > return tkn; > } else if (!isWhitespace(c)) { > // error invalid char between token and next > delimiter > - throw new IOException("(line " + > getLineNumber() + ") invalid char between encapsulated token and > delimiter"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") invalid char between encapsulated token and > delimiter"); > } > } > } > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306663.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306663.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306663.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306663.java > Tue May 7 15:12:48 2013 > @@ -187,7 +187,7 @@ class CSVLexer1306663 extends Lexer { > */ > private Token encapsulatedTokenLexer(final Token tkn) throws > IOException { > // save current line > - final long startLineNumber = getLineNumber(); > + final long startLineNumber = getCurrentLineNumber(); > // ignore the given delimiter > // assert c == delimiter; > int c; > @@ -218,7 +218,7 @@ class CSVLexer1306663 extends Lexer { > return tkn; > } else if (!isWhitespace(c)) { > // error invalid char between token and next > delimiter > - throw new IOException("(line " + > getLineNumber() + ") invalid char between encapsulated token and > delimiter"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") invalid char between encapsulated token and > delimiter"); > } > } > } > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306667.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306667.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306667.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer1306667.java > Tue May 7 15:12:48 2013 > @@ -187,7 +187,7 @@ class CSVLexer1306667 extends Lexer { > */ > private Token encapsulatedTokenLexer(final Token tkn) throws > IOException { > // save current line > - final long startLineNumber = getLineNumber(); > + final long startLineNumber = getCurrentLineNumber(); > // ignore the given delimiter > // assert c == delimiter; > int c; > @@ -218,7 +218,7 @@ class CSVLexer1306667 extends Lexer { > return tkn; > } else if (!isWhitespace(c)) { > // error invalid char between token and next > delimiter > - throw new IOException("(line " + > getLineNumber() + ") invalid char between encapsulated token and > delimiter"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") invalid char between encapsulated token and > delimiter"); > } > } > } > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer3.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer3.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer3.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVLexer3.java > Tue May 7 15:12:48 2013 > @@ -170,7 +170,7 @@ class CSVLexer3 extends Lexer { > state = State.ESCAPE_QUOTE; > break; > case EOFCHAR: > - throw new IOException("(line " + > getLineNumber() + ") unexpected EOF in quoted string"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") unexpected EOF in quoted string"); > default: > tkn.content.append((char) intch); > break; > @@ -194,7 +194,7 @@ class CSVLexer3 extends Lexer { > case WHITESPACE: // trailing whitespace may be > allowed > if (!ignoreSurroundingSpaces) { > // error invalid char between token and > next delimiter > - throw new IOException("(line " + > getLineNumber() + ") invalid char between encapsulated token and > delimiter"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") invalid char between encapsulated token and > delimiter"); > } > break; > // Everything else is invalid > @@ -202,7 +202,7 @@ class CSVLexer3 extends Lexer { > case OTHER: > case COMMENT_START: > // error invalid char between token and next > delimiter > - throw new IOException("(line " + > getLineNumber() + ") invalid char between encapsulated token and > delimiter"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") invalid char between encapsulated token and > delimiter"); > } > break; > case ESCAPE_PLAIN: > @@ -221,7 +221,7 @@ class CSVLexer3 extends Lexer { > tkn.content.append((char) intch); > break; > case EOFCHAR: > - throw new IOException("(line " + > getLineNumber() + ") unexpected EOF in escape sequence"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") unexpected EOF in escape sequence"); > } > break; > case ESCAPE_QUOTE: > @@ -239,7 +239,7 @@ class CSVLexer3 extends Lexer { > tkn.content.append((char) intch); > break; > case EOFCHAR: > - throw new IOException("(line " + > getLineNumber() + ") unexpected EOF in escape sequence"); > + throw new IOException("(line " + > getCurrentLineNumber() + ") unexpected EOF in escape sequence"); > } > break; > default: > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java > Tue May 7 15:12:48 2013 > @@ -629,21 +629,21 @@ public class CSVParserTest { > CSVFormat.newBuilder().withRecordSeparator(CRLF).build()); > CSVRecord record; > assertEquals(0, parser.getRecordNumber()); > - assertEquals(0, parser.getLineNumber()); > + assertEquals(0, parser.getCurrentLineNumber()); > assertNotNull(record = parser.nextRecord()); > - assertEquals(3, parser.getLineNumber()); > + assertEquals(3, parser.getCurrentLineNumber()); > assertEquals(1, record.getRecordNumber()); > assertEquals(1, parser.getRecordNumber()); > assertNotNull(record = parser.nextRecord()); > - assertEquals(6, parser.getLineNumber()); > + assertEquals(6, parser.getCurrentLineNumber()); > assertEquals(2, record.getRecordNumber()); > assertEquals(2, parser.getRecordNumber()); > assertNotNull(record = parser.nextRecord()); > - assertEquals(8, parser.getLineNumber()); > + assertEquals(8, parser.getCurrentLineNumber()); > assertEquals(3, record.getRecordNumber()); > assertEquals(3, parser.getRecordNumber()); > assertNull(record = parser.nextRecord()); > - assertEquals(8, parser.getLineNumber()); > + assertEquals(8, parser.getCurrentLineNumber()); > assertEquals(3, parser.getRecordNumber()); > } > > @@ -676,17 +676,17 @@ public class CSVParserTest { > > private void validateLineNumbers(final String lineSeparator) throws > IOException { > final CSVParser parser = new CSVParser("a" + lineSeparator + "b" > + lineSeparator + "c", > CSVFormat.newBuilder().withRecordSeparator(lineSeparator).build()); > - assertEquals(0, parser.getLineNumber()); > + assertEquals(0, parser.getCurrentLineNumber()); > assertNotNull(parser.nextRecord()); > - assertEquals(1, parser.getLineNumber()); > + assertEquals(1, parser.getCurrentLineNumber()); > assertNotNull(parser.nextRecord()); > - assertEquals(2, parser.getLineNumber()); > + assertEquals(2, parser.getCurrentLineNumber()); > assertNotNull(parser.nextRecord()); > // Still 2 because the last line is does not have EOL chars > - assertEquals(2, parser.getLineNumber()); > + assertEquals(2, parser.getCurrentLineNumber()); > assertNull(parser.nextRecord()); > // Still 2 because the last line is does not have EOL chars > - assertEquals(2, parser.getLineNumber()); > + assertEquals(2, parser.getCurrentLineNumber()); > } > > } > > Modified: > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java > URL: > http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java?rev=1479936&r1=1479935&r2=1479936&view=diff > > ============================================================================== > --- > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java > (original) > +++ > commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/ExtendedBufferedReaderTest.java > Tue May 7 15:12:48 2013 > @@ -47,44 +47,50 @@ public class ExtendedBufferedReaderTest > @Test > public void testReadLookahead1() throws Exception { > final ExtendedBufferedReader br = getBufferedReader("1\n2\r3\n"); > + assertEquals(0, br.getCurrentLineNumber()); > assertEquals('1', br.lookAhead()); > assertEquals(UNDEFINED, br.getLastChar()); > - assertEquals('1', br.read()); > + assertEquals(0, br.getCurrentLineNumber()); > + assertEquals('1', br.read()); // Start line 1 > assertEquals('1', br.getLastChar()); > > - assertEquals(0, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals('\n', br.lookAhead()); > - assertEquals(0, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals('1', br.getLastChar()); > assertEquals('\n', br.read()); > - assertEquals(1, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals('\n', br.getLastChar()); > - assertEquals(1, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > > assertEquals('2', br.lookAhead()); > - assertEquals(1, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals('\n', br.getLastChar()); > - assertEquals(1, br.getLineNumber()); > - assertEquals('2', br.read()); > + assertEquals(1, br.getCurrentLineNumber()); > + assertEquals('2', br.read()); // Start line 2 > + assertEquals(2, br.getCurrentLineNumber()); > assertEquals('2', br.getLastChar()); > > assertEquals('\r', br.lookAhead()); > + assertEquals(2, br.getCurrentLineNumber()); > assertEquals('2', br.getLastChar()); > assertEquals('\r', br.read()); > assertEquals('\r', br.getLastChar()); > + assertEquals(2, br.getCurrentLineNumber()); > > assertEquals('3', br.lookAhead()); > assertEquals('\r', br.getLastChar()); > - assertEquals('3', br.read()); > + assertEquals('3', br.read()); // Start line 3 > assertEquals('3', br.getLastChar()); > + assertEquals(3, br.getCurrentLineNumber()); > > assertEquals('\n', br.lookAhead()); > - assertEquals(2, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > assertEquals('3', br.getLastChar()); > assertEquals('\n', br.read()); > - assertEquals(3, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > assertEquals('\n', br.getLastChar()); > - assertEquals(3, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > > assertEquals(END_OF_STREAM, br.lookAhead()); > assertEquals('\n', br.getLastChar()); > @@ -92,6 +98,7 @@ public class ExtendedBufferedReaderTest > assertEquals(END_OF_STREAM, br.getLastChar()); > assertEquals(END_OF_STREAM, br.read()); > assertEquals(END_OF_STREAM, br.lookAhead()); > + assertEquals(3, br.getCurrentLineNumber()); > > } > > @@ -125,28 +132,28 @@ public class ExtendedBufferedReaderTest > assertNull(br.readLine()); > > br = getBufferedReader("foo\n\nhello"); > - assertEquals(0, br.getLineNumber()); > + assertEquals(0, br.getCurrentLineNumber()); > assertEquals("foo",br.readLine()); > - assertEquals(1, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals("",br.readLine()); > - assertEquals(2, br.getLineNumber()); > + assertEquals(2, br.getCurrentLineNumber()); > assertEquals("hello",br.readLine()); > - assertEquals(3, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > assertNull(br.readLine()); > - assertEquals(3, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > > br = getBufferedReader("foo\n\nhello"); > assertEquals('f', br.read()); > assertEquals('o', br.lookAhead()); > assertEquals("oo",br.readLine()); > - assertEquals(1, br.getLineNumber()); > + assertEquals(1, br.getCurrentLineNumber()); > assertEquals('\n', br.lookAhead()); > assertEquals("",br.readLine()); > - assertEquals(2, br.getLineNumber()); > + assertEquals(2, br.getCurrentLineNumber()); > assertEquals('h', br.lookAhead()); > assertEquals("hello",br.readLine()); > assertNull(br.readLine()); > - assertEquals(3, br.getLineNumber()); > + assertEquals(3, br.getCurrentLineNumber()); > > > br = getBufferedReader("foo\rbaar\r\nfoo"); > @@ -171,20 +178,20 @@ public class ExtendedBufferedReaderTest > ExtendedBufferedReader br; > > br = getBufferedReader(test); > - assertEquals(0, br.getLineNumber()); > + assertEquals(0, br.getCurrentLineNumber()); > while(br.readLine()!=null) {} > - assertEquals(EOLeolct, br.getLineNumber()); > + assertEquals(EOLeolct, br.getCurrentLineNumber()); > > br = getBufferedReader(test); > - assertEquals(0, br.getLineNumber()); > + assertEquals(0, br.getCurrentLineNumber()); > while(br.read()!=-1) {} > - assertEquals(EOLeolct, br.getLineNumber()); > + assertEquals(EOLeolct, br.getCurrentLineNumber()); > > br = getBufferedReader(test); > - assertEquals(0, br.getLineNumber()); > + assertEquals(0, br.getCurrentLineNumber()); > final char[] buff = new char[10]; > while(br.read(buff ,0, 3)!=-1) {} > - assertEquals(EOLeolct, br.getLineNumber()); > + assertEquals(EOLeolct, br.getCurrentLineNumber()); > } > > private ExtendedBufferedReader getBufferedReader(final String s) { > > > -- http://people.apache.org/~britter/ http://www.systemoutprintln.de/ http://twitter.com/BenediktRitter http://github.com/britter