Thank you for the feedback Sebb. I'll do another pass later today. Gary
On Oct 13, 2012, at 6:28, sebb <seb...@gmail.com> wrote: > On 13 October 2012 07:27, <ggreg...@apache.org> wrote: >> Author: ggregory >> Date: Sat Oct 13 06:27:52 2012 >> New Revision: 1397783 >> >> URL: http://svn.apache.org/viewvc?rev=1397783&view=rev >> Log: >> Remove DISABLED character hack. > > -1 (for now) > > Are you sure this change does not affect performance? > The Lexer code now has to do some boxing and unboxing. > Unboxing is not expensive, but boxing is potentially so. > > Also, the code now allows for a null delimiter - is that really sensible? > > Also CSVFormat.getDelimiter() is inconsistent - it returns char > whereas all the others return Character. > > I think the Lexer should stick to using char, particularly for the > delimiter which cannot be null. > >> Modified: >> >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >> >> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >> >> Modified: >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1397783&r1=1397782&r2=1397783&view=diff >> ============================================================================== >> --- >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >> (original) >> +++ >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >> Sat Oct 13 06:27:52 2012 >> @@ -18,6 +18,7 @@ >> package org.apache.commons.csv; >> >> import static org.apache.commons.csv.Constants.COMMA; >> +import static org.apache.commons.csv.Constants.CR; >> import static org.apache.commons.csv.Constants.CRLF; >> import static org.apache.commons.csv.Constants.DOUBLE_QUOTE; >> import static org.apache.commons.csv.Constants.ESCAPE; >> @@ -38,30 +39,19 @@ public class CSVFormat implements Serial >> >> private static final long serialVersionUID = 1L; >> >> - private final char delimiter; >> - private final char encapsulator; >> - private final char commentStart; >> - private final char escape; >> + private final Character delimiter; >> + private final Character encapsulator; >> + private final Character commentStart; >> + private final Character escape; >> private final boolean ignoreSurroundingSpaces; // Should >> leading/trailing spaces be ignored around values? >> private final boolean ignoreEmptyLines; >> private final String lineSeparator; // for outputs >> private final String[] header; >> >> - private final boolean isEscaping; >> - private final boolean isCommentingEnabled; >> - private final boolean isEncapsulating; >> - >> - /** >> - * Constant char to be used for disabling comments, escapes and >> encapsulation. The value -2 is used because it >> - * won't be confused with an EOF signal (-1), and because the Unicode >> value {@code FFFE} would be encoded as two chars >> - * (using surrogates) and thus there should never be a collision with a >> real text char. >> - */ >> - static final char DISABLED = '\ufffe'; >> - >> /** >> * Starting format with no settings defined; used for creating other >> formats from scratch. >> */ >> - static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, >> DISABLED, DISABLED, false, false, null, null); >> + static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null, >> false, false, null, null); >> >> /** >> * Standard comma separated format, as for {@link #RFC4180} but allowing >> blank lines. >> @@ -73,8 +63,8 @@ public class CSVFormat implements Serial >> * </ul> >> */ >> public static final CSVFormat DEFAULT = >> - PRISTINE. >> - withDelimiter(COMMA) >> + PRISTINE >> + .withDelimiter(COMMA) >> .withEncapsulator(DOUBLE_QUOTE) >> .withIgnoreEmptyLines(true) >> .withLineSeparator(CRLF); >> @@ -89,8 +79,8 @@ public class CSVFormat implements Serial >> * </ul> >> */ >> public static final CSVFormat RFC4180 = >> - PRISTINE. >> - withDelimiter(COMMA) >> + PRISTINE >> + .withDelimiter(COMMA) >> .withEncapsulator(DOUBLE_QUOTE) >> .withLineSeparator(CRLF); >> >> @@ -127,7 +117,7 @@ public class CSVFormat implements Serial >> * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> >> * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> >> */ >> - public static final CSVFormat MYSQL = >> + public static final CSVFormat MYSQL = >> PRISTINE >> .withDelimiter(TAB) >> .withEscape(ESCAPE) >> @@ -153,7 +143,7 @@ public class CSVFormat implements Serial >> * @param header >> * the header >> */ >> - CSVFormat(final char delimiter, final char encapsulator, final char >> commentStart, final char escape, final boolean surroundingSpacesIgnored, >> + CSVFormat(final Character delimiter, final Character encapsulator, >> final Character commentStart, final Character escape, final boolean >> surroundingSpacesIgnored, >> final boolean emptyLinesIgnored, final String lineSeparator, >> final String[] header) { >> this.delimiter = delimiter; >> this.encapsulator = encapsulator; >> @@ -163,9 +153,6 @@ public class CSVFormat implements Serial >> this.ignoreEmptyLines = emptyLinesIgnored; >> this.lineSeparator = lineSeparator; >> this.header = header; >> - this.isEncapsulating = encapsulator != DISABLED; >> - this.isCommentingEnabled = commentStart != DISABLED; >> - this.isEscaping = escape != DISABLED; >> } >> >> /** >> @@ -176,8 +163,8 @@ public class CSVFormat implements Serial >> * >> * @return true if <code>c</code> is a line break character >> */ >> - private static boolean isLineBreak(final char c) { >> - return c == '\n' || c == '\r'; >> + private static boolean isLineBreak(final Character c) { >> + return c != null && (c == LF || c == CR); >> } >> >> /** >> @@ -199,12 +186,12 @@ public class CSVFormat implements Serial >> commentStart + "\")"); >> } >> >> - if (encapsulator != DISABLED && encapsulator == commentStart) { >> + if (encapsulator != null && encapsulator == commentStart) { >> throw new IllegalArgumentException( >> "The comment start character and the encapsulator cannot >> be the same (\"" + commentStart + "\")"); >> } >> >> - if (escape != DISABLED && escape == commentStart) { >> + if (escape != null && escape == commentStart) { >> throw new IllegalArgumentException("The comment start and the >> escape character cannot be the same (\"" + >> commentStart + "\")"); >> } >> @@ -229,6 +216,19 @@ public class CSVFormat implements Serial >> * thrown if the specified character is a line break >> */ >> public CSVFormat withDelimiter(final char delimiter) { >> + return withDelimiter(Character.valueOf(delimiter)); >> + } >> + >> + /** >> + * Returns a copy of this format using the specified delimiter >> character. >> + * >> + * @param delimiter >> + * the delimiter character >> + * @return A copy of this format using the specified delimiter character >> + * @throws IllegalArgumentException >> + * thrown if the specified character is a line break >> + */ >> + public CSVFormat withDelimiter(final Character delimiter) { >> if (isLineBreak(delimiter)) { >> throw new IllegalArgumentException("The delimiter cannot be a >> line break"); >> } >> @@ -241,7 +241,7 @@ public class CSVFormat implements Serial >> * >> * @return the encapsulator character >> */ >> - public char getEncapsulator() { >> + public Character getEncapsulator() { >> return encapsulator; >> } >> >> @@ -255,6 +255,19 @@ public class CSVFormat implements Serial >> * thrown if the specified character is a line break >> */ >> public CSVFormat withEncapsulator(final char encapsulator) { >> + return withEncapsulator(Character.valueOf(encapsulator)); >> + } >> + >> + /** >> + * Returns a copy of this format using the specified encapsulator >> character. >> + * >> + * @param encapsulator >> + * the encapsulator character >> + * @return A copy of this format using the specified encapsulator >> character >> + * @throws IllegalArgumentException >> + * thrown if the specified character is a line break >> + */ >> + public CSVFormat withEncapsulator(final Character encapsulator) { >> if (isLineBreak(encapsulator)) { >> throw new IllegalArgumentException("The encapsulator cannot be a >> line break"); >> } >> @@ -268,7 +281,7 @@ public class CSVFormat implements Serial >> * @return {@code true} if an encapsulator is defined >> */ >> public boolean isEncapsulating() { >> - return isEncapsulating; >> + return encapsulator != null; >> } >> >> /** >> @@ -276,7 +289,7 @@ public class CSVFormat implements Serial >> * >> * @return the comment start marker. >> */ >> - public char getCommentStart() { >> + public Character getCommentStart() { >> return commentStart; >> } >> >> @@ -292,6 +305,21 @@ public class CSVFormat implements Serial >> * thrown if the specified character is a line break >> */ >> public CSVFormat withCommentStart(final char commentStart) { >> + return withCommentStart(Character.valueOf(commentStart)); >> + } >> + >> + /** >> + * Returns a copy of this format using the specified character as the >> comment start marker. >> + * >> + * Note that the comment introducer character is only recognised at the >> start of a line. >> + * >> + * @param commentStart >> + * the comment start marker >> + * @return A copy of this format using the specified character as the >> comment start marker >> + * @throws IllegalArgumentException >> + * thrown if the specified character is a line break >> + */ >> + public CSVFormat withCommentStart(final Character commentStart) { >> if (isLineBreak(commentStart)) { >> throw new IllegalArgumentException("The comment start character >> cannot be a line break"); >> } >> @@ -307,7 +335,7 @@ public class CSVFormat implements Serial >> * @return <tt>true</tt> is comments are supported, <tt>false</tt> >> otherwise >> */ >> public boolean isCommentingEnabled() { >> - return isCommentingEnabled; >> + return commentStart != null; >> } >> >> /** >> @@ -315,7 +343,7 @@ public class CSVFormat implements Serial >> * >> * @return the escape character >> */ >> - public char getEscape() { >> + public Character getEscape() { >> return escape; >> } >> >> @@ -329,6 +357,19 @@ public class CSVFormat implements Serial >> * thrown if the specified character is a line break >> */ >> public CSVFormat withEscape(final char escape) { >> + return withEscape(Character.valueOf(escape)); >> + } >> + >> + /** >> + * Returns a copy of this format using the specified escape character. >> + * >> + * @param escape >> + * the escape character >> + * @return A copy of this format using the specified escape character >> + * @throws IllegalArgumentException >> + * thrown if the specified character is a line break >> + */ >> + public CSVFormat withEscape(final Character escape) { >> if (isLineBreak(escape)) { >> throw new IllegalArgumentException("The escape character cannot >> be a line break"); >> } >> @@ -342,7 +383,7 @@ public class CSVFormat implements Serial >> * @return {@code true} if escapes are processed >> */ >> public boolean isEscaping() { >> - return isEscaping; >> + return escape != null; >> } >> >> /** >> >> Modified: >> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1397783&r1=1397782&r2=1397783&view=diff >> ============================================================================== >> --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >> (original) >> +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >> Sat Oct 13 06:27:52 2012 >> @@ -32,14 +32,10 @@ import java.io.IOException; >> */ >> abstract class Lexer { >> >> - private final boolean isEncapsulating; >> - private final boolean isEscaping; >> - private final boolean isCommentEnabled; >> - >> - private final char delimiter; >> - private final char escape; >> - private final char encapsulator; >> - private final char commmentStart; >> + private final Character delimiter; >> + private final Character escape; >> + private final Character encapsulator; >> + private final Character commmentStart; >> >> final boolean surroundingSpacesIgnored; >> final boolean emptyLinesIgnored; >> @@ -52,9 +48,6 @@ abstract class Lexer { >> Lexer(final CSVFormat format, final ExtendedBufferedReader in) { >> this.format = format; >> this.in = in; >> - this.isEncapsulating = format.isEncapsulating(); >> - this.isEscaping = format.isEscaping(); >> - this.isCommentEnabled = format.isCommentingEnabled(); >> this.delimiter = format.getDelimiter(); >> this.escape = format.getEscape(); >> this.encapsulator = format.getEncapsulator(); >> @@ -144,14 +137,14 @@ abstract class Lexer { >> } >> >> boolean isEscape(final int c) { >> - return isEscaping && c == escape; >> + return escape != null && c == escape; >> } >> >> boolean isEncapsulator(final int c) { >> - return isEncapsulating && c == encapsulator; >> + return encapsulator != null && c == encapsulator; >> } >> >> boolean isCommentStart(final int c) { >> - return isCommentEnabled && c == commmentStart; >> + return commmentStart != null && c == commmentStart; >> } >> } >> >> Modified: >> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >> URL: >> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1397783&r1=1397782&r2=1397783&view=diff >> ============================================================================== >> --- >> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >> (original) >> +++ >> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >> Sat Oct 13 06:27:52 2012 >> @@ -46,9 +46,9 @@ public class CSVFormatTest { >> format.withIgnoreEmptyLines(false); >> >> assertEquals('!', format.getDelimiter()); >> - assertEquals('!', format.getEncapsulator()); >> - assertEquals('!', format.getCommentStart()); >> - assertEquals('!', format.getEscape()); >> + assertEquals('!', format.getEncapsulator().charValue()); >> + assertEquals('!', format.getCommentStart().charValue()); >> + assertEquals('!', format.getEscape().charValue()); >> assertEquals(CRLF, format.getLineSeparator()); >> >> assertTrue(format.getIgnoreSurroundingSpaces()); >> @@ -60,10 +60,10 @@ public class CSVFormatTest { >> final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, >> true, CRLF, null); >> >> assertEquals('?', format.withDelimiter('?').getDelimiter()); >> - assertEquals('?', format.withEncapsulator('?').getEncapsulator()); >> - assertEquals('?', format.withCommentStart('?').getCommentStart()); >> + assertEquals('?', >> format.withEncapsulator('?').getEncapsulator().charValue()); >> + assertEquals('?', >> format.withCommentStart('?').getCommentStart().charValue()); >> assertEquals("?", format.withLineSeparator("?").getLineSeparator()); >> - assertEquals('?', format.withEscape('?').getEscape()); >> + assertEquals('?', format.withEscape('?').getEscape().charValue()); >> >> >> assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); >> >> assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); >> @@ -131,7 +131,7 @@ public class CSVFormatTest { >> // expected >> } >> >> - >> format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >> + format.withEncapsulator(null).withCommentStart(null).validate(); >> >> try { >> format.withEscape('!').withCommentStart('!').validate(); >> @@ -140,7 +140,7 @@ public class CSVFormatTest { >> // expected >> } >> >> - >> format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >> + format.withEscape(null).withCommentStart(null).validate(); >> >> >> try { >> >> > > --------------------------------------------------------------------- > To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org > For additional commands, e-mail: dev-h...@commons.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org