luehe 2002/11/06 16:50:52 Modified: jasper2/src/share/org/apache/jasper/compiler ParserController.java Log: If autodetection yields UTF-8 as the source encoding, use ISO-8859-1 in the absence of an XML prolog. Revision Changes Path 1.25 +18 -0 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java Index: ParserController.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- ParserController.java 6 Nov 2002 20:14:19 -0000 1.24 +++ ParserController.java 7 Nov 2002 00:50:52 -0000 1.25 @@ -280,6 +280,24 @@ if (isEncodingSetInProlog) { // Prolog present only in XML syntax isXml = true; + } else if (sourceEnc.equals("UTF-8")) { + /* + * We don't know if we're dealing with an XML document + * unless isXml is true, but even if isXml is true, we don't + * know if we're dealing with a JSP document that satisfies + * the encoding auto-detection rules (the JSP document may not + * have an XML prolog and start with <jsp:root ...>). + * We need to be careful, because the page may be encoded in + * ISO-8859-1 (or something entirely different), and may + * contain byte sequences that will cause a UTF-8 converter to + * throw exceptions. + * It is safe to use a source encoding of ISO-8859-1 in this + * case, as there are no invalid byte sequences in ISO-8859-1, + * and the byte/character sequences we're looking for are + * identical in either encoding (both UTF-8 and ISO-8859-1 are + * extensions of ASCII). + */ + sourceEnc = "ISO-8859-1"; } }
-- To unsubscribe, e-mail: <mailto:tomcat-dev-unsubscribe@;jakarta.apache.org> For additional commands, e-mail: <mailto:tomcat-dev-help@;jakarta.apache.org>