luehe 2003/01/27 10:10:48 Modified: jasper2/src/share/org/apache/jasper/compiler PageInfo.java ParserController.java Validator.java jasper2/src/share/org/apache/jasper/resources messages.properties messages_es.properties messages_fr.properties messages_ja.properties jasper2/src/share/org/apache/jasper/xmlparser XMLEncodingDetector.java XercesEncodingDetector.java Log: Fixed 16127 ("Seems to be a problem doing a static include of content when using a different charset.") In addition, the presence of an XML prolog no longer implies a JSP document, as it may actually be used with standard syntax to produce an XML-formatted result (in which case Jasper must treat the XML prolog as template text). Revision Changes Path 1.17 +3 -18 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java Index: PageInfo.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- PageInfo.java 8 Nov 2002 19:55:47 -0000 1.16 +++ PageInfo.java 27 Jan 2003 18:10:47 -0000 1.17 @@ -89,13 +89,6 @@ private String errorPage = null; private String pageEncoding = null; - /* - * Auto-detected encoding, or encoding specified in XML prolog - * (declaration). - * Only meaningful for XML documents. - */ - private String xmlPrologEncoding = null; - private int maxTagNesting = 0; private boolean scriptless = false; private boolean scriptingInvalid = false; @@ -233,14 +226,6 @@ return pageEncoding; } - public void setXmlPrologEncoding(String xmlPrologEncoding) { - this.xmlPrologEncoding = xmlPrologEncoding; - } - - public String getXmlPrologEncoding() { - return xmlPrologEncoding; - } - public int getMaxTagNesting() { return maxTagNesting; } 1.30 +97 -54 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java Index: ParserController.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v retrieving revision 1.29 retrieving revision 1.30 diff -u -r1.29 -r1.30 --- ParserController.java 22 Jan 2003 20:08:24 -0000 1.29 +++ ParserController.java 27 Jan 2003 18:10:47 -0000 1.30 @@ -67,12 +67,10 @@ /** * Controller for the parsing of a JSP page. * <p> - * A translation unit (JSP source file and any files included via the - * include directive) may involve the processing of JSP pages - * written with different syntaxes (currently the original JSP syntax, - * as well as the XML syntax (as of JSP 1.2)). This class encapsulates - * the behavior related to the selection and invocation of - * the proper parser. + * The same ParserController instance is used for a JSP page and any JSP + * segments included by it (via an include directive), where each segment may + * be provided in standard or XML syntax. This class selects and invokes the + * appropriate parser for the JSP page and its included segments. * * @author Pierre Delisle * @author Jan Luehe @@ -110,6 +108,8 @@ */ private boolean isTopFile = true; + private String sourceEnc; + /* * Constructor */ @@ -196,9 +196,27 @@ // Figure out what type of JSP document and encoding type we are // dealing with - String encoding = figureOutJspDocument(absFileName, jarFile); + figureOutJspDocument(absFileName, jarFile); if (isTopFile) { + if (isXml) { + // Make sure the encoding determined from the XML prolog + // matches that in the JSP config element, if present + String jspConfigPageEnc = pageInfo.getPageEncoding(); + if (jspConfigPageEnc != null + && !jspConfigPageEnc.equals(sourceEnc)) { + err.jspError("jsp.error.prolog_config_encoding_mismatch", + sourceEnc, jspConfigPageEnc); + } + // override the encoding that may have been set from JSP config + // info (in Compiler.generateJava()), since that applies to + // standard syntax only + pageInfo.setPageEncoding(sourceEnc); + } else { + if (pageInfo.getPageEncoding() == null) { + pageInfo.setPageEncoding(sourceEnc); + } + } pageInfo.setIsXml(isXml); isTopFile = false; } else { @@ -207,8 +225,18 @@ // Dispatch to the proper parser if (isXml) { + // JSP document (XML syntax) InputStream inStream = null; try { + // XXX Files included using the include directive must be read + // using the character encoding of the including page. + // However, I am wondering how to implement this if an + // included JSP document contains its own XML prolog. Since + // we're handing the included JSP document off to the XML + // (e.g., SAX) parser, we have no control over how it's parsed: + // the parser will determine the encoding from the XML prolog + // on its own, and use it. We can't tell the parser to use + // a different encoding. inStream = JspUtil.getInputStream(absFileName, jarFile, ctxt, err); parsedPage = JspDocumentParser.parse(this, absFileName, @@ -224,12 +252,21 @@ } } } else { + // Standard syntax InputStreamReader inStreamReader = null; try { - inStreamReader = JspUtil.getReader(absFileName, encoding, - jarFile, ctxt, err); - JspReader jspReader = new JspReader(ctxt, absFileName, - encoding, inStreamReader, + // Files included using the include directive must be read + // using the character encoding of the including page, which is + // the encoding returned by pageInfo.getPageEncoding(). + inStreamReader = JspUtil.getReader(absFileName, + pageInfo.getPageEncoding(), + jarFile, + ctxt, + err); + JspReader jspReader = new JspReader(ctxt, + absFileName, + pageInfo.getPageEncoding(), + inStreamReader, err); parsedPage = Parser.parse(this, jspReader, parent, isTagFile, directivesOnly, jarFile); @@ -261,30 +298,32 @@ * * If these properties are already specified in the jsp-config element * in web.xml, then they are used. - * - * @return The source encoding */ - private String figureOutJspDocument(String fname, JarFile jarFile) + private void figureOutJspDocument(String fname, JarFile jarFile) throws JasperException, IOException { - boolean isXmlFound = false; + // 'true' if the syntax of the page (XML or standard) is identified by + // external information: either via a JSP configuration element or + // the ".jspx" suffix + boolean isExternal = false; isXml = false; if (pageInfo.isXmlSpecified()) { // If <is-xml> is specified in a <jsp-property-group>, it is used. isXml = pageInfo.isXml(); - isXmlFound = true; + isExternal = true; } else if (fname.endsWith(".jspx")) { isXml = true; - isXmlFound = true; + isExternal = true; } - String sourceEnc = null; - if (isXmlFound && !isXml) { + if (isExternal && !isXml) { // JSP syntax if (pageInfo.getPageEncoding() != null) { - // encoding specified in jsp-config (used only by JSP syntax) - return pageInfo.getPageEncoding(); + // Encoding specified in jsp-config (used by standard syntax + // only) + sourceEnc = pageInfo.getPageEncoding(); + return; } else { // We don't know the encoding sourceEnc = "ISO-8859-1"; @@ -294,45 +333,46 @@ Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile, ctxt, err); sourceEnc = (String) ret[0]; - boolean isEncodingSetInProlog = ((Boolean) ret[1]).booleanValue(); - if (isEncodingSetInProlog) { - // Prolog present only in XML syntax - isXml = true; - if (isTopFile) { - String jspConfigPageEnc = pageInfo.getPageEncoding(); - if (jspConfigPageEnc != null - && !jspConfigPageEnc.equals(sourceEnc)) { - err.jspError( - "jsp.error.page.prolog_config_encoding_conflict", - sourceEnc, jspConfigPageEnc); - } - pageInfo.setXmlPrologEncoding(sourceEnc); - } - } else if (sourceEnc.equals("UTF-8")) { + boolean isFallback = ((Boolean) ret[1]).booleanValue(); + if (isFallback) { /* - * We don't know if we're dealing with an XML document - * unless isXml is true, but even if isXml is true, we don't - * know if we're dealing with a JSP document that satisfies - * the encoding auto-detection rules (the JSP document may not - * have an XML prolog and start with <jsp:root ...>). + * Page does not have any XML prolog, or contains an XML + * prolog that is being used as template text (in standard + * syntax). This means that the page's encoding cannot be + * determined from the 'encoding' attribute of an XML prolog, + * or autodetected from an XML prolog. + * * We need to be careful, because the page may be encoded in * ISO-8859-1 (or something entirely different), and may * contain byte sequences that will cause a UTF-8 converter to * throw exceptions. + * * It is safe to use a source encoding of ISO-8859-1 in this * case, as there are no invalid byte sequences in ISO-8859-1, - * and the byte/character sequences we're looking for are - * identical in either encoding (both UTF-8 and ISO-8859-1 are - * extensions of ASCII). + * and the byte/character sequences we're looking for (i.e., + * <jsp:root>) are identical in either encoding (both UTF-8 + * and ISO-8859-1 are extensions of ASCII). */ sourceEnc = "ISO-8859-1"; } } if (isXml) { - return sourceEnc; + // (This implies 'isExternal' is TRUE.) + // We know we're dealing with a JSP document (via JSP config or + // ".jspx" suffix), so we're done. + return; } + /* + * At this point, 'isExternal' or 'isXml' is FALSE. + * Search for jsp:root action, in order to determine if we're dealing + * with XML or standard syntax (unless we already know what we're + * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE). + * No check for XML prolog, since nothing prevents a page from + * outputting XML and still using JSP syntax (in this case, the + * XML prolog is treated as template text). + */ JspReader jspReader = null; try { jspReader = new JspReader(ctxt, fname, sourceEnc, jarFile, err); @@ -341,27 +381,30 @@ } jspReader.setSingleFile(true); Mark startMark = jspReader.mark(); - - if (!isXmlFound) { - // Check for the jsp:root tag - // No check for xml prolog, since nothing prevents a page - // to output XML and still use JSP syntax. + if (!isExternal) { jspReader.reset(startMark); Mark mark = jspReader.skipUntil(JSP_ROOT_TAG); if (mark != null) { isXml = true; - return sourceEnc; + return; } else { isXml = false; } } - // At this point we know it's JSP syntax ... + /* + * At this point, we know we're dealing with JSP syntax. + * If an XML prolog is provided, it's treated as template text. + * Determine the page encoding from the page directive, unless it's + * specified via JSP config. + */ if (pageInfo.getPageEncoding() != null) { - return pageInfo.getPageEncoding(); + sourceEnc = pageInfo.getPageEncoding(); } else { - return getSourceEncodingForJspSyntax(jspReader, startMark); + sourceEnc = getSourceEncodingForJspSyntax(jspReader, startMark); } + + return; } /* 1.69 +16 -27 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java Index: Validator.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v retrieving revision 1.68 retrieving revision 1.69 diff -u -r1.68 -r1.69 --- Validator.java 22 Jan 2003 20:08:24 -0000 1.68 +++ Validator.java 27 Jan 2003 18:10:47 -0000 1.69 @@ -94,7 +94,6 @@ private PageInfo pageInfo; private ErrorDispatcher err; - JspConfig.JspProperty jspProperty; private static final JspUtil.ValidAttribute[] pageDirectiveAttrs = { new JspUtil.ValidAttribute("language"), @@ -131,10 +130,6 @@ this.pageInfo = compiler.getPageInfo(); this.err = compiler.getErrorDispatcher(); JspCompilationContext ctxt = compiler.getCompilationContext(); - JspConfig jspConfig = ctxt.getOptions().getJspConfig(); - if (jspConfig != null) { - this.jspProperty = jspConfig.findJspProperty(ctxt.getJspFile()); - } } public void visit(Node.PageDirective n) throws JasperException { @@ -263,28 +258,22 @@ * a JSP configuration element (whose URL pattern matches * the page). * At this point, we've already verified (in - * ParserController.figureOutJspDocument()) that the page - * character encodings specified in a JSP config element - * and XML prolog match. + * ParserController.parse()) that the page character + * encodings specified in a JSP config element and XML + * prolog match. */ - String compareEnc = null; - if (jspProperty != null) { - compareEnc = jspProperty.getPageEncoding(); - if (compareEnc != null && !compareEnc.equals(value)) { - err.jspError( - n, "jsp.error.page.config_pagedir_encoding_conflict", - compareEnc, value); + String compareEnc = pageInfo.getPageEncoding(); + if (!value.equals(compareEnc)) { + if (pageInfo.isXml()) { + err.jspError(n, + "jsp.error.prolog_pagedir_encoding_mismatch", + compareEnc, value); + } else { + err.jspError(n, + "jsp.error.config_pagedir_encoding_mismatch", + compareEnc, value); } } - if (compareEnc == null) { - compareEnc = pageInfo.getXmlPrologEncoding(); - if (compareEnc != null && !compareEnc.equals(value)) { - err.jspError( - n, "jsp.error.page.prolog_pagedir_encoding_conflict", - compareEnc, value); - } - } - pageInfo.setPageEncoding(value); } } 1.84 +4 -4 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties Index: messages.properties =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages.properties,v retrieving revision 1.83 retrieving revision 1.84 diff -u -r1.83 -r1.84 --- messages.properties 22 Jan 2003 21:06:45 -0000 1.83 +++ messages.properties 27 Jan 2003 18:10:47 -0000 1.84 @@ -306,9 +306,9 @@ jsp.error.not.in.template={0} not allowed in a template text body. jsp.error.badStandardAction=The action is not a recognizable standard action. jsp.error.tagdirective.badbodycontent=Invalid body-content ({0}) in tag directive -jsp.error.page.config_pagedir_encoding_conflict=Page-encoding specified in jsp-property-group ({0}) is different from that specified in page directive ({1}) -jsp.error.page.prolog_pagedir_encoding_conflict=Page-encoding specified in XML prolog ({0}) is different from that specified in page directive ({1}) -jsp.error.page.prolog_config_encoding_conflict=Page-encoding specified in XML prolog ({0}) is different from that specified in jsp-property-group ({1}) +jsp.error.config_pagedir_encoding_mismatch=Page-encoding specified in jsp-property-group ({0}) is different from that specified in page directive ({1}) +jsp.error.prolog_pagedir_encoding_mismatch=Page-encoding specified in XML prolog ({0}) is different from that specified in page directive ({1}) +jsp.error.prolog_config_encoding_mismatch=Page-encoding specified in XML prolog ({0}) is different from that specified in jsp-property-group ({1}) jsp.error.attribute.custom.non_rt_with_expr=According to TLD, attribute {0} does not accept any expressions jsp.error.attribute.standard.non_rt_with_expr=The {0} attribute of the {1} standard action does not accept any expressions jsp.error.scripting.variable.missing_name=Unable to determine scripting variable name from attribute {0} 1.30 +4 -4 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties Index: messages_es.properties =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_es.properties,v retrieving revision 1.29 retrieving revision 1.30 diff -u -r1.29 -r1.30 --- messages_es.properties 22 Jan 2003 20:08:24 -0000 1.29 +++ messages_es.properties 27 Jan 2003 18:10:47 -0000 1.30 @@ -216,9 +216,9 @@ jsp.warning.bad.urlpattern.propertygroup= jsp.error.jspelement.missing.name= jsp.error.tagdirective.badbodycontent= -jsp.error.page.config_pagedir_encoding_conflict= -jsp.error.page.prolog_pagedir_encoding_conflict= -jsp.error.page.prolog_config_encoding_conflict= +jsp.error.config_pagedir_encoding_mismatch= +jsp.error.prolog_pagedir_encoding_mismatch= +jsp.error.prolog_config_encoding_mismatch= jsp.error.attribute.custom.non_rt_with_expr= jsp.error.scripting.variable.missing_name= jasper.error.emptybodycontent.nonempty= 1.13 +4 -4 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_fr.properties Index: messages_fr.properties =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_fr.properties,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- messages_fr.properties 22 Jan 2003 20:08:24 -0000 1.12 +++ messages_fr.properties 27 Jan 2003 18:10:47 -0000 1.13 @@ -288,9 +288,9 @@ jsp.error.not.in.template={0} n''est pas autorisé dans le corps de texte de template. jsp.error.badStandardAction=L''action n''est pas reconnue comme une action standard. jsp.error.tagdirective.badbodycontent=Contenu de corps (body-content) ({0}) invalide dans la directive tag -jsp.error.page.config_pagedir_encoding_conflict=L''encode de page (Page-encoding) indiqué dans le jsp-property-group ({0}) est différent de celui indiqué dans la directive de page ({1}) -jsp.error.page.prolog_pagedir_encoding_conflict= -jsp.error.page.prolog_config_encoding_conflict= +jsp.error.config_pagedir_encoding_mismatch=L''encode de page (Page-encoding) indiqué dans le jsp-property-group ({0}) est différent de celui indiqué dans la directive de page ({1}) +jsp.error.prolog_pagedir_encoding_mismatch= +jsp.error.prolog_config_encoding_mismatch= jsp.error.attribute.custom.non_rt_with_expr=D''après la TLD, l''attribut {0} n''accepte aucune expression jsp.error.scripting.variable.missing_name=Incapable de déterminer le nom de variable scripting d''après l''attribut {0} jasper.error.emptybodycontent.nonempty=D''après la TLD, le tag {0} doit être vide, mais ne l''est pas 1.30 +4 -4 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties Index: messages_ja.properties =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/resources/messages_ja.properties,v retrieving revision 1.29 retrieving revision 1.30 diff -u -r1.29 -r1.30 --- messages_ja.properties 22 Jan 2003 20:08:24 -0000 1.29 +++ messages_ja.properties 27 Jan 2003 18:10:47 -0000 1.30 @@ -247,9 +247,9 @@ jsp.warning.bad.urlpattern.propertygroup= jsp.error.jspelement.missing.name= jsp.error.tagdirective.badbodycontent= -jsp.error.page.config_pagedir_encoding_conflict= -jsp.error.page.prolog_pagedir_encoding_conflict= -jsp.error.page.prolog_config_encoding_conflict= +jsp.error.config_pagedir_encoding_mismatch= +jsp.error.prolog_pagedir_encoding_mismatch= +jsp.error.prolog_config_encoding_mismatch= jsp.error.attribute.custom.non_rt_with_expr= jsp.error.scripting.variable.missing_name= jasper.error.emptybodycontent.nonempty= 1.4 +6 -6 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java Index: XMLEncodingDetector.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- XMLEncodingDetector.java 9 Jan 2003 17:43:15 -0000 1.3 +++ XMLEncodingDetector.java 27 Jan 2003 18:10:48 -0000 1.4 @@ -90,21 +90,21 @@ { XMLEncodingDetector detector=null; try { - Class.forName( "org.apache.xerces.util.SymbolTable"); + Class.forName("org.apache.xerces.util.SymbolTable"); Class detectorClass=Class.forName("org.apache.jasper.xmlparser.XercesEncodingDetector"); - detector=(XMLEncodingDetector)detectorClass.newInstance(); + detector = (XMLEncodingDetector) detectorClass.newInstance(); } catch(Exception ex ) { - detector=new XMLEncodingDetector(); + detector = new XMLEncodingDetector(); } return detector.getEncodingMethod(fname, jarFile, ctxt, err); } public Object[] getEncodingMethod(String fname, JarFile jarFile, - JspCompilationContext ctxt, - ErrorDispatcher err) + JspCompilationContext ctxt, + ErrorDispatcher err) throws IOException, JasperException { - Object result[]=new Object[]{ "UTF8", new Boolean(false) }; + Object result[] = new Object[] { "UTF8", new Boolean(true) }; return result; } } 1.4 +11 -7 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java Index: XercesEncodingDetector.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- XercesEncodingDetector.java 22 Jan 2003 20:08:25 -0000 1.3 +++ XercesEncodingDetector.java 27 Jan 2003 18:10:48 -0000 1.4 @@ -80,7 +80,7 @@ private InputStream stream; private String encoding; - private boolean isEncodingSetInProlog; + private boolean isFallback; private Boolean isBigEndian; private Reader reader; @@ -134,10 +134,10 @@ * @param err The error dispatcher * * @return Two-element array, where the first element (of type - * java.lang.String) contains the name of the autodetected encoding, and - * the second element (of type java.lang.Boolean) specifies whether the - * encoding was specified by the encoding attribute of an XML declaration - * (prolog). + * java.lang.String) contains the name of the (auto)detected encoding, + * and the second element specifies whether the default encoding + * (UTF-8) is being used as a fallback (because no encoding could be + * detected). */ public Object[] getEncoding(InputStream in, ErrorDispatcher err) throws IOException, JasperException @@ -149,7 +149,7 @@ detector.scanXMLDecl(); return new Object[] { detector.encoding, - new Boolean(detector.isEncodingSetInProlog) }; + new Boolean(detector.isFallback) }; } public Object[] getEncodingMethod(String fname, JarFile jarFile, @@ -319,6 +319,7 @@ private Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { + isFallback = true; return new Object[]{"UTF-8", null}; } @@ -337,6 +338,7 @@ // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { + isFallback = true; return new Object [] {"UTF-8", null}; } @@ -349,6 +351,7 @@ // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { + isFallback = true; return new Object [] {"UTF-8", null}; } @@ -390,6 +393,7 @@ } // default encoding + isFallback = true; return new Object [] {"UTF-8", null}; } @@ -1302,7 +1306,7 @@ // set encoding on reader if (encodingPseudoAttr != null) { - isEncodingSetInProlog = true; + isFallback = false; encoding = encodingPseudoAttr; } }
-- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>