From: Pilho Kim <[EMAIL PROTECTED]>
Subject: My patches for Tomcat 3.2 wrt mutlibyte characters
Date: Tue, 5 Dec 2000 09:47:38 +0900 (KST)
Message-ID: <[EMAIL PROTECTED]>
> Try to visit
>
> http://www.javaclue.org/tomcat/patch32/dopatch.html
>
> I hope that those would be adopted in TC 3.2.1.
We are developing similar patches for japanese users. Your patches
have a few problems:
1, Don't change a DEFAULT_CHAR_ENCODING constant in
src/share/org/apache/tomcat/core/Constants.java
Web i18n basics is to specify "right" charset. If you specify charset
explicitly in tomcat 3.2, your Web applications work well except for a
few well-known problems (JSP's include charset & getParameter() etc. -
they are resolved in Servlet API 2.3 & JSP 1.2).
And "iso-8859-1" is defined as default charset in Servlet API 2.3
final draft (See 4.9, 5.4, javax.servlet.ServletResponse). It isn't
desirable to introduce another i18n concept to Servlet API 2.2.
Of cource, some internal modules such as DefaultCMSetter must send a
reply in platform native charset because of native library's localized
messages etc. In this case, it is better to specify charset
internally.
2, Don't change Jasper's default encoding to a platform native
character encoding.
This spoils platform independency of JSP files. For example, it is
very popular to serve JSP files encoded in Shift_JIS (this is used on
Windows-PC) on an unix-variant server which default charset is EUC-JP.
And JSP files specified charset work well except for "include"
directive.
JSP 1.2 provide "pageEncoding" attribute for this problem. But there
are no walkaround in JSP 1.1. This is a serious problem. I think
charset-inheritance mechanism is better if possible.
We have a plan to provide pageEncoding patch (JSP 1.2 feature
implementation to JSP 1.1)for this problem and this patch will be used
in user's own risk. But it isn't good to provide it officially.
3, Don't use non-IANA charset.
Java's default encoding name is almost converter's name but isn't
included in IANA registry.
But there is no best way to convert Java's encoding to IANA charset in
current JDK & Tomcat.
I think that it is reasonable to use
org.apache.tomcat.util.LocaleToCharsetMap (see ResponseImpl.java).
I send new patch of org.apache.tomcat.context.DefaultCMSetter. This
patch sets charset to iso-8859-1 in the case that LocaleToCharsetMap
returns null (specified locale isn't registered), but there may be a
better code.
Kazuhiro Kazama ([EMAIL PROTECTED]) NTT Network Innovation Laboratories
--- DefaultCMSetter.java.orig Tue Dec 5 11:49:48 2000
+++ DefaultCMSetter.java Tue Dec 5 18:04:24 2000
@@ -141,17 +141,24 @@
class NotFoundHandler extends ServletWrapper {
static StringManager sm=StringManager.
getManager("org.apache.tomcat.resources");
+ String charset;
NotFoundHandler() {
initialized=true;
internal=true;
name="tomcat.notFoundHandler";
+ charset = LocaleToCharsetMap.getCharset(Locale.getDefault());
+ if (charset == null)
+ charset = "ISO-8859-1";
}
public void doService(Request req, Response res)
throws Exception
{
- res.setContentType("text/html"); // ISO-8859-1 default
+ if (charset.equalsIgnoreCase("ISO-8859-1"))
+ res.setContentType("text/html");
+ else
+ res.setContentType("text/html; charset=" + charset);
String requestURI = (String)req.
getAttribute("javax.servlet.include.request_uri");
@@ -186,30 +193,28 @@
buf.append("</body>\r\n");
- String body = buf.toString();
+ byte[] body = new String(buf).getBytes(charset);
- res.setContentLength(body.length());
+ res.setContentLength(body.length);
- if( res.isUsingStream() ) {
- ServletOutputStream out = res.getOutputStream();
- out.print(body);
- out.flush();
- } else {
- PrintWriter out = res.getWriter();
- out.print(body);
- out.flush();
- }
+ ServletOutputStream out = res.getOutputStream();
+ out.write(body);
+ out.flush();
}
}
class ExceptionHandler extends ServletWrapper {
static StringManager sm=StringManager.
getManager("org.apache.tomcat.resources");
+ String charset;
ExceptionHandler() {
initialized=true;
internal=true;
name="tomcat.exceptionHandler";
+ charset = LocaleToCharsetMap.getCharset(Locale.getDefault());
+ if (charset == null)
+ charset = "ISO-8859-1";
}
public void doService(Request req, Response res)
@@ -226,7 +231,10 @@
return;
}
- res.setContentType("text/html");
+ if (charset.equalsIgnoreCase("ISO-8859-1"))
+ res.setContentType("text/html");
+ else
+ res.setContentType("text/html; charset=" + charset);
res.setStatus( 500 );
StringBuffer buf = new StringBuffer();
@@ -303,24 +311,26 @@
buf.append("\r\n");
- if( res.isUsingStream() ) {
- ServletOutputStream out = res.getOutputStream();
- out.print(buf.toString());
- } else {
- PrintWriter out = res.getWriter();
- out.print(buf.toString());
- }
+ byte[] body = buf.toString().getBytes(charset);
+
+ ServletOutputStream out = res.getOutputStream();
+ out.write(body);
+ out.flush();
}
}
class StatusHandler extends ServletWrapper {
static StringManager sm=StringManager.
getManager("org.apache.tomcat.resources");
+ String charset;
StatusHandler() {
initialized=true;
internal=true;
name="tomcat.statusHandler";
+ charset = LocaleToCharsetMap.getCharset(Locale.getDefault());
+ if (charset == null)
+ charset = "ISO-8859-1";
}
// We don't want interceptors called for redirect
@@ -331,7 +341,10 @@
String msg=(String)req.getAttribute("javax.servlet.error.message");
String errorURI = res.getErrorURI();
- res.setContentType("text/html");
+ if (charset.equalsIgnoreCase("ISO-8859-1"))
+ res.setContentType("text/html");
+ else
+ res.setContentType("text/html; charset=" + charset);
// res is reset !!!
// status is already set
int sc=res.getStatus();
@@ -399,24 +412,26 @@
buf.append("</body>\r\n");
- if( res.isUsingStream() ) {
- ServletOutputStream out = res.getOutputStream();
- out.print(buf.toString());
- } else {
- PrintWriter out = res.getWriter();
- out.print(buf.toString());
- }
+ byte[] body = new String(buf).getBytes(charset);
+
+ ServletOutputStream out = res.getOutputStream();
+ out.write(body);
+ out.flush();
}
}
class RedirectHandler extends ServletWrapper {
static StringManager sm=StringManager.
getManager("org.apache.tomcat.resources");
+ String charset;
RedirectHandler() {
initialized=true;
internal=true;
name="tomcat.redirectHandler";
+ charset = LocaleToCharsetMap.getCharset(Locale.getDefault());
+ if (charset == null)
+ charset = "ISO-8859-1";
}
// We don't want interceptors called for redirect
@@ -432,7 +447,10 @@
if( debug>0) ctx.log("Redirect " + location + " " + req );
- res.setContentType("text/html"); // ISO-8859-1 default
+ if (charset.equalsIgnoreCase("ISO-8859-1"))
+ res.setContentType("text/html");
+ else
+ res.setContentType("text/html; charset=" + charset);
res.setHeader("Location", location);
StringBuffer buf = new StringBuffer();
@@ -446,19 +464,13 @@
append(location).
append("\">here</a>.<p>\r\n</body>\r\n");
- String body = buf.toString();
+ byte[] body = new String(buf).getBytes(charset);
- res.setContentLength(body.length());
+ res.setContentLength(body.length);
- if( res.isUsingStream() ) {
- ServletOutputStream out = res.getOutputStream();
- out.print(body);
- out.flush();
- } else {
- PrintWriter out = res.getWriter();
- out.print(body);
- out.flush();
- }
+ ServletOutputStream out = res.getOutputStream();
+ out.write(body);
+ out.flush();
}
// XXX Move it to URLUtil !!!