Since characters in Java are UNICODE, what does this code do when it encounters a
character who's code point is greater than 0xFF?
My suggestion, is to first encode the path as a UTF-8 byte array, then encode the
bytes according to this algorithm
Tim
[EMAIL PROTECTED] wrote:
> remm 01/01/29 19:50:09
>
> Modified: catalina/src/share/org/apache/catalina/servlets
> DefaultServlet.java
> Log:
> - Will now encode all unsafe characters on the URL.
>
> Revision Changes Path
> 1.22 +84 -28
>jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java
>
> Index: DefaultServlet.java
> ===================================================================
> RCS file:
>/home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v
> retrieving revision 1.21
> retrieving revision 1.22
> diff -u -r1.21 -r1.22
> --- DefaultServlet.java 2001/01/25 05:45:40 1.21
> +++ DefaultServlet.java 2001/01/30 03:50:08 1.22
> @@ -1,7 +1,7 @@
> /*
> - * $Header:
>/home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v
> 1.21 2001/01/25 05:45:40 remm Exp $
> - * $Revision: 1.21 $
> - * $Date: 2001/01/25 05:45:40 $
> + * $Header:
>/home/cvs/jakarta-tomcat-4.0/catalina/src/share/org/apache/catalina/servlets/DefaultServlet.java,v
> 1.22 2001/01/30 03:50:08 remm Exp $
> + * $Revision: 1.22 $
> + * $Date: 2001/01/30 03:50:08 $
> *
> * ====================================================================
> *
> @@ -76,6 +76,7 @@
> import java.io.Reader;
> import java.io.InputStreamReader;
> import java.io.Writer;
> +import java.io.OutputStreamWriter;
> import java.net.MalformedURLException;
> import java.net.URL;
> import java.net.URLEncoder;
> @@ -87,6 +88,7 @@
> import java.util.Locale;
> import java.util.TimeZone;
> import java.util.Hashtable;
> +import java.util.BitSet;
> import java.text.ParseException;
> import java.text.SimpleDateFormat;
> import java.security.MessageDigest;
> @@ -119,7 +121,7 @@
> *
> * @author Craig R. McClanahan
> * @author Remy Maucherat
> - * @version $Revision: 1.21 $ $Date: 2001/01/25 05:45:40 $
> + * @version $Revision: 1.22 $ $Date: 2001/01/30 03:50:08 $
> */
>
> public class DefaultServlet
> @@ -219,6 +221,40 @@
> StringManager.getManager(Constants.Package);
>
>
> + /**
> + * Array containing the safe characters set.
> + */
> + protected static BitSet safeCharacters;
> +
> +
> + protected static final char[] hexadecimal =
> + {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
> + 'A', 'B', 'C', 'D', 'E', 'F'};
> +
> +
> + // ----------------------------------------------------- Static Initializer
> +
> +
> + static {
> + safeCharacters = new BitSet(256);
> + int i;
> + for (i = 'a'; i <= 'z'; i++) {
> + safeCharacters.set(i);
> + }
> + for (i = 'A'; i <= 'Z'; i++) {
> + safeCharacters.set(i);
> + }
> + for (i = '0'; i <= '9'; i++) {
> + safeCharacters.set(i);
> + }
> + safeCharacters.set('-');
> + safeCharacters.set('_');
> + safeCharacters.set('.');
> + safeCharacters.set('*');
> + safeCharacters.set('/');
> + }
> +
> +
> // --------------------------------------------------------- Public Methods
>
>
> @@ -853,7 +889,7 @@
> replaceChar +
> normalized.substring(index + 3);
> }
> -
> +
> // Normalize the slashes and add leading slash if necessary
> if (normalized.indexOf('\\') >= 0)
> normalized = normalized.replace('\\', '/');
> @@ -902,29 +938,49 @@
> * @param path Path which has to be rewiten
> */
> protected String rewriteUrl(String path) {
> -
> - String normalized = path;
> -
> - // Replace " " with "%20"
> - while (true) {
> - int index = normalized.indexOf(" ");
> - if (index < 0)
> - break;
> - normalized = normalized.substring(0, index) + "%20"
> - + normalized.substring(index + 1);
> - }
> -
> - // Replace "&" with "%26"
> - while (true) {
> - int index = normalized.indexOf("&");
> - if (index < 0)
> - break;
> - normalized = normalized.substring(0, index) + "%26"
> - + normalized.substring(index + 1);
> - }
> -
> - return normalized;
> -
> +
> + /**
> + * Note: This code portion is very similar to URLEncoder.encode.
> + * Unfortunately, there is no way to specify to the URLEncoder which
> + * characters should be encoded. Here, ' ' should be encoded as "%20"
> + * and '/' shouldn't be encoded.
> + */
> +
> + int maxBytesPerChar = 10;
> + int caseDiff = ('a' - 'A');
> + StringBuffer rewrittenPath = new StringBuffer(path.length());
> + ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
> + OutputStreamWriter writer = new OutputStreamWriter(buf);
> +
> + for (int i = 0; i < path.length(); i++) {
> + int c = (int) path.charAt(i);
> + if (safeCharacters.get(c)) {
> + rewrittenPath.append((char)c);
> + } else {
> + // convert to external encoding before hex conversion
> + try {
> + writer.write(c);
> + writer.flush();
> + } catch(IOException e) {
> + buf.reset();
> + continue;
> + }
> + byte[] ba = buf.toByteArray();
> + for (int j = 0; j < ba.length; j++) {
> + // Converting each byte in the buffer
> + byte toEncode = ba[j];
> + rewrittenPath.append('%');
> + int low = (int) (toEncode & 0x0f);
> + int high = (int) ((toEncode & 0xf0) >> 4);
> + rewrittenPath.append(hexadecimal[high]);
> + rewrittenPath.append(hexadecimal[low]);
> + }
> + buf.reset();
> + }
> + }
> +
> + return rewrittenPath.toString();
> +
> }
>
>
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, email: [EMAIL PROTECTED]
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, email: [EMAIL PROTECTED]