Hi, I've found a potential bug in the Basic Authentication module. I have users and some user's username is contains national characters (encoded in UTF-8). The HTTP header based authentication is fails when the username or the password contains multibyte characters.
The root of the bug is the Base64 decoder, which decodes the Base64 stream to char array: converts each byte to individual char, this decode method corrupts the multibyte characters... Here is the patch: === Index: java/org/apache/catalina/util/Base64.java =================================================================== --- java/org/apache/catalina/util/Base64.java (revision 901368) +++ java/org/apache/catalina/util/Base64.java (working copy) @@ -283,5 +283,84 @@ } } + /** + * Decodes Base64 data into octects + * + * @param base64DataBC Byte array containing Base64 data + * @param decodedDataBC The decoded data bytes + */ + public static void decode( ByteChunk base64DataBC, ByteChunk decodedDataBC) + { + int start = base64DataBC.getStart(); + int end = base64DataBC.getEnd(); + byte[] base64Data = base64DataBC.getBuffer(); + + decodedDataBC.recycle(); + + // handle the edge case, so we don't have to worry about it later + if(end - start == 0) { return; } + int numberQuadruple = (end - start)/FOURBYTE; + byte b1=0,b2=0,b3=0, b4=0, marker0=0, marker1=0; + + // Throw away anything not in base64Data + + int encodedIndex = 0; + int dataIndex = start; + byte[] decodedData = null; + + { + // this sizes the output array properly - rlw + int lastData = end - start; + // ignore the '=' padding + while (base64Data[start+lastData-1] == PAD) + { + if (--lastData == 0) + { + return; + } + } + decodedDataBC.allocate(lastData - numberQuadruple, -1); + decodedDataBC.setEnd(lastData - numberQuadruple); + decodedData = decodedDataBC.getBuffer(); + } + + for (int i = 0; i < numberQuadruple; i++) + { + dataIndex = start + i * 4; + marker0 = base64Data[dataIndex + 2]; + marker1 = base64Data[dataIndex + 3]; + + b1 = base64Alphabet[base64Data[dataIndex]]; + b2 = base64Alphabet[base64Data[dataIndex +1]]; + + if (marker0 != PAD && marker1 != PAD) + { + //No PAD e.g 3cQl + b3 = base64Alphabet[ marker0 ]; + b4 = base64Alphabet[ marker1 ]; + + decodedData[encodedIndex] = (byte) (( b1 <<2 | b2>>4 ) & 0xff); + decodedData[encodedIndex + 1] = + (byte) ((((b2 & 0xf)<<4 ) |( (b3>>2) & 0xf) ) & 0xff); + decodedData[encodedIndex + 2] = (byte) (( b3<<6 | b4 ) & 0xff); + } + else if (marker0 == PAD) + { + //Two PAD e.g. 3c[Pad][Pad] + decodedData[encodedIndex] = (byte) (( b1 <<2 | b2>>4 ) & 0xff); + } + else if (marker1 == PAD) + { + //One PAD e.g. 3cQ[Pad] + b3 = base64Alphabet[ marker0 ]; + + decodedData[encodedIndex] = (byte) (( b1 <<2 | b2>>4 ) & 0xff); + decodedData[encodedIndex + 1] = + (byte) ((((b2 & 0xf)<<4 ) |( (b3>>2) & 0xf) ) & 0xff); + } + encodedIndex += 3; + } + } + } Index: java/org/apache/catalina/authenticator/BasicAuthenticator.java =================================================================== --- java/org/apache/catalina/authenticator/BasicAuthenticator.java (revision 901368) +++ java/org/apache/catalina/authenticator/BasicAuthenticator.java (working copy) @@ -161,18 +161,18 @@ // FIXME: Add trimming // authorizationBC.trim(); - CharChunk authorizationCC = authorization.getCharChunk(); - Base64.decode(authorizationBC, authorizationCC); + ByteChunk authorizationBCC = authorization.getByteChunk(); + Base64.decode(authorizationBC, authorizationBCC); // Get username and password - int colon = authorizationCC.indexOf(':'); + int colon = authorizationBCC.indexOf(':',0); if (colon < 0) { - username = authorizationCC.toString(); + username = authorizationBCC.toString(); } else { - char[] buf = authorizationCC.getBuffer(); + byte[] buf = authorizationBCC.getBuffer(); username = new String(buf, 0, colon); password = new String(buf, colon + 1, - authorizationCC.getEnd() - colon - 1); + authorizationBCC.getEnd() - colon - 1); } authorizationBC.setOffset(authorizationBC.getOffset() - 6); === It works, because the byte[] to String conversion supports the multibyte conversion and uses the encoding of the JVM. What do you think about it? Best regards, Gábor Auth --------------------------------------------------------------------- To unsubscribe, e-mail: users-unsubscr...@tomcat.apache.org For additional commands, e-mail: users-h...@tomcat.apache.org