Current implementation of udf_translate_to_linux function does not
support multi-bytes characters at all: it counts bytes while calculating
extension length, when inserting CRC inside the name it doesn't
take into account inter-character boundaries and can break into
the middle of the character.

The most efficient way to properly support multi-bytes characters is
merging of translation operations directly into conversion function.
This can help to avoid extra passes along the string or parsing
the multi-bytes character back into unicode to find out it's length.

Signed-off-by: Andrew Gabbasov <andrew_gabba...@mentor.com>
---
 fs/udf/unicode.c | 260 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 141 insertions(+), 119 deletions(-)

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index f1cdeac..1dc967d 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,9 +28,6 @@
 
 #include "udf_sb.h"
 
-static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int,
-                                 const uint8_t *, int);
-
 static int udf_uni2char_utf8(wchar_t uni,
                             unsigned char *out,
                             int boundlen)
@@ -114,13 +111,32 @@ static int udf_char2uni_utf8(const unsigned char *in,
        return u_len;
 }
 
+#define ILLEGAL_CHAR_MARK      '_'
+#define EXT_MARK               '.'
+#define CRC_MARK               '#'
+#define EXT_SIZE               5
+/* Number of chars we need to store generated CRC to make filename unique */
+#define CRC_LEN                        5
+
 static int udf_name_from_CS0(uint8_t *str_o, int str_max_len,
                             const uint8_t *ocu, int ocu_len,
-                            int (*conv_f)(wchar_t, unsigned char *, int))
+                            int (*conv_f)(wchar_t, unsigned char *, int),
+                            int translate)
 {
+       uint32_t c;
        uint8_t cmp_id;
        int i, len;
-       int str_o_len = 0;
+       int u_ch;
+       int firstDots = 0, needsCRC = 0, illChar;
+       int ext_i_len, ext_max_len;
+       int str_o_len = 0;      /* Length of resulting output */
+       int ext_o_len = 0;      /* Extension output length */
+       int ext_crc_len = 0;    /* Extension output length if used with CRC */
+       int i_ext = -1;         /* Extension position in input buffer */
+       int o_crc = 0;          /* Rightmost possible output pos for CRC+ext */
+       unsigned short valueCRC;
+       uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
+       uint8_t crc[CRC_LEN];
 
        if (str_max_len <= 0)
                return 0;
@@ -133,22 +149,134 @@ static int udf_name_from_CS0(uint8_t *str_o, int 
str_max_len,
        cmp_id = ocu[0];
        if (cmp_id != 8 && cmp_id != 16) {
                memset(str_o, 0, str_max_len);
-               pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu);
+               pr_err("unknown compression code (%d)\n", cmp_id);
                return -EINVAL;
        }
+       u_ch = cmp_id >> 3;
+
+       ocu++;
+       ocu_len--;
+
+       if (translate) {
+               /* Look for extension */
+               for (i = (ocu_len & ~(u_ch - 1)) - u_ch, ext_i_len = 0;
+                    (i >= 0) && (ext_i_len < EXT_SIZE);
+                    i -= u_ch, ext_i_len++) {
+
+                       c = ocu[i];
+                       if (u_ch > 1)
+                               c = (c << 8) | ocu[i + 1];
+
+                       if (c == EXT_MARK) {
+                               if (ext_i_len)
+                                       i_ext = i;
+                               break;
+                       }
+               }
+               if (i_ext >= 0) {
+                       /* Convert extension */
+                       ext_max_len = min_t(int, sizeof(ext), str_max_len);
+                       ext[ext_o_len++] = EXT_MARK;
+                       illChar = 0;
+                       for (i = i_ext + u_ch; i < ocu_len;) {
+
+                               c = ocu[i++];
+                               if (u_ch > 1)
+                                       c = (c << 8) | ocu[i++];
+
+                               if (c == '/' || c == 0) {
+                                       if (illChar)
+                                               continue;
+                                       illChar = 1;
+                                       needsCRC = 1;
+                                       c = ILLEGAL_CHAR_MARK;
+                               } else {
+                                       illChar = 0;
+                               }
+
+                               len = conv_f(c, &ext[ext_o_len],
+                                            ext_max_len - ext_o_len);
+                               /* Valid character? */
+                               if (len >= 0) {
+                                       ext_o_len += len;
+                               } else {
+                                       ext[ext_o_len++] = '?';
+                                       needsCRC = 1;
+                               }
+                               if ((ext_o_len + CRC_LEN) < str_max_len)
+                                       ext_crc_len = ext_o_len;
+                       }
+               }
+       }
+
+       illChar = 0;
+       for (i = 0; i < ocu_len;) {
+
+               if (str_o_len >= str_max_len) {
+                       needsCRC = 1;
+                       break;
+               }
+
+               if (translate && (i == i_ext)) {
+                       if (str_o_len > (str_max_len - ext_o_len))
+                               needsCRC = 1;
+                       break;
+               }
 
-       for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) {
                /* Expand OSTA compressed Unicode to Unicode */
-               uint32_t c = ocu[i++];
-               if (cmp_id == 16)
+               c = ocu[i++];
+               if (u_ch > 1)
                        c = (c << 8) | ocu[i++];
 
+               if (translate) {
+                       if ((c == '.') && (firstDots >= 0))
+                               firstDots++;
+                       else
+                               firstDots = -1;
+
+                       if (c == '/' || c == 0) {
+                               if (illChar)
+                                       continue;
+                               illChar = 1;
+                               needsCRC = 1;
+                               c = ILLEGAL_CHAR_MARK;
+                       } else {
+                               illChar = 0;
+                       }
+               }
+
                len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len);
                /* Valid character? */
-               if (len >= 0)
+               if (len >= 0) {
                        str_o_len += len;
-               else
+               } else {
                        str_o[str_o_len++] = '?';
+                       needsCRC = 1;
+               }
+               if (str_o_len <= (str_max_len - ext_o_len - CRC_LEN))
+                       o_crc = str_o_len;
+       }
+
+       if (translate) {
+               if ((firstDots == 1) || (firstDots == 2))
+                       needsCRC = 1;
+               if (needsCRC) {
+                       str_o_len = o_crc;
+                       valueCRC = crc_itu_t(0, ocu, ocu_len);
+                       crc[0] = CRC_MARK;
+                       crc[1] = hex_asc_upper_hi(valueCRC >> 8);
+                       crc[2] = hex_asc_upper_lo(valueCRC >> 8);
+                       crc[3] = hex_asc_upper_hi(valueCRC);
+                       crc[4] = hex_asc_upper_lo(valueCRC);
+                       len = min_t(int, CRC_LEN, str_max_len - str_o_len);
+                       memcpy(&str_o[str_o_len], crc, len);
+                       str_o_len += len;
+                       ext_o_len = ext_crc_len;
+               }
+               if (ext_o_len > 0) {
+                       memcpy(&str_o[str_o_len], ext, ext_o_len);
+                       str_o_len += ext_o_len;
+               }
        }
 
        return str_o_len;
@@ -202,13 +330,12 @@ try_again:
 int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len)
 {
        return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len,
-                                udf_uni2char_utf8);
+                                udf_uni2char_utf8, 0);
 }
 
 int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
                     uint8_t *dname, int dlen)
 {
-       uint8_t *filename;
        int (*conv_f)(wchar_t, unsigned char *, int);
        int ret;
 
@@ -218,10 +345,6 @@ int udf_get_filename(struct super_block *sb, const uint8_t 
*sname, int slen,
        if (dlen <= 0)
                return 0;
 
-       filename = kmalloc(dlen, GFP_NOFS);
-       if (!filename)
-               return -ENOMEM;
-
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
                conv_f = udf_uni2char_utf8;
        } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
@@ -229,18 +352,10 @@ int udf_get_filename(struct super_block *sb, const 
uint8_t *sname, int slen,
        } else
                BUG();
 
-       ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f);
-       if (ret < 0) {
-               udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
-               goto out2;
-       }
-
-       ret = udf_translate_to_linux(dname, dlen, filename, dlen, sname, slen);
+       ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1);
        /* Zero length filename isn't valid... */
        if (ret == 0)
                ret = -EINVAL;
-out2:
-       kfree(filename);
        return ret;
 }
 
@@ -259,96 +374,3 @@ int udf_put_filename(struct super_block *sb, const uint8_t 
*sname, int slen,
        return udf_name_to_CS0(dname, dlen, sname, slen, conv_f);
 }
 
-#define ILLEGAL_CHAR_MARK      '_'
-#define EXT_MARK               '.'
-#define CRC_MARK               '#'
-#define EXT_SIZE               5
-/* Number of chars we need to store generated CRC to make filename unique */
-#define CRC_LEN                        5
-
-static int udf_translate_to_linux(uint8_t *newName, int newLen,
-                                 const uint8_t *udfName, int udfLen,
-                                 const uint8_t *fidName, int fidNameLen)
-{
-       int index, newIndex = 0, needsCRC = 0;
-       int extIndex = 0, newExtIndex = 0, hasExt = 0;
-       unsigned short valueCRC;
-       uint8_t curr;
-
-       if (udfName[0] == '.' &&
-           (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
-               needsCRC = 1;
-               newIndex = udfLen;
-               memcpy(newName, udfName, udfLen);
-       } else {
-               for (index = 0; index < udfLen; index++) {
-                       curr = udfName[index];
-                       if (curr == '/' || curr == 0) {
-                               needsCRC = 1;
-                               curr = ILLEGAL_CHAR_MARK;
-                               while (index + 1 < udfLen &&
-                                               (udfName[index + 1] == '/' ||
-                                                udfName[index + 1] == 0))
-                                       index++;
-                       }
-                       if (curr == EXT_MARK &&
-                                       (udfLen - index - 1) <= EXT_SIZE) {
-                               if (udfLen == index + 1)
-                                       hasExt = 0;
-                               else {
-                                       hasExt = 1;
-                                       extIndex = index;
-                                       newExtIndex = newIndex;
-                               }
-                       }
-                       if (newIndex < newLen)
-                               newName[newIndex++] = curr;
-                       else
-                               needsCRC = 1;
-               }
-       }
-       if (needsCRC) {
-               uint8_t ext[EXT_SIZE];
-               int localExtIndex = 0;
-
-               if (hasExt) {
-                       int maxFilenameLen;
-                       for (index = 0;
-                            index < EXT_SIZE && extIndex + index + 1 < udfLen;
-                            index++) {
-                               curr = udfName[extIndex + index + 1];
-
-                               if (curr == '/' || curr == 0) {
-                                       needsCRC = 1;
-                                       curr = ILLEGAL_CHAR_MARK;
-                                       while (extIndex + index + 2 < udfLen &&
-                                             (index + 1 < EXT_SIZE &&
-                                               (udfName[extIndex + index + 2] 
== '/' ||
-                                                udfName[extIndex + index + 2] 
== 0)))
-                                               index++;
-                               }
-                               ext[localExtIndex++] = curr;
-                       }
-                       maxFilenameLen = newLen - CRC_LEN - localExtIndex;
-                       if (newIndex > maxFilenameLen)
-                               newIndex = maxFilenameLen;
-                       else
-                               newIndex = newExtIndex;
-               } else if (newIndex > newLen - CRC_LEN)
-                       newIndex = newLen - CRC_LEN;
-               newName[newIndex++] = CRC_MARK;
-               valueCRC = crc_itu_t(0, fidName, fidNameLen);
-               newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);
-               newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8);
-               newName[newIndex++] = hex_asc_upper_hi(valueCRC);
-               newName[newIndex++] = hex_asc_upper_lo(valueCRC);
-
-               if (hasExt) {
-                       newName[newIndex++] = EXT_MARK;
-                       for (index = 0; index < localExtIndex; index++)
-                               newName[newIndex++] = ext[index];
-               }
-       }
-
-       return newIndex;
-}
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to