Commit: patch 9.2.0622: str2blob() does not work with wide UTF-16 encoding

Christian Brabandt Fri, 12 Jun 2026 03:15:13 -0700

patch 9.2.0622: str2blob() does not work with wide UTF-16 encoding

Commit: 
https://github.com/vim/vim/commit/26dc90a21079a5f5ae472d98c05770ba2eb7868e
Author: Yasuhiro Matsumoto <[email protected]>
Date:   Fri Jun 12 10:00:37 2026 +0000


    patch 9.2.0622: str2blob() does not work with wide UTF-16 encoding
    
    Problem:  str2blob() does not work with wide UTF-16 encoding
    Solution: Use iconv() and convert the UTF-16 and similar encodings
              directly (Yasuhiro Matsumoto)
    
    str2blob() routed every target encoding through convert_string(), which
    treats all Unicode encodings as utf-8 and therefore left the bytes
    unconverted. As a result str2blob(['Hello'], {'encoding': 'utf-16le'})
    returned 0z48656C6C6F instead of 0z480065006C006C006F00, breaking the
    round-trip with blob2str(). Add the same wide-encoding handling blob2str()
    uses: join the list items with a newline, convert the whole string at once
    with the endianness-preserving encoding name, and append the raw bytes.
    
    closes: #20466
    
    Signed-off-by: Yasuhiro Matsumoto <[email protected]>
    Signed-off-by: Christian Brabandt <[email protected]>

diff --git a/src/strings.c b/src/strings.c
index ff63a3ef4..71f9d83e2 100644
--- a/src/strings.c
+++ b/src/strings.c
@@ -1584,6 +1584,7 @@ f_str2blob(typval_T *argvars, typval_T *rettv)
        return;
 
     char_u     *to_encoding = NULL;
+    char_u     *to_encoding_raw = NULL;  // Encoding name with endianness 
preserved for iconv
     if (argvars[1].v_type != VAR_UNKNOWN)
     {
        dict_T *d = argvars[1].vval.v_dict;
@@ -1591,50 +1592,144 @@ f_str2blob(typval_T *argvars, typval_T *rettv)
        {
            char_u *enc = dict_get_string(d, "encoding", FALSE);
            if (enc != NULL)
-               to_encoding = enc_canonize(enc_skip(enc));
+           {
+               char_u *enc_skipped = enc_skip(enc);
+               to_encoding = enc_canonize(enc_skipped);
+
+               // For iconv, preserve the endianness suffix by creating a
+               // normalized version with hyphens: "utf16le" -> "utf-16le"
+               to_encoding_raw = normalize_encoding_name(enc_skipped);
+               if (to_encoding_raw == NULL)
+               {
+                   emsg(_(e_out_of_memory));
+                   VIM_CLEAR(to_encoding);
+                   return;
+               }
+           }
        }
     }
 
-    FOR_ALL_LIST_ITEMS(list, li)
+    // Special handling for UTF-16/UCS-2/UTF-32/UCS-4 target encodings: join 
the
+    // list items with a newline and convert the whole string at once, so that
+    // the wide-encoded newline separators and embedded NUL bytes are preserved
+    // (mirrors blob2str()).  convert_string() cannot be used here because it
+    // treats every Unicode encoding as utf-8, leaving the bytes unconverted.
+    int to_prop = 0;
+    if (to_encoding != NULL)
+       to_prop = enc_canon_props(to_encoding);
+    if (to_encoding != NULL && (to_prop & (ENC_2BYTE | ENC_4BYTE | ENC_2WORD)))
     {
-       if (li->li_tv.v_type != VAR_STRING)
-           continue;
+       garray_T        str_ga;
 
-       string_T    str = {li->li_tv.vval.v_string, 0};
+       ga_init2(&str_ga, 1, 256);
+       FOR_ALL_LIST_ITEMS(list, li)
+       {
+           char_u *s;
 
-       if (str.string == NULL)
-           STR_LITERAL_SET(str, "");
-       else
-           str.length = STRLEN(str.string);
+           if (li->li_tv.v_type != VAR_STRING)
+               continue;
+
+           s = li->li_tv.vval.v_string;
 
-       if (to_encoding != NULL)
+           // Each list string item is separated by a newline in the blob
+           if (li != list->lv_first)
+               ga_append(&str_ga, NL);
+           if (s != NULL && *s != NUL)
+           {
+               int slen = (int)STRLEN(s);
+
+               if (ga_grow(&str_ga, slen) == FAIL)
+               {
+                   ga_clear(&str_ga);
+                   goto done;
+               }
+               mch_memmove((char_u *)str_ga.ga_data + str_ga.ga_len, s,
+                                                               (size_t)slen);
+               str_ga.ga_len += slen;
+           }
+       }
+
+       if (str_ga.ga_len > 0)
        {
-           int         res;
-           string_T    converted;
+           vimconv_T   vimconv;
 
-           res = convert_string(&str, p_enc, to_encoding, &converted);
-           if (res != OK)
+           vimconv.vc_type = CONV_NONE;
+           if (convert_setup_ext(&vimconv, p_enc, FALSE, to_encoding_raw, 
FALSE)
+                                                                   == FAIL)
            {
+               ga_clear(&str_ga);
                semsg(_(e_str_encoding_to_failed), to_encoding);
                goto done;
            }
-           str.string = converted.string;
-           str.length = converted.length;
+           vimconv.vc_fail = TRUE;
+
+           int         len = str_ga.ga_len;
+           char_u      *converted = string_convert_ext(&vimconv,
+                                   (char_u *)str_ga.ga_data, &len, NULL);
+           convert_setup(&vimconv, NULL, NULL);
+           ga_clear(&str_ga);
+
+           if (converted == NULL)
+           {
+               semsg(_(e_str_encoding_to_failed), to_encoding);
+               goto done;
+           }
+           if (len > 0 && ga_grow(&blob->bv_ga, len) == OK)
+           {
+               mch_memmove((char_u *)blob->bv_ga.ga_data + blob->bv_ga.ga_len,
+                                                   converted, (size_t)len);
+               blob->bv_ga.ga_len += len;
+           }
+           vim_free(converted);
        }
+       else
+           ga_clear(&str_ga);
+    }
+    else
+    {
+       FOR_ALL_LIST_ITEMS(list, li)
+       {
+           if (li->li_tv.v_type != VAR_STRING)
+               continue;
 
-       if (li != list->lv_first)
-           // Each list string item is separated by a newline in the blob
-           ga_append(&blob->bv_ga, NL);
+           string_T    str = {li->li_tv.vval.v_string, 0};
 
-       blob_from_string(str.string, blob);
+           if (str.string == NULL)
+               STR_LITERAL_SET(str, "");
+           else
+               str.length = STRLEN(str.string);
 
-       if (to_encoding != NULL)
-           vim_free(str.string);
+           if (to_encoding != NULL)
+           {
+               int         res;
+               string_T    converted;
+
+               res = convert_string(&str, p_enc, to_encoding, &converted);
+               if (res != OK)
+               {
+                   semsg(_(e_str_encoding_to_failed), to_encoding);
+                   goto done;
+               }
+               str.string = converted.string;
+               str.length = converted.length;
+           }
+
+           if (li != list->lv_first)
+               // Each list string item is separated by a newline in the blob
+               ga_append(&blob->bv_ga, NL);
+
+           blob_from_string(str.string, blob);
+
+           if (to_encoding != NULL)
+               vim_free(str.string);
+       }
     }
 
 done:
     if (to_encoding != NULL)
        vim_free(to_encoding);
+    if (to_encoding_raw != NULL)
+       vim_free(to_encoding_raw);
 }
 
 /*
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index 375359527..8ca73a62a 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -4513,6 +4513,20 @@ func Test_str2blob()
     call assert_equal(0zABBB0AABBB, str2blob(['«»', '«»'], {'encoding': 
'latin1'}))
     call assert_equal(0zC2ABC2BB, str2blob(['«»'], {'encoding': 'utf8'}))
 
+    if has('iconv')
+      call assert_equal(0z480065006C006C006F00, str2blob(['Hello'], 
{'encoding': 'utf-16le'}))
+      call assert_equal(0z480065006C006C006F00, str2blob(['Hello'], 
{'encoding': 'utf16le'}))
+      call assert_equal(0z00480065006C006C006F, str2blob(['Hello'], 
{'encoding': 'utf-16be'}))
+      call assert_equal(0z48006900.0A004200.79006500, str2blob(['Hi', 'Bye'], 
{'encoding': 'utf-16le'}))
+      call assert_equal(0z61000A006200, str2blob(["a
b"], {'encoding': 'utf-16le'}))
+      call assert_equal(0z, str2blob([''], {'encoding': 'utf-16le'}))
+      call assert_equal(0z0A00, str2blob(['', ''], {'encoding': 'utf-16le'}))
+      for enc in ['utf-16le', 'utf-16be', 'ucs-2le', 'utf-32le', 'utf-32be']
+        call assert_equal(['Hello', 'World'],
+              \ blob2str(str2blob(['Hello', 'World'], {'encoding': enc}), 
{'encoding': enc}), enc)
+      endfor
+    endif
+
     call assert_equal(0z62, str2blob(["b"], test_null_dict()))
     call assert_equal(0z63, str2blob(["c"], {'encoding': test_null_string()}))
 
@@ -4581,12 +4595,14 @@ func Test_blob2str()
     call assert_fails("call blob2str(0z6162, {'encoding': []})", 'E730: Using 
a List as a String')
     call assert_fails("call blob2str(0z6162, {'encoding': 'ab12xy'})", 'E1515: 
Unable to convert from ''ab12xy'' encoding')
 
-    #" UTF-16LE encoding
-    call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, {'encoding': 
'utf-16le'}))
-    call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, {'encoding': 
'utf16le'}))
-    #" UCS-2LE encoding
-    call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, {'encoding': 
'ucs-2le'}))
-    call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, {'encoding': 
'ucs2le'}))
+    if has("iconv")
+      #" UTF-16LE encoding
+      call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, 
{'encoding': 'utf-16le'}))
+      call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, 
{'encoding': 'utf16le'}))
+      #" UCS-2LE encoding
+      call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, 
{'encoding': 'ucs-2le'}))
+      call assert_equal(['Hello'], blob2str(0z480065006C006C006F00, 
{'encoding': 'ucs2le'}))
+    endif
   END
   call v9.CheckLegacyAndVim9Success(lines)
 endfunc
diff --git a/src/version.c b/src/version.c
index fac15c00f..57bd82493 100644
--- a/src/version.c
+++ b/src/version.c
@@ -754,6 +754,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    622,
 /**/
     621,
 /**/

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion visit 
https://groups.google.com/d/msgid/vim_dev/E1wXyul-00B8CJ-FD%40256bit.org.

Commit: patch 9.2.0622: str2blob() does not work with wide UTF-16 encoding

Raspunde prin e-mail lui