https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247494

--- Comment #8 from Conrad Meyer <c...@freebsd.org> ---
With this second patch:

(echo 耳 ; echo 脳 ; echo 耳) | LC_CTYPE=ja_JP.UTF-8 LC_COLLATE=C LANG=C sort
--radixsort --debug
Using collate rules of C locale
Byte sort is used
sort_method=radixsort
; k1=<耳>(1), k2=<脳>(1); s1=<耳>, s2=<脳>; cmp1=-256
; k1=<脳>(1), k2=<耳>(1); s1=<脳>, s2=<耳>; cmp1=256
; k1=<耳>(1), k2=<耳>(1); s1=<耳>, s2=<耳>; cmp1=0
耳
耳
脳

Which seems correct.  In C mode:

(echo 耳 ; echo 脳 ; echo 耳) | LC_CTYPE=C LC_COLLATE=C LANG=C sort --radixsort
--debug
Using collate rules of C locale
Byte sort is used
sort_method=radixsort
; offset=1
; k1=<耳>(3), k2=<脳>(3); offset=1; s1=<耳>, s2=<脳>; cmp1=-4
; offset=1
; k1=<脳>(3), k2=<耳>(3); offset=1; s1=<脳>, s2=<耳>; cmp1=4
; offset=1
; k1=<耳>(3), k2=<耳>(3); offset=1; s1=<耳>, s2=<耳>; cmp1=0
耳
耳
脳

@@ -317,6 +339,7 @@ free_sort_level(struct sort_level *sl)
 static void
 run_sort_level_next(struct sort_level *sl)
 {
+       const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t);
        struct sort_level *slc;
        size_t i, sln, tosort_num;

@@ -333,8 +360,16 @@ run_sort_level_next(struct sort_level *sl)
                sort_left_dec(1);
                goto end;
        case (2):
+               /*
+                * Radixsort only processes a single byte at a time.  In wchar
+                * mode, this can be a subset of the length of a character.
+                * list_coll_offset() offset is in units of wchar, not bytes.
+                * So to calculate the offset, we must divide by
+                * sizeof(wchar_t) and round down to the index of the first
+                * character this level references.
+                */
                if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
-                   sl->level) > 0) {
+                   sl->level / wcfact) > 0) {
                        sl->sorted[sl->start_position++] = sl->tosort[1];
                        sl->sorted[sl->start_position] = sl->tosort[0];
                } else {
@@ -348,7 +383,13 @@ run_sort_level_next(struct sort_level *sl)
                if (TINY_NODE(sl) || (sl->level > 15)) {
                        listcoll_t func;

-                       func = get_list_call_func(sl->level);
+                       /*
+                        * Collate comparison offset is in units of
+                        * character-width, so we must divide the level (bytes)
+                        * by operating character width (wchar_t or char).  See
+                        * longer comment above.
+                        */
+                       func = get_list_call_func(sl->level / wcfact);

                        sl->leaves = sl->tosort;
                        sl->leaves_num = sl->tosort_num;

-- 
You are receiving this mail because:
You are the assignee for the bug.
_______________________________________________
freebsd-bugs@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-bugs
To unsubscribe, send any mail to "freebsd-bugs-unsubscr...@freebsd.org"

Reply via email to