https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247494
--- Comment #8 from Conrad Meyer <c...@freebsd.org> --- With this second patch: (echo 耳 ; echo 脳 ; echo 耳) | LC_CTYPE=ja_JP.UTF-8 LC_COLLATE=C LANG=C sort --radixsort --debug Using collate rules of C locale Byte sort is used sort_method=radixsort ; k1=<耳>(1), k2=<脳>(1); s1=<耳>, s2=<脳>; cmp1=-256 ; k1=<脳>(1), k2=<耳>(1); s1=<脳>, s2=<耳>; cmp1=256 ; k1=<耳>(1), k2=<耳>(1); s1=<耳>, s2=<耳>; cmp1=0 耳 耳 脳 Which seems correct. In C mode: (echo 耳 ; echo 脳 ; echo 耳) | LC_CTYPE=C LC_COLLATE=C LANG=C sort --radixsort --debug Using collate rules of C locale Byte sort is used sort_method=radixsort ; offset=1 ; k1=<耳>(3), k2=<脳>(3); offset=1; s1=<耳>, s2=<脳>; cmp1=-4 ; offset=1 ; k1=<脳>(3), k2=<耳>(3); offset=1; s1=<脳>, s2=<耳>; cmp1=4 ; offset=1 ; k1=<耳>(3), k2=<耳>(3); offset=1; s1=<耳>, s2=<耳>; cmp1=0 耳 耳 脳 @@ -317,6 +339,7 @@ free_sort_level(struct sort_level *sl) static void run_sort_level_next(struct sort_level *sl) { + const size_t wcfact = (MB_CUR_MAX == 1) ? 1 : sizeof(wchar_t); struct sort_level *slc; size_t i, sln, tosort_num; @@ -333,8 +360,16 @@ run_sort_level_next(struct sort_level *sl) sort_left_dec(1); goto end; case (2): + /* + * Radixsort only processes a single byte at a time. In wchar + * mode, this can be a subset of the length of a character. + * list_coll_offset() offset is in units of wchar, not bytes. + * So to calculate the offset, we must divide by + * sizeof(wchar_t) and round down to the index of the first + * character this level references. + */ if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]), - sl->level) > 0) { + sl->level / wcfact) > 0) { sl->sorted[sl->start_position++] = sl->tosort[1]; sl->sorted[sl->start_position] = sl->tosort[0]; } else { @@ -348,7 +383,13 @@ run_sort_level_next(struct sort_level *sl) if (TINY_NODE(sl) || (sl->level > 15)) { listcoll_t func; - func = get_list_call_func(sl->level); + /* + * Collate comparison offset is in units of + * character-width, so we must divide the level (bytes) + * by operating character width (wchar_t or char). See + * longer comment above. + */ + func = get_list_call_func(sl->level / wcfact); sl->leaves = sl->tosort; sl->leaves_num = sl->tosort_num; -- You are receiving this mail because: You are the assignee for the bug. _______________________________________________ freebsd-bugs@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/freebsd-bugs To unsubscribe, send any mail to "freebsd-bugs-unsubscr...@freebsd.org"