Although the behavior of strcasecmp is unsepecified for multibyte
encodings (Is that right?)
http://pubs.opengroup.org/onlinepubs/9699919799/
I wish the attached test (encoded in UTF-8) would pass,
so I'm also attaching a patch for strcasecmp and strncasecmp that makes
this test pass, it uses only LC_CTYPE (but not LC_COLLATE) and a
simple numeric comparision when the strings are different (as the
previous version was doing).
--
Dios, gracias por tu amor infinito.
--
Vladimir Támara Patiño. http://vtamara.pasosdeJesus.org/
http://www.pasosdejesus.org/dominio_publico_colombia.html
#include <locale.h>
#include <stdio.h>
#include <string.h>
int main()
{
char *nl = setlocale(LC_ALL, "es_CO.UTF-8");
if (strcasecmp("ñ", "Ñ") == 0) {
printf("OK");
} else {
printf("Error");
}
return 0;
}
--- src53/lib/libc/string/strcasecmp.c Mon Mar 25 18:28:29 2013
+++ src/lib/libc/string/strcasecmp.c Tue Apr 9 11:34:44 2013
@@ -30,6 +30,8 @@
*/
#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
typedef unsigned char u_char;
@@ -76,29 +78,36 @@
int
strcasecmp(const char *s1, const char *s2)
{
- const u_char *cm = charmap;
- const u_char *us1 = (const u_char *)s1;
- const u_char *us2 = (const u_char *)s2;
-
- while (cm[*us1] == cm[*us2++])
- if (*us1++ == '\0')
- return (0);
- return (cm[*us1] - cm[*--us2]);
+ return strncasecmp(s1, s2, strlen(s1) + 1);
}
+/** Uses LC_CTYPE but not LC_COLLATE */
int
strncasecmp(const char *s1, const char *s2, size_t n)
{
+ mbstate_t mb1, mb2;
+ bzero(&mb1, sizeof(mb1));
+ bzero(&mb2, sizeof(mb2));
+ mbsinit(&mb1);
+ mbsinit(&mb2);
+
if (n != 0) {
- const u_char *cm = charmap;
const u_char *us1 = (const u_char *)s1;
const u_char *us2 = (const u_char *)s2;
+ size_t lus1 = strlen(us1);
+ size_t lus2 = strlen(us2);
do {
- if (cm[*us1] != cm[*us2++])
- return (cm[*us1] - cm[*--us2]);
- if (*us1++ == '\0')
+ wchar_t w1, w2, l1, l2;
+ size_t d1 = mbrtowc(&w1, us1, lus1, &mb1);
+ size_t d2 = mbrtowc(&w2, us2, lus2, &mb2);
+ if ((l1 = towlower(w1)) != (l2 = towlower(w2))) {
+ return l1 - l2;
+ }
+ if (*us1 == '\0')
break;
+ us2 += d2;
+ us1 += d1;
} while (--n != 0);
}
return (0);