Thanks for feedback to Stephan and Matthew.
Updating patch with all your feedback, and having into account that
n in strncmp counts bytes on s1 (as stated in the thread of
the POSIX list sent byt Matthew).
On Tue, Apr 09, 2013 at 11:17:26AM -0700, Matthew Dempsky wrote:
These strlen() calls are also wrong, because they could read past the
n bytes allowed for strncasecmp().
O
> It seems strncasecmp() cannot return errors according to POSIX.
> Only the _l variants have errors defined.
The convention for methods like these to return errors is the caller
is required to set "errno = 0;" before the call, and then check for
"errno != 0" after the call. :(
> While I recognize that POSIX mandates the current locale to be used for
> case conversion, I'm not sure this change is worth the extra complexity.
> As you point out, the standard seems to throw its hands up in the air
> when the question about conversion errors with strcasecmp() and strncasecmp()
> is raised, possibly because these functions are older than the locale concept.
The definition of strncasecmp() has actually been a pretty contentious
topic on the POSIX mailing list for the past few weeks, albeit mostly
focused on whether LC_CTYPE=POSIX allows for UTF-8 or not.
E.g., http://austingroupbugs.net/view.php?id=663
I'm inclined to leave the functions as is for now.
--
Dios, gracias por tu amor infinito.
--
Vladimir Támara Patiño. http://vtamara.pasosdeJesus.org/
http://www.pasosdejesus.org/dominio_publico_colombia.html
--- src53/lib/libc/string/strcasecmp.c Mon Mar 25 18:28:29 2013
+++ src/lib/libc/string/strcasecmp.c Wed Apr 10 04:20:02 2013
@@ -29,7 +29,10 @@
* SUCH DAMAGE.
*/
+#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
typedef unsigned char u_char;
@@ -76,18 +79,11 @@
int
strcasecmp(const char *s1, const char *s2)
{
- const u_char *cm = charmap;
- const u_char *us1 = (const u_char *)s1;
- const u_char *us2 = (const u_char *)s2;
-
- while (cm[*us1] == cm[*us2++])
- if (*us1++ == '\0')
- return (0);
- return (cm[*us1] - cm[*--us2]);
+ return strncasecmp(s1, s2, strlen(s1) + 1);
}
int
-strncasecmp(const char *s1, const char *s2, size_t n)
+sbstrncasecmp(const char *s1, const char *s2, size_t n)
{
if (n != 0) {
const u_char *cm = charmap;
@@ -100,6 +96,42 @@
if (*us1++ == '\0')
break;
} while (--n != 0);
+ }
+ return (0);
+}
+
+int
+strncasecmp(const char *s1, const char *s2, size_t n)
+{
+ mbstate_t mb1, mb2;
+
+ bzero(&mb1, sizeof(mb1));
+ bzero(&mb2, sizeof(mb2));
+ if (n != 0) {
+ const u_char *us1 = (const u_char *)s1;
+ const u_char *us2 = (const u_char *)s2;
+ size_t d1, d2;
+
+ do {
+ wchar_t w1, w2, l1, l2;
+ size_t ml = n < MB_CUR_MAX ? n : MB_CUR_MAX;
+ d1 = mbrtowc(&w1, us1, ml, &mb1);
+ if (d1 == (size_t)-1 || d1 == (size_t)-2) {
+ return sbstrncasecmp(s1, s2, n);
+ }
+ d2 = mbrtowc(&w2, us2, ml, &mb2);
+ if (d2 == (size_t)-1 || d2 == (size_t)-2) {
+ return sbstrncasecmp(s1, s2, n);
+ }
+ if ((l1 = towlower(w1)) != (l2 = towlower(w2))) {
+ return l1 - l2;
+ }
+ if (*us1 == '\0')
+ break;
+ us2 += d2;
+ us1 += d1;
+ n -= d1;
+ } while (n != 0);
}
return (0);
}