My versions of strspn() & strcspn() use faster and clear algorithm.

Below I provide simple test application, which compares speed and
results of old strspn() with new one & unified diff in the bottom.

===============cut==================
#include <stdio.h>
#include <string.h>
#include <time.h> /* for _rdtsc() */

#define S1_LEN 1024 /* length of test string */
#define S2_LEN 256  /* length of mask. Maximum is 265 */
#define PHPAPI

PHPAPI size_t php_strspn_new(char *s1, char *s2, char *s1_end, char *s2_end)
{
unsigned char *p1, *p2;
char char_table[256];


        if (s1 == s1_end || s2 == s2_end) {
                /* there is no chars in string [s1] or in the mask [s2] */
                return 0;
        }

        /* create a char table from mask [s2] */
        memset(char_table, 0, 256);
        p1 = (unsigned char *) s2;
        p2 = (unsigned char *) s2_end;
        while (p1 < p2) {
                char_table[*p1++] = 1;
        }

        /* compute the length of the initial segment of string [s1] which
           consists entirely of characters in mask [s2]
        */
        p1 = (unsigned char *) s1;
        p2 = (unsigned char *) s1_end;
        while (p1 < p2 && char_table[*p1++]) {
                /* empty cycle */
        }
        if (char_table[*(--p1)]) {
                p1++;
        }

        return ((char *)p1 - s1);
}

PHPAPI size_t php_strspn_old(char *s1, char *s2, char *s1_end, char *s2_end)
{
register const char *p = s1, *spanp;
register char c = *p;


cont:
    for (spanp = s2; p != s1_end && spanp != s2_end;)
        if (*spanp++ == c) {
            c = *(++p);
            goto cont;
        }
    return (p - s1);
}

int main(int argc,char *argv[])
{
        char s1[S1_LEN], s2[S2_LEN], *s1_end = s1 + S1_LEN, *s2_end = s2 + S2_LEN;
        size_t n, i;
        unsigned long long t;

        memset(s1, S2_LEN - 1, S1_LEN); /* create test string */
        for (i = 0; i < S2_LEN; i++) s2[i] = i; /* create test mask */

        t = _rdtsc();
        n = php_strspn_old(s1, s2, s1_end, s2_end);
        printf("Old strspn time=%lld, result=%d\n", _rdtsc() - t, n);

        t = _rdtsc();
        n = php_strspn_new(s1, s2, s1_end, s2_end);
        printf("New strspn time=%lld, result=%d\n", _rdtsc() - t, n);

    return 0;
}
===============cut==================


unified diff ==========cut========== --- string.c Thu May 13 20:44:32 2004 +++ string_strspn.c Tue Jun 15 15:22:26 2004 @@ -1296,16 +1296,36 @@ */ PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end) { - register const char *p = s1, *spanp; - register char c = *p; + unsigned char *p1, *p2; + char char_table[256];

-cont:
-   for (spanp = s2; p != s1_end && spanp != s2_end;)
-       if (*spanp++ == c) {
-           c = *(++p);
-           goto cont;
+    /* handling some trivial cases */
+    if (s1 == s1_end || s2 == s2_end) {
+        /* there is no chars in string [s1] or in the mask [s2] */
+        return 0;
+    }
+
+    /* create a char table from mask [s2] */
+    memset(char_table, 0, 256);
+    p1 = (unsigned char *) s2;
+    p2 = (unsigned char *) s2_end;
+    while (p1 < p2) {
+        char_table[*p1++] = 1;
+    }
+
+    /* compute the length of the initial segment of string [s1] which
+       consists entirely of characters in mask [s2]
+    */
+    p1 = (unsigned char *) s1;
+    p2 = (unsigned char *) s1_end;
+    while (p1 < p2 && char_table[*p1++]) {
+        /* empty cycle */
        }
-   return (p - s1);
+    if (char_table[*(--p1)]) {
+        p1++;
+    }
+
+    return ((char *)p1 - s1);
 }
 /* }}} */

@@ -1313,18 +1333,40 @@
  */
 PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end)
 {
-   register const char *p, *spanp;
-   register char c = *s1;
+    unsigned char *p1, *p2;
+    char char_table[256];

-   for (p = s1;;) {
-       spanp = s2;
-       do {
-           if (*spanp == c || p == s1_end)
-               return p - s1;
-       } while (spanp++ < s2_end);
-       c = *++p;
+    /* handling some trivial cases */
+    if (s2 == s2_end) {
+        /* empty mask [s2] */
+        return s1_end - s1;
    }
-   /* NOTREACHED */
+    if (s1 == s1_end) {
+        /* there is no chars in string [s1] */
+        return 0;
+    }
+
+    /* create a char table from mask [s2] */
+    memset(char_table, 1, 256);
+    p1 = (unsigned char *) s2;
+    p2 = (unsigned char *) s2_end;
+    while (p1 < p2) {
+        char_table[*p1++] = 0;
+    }
+
+    /* compute the length of the initial segment of string [s1] which
+       does NOT contain any of the characters in mask [s2]
+    */
+    p1 = (unsigned char *) s1;
+    p2 = (unsigned char *) s1_end;
+    while (p1 < p2 && char_table[*p1++]) {
+        /* empty cycle */
+    }
+    if (char_table[*(--p1)]) {
+        p1++;
+    }
+
+    return ((char *)p1 - s1);
 }
 /* }}} */


==========cut========== -- Using Opera's revolutionary e-mail client: http://www.opera.com/m2/

--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php



Reply via email to