Hi Dave,

This patch fixes a problem in the Boyer-Moore textsearch strategy.
Please apply.

-- 
Pablo
[TEXTSEARCH] Fix broken good shift array calculation in Boyer-Moore

The current logic does not calculate correctly the good shift array:
Let x be the pattern that is being searched. Let y be the block of data. 
The good shift array aligns the segment:

x[i+1 ... m-1] = y[i+j+1 ... j+m-1]

with its rightmost occurrence in x that fulfils x[i] neq y[i+j].

In previous version, the good shift array for the pattern ANPANMAN is:
[1, 8, 3, 8, 8, 8, 8, 8]
and should be:
[1, 8, 3, 6, 6, 6, 6, 6]

Signed-off-by: Pablo Neira Ayuso <[EMAIL PROTECTED]>

Index: net-2.6.git/lib/ts_bm.c
===================================================================
--- net-2.6.git.orig/lib/ts_bm.c        2006-01-29 05:10:25.000000000 +0100
+++ net-2.6.git/lib/ts_bm.c     2006-01-29 05:25:47.000000000 +0100
@@ -94,10 +94,28 @@ next:                       bs = bm->bad_shift[text[shift-i]
        return UINT_MAX;
 }
 
+static int subpattern(u8 *pattern, int i, int j, int g)
+{
+       int x = i+g-1, y = j+g-1, ret = 0;
+
+       while(pattern[x--] == pattern[y--]) {
+               if (y < 0) {
+                       ret = 1;
+                       break;
+               }
+               if (--g == 0) {
+                       ret = pattern[i-1] != pattern[j-1];
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
                               unsigned int len)
 {
-       int i, j, ended, l[ASIZE];
+       int i, j, g;
 
        for (i = 0; i < ASIZE; i++)
                bm->bad_shift[i] = len;
@@ -106,23 +124,15 @@ static void compute_prefix_tbl(struct ts
 
        /* Compute the good shift array, used to match reocurrences 
         * of a subpattern */
-       for (i = 1; i < bm->patlen; i++) {
-               for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j]
-                               == bm->pattern[bm->patlen - 1 - i - j]; j++);
-               l[i] = j;
-       }  
-
        bm->good_shift[0] = 1;
        for (i = 1; i < bm->patlen; i++)
                bm->good_shift[i] = bm->patlen;
-       for (i = bm->patlen - 1; i > 0; i--)
-               bm->good_shift[l[i]] = i;
-       ended = 0;
-       for (i = 0; i < bm->patlen; i++) {
-               if (l[i] == bm->patlen - 1 - i)
-                       ended = i;
-               if (ended)
-                       bm->good_shift[i] = ended;
+        for (i = bm->patlen-1, g = 1; i > 0; g++, i--) {
+               for (j = i-1; j >= 1-g ; j--)
+                       if (subpattern(bm->pattern, i, j, g)) {
+                               bm->good_shift[g] = bm->patlen-j-g;
+                               break;
+                       }
        }
 }
 

Reply via email to