*** varlena.c	2008-08-31 20:31:56.000000000 +0100
--- varlena.c	2008-09-07 02:47:52.000000000 +0100
***************
*** 40,45 ****
--- 40,47 ----
  	pg_wchar   *wstr2;			/* note: these are palloc'd */
  	int			len1;			/* string lengths in logical characters */
  	int			len2;
+ 	int			skiptable[256];	/* for Boyer-Moore-Horspool searching, must be 256 elements. */
+ 	int			skiptablemask;	/* mask for ANDing with skiptable */
  } TextPositionState;
  
  #define DatumGetUnknownP(X)			((unknown *) PG_DETOAST_DATUM(X))
***************
*** 795,800 ****
--- 797,803 ----
  	return result;
  }
  
+ 
  /*
   * text_position_setup, text_position_next, text_position_cleanup -
   *	Component steps of text_position()
***************
*** 804,811 ****
   * called multiple times with increasing values of start_pos, which is
   * the 1-based character position to start the search from.  The "state"
   * variable is normally just a local variable in the caller.
   */
- 
  static void
  text_position_setup(text *t1, text *t2, TextPositionState *state)
  {
--- 807,815 ----
   * called multiple times with increasing values of start_pos, which is
   * the 1-based character position to start the search from.  The "state"
   * variable is normally just a local variable in the caller.
+  *
+  * David Rowley: 2008-09-06 -- Updated for Boyer-Moore-Horspool searching
   */
  static void
  text_position_setup(text *t1, text *t2, TextPositionState *state)
  {
***************
*** 838,902 ****
  		state->len1 = len1;
  		state->len2 = len2;
  	}
! }
! 
! static int
  text_position_next(int start_pos, TextPositionState *state)
  {
! 	int			pos = 0,
! 				p,
! 				px;
! 
! 	Assert(start_pos > 0);		/* else caller error */
! 
! 	if (state->len2 <= 0)
! 		return start_pos;		/* result for empty pattern */
! 
! 	if (!state->use_wchar)
! 	{
! 		/* simple case - single byte encoding */
! 		char	   *p1 = state->str1;
! 		char	   *p2 = state->str2;
! 
! 		/* no use in searching str past point where search_str will fit */
! 		px = (state->len1 - state->len2);
! 
! 		p1 += start_pos - 1;
! 
! 		for (p = start_pos - 1; p <= px; p++)
! 		{
! 			if ((*p1 == *p2) && (strncmp(p1, p2, state->len2) == 0))
! 			{
! 				pos = p + 1;
! 				break;
! 			}
! 			p1++;
! 		}
! 	}
! 	else
! 	{
! 		/* not as simple - multibyte encoding */
! 		pg_wchar   *p1 = state->wstr1;
! 		pg_wchar   *p2 = state->wstr2;
! 
! 		/* no use in searching str past point where search_str will fit */
! 		px = (state->len1 - state->len2);
  
- 		p1 += start_pos - 1;
  
- 		for (p = start_pos - 1; p <= px; p++)
- 		{
- 			if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, state->len2) == 0))
- 			{
- 				pos = p + 1;
- 				break;
- 			}
- 			p1++;
- 		}
- 	}
  
- 	return pos;
- }
  
  static void
  text_position_cleanup(TextPositionState *state)
--- 842,1005 ----
  		state->len1 = len1;
  		state->len2 = len2;
  	}
!   /*
!    * If the needle is bigger than the haystack then there
!    * is no point in wasting cycles initalizing anything else.
!    */
!   if (state->len2 < state->len1)
!   {
!     int     ai;
!     int     tablemask;
!     int     lneedle; 
! 
!     /* Prepare the skip table for Boyer-Moore-Horspool searching. First we
!      * must determine how much of the skip table to use.  The declaration of
!      * TextPositionState allows up to 256 elements, but with haystack lengths
!      * of only a few characters we don't really want to have to initialize so
!      * many elements --- it would take too long in comparison to the actual
!      * search time.  So we choose a useful skip table size based on the
!      * haystack length minus the needle length. The closer the needle length
!      * is to the haystack length the less useful skipping becomes.
!      * 
!      * Note: tablemask must be N^2-1 and no more than 255. 
!      */
!     if ((state->len1 - state->len2) < 16)
!       tablemask = 3;
!     else if ((state->len1 - state->len2) < 64)
!       tablemask = 7;
!     else if ((state->len1 - state->len2) < 128)
!       tablemask = 15;
!     else if ((state->len1 - state->len2) < 512)
!       tablemask = 31;
!     else if ((state->len1 - state->len2) < 2048)
!       tablemask = 63;
!     else if ((state->len1 - state->len2) < 4096)
!       tablemask = 127;
!     else
!       tablemask = 255;
! 
!     lneedle = state->len2;
!     state->skiptablemask = tablemask;
! 
!     /* Initalize the skip table. We set all elements to the needle length */
!     for (ai = 0; ai <= tablemask; ai++)
!       state->skiptable[ai] = lneedle;
! 
!     /* We do not process the last character in the needle */
!     lneedle--;
! 
!     if (!state->use_wchar) 
!     {
!       for (ai = 0; ai < lneedle; ai++)
!         state->skiptable[(unsigned char) state->str2[ai] & tablemask] = lneedle - ai;
!     }
!     else 
!     {
!       for (ai = 0; ai < lneedle; ai++)
!         state->skiptable[state->wstr2[ai] & tablemask] = lneedle - ai;
!     }
!   }
! }
! 
! /* text_position_next
!  * David Rowley 2008-09-06
!  * Uses Boyer-Moore-Horspool searching
!  */
! int
  text_position_next(int start_pos, TextPositionState *state)
  {
!   int haystack_len = state->len1;
!   int needle_len = state->len2;
!   int skipmask = state->skiptablemask;
! 
!   Assert(start_pos > 0);		/* else caller error */
! 
!   if (needle_len <= 0)
! 		return start_pos;		/* Empty needle */
! 
!   start_pos--; /* Change for zero based arrays */
!   /*
!    * Eliminate the impossible first. The needle is
!    * too big for the haystack. We've likely not setup
!    * the skip table when this happens anyway.
!    */
!   if (haystack_len + start_pos < needle_len)
!     return 0;
! 
!   Assert((skipmask & 255) == skipmask && skipmask >= 3); /* Must be N^2-1, min size 3 */
! 
!   if (!state->use_wchar)
!   {
!     char *nptr;
!     char *hptr;
!     char *p;
!     char *needle = state->str2;
!     char *haystack = state->str1;
!  
!     /* Start at startpos plus the length of the needle */
!     hptr = &haystack[needle_len - 1 + start_pos];
!     while (hptr < &haystack[haystack_len]) 
!     {
!       nptr = &needle[needle_len - 1]; /* Point to the end of the needle */
!       p = hptr;
! 
!       while (*nptr == *p) 
!       {
!         /* Do we have it? Return 1 based array pos */
!         if (nptr-- == needle) 
!           return p - haystack + 1; 
!         p--;
!       }
!       /* Ask the skiptable where to look next. If it
!        * finds a match then we align the two ~matching~
!        * characters and start another search there.
!        * Else we skip a whole needle length.
!        * Of course the ~matching~ characters only have the
!        * same hash value, the character value may be different.
!        */
!       hptr += state->skiptable[(unsigned char) *hptr & skipmask];
!     }
!     return 0; /* Not found */
!   } 
!   else
!   { 
!     /* The multibyte char version. This works exactly the same way. */
!     pg_wchar *nptr;
!     pg_wchar *hptr;
!     pg_wchar *p;
!     pg_wchar *needle = state->wstr2;
!     pg_wchar *haystack = state->wstr1;
!   
!     /* Start at start_pos plus the length of the needle */
!     hptr = &haystack[needle_len - 1 + start_pos];
!     while (hptr < &haystack[haystack_len]) 
!     {
!       nptr = &needle[needle_len - 1]; /* Point to the end of the needle */
!       p = hptr;
! 
!       while (*nptr == *p) 
!       {
!         /* Do we have it? Return 1 based array pos */
!         if (nptr-- == needle) 
!           return p - haystack + 1; 
!         p--;
!       }
!       /* Ask the skiptable where to look next. If it
!        * finds a match then we align the two ~matching~
!        * characters and start another search there.
!        * Else we skip a whole needle length.
!        * Of course the ~matching~ characters only have the
!        * same hash value, the character value may be different.
!        */
!        hptr += state->skiptable[*hptr & skipmask];
!     }
!     return 0; /* Not found */
!   }
! }
!  
  
  
  
  
  static void
  text_position_cleanup(TextPositionState *state)
