Attached is a pacth against 7.2 which adds locale awareness to the
character classes of the regular expression engine. Please consider
including this feature to postgreSQL.

*** src/backend/regex/	Sun Mar 17 16:39:13 2002
--- src/backend/regex/regcomp.c	Sun Mar 17 16:53:43 2002
*** 47,53 ****
--- 47,64 ----
  #include "regex/regex.h"
  #include "regex/utils.h"
  #include "regex/regex2.h"
+ #ifdef USE_LOCALE
+ struct cclass
+ {
+     char *name;
+     char *chars;
+     char *multis;
+ };
+ static struct cclass* cclasses = NULL;
+ static struct cclass* cclass_init(void);
+ #else
  #include "regex/cclass.h"
+ #endif /* USE_LOCALE */
  #include "regex/cname.h"
*** 174,179 ****
--- 185,195 ----
  	pg_wchar   *wcp;
+ #ifdef USE_LOCALE
+     if ( cclasses == NULL )
+         cclasses = cclass_init();
+ #endif /* USE_LOCALE */
  #ifdef REDEBUG
  #define  GOODFLAGS(f)	 (f)
*** 884,890 ****
  	struct cclass *cp;
  	size_t		len;
  	char	   *u;
! 	char		c;
  	while (MORE() && pg_isalpha(PEEK()))
--- 900,906 ----
  	struct cclass *cp;
  	size_t		len;
  	char	   *u;
! 	unsigned char		c;
  	while (MORE() && pg_isalpha(PEEK()))
*** 905,911 ****
  	u = cp->chars;
  	while ((c = *u++) != '\0')
! 		CHadd(cs, c);
  	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
  		MCadd(p, cs, u);
--- 921,927 ----
  	u = cp->chars;
  	while ((c = *u++) != '\0')
! 		CHadd(cs, c);   
  	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
  		MCadd(p, cs, u);
*** 1716,1718 ****
--- 1732,1796 ----
  	return (islower((unsigned char) c));
+ #ifdef USE_LOCALE
+ static struct cclass *
+ cclass_init(void)
+ {
+     struct cclass *cp = NULL;
+     struct cclass *classes = NULL;
+     struct cclass_factory
+     {
+         char *name;
+         int (*func)(int);
+         char *chars;
+     } cclass_factories [] =
+         {
+             { "alnum", isalnum, NULL },
+             { "alpha", isalpha, NULL },
+             { "blank", NULL, " \t" },
+             { "cntrl", iscntrl, NULL },
+             { "digit", NULL, "0123456789" },
+             { "graph", isgraph, NULL },
+             { "lower", islower, NULL },
+             { "print", isprint, NULL },
+             { "punct", ispunct, NULL },
+             { "space", NULL, "\t\n\v\f\r " },
+             { "upper", isupper, NULL },
+             { "xdigit", isxdigit, NULL },
+             { NULL, NULL, NULL }
+         };
+     struct cclass_factory *cf = NULL;
+     classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
+     if (classes == NULL)
+         elog(ERROR,"cclass_init: out of memory");
+     cp = classes;
+     for(cf = cclass_factories; cf->name != NULL; cf++)
+         {
+             cp->name = strdup(cf->name);
+             if ( cf->chars )
+                 cp->chars = strdup(cf->chars);
+             else
+                 {
+                     int x = 0, y = 0;
+                     cp->chars = malloc(sizeof(char) * 256);
+                     if (cp->chars == NULL)
+                         elog(ERROR,"cclass_init: out of memory");
+                     for (x = 0; x < 256; x++)
+                         {
+                             if((cf->func)(x))
+                                 *(cp->chars + y++) = x;                            
+                         }
+                     *(cp->chars + y) = '\0';
+                 }
+             cp->multis = "";
+             cp++;
+         }
+     cp->name = cp->chars = NULL;
+     cp->multis = "";
+     return classes;
+ }
+ #endif /* USE_LOCALE */

---------------------------(end of broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]

Reply via email to