Control: tags -1 + patch Please find attached a patch; unfortunately I could not find a way to test it.
Description: Port to PCRE2. Bug-Debian: https://bugs.debian.org/999954 Author: Yavor Doganov <ya...@gnu.org> Forwarded: no Last-Update: 2023-12-19 ---
--- squeak-vm.orig/platforms/unix/CMakeLists.txt +++ squeak-vm/platforms/unix/CMakeLists.txt @@ -125,7 +125,7 @@ USE_LIBRARY ("-framework ${fwk}") ENDMACRO (USE_FRAMEWORK) -USE_LIBRARY_SHARED ("-lpcre") +USE_LIBRARY_SHARED ("-lpcre2-8") USE_LIBRARY_SHARED ("-ljpeg") MACRO (CONFIG_DEFINE var) --- squeak-vm.orig/platforms/Cross/plugins/RePlugin/rePlugin.h +++ squeak-vm/platforms/Cross/plugins/RePlugin/rePlugin.h @@ -15,11 +15,9 @@ The instance variables must appear in the preceding order. MatchSpaceObj must be allocated by the calling routine and contain at least 6*(numGroups+1) bytes. */ -#include "pcre.h" -#include "internal.h" +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> /* Adjust malloc and free routines as used by PCRE */ -static void rePluginFree(void * aPointer); -static void * rePluginMalloc(size_t anInteger); -void *(*pcre_malloc)(size_t) = rePluginMalloc; -void (*pcre_free)(void *) = rePluginFree; +static void rePluginFree(void * aPointer, void * aData); +static void * rePluginMalloc(size_t anInteger, void * aData); --- squeak-vm.orig/platforms/unix/src/vm/intplugins/RePlugin/RePlugin.c +++ squeak-vm/platforms/unix/src/vm/intplugins/RePlugin/RePlugin.c @@ -35,7 +35,6 @@ /*** Constants ***/ /*** Function Prototypes ***/ -static sqInt allocateByteArrayAndSetRcvrExtraPtrFrom(sqInt anExtraPtr); static sqInt allocateByteArrayAndSetRcvrPCREPtrFromPCRE(sqInt aPCREPtr); static sqInt allocateStringAndSetRcvrErrorStrFromCStr(const char *aCStrBuffer); #pragma export on @@ -52,17 +51,19 @@ EXPORT(sqInt) primPCREExecfromto(void); EXPORT(sqInt) primPCRENumSubPatterns(void); #pragma export off -static void rePluginFree(void * aPointer); +static void rePluginFree(void * aPointer, void * aData); #pragma export on -EXPORT(void *) rePluginMalloc(size_t anInteger); +EXPORT(void *) rePluginMalloc(size_t anInteger, void * aData); EXPORT(sqInt) setInterpreter(struct VirtualMachine*anInterpreter); #pragma export off /*** Variables ***/ static sqInt compileFlags; -static sqInt errorOffset; +static PCRE2_SIZE errorOffset; static sqInt errorStr; -static const char * errorStrBuffer; -static sqInt extraPtr; +static int errorCode; +static pcre2_general_context * genContext = NULL; +static pcre2_compile_context * compContext = NULL; +static pcre2_match_context * matchContext = NULL; #ifdef SQUEAK_BUILTIN_PLUGIN extern @@ -86,28 +87,6 @@ static sqInt rcvr; -static sqInt allocateByteArrayAndSetRcvrExtraPtrFrom(sqInt anExtraPtr) { - sqInt extraObject; - void *extraByteArrayPtr; - - if (anExtraPtr) { - - /* Allocate a Smalltalk ByteArray -- lastAlloc contains the length */ - - extraObject = interpreterProxy->instantiateClassindexableSize(interpreterProxy->classByteArray(), sizeof(real_pcre_extra)); - /* begin loadRcvrFromStackAt: */ - rcvr = interpreterProxy->stackObjectValue(0); - extraByteArrayPtr = interpreterProxy->arrayValueOf(extraObject); - memcpy(extraByteArrayPtr, (void *) anExtraPtr, sizeof(real_pcre_extra)); - } else { - extraObject = interpreterProxy->nilObject(); - } - /* begin rcvrExtraPtrFrom: */ - interpreterProxy->storePointerofObjectwithValue(3, rcvr, extraObject); - ; - return extraObject; -} - static sqInt allocateByteArrayAndSetRcvrPCREPtrFromPCRE(sqInt aPCREPtr) { sqInt patObject; void *patByteArrayPtr; @@ -184,13 +163,18 @@ /* <rcvr primPCRECompile>, where rcvr is an object with instance variables: - 'patternStr compileFlags pcrePtr extraPtr errorStr errorOffset matchFlags' + 'patternStr compileFlags pcrePtr errorStr errorOffset matchFlags' Compile the regular expression in patternStr, and if the compilation is successful, attempt to optimize the compiled expression. Store the results in <pcrePtr> and <extratr>, or fill errorStr with a meaningful errorString and errorOffset with an indicator where the error was found, applying compileFlags throughout. Answer nil with a clean compile (regardless of whether an optimization is possible, and answer with the string otherwise. */ EXPORT(sqInt) primPCRECompile(void) { sqInt anInteger; + if (!genContext) + genContext = pcre2_general_context_create(rePluginMalloc, + rePluginFree, NULL); + if (!compContext) + compContext = pcre2_compile_context_create(genContext); /* begin loadRcvrFromStackAt: */ rcvr = interpreterProxy->stackObjectValue(0); patternStrPtr = ((char *) (interpreterProxy->fetchArrayofObject(0, rcvr))); @@ -198,24 +182,25 @@ if (interpreterProxy->failed()) { return null; } - pcrePtr = (int) pcre_compile(patternStrPtr, compileFlags, - &errorStrBuffer, &errorOffset, NULL); + pcrePtr = (int) pcre2_compile((PCRE2_SPTR)patternStrPtr, + strlen(patternStrPtr), compileFlags, + &errorCode, &errorOffset, compContext); if (pcrePtr) { allocateByteArrayAndSetRcvrPCREPtrFromPCRE(pcrePtr); - extraPtr = (int) pcre_study((pcre *)pcrePtr, compileFlags, &errorStrBuffer); - allocateByteArrayAndSetRcvrExtraPtrFrom(extraPtr); - rePluginFree(((void *) pcrePtr)); - if (extraPtr) { - rePluginFree(((void *) extraPtr)); - } + rePluginFree(((void *) pcrePtr), NULL); if (interpreterProxy->failed()) { return null; } interpreterProxy->popthenPush(1, interpreterProxy->nilObject()); } else { + const char errorStrBuffer[120]; + + pcre2_get_error_message(errorCode, + (PCRE2_UCHAR *)errorStrBuffer, + sizeof(errorStrBuffer)); errorStr = allocateStringAndSetRcvrErrorStrFromCStr(errorStrBuffer); /* begin rcvrErrorOffsetFrom: */ - anInteger = errorOffset; + anInteger = (int) errorOffset; interpreterProxy->storeIntegerofObjectwithValue(5, rcvr, anInteger); if (interpreterProxy->failed()) { return null; @@ -227,7 +212,7 @@ /* <rcvr primPCREExec: searchObject>, where rcvr is an object with instance variables: - 'patternStr compileFlags pcrePtr extraPtr errorStr errorOffset matchFlags' + 'patternStr compileFlags pcrePtr errorStr errorOffset matchFlags' Apply the regular expression (stored in <pcrePtr> and <extratr>, generated from calls to primPCRECompile), to smalltalk String searchObject using <matchOptions>. If there is no match, answer nil. Otherwise answer a ByteArray of offsets representing the results of the match. */ @@ -235,10 +220,8 @@ sqInt searchObject; sqInt length; char *searchBuffer; - sqInt matchSpaceSize; sqInt result; - int *matchSpacePtr; - sqInt extraObj; + pcre2_match_data *matchData; /* Load Parameters */ @@ -249,22 +232,17 @@ /* begin loadRcvrFromStackAt: */ rcvr = interpreterProxy->stackObjectValue(1); pcrePtr = ((int) (interpreterProxy->fetchArrayofObject(2, rcvr))); - /* begin rcvrExtraPtr */ - extraObj = interpreterProxy->fetchPointerofObject(3, rcvr); - if (extraObj == (interpreterProxy->nilObject())) { - extraPtr = NULL; - goto l1; - } - extraPtr = ((int) (interpreterProxy->arrayValueOf(extraObj))); -l1: /* end rcvrExtraPtr */; matchFlags = interpreterProxy->fetchIntegerofObject(6, rcvr); - matchSpacePtr = ((int *) (interpreterProxy->fetchArrayofObject(7, rcvr))); - matchSpaceSize = ((sqInt) (interpreterProxy->byteSizeOf(interpreterProxy->fetchPointerofObject(7, rcvr))) >> 2); if (interpreterProxy->failed()) { return null; } - result = pcre_exec((pcre *)pcrePtr, (pcre_extra *)extraPtr, - searchBuffer, length, 0, matchFlags, matchSpacePtr, matchSpaceSize); + if (!matchContext) + matchContext = pcre2_match_context_create(genContext); + matchData = pcre2_match_data_create_from_pattern((pcre2_code *)pcrePtr, + genContext); + result = pcre2_match((pcre2_code *)pcrePtr, (PCRE2_SPTR)searchBuffer, + length, 0, matchFlags, matchData, matchContext); + pcre2_match_data_free(matchData); interpreterProxy->pop(2); interpreterProxy->pushInteger(result); ; @@ -276,7 +254,7 @@ /* <rcvr primPCREExec: searchObject> from: fromInteger to: toInteger>, where rcvr is an object with instance variables: - 'patternStr compileFlags pcrePtr extraPtr errorStr errorOffset matchFlags' + 'patternStr compileFlags pcrePtr errorStr errorOffset matchFlags' Apply the regular expression (stored in <pcrePtr> and <extratr>, generated from calls to primPCRECompile), to smalltalk String searchObject using <matchOptions>, beginning at offset <fromInteger> and continuing until offset <toInteger>. If there is no match, answer nil. Otherwise answer a ByteArray of offsets representing the results of the match. */ @@ -284,12 +262,10 @@ sqInt searchObject; sqInt length; char *searchBuffer; - sqInt matchSpaceSize; sqInt toInteger; sqInt result; sqInt fromInteger; - int *matchSpacePtr; - sqInt extraObj; + pcre2_match_data *matchData; /* Load Parameters */ @@ -314,22 +290,17 @@ searchBuffer += fromInteger; pcrePtr = ((int) (interpreterProxy->fetchArrayofObject(2, rcvr))); - /* begin rcvrExtraPtr */ - extraObj = interpreterProxy->fetchPointerofObject(3, rcvr); - if (extraObj == (interpreterProxy->nilObject())) { - extraPtr = NULL; - goto l1; - } - extraPtr = ((int) (interpreterProxy->arrayValueOf(extraObj))); -l1: /* end rcvrExtraPtr */; matchFlags = interpreterProxy->fetchIntegerofObject(6, rcvr); - matchSpacePtr = ((int *) (interpreterProxy->fetchArrayofObject(7, rcvr))); - matchSpaceSize = ((sqInt) (interpreterProxy->byteSizeOf(interpreterProxy->fetchPointerofObject(7, rcvr))) >> 2); if (interpreterProxy->failed()) { return null; } - result = pcre_exec((pcre *)pcrePtr, (pcre_extra *)extraPtr, - searchBuffer, length, 0, matchFlags, matchSpacePtr, matchSpaceSize); + if (!matchContext) + matchContext = pcre2_match_context_create(genContext); + matchData = pcre2_match_data_create_from_pattern((pcre2_code *)pcrePtr, + genContext); + result = pcre2_match((pcre2_code *)pcrePtr, (PCRE2_SPTR)searchBuffer, + length, 0, matchFlags, matchData, matchContext); + pcre2_match_data_free(matchData); interpreterProxy->pop(2); interpreterProxy->pushInteger(result); ; @@ -341,23 +312,27 @@ /* <rcvr primPCRENumSubPatterns>, where rcvr is an object with instance variables: - 'patternStr compileFlags pcrePtr extraPtr errorStr errorOffset matchFlags' + 'patternStr compileFlags pcrePtr errorStr errorOffset matchFlags' Return the number of subpatterns captured by the compiled pattern. */ /* Load Parameters */ EXPORT(sqInt) primPCRENumSubPatterns(void) { + uint32_t capturedSubpatterns; + /* begin loadRcvrFromStackAt: */ rcvr = interpreterProxy->stackObjectValue(0); pcrePtr = ((int) (interpreterProxy->fetchArrayofObject(2, rcvr))); interpreterProxy->pop(1); - interpreterProxy->pushInteger(pcre_info((pcre *)pcrePtr, NULL, NULL)); + pcre2_pattern_info((pcre2_code *)pcrePtr, PCRE2_INFO_CAPTURECOUNT, + &capturedSubpatterns); + interpreterProxy->pushInteger(capturedSubpatterns); } /* Free a block of fixed memory allocated with rePluginMalloc. Instrumented version of C free() to facilitate leak analysis from Smalltalk. OS-specific variations on malloc/free, such as with MacOS, are handled by adding a C macro to the header file redefining malloc/free -- see the class comment */ -static void rePluginFree(void * aPointer) { +static void rePluginFree(void * aPointer, void * aData) { numFrees += 1; if (aPointer) { free(aPointer); @@ -367,7 +342,7 @@ /* Allocate a block of fixed memory using C calls to malloc(). Instrumented to facilitate leak analysis from Smalltalk. Set global lastAlloc to anInteger. OS-specific variations on malloc/free, such as with MacOS, are handled by adding a C macro to the header file redefining malloc/free -- see the class comment */ -EXPORT(void *) rePluginMalloc(size_t anInteger) { +EXPORT(void *) rePluginMalloc(size_t anInteger, void * aData) { void *aPointer; numAllocs += 1; --- squeak-vm.orig/platforms/Cross/plugins/RePlugin/get.c +++ /dev/null @@ -1,227 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel <p...@cam.ac.uk> - - Copyright (c) 1997-2001 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - -/* This module contains some convenience functions for extracting substrings -from the subject string after a regex match has succeeded. The original idea -for these functions came from Scott Wimer <sco...@cgibuilder.com>. */ - - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - - -/************************************************* -* Copy captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer. -Note that we use memcpy() rather than strncpy() in case there are binary zeros -in the string. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - -int -pcre_copy_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, char *buffer, int size) -{ -int yield; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -if (size < yield + 1) return PCRE_ERROR_NOMEMORY; -memcpy(buffer, subject + ovector[stringnumber], yield); -buffer[yield] = 0; -return yield; -} - - - -/************************************************* -* Copy all captured strings to new store * -*************************************************/ - -/* This function gets one chunk of store and builds a list of pointers and all -of the captured substrings in it. A NULL pointer is put on the end of the list. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - listptr set to point to the list of pointers - -Returns: if successful: 0 - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store -*/ - -int -pcre_get_substring_list(const char *subject, int *ovector, int stringcount, - const char ***listptr) -{ -int i; -int size = sizeof(char *); -int double_count = stringcount * 2; -char **stringlist; -char *p; - -for (i = 0; i < double_count; i += 2) - size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; - -stringlist = (char **)(pcre_malloc)(size); -if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; - -*listptr = (const char **)stringlist; -p = (char *)(stringlist + stringcount + 1); - -for (i = 0; i < double_count; i += 2) - { - int len = ovector[i+1] - ovector[i]; - memcpy(p, subject + ovector[i], len); - *stringlist++ = p; - p += len; - *p++ = 0; - } - -*stringlist = NULL; -return 0; -} - - - -/************************************************* -* Free store obtained by get_substring_list * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. - -Argument: the result of a previous pcre_get_substring_list() -Returns: nothing -*/ - -void -pcre_free_substring_list(const char **pointer) -{ -(pcre_free)((void *)pointer); -} - - - -/************************************************* -* Copy captured string to new store * -*************************************************/ - -/* This function copies a single captured substring into a piece of new -store - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - stringptr where to put a pointer to the substring - -Returns: if successful: - the length of the string, not including the zero that - is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store - PCRE_ERROR_NOSUBSTRING (-7) substring not present -*/ - -int -pcre_get_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, const char **stringptr) -{ -int yield; -char *substring; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -substring = (char *)(pcre_malloc)(yield + 1); -if (substring == NULL) return PCRE_ERROR_NOMEMORY; -memcpy(substring, subject + ovector[stringnumber], yield); -substring[yield] = 0; -*stringptr = substring; -return yield; -} - - - -/************************************************* -* Free store obtained by get_substring * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. - -Argument: the result of a previous pcre_get_substring() -Returns: nothing -*/ - -void -pcre_free_substring(const char *pointer) -{ -(pcre_free)((void *)pointer); -} - -/* End of get.c */ --- squeak-vm.orig/platforms/Cross/plugins/RePlugin/chartables.c +++ /dev/null @@ -1,183 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* This file is automatically written by the dftables auxiliary -program. If you edit it by hand, you might like to edit the Makefile to -prevent its ever being regenerated. - -This file is #included in the compilation of pcre.c to build the default -character tables which are used when no tables are passed to the compile -function. */ - -static unsigned char pcre_default_tables[] = { - -/* This table is a lower casing table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table is a case flipping table. */ - - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99,100,101,102,103, - 104,105,106,107,108,109,110,111, - 112,113,114,115,116,117,118,119, - 120,121,122, 91, 92, 93, 94, 95, - 96, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90,123,124,125,126,127, - 128,129,130,131,132,133,134,135, - 136,137,138,139,140,141,142,143, - 144,145,146,147,148,149,150,151, - 152,153,154,155,156,157,158,159, - 160,161,162,163,164,165,166,167, - 168,169,170,171,172,173,174,175, - 176,177,178,179,180,181,182,183, - 184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207, - 208,209,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223, - 224,225,226,227,228,229,230,231, - 232,233,234,235,236,237,238,239, - 240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255, - -/* This table contains bit maps for various character classes. -Each map is 32 bytes long and the bits run from the least -significant end of each byte. The classes that have their own -maps are: space, xdigit, digit, upper, lower, word, graph -print, punct, and cntrl. Other classes are built from combinations. */ - - 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, - 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, - 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, - 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - - 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - -/* This table identifies various classes of character by individual bits: - 0x01 white space character - 0x02 letter - 0x04 decimal digit - 0x08 hexadecimal digit - 0x10 alphanumeric or '_' - 0x80 regular expression metacharacter or binary zero -*/ - - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ - 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ - 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ - 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ - 0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /* X - _ */ - 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ - 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - -/* End of chartables.c */ --- squeak-vm.orig/platforms/Cross/plugins/RePlugin/study.c +++ /dev/null @@ -1,401 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel <p...@cam.ac.uk> - - Copyright (c) 1997-2001 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - - -/************************************************* -* Set a bit and maybe its alternate case * -*************************************************/ - -/* Given a character, set its bit in the table, and also the bit for the other -version of a letter if we are caseless. - -Arguments: - start_bits points to the bit map - c is the character - caseless the caseless flag - cd the block with char table pointers - -Returns: nothing -*/ - -static void -set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd) -{ -start_bits[c/8] |= (1 << (c&7)); -if (caseless && (cd->ctypes[c] & ctype_letter) != 0) - start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); -} - - - -/************************************************* -* Create bitmap of starting chars * -*************************************************/ - -/* This function scans a compiled unanchored expression and attempts to build a -bitmap of the set of initial characters. If it can't, it returns FALSE. As time -goes by, we may be able to get more clever at doing this. - -Arguments: - code points to an expression - start_bits points to a 32-byte table, initialized to 0 - caseless the current state of the caseless flag - cd the block with char table pointers - -Returns: TRUE if table built, FALSE otherwise -*/ - -static BOOL -set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, - compile_data *cd) -{ -register int c; - -/* This next statement and the later reference to dummy are here in order to -trick the optimizer of the IBM C compiler for OS/2 into generating correct -code. Apparently IBM isn't going to fix the problem, and we would rather not -disable optimization (in this module it actually makes a big difference, and -the pcre module can use all the optimization it can get). */ - -volatile int dummy; - -do - { - const uschar *tcode = code + 3; - BOOL try_next = TRUE; - - while (try_next) - { - /* If a branch starts with a bracket or a positive lookahead assertion, - recurse to set bits from within them. That's all for this branch. */ - - if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) - { - if (!set_start_bits(tcode, start_bits, caseless, cd)) - return FALSE; - try_next = FALSE; - } - - else switch(*tcode) - { - default: - return FALSE; - - /* Skip over extended extraction bracket number */ - - case OP_BRANUMBER: - tcode += 3; - break; - - /* Skip over lookbehind and negative lookahead assertions */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); - tcode += 3; - break; - - /* Skip over an option setting, changing the caseless flag */ - - case OP_OPT: - caseless = (tcode[1] & PCRE_CASELESS) != 0; - tcode += 2; - break; - - /* BRAZERO does the bracket, but carries on. */ - - case OP_BRAZERO: - case OP_BRAMINZERO: - if (!set_start_bits(++tcode, start_bits, caseless, cd)) - return FALSE; - dummy = 1; - do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); - tcode += 3; - break; - - /* Single-char * or ? sets the bit and tries the next item */ - - case OP_STAR: - case OP_MINSTAR: - case OP_QUERY: - case OP_MINQUERY: - set_bit(start_bits, tcode[1], caseless, cd); - tcode += 2; - break; - - /* Single-char upto sets the bit and tries the next */ - - case OP_UPTO: - case OP_MINUPTO: - set_bit(start_bits, tcode[3], caseless, cd); - tcode += 4; - break; - - /* At least one single char sets the bit and stops */ - - case OP_EXACT: /* Fall through */ - tcode++; - - case OP_CHARS: /* Fall through */ - tcode++; - - case OP_PLUS: - case OP_MINPLUS: - set_bit(start_bits, tcode[1], caseless, cd); - try_next = FALSE; - break; - - /* Single character type sets the bits and stops */ - - case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_digit]; - try_next = FALSE; - break; - - case OP_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_digit]; - try_next = FALSE; - break; - - case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_space]; - try_next = FALSE; - break; - - case OP_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_space]; - try_next = FALSE; - break; - - case OP_NOT_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_word]; - try_next = FALSE; - break; - - case OP_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_word]; - try_next = FALSE; - break; - - /* One or more character type fudges the pointer and restarts, knowing - it will hit a single character type and stop there. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - tcode++; - break; - - case OP_TYPEEXACT: - tcode += 3; - break; - - /* Zero or more repeats of character types set the bits and then - try again. */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - tcode += 2; /* Fall through */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - switch(tcode[1]) - { - case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_digit]; - break; - - case OP_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_digit]; - break; - - case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_space]; - break; - - case OP_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_space]; - break; - - case OP_NOT_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_word]; - break; - - case OP_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_word]; - break; - } - - tcode += 2; - break; - - /* Character class: set the bits and either carry on or not, - according to the repeat count. */ - - case OP_CLASS: - { - tcode++; - for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; - tcode += 32; - switch (*tcode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - tcode++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; - else try_next = FALSE; - break; - - default: - try_next = FALSE; - break; - } - } - break; /* End of class handling */ - - } /* End of switch */ - } /* End of try_next loop */ - - code += (code[1] << 8) + code[2]; /* Advance to next branch */ - } -while (*code == OP_ALT); -return TRUE; -} - - - -/************************************************* -* Study a compiled expression * -*************************************************/ - -/* This function is handed a compiled expression that it must study to produce -information that will speed up the matching. It returns a pcre_extra block -which then gets handed back to pcre_exec(). - -Arguments: - re points to the compiled expression - options contains option bits - errorptr points to where to place error messages; - set NULL unless error - -Returns: pointer to a pcre_extra block, - NULL on error or if no optimization possible -*/ - -pcre_extra * -pcre_study(const pcre *external_re, int options, const char **errorptr) -{ -uschar start_bits[32]; -real_pcre_extra *extra; -const real_pcre *re = (const real_pcre *)external_re; -compile_data compile_block; - -*errorptr = NULL; - -if (re == NULL || re->magic_number != MAGIC_NUMBER) - { - *errorptr = "argument is not a compiled regular expression"; - return NULL; - } - -if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) - { - *errorptr = "unknown or incorrect option bit(s) set"; - return NULL; - } - -/* For an anchored pattern, or an unchored pattern that has a first char, or a -multiline pattern that matches only at "line starts", no further processing at -present. */ - -if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) - return NULL; - -/* Set the character tables in the block which is passed around */ - -compile_block.lcc = re->tables + lcc_offset; -compile_block.fcc = re->tables + fcc_offset; -compile_block.cbits = re->tables + cbits_offset; -compile_block.ctypes = re->tables + ctypes_offset; - -/* See if we can find a fixed set of initial characters for the pattern. */ - -memset(start_bits, 0, 32 * sizeof(uschar)); -if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0, - &compile_block)) return NULL; - -/* Get an "extra" block and put the information therein. */ - -extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra)); - -if (extra == NULL) - { - *errorptr = "failed to get memory"; - return NULL; - } - -extra->options = PCRE_STUDY_MAPPED; -memcpy(extra->start_bits, start_bits, sizeof(start_bits)); - -return (pcre_extra *)extra; -} - -/* End of study.c */