On Thu, Jan 25, 2018 at 07:51:58PM +0300, Arthur Zakirov wrote: > Attached new version of the patch.
Here is rebased version of the patch due to changes into dict_ispell.c. The patch itself wasn't changed. -- Arthur Zakirov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index b9fdd77e19..e071994523 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -78,6 +78,8 @@ #define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) #define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str)) + /* * Prepare for constructing an ISpell dictionary. * @@ -498,7 +500,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') - ? cpstrdup(Conf, flag) : VoidString; + ? tmpstrdup(flag) : VoidString; Conf->nspell++; } @@ -1040,7 +1042,7 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, entry->flag.i = i; } else - entry->flag.s = cpstrdup(Conf, s); + entry->flag.s = tmpstrdup(s); entry->flagMode = Conf->flagMode; entry->value = val; @@ -1536,6 +1538,9 @@ nextline: return; isnewformat: + pfree(recoded); + pfree(pstr); + if (oldformat) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c45979dee4..725473b7c2 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1364,6 +1364,35 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc-max-shared-dictionaries-size" xreflabel="max_shared_dictionaries_size"> + <term><varname>max_shared_dictionaries_size</varname> (<type>integer</type>) + <indexterm> + <primary><varname>max_shared_dictionaries_size</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Sets the maximum size of all text search dictionaries loaded into shared + memory. The default is 100 megabytes (<literal>100MB</literal>). This + parameter can only be set at server start. + </para> + + <para> + Currently controls only loading of <application>Ispell</application> + dictionaries (see <xref linkend="textsearch-ispell-dictionary"/>). + After compiling the dictionary it will be copied into shared memory. + Another backends on first use of the dictionary will use it from shared + memory, so it doesn't need to compile the dictionary second time. + </para> + + <para> + If total size of simultaneously loaded dictionaries reaches the maximum + allowed size then a new dictionary will be loaded into local memory of + a backend. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-huge-pages" xreflabel="huge_pages"> <term><varname>huge_pages</varname> (<type>enum</type>) <indexterm> diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 3a843512d1..b6aeae449b 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -39,6 +39,7 @@ #include "nodes/makefuncs.h" #include "parser/parse_func.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -396,7 +397,8 @@ verify_dictoptions(Oid tmplId, List *dictoptions) * Call the init method and see if it complains. We don't worry about * it leaking memory, since our command will soon be over anyway. */ - (void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions)); + (void) OidFunctionCall2(initmethod, PointerGetDatum(dictoptions), + ObjectIdGetDatum(InvalidOid)); } ReleaseSysCache(tup); @@ -513,6 +515,8 @@ RemoveTSDictionaryById(Oid dictId) CatalogTupleDelete(relation, &tup->t_self); + ts_dict_shmem_release(dictId); + ReleaseSysCache(tup); heap_close(relation, RowExclusiveLock); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 0c86a581c0..c7dce8cac5 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -44,6 +44,7 @@ #include "storage/procsignal.h" #include "storage/sinvaladt.h" #include "storage/spin.h" +#include "tsearch/ts_shared.h" #include "utils/backend_random.h" #include "utils/snapmgr.h" @@ -150,6 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); size = add_size(size, BackendRandomShmemSize()); + size = add_size(size, TsearchShmemSize()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -271,6 +273,11 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) AsyncShmemInit(); BackendRandomShmemInit(); + /* + * Set up shared memory to tsearch + */ + TsearchShmemInit(); + #ifdef EXEC_BACKEND /* diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index 227468ae9e..860cd196e9 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES)) OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \ dict_ispell.o regis.o spell.o \ - to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o + to_tsany.o ts_selfuncs.o ts_shared.o ts_typanalyze.o ts_utils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c new file mode 100644 index 0000000000..7d1f7544cf --- /dev/null +++ b/src/backend/tsearch/ts_shared.c @@ -0,0 +1,366 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.c + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/tsearch/ts_shared.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "lib/dshash.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "tsearch/ts_shared.h" +#include "utils/hashutils.h" +#include "utils/memutils.h" + + +/* + * Hash table structures + */ +typedef struct +{ + Oid dict_id; + dsm_handle dict_dsm; + Size dict_size; + + /* How many backends have DSM mapping */ + uint32 refcnt; +} TsearchDictEntry; + +static dshash_table *dict_table = NULL; + +/* + * Shared struct for locking + */ +typedef struct +{ + dsa_handle area; + dshash_table_handle dict_table_handle; + + /* Total size of loaded dictionaries into shared memory in bytes */ + Size loaded_size; + + LWLock lock; +} TsearchCtlData; + +static TsearchCtlData *tsearch_ctl; + +/* + * GUC variable for maximum number of shared dictionaries. Default value is + * 100MB. + */ +int max_shared_dictionaries_size = 100 * 1024; + +static void init_dict_table(void); + +/* Parameters for dict_table */ +static const dshash_parameters dict_table_params ={ + sizeof(Oid), + sizeof(TsearchDictEntry), + dshash_memcmp, + dshash_memhash, + LWTRANCHE_TSEARCH_TABLE +}; + +/* + * Build the dictionary using allocate_cb callback. If there is a space in + * shared memory and max_shared_dictionaries_size is greater than 0 copy the + * dictionary into DSM. + * + * If max_shared_dictionaries_size is greater than 0 then try to find the + * dictionary in shared hash table first. If it was built by someone earlier + * just return its location in DSM. + * + * dictid: Oid of the dictionary. + * arg: an argument to the callback function. + * allocate_cb: function to build the dictionary, if it wasn't found in DSM. + * + * Returns address in the dynamic shared memory segment or in backend memory. + */ +void * +ts_dict_shmem_location(Oid dictid, List *dictoptions, + ispell_build_callback allocate_cb) +{ + TsearchDictEntry *entry; + bool found; + dsm_segment *seg; + void *dict, + *dict_location; + +#define CHECK_SHARED_SPACE() \ + if (entry->dict_size + tsearch_ctl->loaded_size > \ + max_shared_dictionaries_size * 1024L) \ + { \ + LWLockRelease(&tsearch_ctl->lock); \ + ereport(LOG, \ + (errmsg("there is no space in shared memory for text search " \ + "dictionary %u, it will be loaded into backend's memory", \ + dictid))); \ + dshash_delete_entry(dict_table, entry); \ + return dict; \ + } \ + + init_dict_table(); + + /* + * Build the dictionary in backend's memory if a hash table wasn't created + * or dictid is invalid (it may happen if the dicionary's init method was + * called within verify_dictoptions()). + */ + if (!DsaPointerIsValid(tsearch_ctl->dict_table_handle) || + !OidIsValid(dictid)) + { + Size dict_size; + + dict = allocate_cb(dictoptions, &dict_size); + + return dict; + } + + /* Try to find an entry in the hash table */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &dictid, false); + + if (entry) + { + seg = dsm_find_mapping(entry->dict_dsm); + if (!seg) + { + seg = dsm_attach(entry->dict_dsm); + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + } + + entry->refcnt++; + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); + } + + /* Dictionary haven't been loaded into memory yet */ + entry = (TsearchDictEntry *) dshash_find_or_insert(dict_table, &dictid, + &found); + + if (found) + { + /* + * Someone concurrently inserted a dictionary entry since the first time + * we checked. + */ + seg = dsm_attach(entry->dict_dsm); + + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + + entry->refcnt++; + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); + } + + /* Build the dictionary */ + dict = allocate_cb(dictoptions, &entry->dict_size); + + LWLockAcquire(&tsearch_ctl->lock, LW_SHARED); + + /* Before allocating a DSM segment check remaining shared space */ + Assert(max_shared_dictionaries_size); + + CHECK_SHARED_SPACE(); + + LWLockRelease(&tsearch_ctl->lock); + /* If we come here, we need an exclusive lock */ + while (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE)) + { + /* + * Check again in case if there are no space anymore while we were + * waiting for exclusive lock. + */ + CHECK_SHARED_SPACE(); + } + + tsearch_ctl->loaded_size += entry->dict_size; + + LWLockRelease(&tsearch_ctl->lock); + + /* At least, allocate a DSM segment for the compiled dictionary */ + seg = dsm_create(entry->dict_size, 0); + dict_location = dsm_segment_address(seg); + memcpy(dict_location, dict, entry->dict_size); + + pfree(dict); + + entry->dict_id = dictid; + entry->dict_dsm = dsm_segment_handle(seg); + entry->refcnt++; + + /* Remain attached until end of postmaster */ + dsm_pin_segment(seg); + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); +} + +/* + * Release memory occupied by the dictionary. Function just unpins DSM mapping. + * If nobody else hasn't mapping to this DSM then unping DSM segment. + * + * dictid: Oid of the dictionary. + */ +void +ts_dict_shmem_release(Oid dictid) +{ + TsearchDictEntry *entry; + + /* + * If we didn't attach to a hash table then do nothing. + */ + if (!dict_table) + return; + + /* Try to find an entry in the hash table */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &dictid, true); + + if (entry) + { + dsm_segment *seg; + + seg = dsm_find_mapping(entry->dict_dsm); + /* + * If current backend didn't pin a mapping then we don't need to do + * unpinning. + */ + if (!seg) + { + dshash_release_lock(dict_table, entry); + return; + } + + dsm_unpin_mapping(seg); + dsm_detach(seg); + + entry->refcnt--; + + if (entry->refcnt == 0) + { + dsm_unpin_segment(entry->dict_dsm); + dshash_delete_entry(dict_table, entry); + } + else + dshash_release_lock(dict_table, entry); + } +} + +/* + * Allocate and initialize tsearch-related shared memory. + */ +void +TsearchShmemInit(void) +{ + bool found; + + tsearch_ctl = (TsearchCtlData *) + ShmemInitStruct("Full Text Search Ctl", sizeof(TsearchCtlData), &found); + + if (!found) + { + LWLockRegisterTranche(LWTRANCHE_TSEARCH_DSA, "tsearch_dsa"); + LWLockRegisterTranche(LWTRANCHE_TSEARCH_TABLE, "tsearch_table"); + + LWLockInitialize(&tsearch_ctl->lock, LWTRANCHE_TSEARCH_DSA); + + tsearch_ctl->area = DSM_HANDLE_INVALID; + tsearch_ctl->dict_table_handle = InvalidDsaPointer; + tsearch_ctl->loaded_size = 0; + } +} + +/* + * Report shared memory space needed by TsearchShmemInit. + */ +Size +TsearchShmemSize(void) +{ + Size size = 0; + + /* size of service structure */ + size = add_size(size, MAXALIGN(sizeof(TsearchCtlData))); + + return size; +} + +/* + * Initialize hash table located in DSM. + * + * The hash table should be created and initialized iff + * max_shared_dictionaries_size GUC is greater than zero and it doesn't exist + * yet. + */ +static void +init_dict_table(void) +{ + MemoryContext old_context; + dsa_area *dsa; + + if (max_shared_dictionaries_size == 0) + return; + + if (dict_table) + return; + + old_context = MemoryContextSwitchTo(TopMemoryContext); + +recheck_table: + LWLockAcquire(&tsearch_ctl->lock, LW_SHARED); + + /* Hash table have been created already by someone */ + if (DsaPointerIsValid(tsearch_ctl->dict_table_handle)) + { + Assert(tsearch_ctl->area != DSM_HANDLE_INVALID); + + dsa = dsa_attach(tsearch_ctl->area); + + dict_table = dshash_attach(dsa, + &dict_table_params, + tsearch_ctl->dict_table_handle, + NULL); + } + else + { + /* Try to get exclusive lock */ + LWLockRelease(&tsearch_ctl->lock); + if (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE)) + { + /* + * The lock was released by another backend and other backend + * has concurrently created the hash table already. + */ + goto recheck_table; + } + + dsa = dsa_create(LWTRANCHE_TSEARCH_DSA); + tsearch_ctl->area = dsa_get_handle(dsa); + + dict_table = dshash_create(dsa, &dict_table_params, NULL); + tsearch_ctl->dict_table_handle = dshash_get_hash_table_handle(dict_table); + + /* Remain attached until end of postmaster */ + dsa_pin(dsa); + } + + LWLockRelease(&tsearch_ctl->lock); + + /* Remain attached until end of session */ + dsa_pin_mapping(dsa); + + MemoryContextSwitchTo(old_context); +} diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 3d5c194148..c078503111 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -39,6 +39,7 @@ #include "catalog/pg_ts_template.h" #include "commands/defrem.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_shared.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -98,7 +99,16 @@ InvalidateTSCacheCallBack(Datum arg, int cacheid, uint32 hashvalue) hash_seq_init(&status, hash); while ((entry = (TSAnyCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (entry->isvalid && hash == TSDictionaryCacheHash) + { + TSDictionaryCacheEntry *dict_entry = (TSDictionaryCacheEntry *) entry; + + ts_dict_shmem_release(dict_entry->dictId); + } + entry->isvalid = false; + } /* Also invalidate the current-config cache if it's pg_ts_config */ if (hash == TSConfigCacheHash) @@ -334,8 +344,9 @@ lookup_ts_dictionary_cache(Oid dictId) dictoptions = deserialize_deflist(opt); entry->dictData = - DatumGetPointer(OidFunctionCall1(template->tmplinit, - PointerGetDatum(dictoptions))); + DatumGetPointer(OidFunctionCall2(template->tmplinit, + PointerGetDatum(dictoptions), + ObjectIdGetDatum(dictId))); MemoryContextSwitchTo(oldcontext); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 87ba67661a..53230bc37f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -76,6 +76,7 @@ #include "storage/predicate.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_shared.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/guc_tables.h" @@ -2922,6 +2923,20 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"max_shared_dictionaries_size", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the maximum size of all text search dictionaries loaded into shared memory."), + gettext_noop("Currently controls only loading of Ispell dictionaries. " + "If total size of simultaneously loaded dictionaries " + "reaches the maximum allowed size then a new dictionary " + "will be loaded into local memory of a backend."), + GUC_UNIT_KB, + }, + &max_shared_dictionaries_size, + 100 * 1024, 0, MAX_KILOBYTES, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 9a3535559e..908ccebb52 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -133,6 +133,7 @@ # mmap # use none to disable dynamic shared memory # (change requires restart) +#max_shared_dictionaries_size = 100MB # (change requires restart) # - Disk - diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index c21bfe2f66..16b0858eda 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -219,6 +219,8 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SHARED_TUPLESTORE, LWTRANCHE_TBM, LWTRANCHE_PARALLEL_APPEND, + LWTRANCHE_TSEARCH_DSA, + LWTRANCHE_TSEARCH_TABLE, LWTRANCHE_FIRST_USER_DEFINED } BuiltinTrancheIds; diff --git a/src/include/tsearch/ts_shared.h b/src/include/tsearch/ts_shared.h new file mode 100644 index 0000000000..d6a27c9037 --- /dev/null +++ b/src/include/tsearch/ts_shared.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.h + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + * src/include/tsearch/ts_shared.h + * + *------------------------------------------------------------------------- + */ +#ifndef TS_SHARED_H +#define TS_SHARED_H + +#include "c.h" + +#include "nodes/pg_list.h" + +/* + * GUC variable for maximum number of shared dictionaries + */ +extern int max_shared_dictionaries_size; + +typedef void *(*ispell_build_callback) (List *dictoptions, Size *size); + +extern void *ts_dict_shmem_location(Oid dictid, List *dictoptions, + ispell_build_callback allocate_cb); +extern void ts_dict_shmem_release(Oid dictid); + +extern void TsearchShmemInit(void); +extern Size TsearchShmemSize(void); + +#endif /* TS_SHARED_H */
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 610b7bf033..82afe201f8 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -3030,6 +3030,23 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( </sect2> + <sect2 id="textsearch-shared-dictionaries"> + <title>Dictionaries in Shared Memory</title> + + <para> + Some dictionaries, especially <application>Ispell</application>, consumes a + noticable value of memory. Size of a dictionary can reach tens of megabytes. + Most of them also stores configuration in text files. A dictionary is compiled + during first access per a user session. + </para> + + <para> + To store dictionaries in shared memory set to <xref linkend="guc-max-shared-dictionaries-size"/> + parameter value greater than zero before server starting. + </para> + + </sect2> + </sect1> <sect1 id="textsearch-configuration"> diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index edc6547700..e7f4d5a48d 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -5,6 +5,15 @@ * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * + * By default all Ispell dictionaries are stored in DSM. But if number of + * loaded dictionaries reached maximum allowed value then it will be + * allocated within its memory context (dictCtx). + * + * All necessary data are built within dispell_build() function. But + * structures for regular expressions are compiled on first demand and + * stored using AffixReg array. It is because regex_t and Regis cannot be + * stored in shared memory. + * * * IDENTIFICATION * src/backend/tsearch/dict_ispell.c @@ -14,8 +23,10 @@ #include "postgres.h" #include "commands/defrem.h" +#include "storage/dsm.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_locale.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" @@ -26,19 +37,90 @@ typedef struct IspellDict obj; } DictISpell; +static void parse_dictoptions(List *dictoptions, + char **dictfile, char **afffile, char **stopfile); +static void *dispell_build(List *dictoptions, Size *size); + Datum dispell_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); + Oid dictid = PG_GETARG_OID(1); DictISpell *d; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - ListCell *l; + void *dict_location; + char *stopfile; d = (DictISpell *) palloc0(sizeof(DictISpell)); - NIStartBuild(&(d->obj)); + parse_dictoptions(dictoptions, NULL, NULL, &stopfile); + + if (stopfile) + readstoplist(stopfile, &(d->stoplist), lowerstr); + + dict_location = ts_dict_shmem_location(dictid, dictoptions, dispell_build); + Assert(dict_location); + + d->obj.dict = (IspellDictData *) dict_location; + d->obj.reg = (AffixReg *) palloc0(d->obj.dict->nAffix * + sizeof(AffixReg)); + /* Current memory context is dictionary's private memory context */ + d->obj.dictCtx = CurrentMemoryContext; + + PG_RETURN_POINTER(d); +} + +Datum +dispell_lexize(PG_FUNCTION_ARGS) +{ + DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + res = NINormalizeWord(&(d->obj), txt); + + if (res == NULL) + PG_RETURN_POINTER(NULL); + + cptr = res; + for (ptr = cptr; ptr->lexeme; ptr++) + { + if (searchstoplist(&(d->stoplist), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + } + else + { + if (cptr != ptr) + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + } + } + cptr->lexeme = NULL; + + PG_RETURN_POINTER(res); +} + +static void +parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, + char **stopfile) +{ + ListCell *l; + + if (dictfile) + *dictfile = NULL; + if (afffile) + *afffile = NULL; + if (stopfile) + *stopfile = NULL; foreach(l, dictoptions) { @@ -46,34 +128,36 @@ dispell_init(PG_FUNCTION_ARGS) if (strcmp(defel->defname, "dictfile") == 0) { - if (dictloaded) + if (!dictfile) + continue; + + if (*dictfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); - dictloaded = true; + *dictfile = get_tsearch_config_filename(defGetString(defel), "dict"); } else if (strcmp(defel->defname, "afffile") == 0) { - if (affloaded) + if (!afffile) + continue; + + if (*afffile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); - affloaded = true; + *afffile = get_tsearch_config_filename(defGetString(defel), "affix"); } else if (strcmp(defel->defname, "stopwords") == 0) { - if (stoploaded) + if (!stopfile) + continue; + + if (*stopfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); - readstoplist(defGetString(defel), &(d->stoplist), lowerstr); - stoploaded = true; + *stopfile = defGetString(defel); } else { @@ -83,66 +167,52 @@ dispell_init(PG_FUNCTION_ARGS) defel->defname))); } } +} - if (affloaded && dictloaded) - { - NISortDictionary(&(d->obj)); - NISortAffixes(&(d->obj)); - } - else if (!affloaded) +/* + * Build the dictionary. + * + * Result is palloc'ed. + */ +static void * +dispell_build(List *dictoptions, Size *size) +{ + IspellDictBuild build; + char *dictfile, + *afffile; + + parse_dictoptions(dictoptions, &dictfile, &afffile, NULL); + + if (!afffile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing AffFile parameter"))); } - else + else if (!dictfile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing DictFile parameter"))); } - NIFinishBuild(&(d->obj)); + MemSet(&build, 0, sizeof(build)); + NIStartBuild(&build); - PG_RETURN_POINTER(d); -} + /* Read files */ + NIImportDictionary(&build, dictfile); + NIImportAffixes(&build, afffile); -Datum -dispell_lexize(PG_FUNCTION_ARGS) -{ - DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - int32 len = PG_GETARG_INT32(2); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; + /* Build persistent data to use by backends */ + NISortDictionary(&build); + NISortAffixes(&build); - if (len <= 0) - PG_RETURN_POINTER(NULL); + NICopyData(&build); - txt = lowerstr_with_len(in, len); - res = NINormalizeWord(&(d->obj), txt); - - if (res == NULL) - PG_RETURN_POINTER(NULL); + /* Release temporary data */ + NIFinishBuild(&build); - cptr = res; - for (ptr = cptr; ptr->lexeme; ptr++) - { - if (searchstoplist(&(d->stoplist), ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - } - else - { - if (cptr != ptr) - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - } - } - cptr->lexeme = NULL; - - PG_RETURN_POINTER(res); + /* Return the buffer and its size */ + *size = build.dict_size; + return build.dict; } diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index e071994523..1c560ef56a 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -23,33 +23,35 @@ * Compilation of a dictionary * --------------------------- * - * A compiled dictionary is stored in the IspellDict structure. Compilation of - * a dictionary is divided into the several steps: + * A compiled dictionary is stored in the following structures: + * - IspellDictBuild - stores temporary data and IspellDictData + * - IspellDictData - stores permanent data used within NINormalizeWord() + * Compilation of the dictionary is divided into the several steps: * - NIImportDictionary() - stores each word of a .dict file in the * temporary Spell field. - * - NIImportAffixes() - stores affix rules of an .affix file in the - * Affix field (not temporary) if an .affix file has the Ispell format. + * - NIImportAffixes() - stores affix rules of an .affix file in the temporary + * Affix field if an .affix file has the Ispell format. * -> NIImportOOAffixes() - stores affix rules if an .affix file has the * Hunspell format. The AffixData field is initialized if AF parameter * is defined. * - NISortDictionary() - builds a prefix tree (Trie) from the words list - * and stores it in the Dictionary field. The words list is got from the + * and stores it in the DictNodes field. The words list is got from the * Spell field. The AffixData field is initialized if AF parameter is not * defined. * - NISortAffixes(): * - builds a list of compound affixes from the affix list and stores it * in the CompoundAffix. * - builds prefix trees (Trie) from the affix list for prefixes and suffixes - * and stores them in Suffix and Prefix fields. + * and stores them in SuffixNodes and PrefixNodes fields. * The affix list is got from the Affix field. + * Persistent data of the dictionary is copied within NICopyData(). * * Memory management * ----------------- * - * The IspellDict structure has the Spell field which is used only in compile - * time. The Spell field stores a words list. It can take a lot of memory. - * Therefore when a dictionary is compiled this field is cleared by - * NIFinishBuild(). + * The IspellDictBuild structure has the temporary data which is used only in + * compile time. It can take a lot of memory. Therefore after compiling the + * dictionary this data is cleared by NIFinishBuild(). * * All resources which should cleared by NIFinishBuild() is initialized using * tmpalloc() and tmpalloc0(). @@ -73,112 +75,147 @@ * after the initialization is done. During initialization, * CurrentMemoryContext is the long-lived memory context associated * with the dictionary cache entry. We keep the short-lived stuff - * in the Conf->buildCxt context. + * in the ConfBuild->buildCxt context. */ -#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) -#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz)) +#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz)) -#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str)) +#define tmpstrdup(str) MemoryContextStrdup(ConfBuild->buildCxt, (str)) /* * Prepare for constructing an ISpell dictionary. * - * The IspellDict struct is assumed to be zeroed when allocated. + * The IspellDictBuild struct is assumed to be zeroed when allocated. */ void -NIStartBuild(IspellDict *Conf) +NIStartBuild(IspellDictBuild *ConfBuild) { + uint32 dict_size; + /* * The temp context is a child of CurTransactionContext, so that it will * go away automatically on error. */ - Conf->buildCxt = AllocSetContextCreate(CurTransactionContext, - "Ispell dictionary init context", - ALLOCSET_DEFAULT_SIZES); + ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext, + "Ispell dictionary init context", + ALLOCSET_DEFAULT_SIZES); + + /* + * Allocate buffer for the dictionary in current context not in buildCxt. + */ + dict_size = MAXALIGN(IspellDictDataHdrSize); + ConfBuild->dict = palloc0(dict_size); + ConfBuild->dict_size = dict_size; } /* - * Clean up when dictionary construction is complete. + * Copy compiled and persistent data into IspellDictData. */ void -NIFinishBuild(IspellDict *Conf) +NICopyData(IspellDictBuild *ConfBuild) { - /* Release no-longer-needed temp memory */ - MemoryContextDelete(Conf->buildCxt); - /* Just for cleanliness, zero the now-dangling pointers */ - Conf->buildCxt = NULL; - Conf->Spell = NULL; - Conf->firstfree = NULL; - Conf->CompoundAffixFlags = NULL; -} + IspellDictData *dict; + uint32 size; + int i; + uint32 *offsets, + offset; + /* + * Calculate necessary space + */ + size = ConfBuild->nAffixData * sizeof(uint32); + size += ConfBuild->AffixDataEnd; -/* - * "Compact" palloc: allocate without extra palloc overhead. - * - * Since we have no need to free the ispell data items individually, there's - * not much value in the per-chunk overhead normally consumed by palloc. - * Getting rid of it is helpful since ispell can allocate a lot of small nodes. - * - * We currently pre-zero all data allocated this way, even though some of it - * doesn't need that. The cpalloc and cpalloc0 macros are just documentation - * to indicate which allocations actually require zeroing. - */ -#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */ -#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */ + size += ConfBuild->nAffix * sizeof(uint32); + size += ConfBuild->AffixSize; -static void * -compact_palloc0(IspellDict *Conf, size_t size) -{ - void *result; + size += ConfBuild->DictNodes.NodesEnd; + size += ConfBuild->PrefixNodes.NodesEnd; + size += ConfBuild->SuffixNodes.NodesEnd; - /* Should only be called during init */ - Assert(Conf->buildCxt != NULL); + size += sizeof(CMPDAffix) * ConfBuild->nCompoundAffix; - /* No point in this for large chunks */ - if (size > COMPACT_MAX_REQ) - return palloc0(size); + /* + * Copy data itself + */ + ConfBuild->dict_size = IspellDictDataHdrSize + size; + ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size); + + dict = ConfBuild->dict; + + /* AffixData */ + dict->nAffixData = ConfBuild->nAffixData; + dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData; + memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->nAffixData); + memcpy(DictAffixData(dict), ConfBuild->AffixData, ConfBuild->AffixDataEnd); + + /* Affix array */ + dict->nAffix = ConfBuild->nAffix; + dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd; + dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) * ConfBuild->nAffix; + offsets = (uint32 *) DictAffixOffset(dict); + offset = 0; + for (i = 0; i < ConfBuild->nAffix; i++) + { + AFFIX *affix; + uint32 size = AffixGetSize(ConfBuild->Affix[i]); - /* Keep everything maxaligned */ - size = MAXALIGN(size); + offsets[i] = offset; + affix = (AFFIX *) DictAffixGet(dict, i); + Assert(affix); - /* Need more space? */ - if (size > Conf->avail) - { - Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); - Conf->avail = COMPACT_ALLOC_CHUNK; - } + memcpy(affix, ConfBuild->Affix[i], size); - result = (void *) Conf->firstfree; - Conf->firstfree += size; - Conf->avail -= size; + offset += size; + } - return result; + /* DictNodes prefix tree */ + dict->DictNodesStart = dict->AffixStart + offset; + memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes, + ConfBuild->DictNodes.NodesEnd); + + /* PrefixNodes prefix tree */ + dict->PrefixNodesStart = dict->DictNodesStart + ConfBuild->DictNodes.NodesEnd; + memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes, + ConfBuild->PrefixNodes.NodesEnd); + + /* SuffixNodes prefix tree */ + dict->SuffixNodesStart = dict->PrefixNodesStart + ConfBuild->PrefixNodes.NodesEnd; + memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes, + ConfBuild->SuffixNodes.NodesEnd); + + /* CompoundAffix array */ + dict->CompoundAffixStart = dict->SuffixNodesStart + + ConfBuild->SuffixNodes.NodesEnd; + memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); } -#define cpalloc(size) compact_palloc0(Conf, size) -#define cpalloc0(size) compact_palloc0(Conf, size) - -static char * -cpstrdup(IspellDict *Conf, const char *str) +/* + * Clean up when dictionary construction is complete. + */ +void +NIFinishBuild(IspellDictBuild *ConfBuild) { - char *res = cpalloc(strlen(str) + 1); - - strcpy(res, str); - return res; + /* Release no-longer-needed temp memory */ + MemoryContextDelete(ConfBuild->buildCxt); + /* Just for cleanliness, zero the now-dangling pointers */ + ConfBuild->buildCxt = NULL; + ConfBuild->Spell = NULL; + ConfBuild->CompoundAffixFlags = NULL; } - /* * Apply lowerstr(), producing a temporary result (in the buildCxt). */ static char * -lowerstr_ctx(IspellDict *Conf, const char *src) +lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src) { MemoryContext saveCtx; char *dst; - saveCtx = MemoryContextSwitchTo(Conf->buildCxt); + saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt); dst = lowerstr(src); MemoryContextSwitchTo(saveCtx); @@ -190,7 +227,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src) #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) -#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) +#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T ) static char *VoidString = ""; @@ -311,18 +348,189 @@ strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) static int cmpaffix(const void *s1, const void *s2) { - const AFFIX *a1 = (const AFFIX *) s1; - const AFFIX *a2 = (const AFFIX *) s2; + const AFFIX *a1 = *((AFFIX *const *) s1); + const AFFIX *a2 = *((AFFIX *const *) s2); if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) - return strcmp(a1->repl, a2->repl); + return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2)); else - return strbcmp((const unsigned char *) a1->repl, - (const unsigned char *) a2->repl); + return strbcmp((const unsigned char *) AffixFieldRepl(a1), + (const unsigned char *) AffixFieldRepl(a2)); +} + +/* + * Allocate space for AffixData. + */ +static void +InitAffixData(IspellDictBuild *ConfBuild, int numAffixData) +{ + uint32 size; + + size = 8 * 1024 /* Reserve 8KB for data */; + + ConfBuild->AffixData = (char *) tmpalloc(size); + ConfBuild->AffixDataSize = size; + ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData * sizeof(uint32)); + ConfBuild->nAffixData = 0; + ConfBuild->mAffixData= numAffixData; + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd = 0; +} + +/* + * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't + * fit new affix set then resize it. + * + * ConfBuild: building structure for the current dictionary. + * AffixSet: set of affix flags. + */ +static void +AddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet, + uint32 AffixSetLen) +{ + /* + * Check available space for AffixSet. + */ + if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >= + ConfBuild->AffixDataSize) + { + uint32 newsize = Max(ConfBuild->AffixDataSize + 8 * 1024 /* 8KB */, + ConfBuild->AffixDataSize + AffixSetLen + 1); + + ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData, newsize); + ConfBuild->AffixDataSize = newsize; + } + + /* Check available number of offsets */ + if (ConfBuild->nAffixData >= ConfBuild->mAffixData) + { + ConfBuild->mAffixData *= 2; + ConfBuild->AffixDataOffset = (uint32 *) repalloc(ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->mAffixData); + } + + ConfBuild->AffixDataOffset[ConfBuild->nAffixData] = ConfBuild->AffixDataEnd; + StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData), + AffixSet, AffixSetLen + 1); + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd += AffixSetLen + 1; + ConfBuild->nAffixData++; +} + +/* + * Allocate space for prefix tree node. + * + * ConfBuild: building structure for the current dictionary. + * array: NodeArray where to allocate new node. + * length: number of allocated NodeData. + * sizeNodeData: minimum size of each NodeData. + * sizeNodeHeader: size of header of new node. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length, + uint32 sizeNodeData, uint32 sizeNodeHeader) +{ + uint32 node_offset; + uint32 size; + + size = sizeNodeHeader + length * sizeNodeData; + size = MAXALIGN(size); + + if (array->NodesSize == 0) + { + array->NodesSize = size * 32; /* Reserve space for next levels of the + * prefix tree */ + array->Nodes = (char *) tmpalloc(array->NodesSize); + array->NodesEnd = 0; + } + else if (array->NodesEnd + size >= array->NodesSize) + { + array->NodesSize = Max(array->NodesSize * 2, array->NodesSize + size); + array->Nodes = (char *) repalloc(array->Nodes, array->NodesSize); + } + + node_offset = array->NodesEnd; + array->NodesEnd += size; + + return node_offset; +} + +/* + * Allocate space for SPNode. + * + * Returns an offset of new node in ConfBuild->DictNodes->Nodes. + */ +static uint32 +AllocateSPNode(IspellDictBuild *ConfBuild, uint32 length) +{ + uint32 offset; + SPNode *node; + SPNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, &ConfBuild->DictNodes, length, + sizeof(SPNodeData), SPNHDRSZ); + node = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, offset); + node->length = length; + + /* + * Initialize all SPNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affix = ISPELL_INVALID_INDEX; + data->compoundflag = 0; + data->isword = 0; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; +} + +/* + * Allocate space for AffixNode. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateAffixNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length) +{ + uint32 offset; + AffixNode *node; + AffixNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, array, length, sizeof(AffixNodeData), + ANHRDSZ); + node = (AffixNode *) NodeArrayGet(array, offset); + node->length = length; + node->isvoid = 0; + + /* + * Initialize all AffixNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affstart = ISPELL_INVALID_INDEX; + data->affend = ISPELL_INVALID_INDEX; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; } /* @@ -333,7 +541,7 @@ cmpaffix(const void *s1, const void *s2) * - 2 characters (FM_LONG). A character may be Unicode. * - numbers from 1 to 65000 (FM_NUM). * - * Depending on the flagMode an affix string can have the following format: + * Depending on the flagmode an affix string can have the following format: * - FM_CHAR: ABCD * Here we have 4 flags: A, B, C and D * - FM_LONG: ABCDE* @@ -341,13 +549,13 @@ cmpaffix(const void *s1, const void *s2) * - FM_NUM: 200,205,50 * Here we have 3 flags: 200, 205 and 50 * - * Conf: current dictionary. + * flagmode: flag mode of the dictionary * sflagset: the set of affix flags. Returns a reference to the start of a next * affix flag. * sflag: returns an affix flag from sflagset. */ static void -getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) +getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag) { int32 s; char *next, @@ -356,11 +564,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) bool stop = false; bool met_comma = false; - maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1; + maxstep = (flagmode == FM_LONG) ? 2 : 1; while (**sflagset) { - switch (Conf->flagMode) + switch (flagmode) { case FM_LONG: case FM_CHAR: @@ -422,15 +630,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) stop = true; break; default: - elog(ERROR, "unrecognized type of Conf->flagMode: %d", - Conf->flagMode); + elog(ERROR, "unrecognized type of flagmode: %d", + flagmode); } if (stop) break; } - if (Conf->flagMode == FM_LONG && maxstep > 0) + if (flagmode == FM_LONG && maxstep > 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix flag \"%s\" with \"long\" flag value", @@ -440,31 +648,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) } /* - * Checks if the affix set Conf->AffixData[affix] contains affixflag. - * Conf->AffixData[affix] does not contain affixflag if this flag is not used - * actually by the .dict file. + * Checks if the affix set from AffixData contains affixflag. Affix set does + * not contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. - * affix: index of the Conf->AffixData array. + * flagmode: flag mode of the dictionary. + * sflagset: the set of affix flags. * affixflag: the affix flag. * - * Returns true if the string Conf->AffixData[affix] contains affixflag, - * otherwise returns false. + * Returns true if the affix set string contains affixflag, otherwise returns + * false. */ static bool -IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) +IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag) { - char *flagcur; + char *flagcur = sflagset; char flag[BUFSIZ]; if (*affixflag == 0) return true; - flagcur = Conf->AffixData[affix]; - while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, flag); + getNextFlagFromString(flagmode, &flagcur, flag); /* Compare first affix flag in flagcur with affixflag */ if (strcmp(flag, affixflag) == 0) return true; @@ -477,31 +682,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) /* * Adds the new word into the temporary array Spell. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * word: new word. * flag: set of affix flags. Single flag can be get by getNextFlagFromString(). */ static void -NIAddSpell(IspellDict *Conf, const char *word, const char *flag) +NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag) { - if (Conf->nspell >= Conf->mspell) + if (ConfBuild->nSpell >= ConfBuild->mSpell) { - if (Conf->mspell) + if (ConfBuild->mSpell) { - Conf->mspell *= 2; - Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell *= 2; + ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell, + ConfBuild->mSpell * sizeof(SPELL *)); } else { - Conf->mspell = 1024 * 20; - Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell = 1024 * 20; + ConfBuild->Spell = (SPELL **) tmpalloc(ConfBuild->mSpell * sizeof(SPELL *)); } } - Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); - strcpy(Conf->Spell[Conf->nspell]->word, word); - Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') + ConfBuild->Spell[ConfBuild->nSpell] = + (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); + strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word); + ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0') ? tmpstrdup(flag) : VoidString; - Conf->nspell++; + ConfBuild->nSpell++; } /* @@ -509,11 +716,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) * * Note caller must already have applied get_tsearch_config_filename. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .dict file. */ void -NIImportDictionary(IspellDict *Conf, const char *filename) +NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename) { tsearch_readline_state trst; char *line; @@ -564,9 +771,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) } s += pg_mblen(s); } - pstr = lowerstr_ctx(Conf, line); + pstr = lowerstr_ctx(ConfBuild, line); - NIAddSpell(Conf, pstr, flag); + NIAddSpell(ConfBuild, pstr, flag); pfree(pstr); pfree(line); @@ -590,7 +797,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * SFX M 0 's . * is presented here. * - * Conf: current dictionary. + * dict: current dictionary. * word: basic form of word. * affixflag: affix flag, by which a basic form of word was generated. * flag: compound flag used to compare with StopMiddle->compoundflag. @@ -598,9 +805,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * Returns 1 if the word was found in the prefix tree, else returns 0. */ static int -FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) +FindWord(IspellDictData *dict, const char *word, const char *affixflag, int flag) { - SPNode *node = Conf->Dictionary; + SPNode *node = (SPNode *) DictDictNodes(dict); SPNodeData *StopLow, *StopHigh, *StopMiddle; @@ -636,10 +843,14 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * Check if this affix rule is presented in the affix set * with index StopMiddle->affix. */ - if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag)) + if (IsAffixFlagInUse(dict->flagMode, + DictAffixDataGet(dict, StopMiddle->affix), + affixflag)) return 1; } - node = StopMiddle->node; + /* Retreive SPNode by the offset */ + node = (SPNode *) DictNodeGet(DictDictNodes(dict), + StopMiddle->node_offset); ptr++; break; } @@ -657,7 +868,8 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) /* * Adds a new affix rule to the Affix field. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary, is used to allocate + * temporary data. * flag: affix flag ('\' in the below example). * flagflags: set of flags from the flagval field for this affix rule. This set * is listed after '/' character in the added string (repl). @@ -673,26 +885,54 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * type: FF_SUFFIX or FF_PREFIX. */ static void -NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, - const char *find, const char *repl, int type) +NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags, + const char *mask, const char *find, const char *repl, int type) { AFFIX *Affix; + uint32 size; + uint32 flaglen = strlen(flag), + findlen = strlen(find), + repllen = strlen(repl), + masklen = strlen(mask); + + /* Sanity checks */ + if (flaglen > AF_FLAG_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix flag \"%s\" too long", flag))); + if (findlen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix find field \"%s\" too long", find))); + if (repllen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix repl field \"%s\" too long", repl))); + if (masklen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix mask field \"%s\" too long", repl))); - if (Conf->naffixes >= Conf->maffixes) + if (ConfBuild->nAffix >= ConfBuild->mAffix) { - if (Conf->maffixes) + if (ConfBuild->mAffix) { - Conf->maffixes *= 2; - Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix *= 2; + ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix, + ConfBuild->mAffix * sizeof(AFFIX *)); } else { - Conf->maffixes = 16; - Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix = 255; + ConfBuild->Affix = (AFFIX **) tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *)); } } - Affix = Conf->Affix + Conf->naffixes; + size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ + + repllen + 1 /* \0 */ + masklen + 1 /* \0 */; + + Affix = (AFFIX *) tmpalloc(size); + ConfBuild->Affix[ConfBuild->nAffix] = Affix; /* This affix rule can be applied for words with any ending */ if (strcmp(mask, ".") == 0 || *mask == '\0') @@ -705,42 +945,12 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, { Affix->issimple = 0; Affix->isregis = 1; - RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX), - *mask ? mask : VoidString); } /* This affix rule will use regex_t to search word ending */ else { - int masklen; - int wmasklen; - int err; - pg_wchar *wmask; - char *tmask; - Affix->issimple = 0; Affix->isregis = 0; - tmask = (char *) tmpalloc(strlen(mask) + 3); - if (type == FF_SUFFIX) - sprintf(tmask, "%s$", mask); - else - sprintf(tmask, "^%s", mask); - - masklen = strlen(tmask); - wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); - wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); - - err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, - REG_ADVANCED | REG_NOSUB, - DEFAULT_COLLATION_OID); - if (err) - { - char errstr[100]; - - pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("invalid regular expression: %s", errstr))); - } } Affix->flagflags = flagflags; @@ -749,15 +959,22 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) Affix->flagflags |= FF_COMPOUNDFLAG; } - Affix->flag = cpstrdup(Conf, flag); + Affix->type = type; - Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; - if ((Affix->replen = strlen(repl)) > 0) - Affix->repl = cpstrdup(Conf, repl); - else - Affix->repl = VoidString; - Conf->naffixes++; + Affix->replen = repllen; + StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1); + + Affix->findlen = findlen; + StrNCpy(AffixFieldFind(Affix), find, findlen + 1); + + Affix->masklen = masklen; + StrNCpy(AffixFieldMask(Affix), mask, masklen + 1); + + StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1); + + ConfBuild->nAffix++; + ConfBuild->AffixSize += size; } /* Parsing states for parse_affentry() and friends */ @@ -1021,10 +1238,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl) * Sets a Hunspell options depending on flag type. */ static void -setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, +setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry, char *s, uint32 val) { - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { char *next; int i; @@ -1044,19 +1261,19 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, else entry->flag.s = tmpstrdup(s); - entry->flagMode = Conf->flagMode; + entry->flagMode = ConfBuild->dict->flagMode; entry->value = val; } /* * Sets up a correspondence for the affix parameter with the affix flag. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * s: affix flag in string. * val: affix parameter. */ static void -addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) +addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val) { CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; @@ -1083,29 +1300,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) *sflag = '\0'; /* Resize array or allocate memory for array CompoundAffixFlag */ - if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag) + if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag) { - if (Conf->mCompoundAffixFlag) + if (ConfBuild->mCompoundAffixFlag) { - Conf->mCompoundAffixFlag *= 2; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - repalloc((void *) Conf->CompoundAffixFlags, - Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag *= 2; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + repalloc((void *) ConfBuild->CompoundAffixFlags, + ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } else { - Conf->mCompoundAffixFlag = 10; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag = 10; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + tmpalloc(ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } } - newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag; + newValue = ConfBuild->CompoundAffixFlags + ConfBuild->nCompoundAffixFlag; - setCompoundAffixFlagValue(Conf, newValue, sbuf, val); + setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val); - Conf->usecompound = true; - Conf->nCompoundAffixFlag++; + ConfBuild->dict->usecompound = true; + ConfBuild->nCompoundAffixFlag++; } /* @@ -1113,7 +1330,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) * flags s. */ static int -getCompoundAffixFlagValue(IspellDict *Conf, char *s) +getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s) { uint32 flag = 0; CompoundAffixFlag *found, @@ -1121,18 +1338,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) char sflag[BUFSIZ]; char *flagcur; - if (Conf->nCompoundAffixFlag == 0) + if (ConfBuild->nCompoundAffixFlag == 0) return 0; flagcur = s; while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, sflag); - setCompoundAffixFlagValue(Conf, &key, sflag, 0); + getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur, sflag); + setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0); found = (CompoundAffixFlag *) - bsearch(&key, (void *) Conf->CompoundAffixFlags, - Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag), + bsearch(&key, (void *) ConfBuild->CompoundAffixFlags, + ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (found != NULL) flag |= found->value; @@ -1144,14 +1361,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) /* * Returns a flag set using the s parameter. * - * If Conf->useFlagAliases is true then the s parameter is index of the - * Conf->AffixData array and function returns its entry. - * Else function returns the s parameter. + * If useFlagAliases is true then the s parameter is index of the AffixData + * array and function returns its entry. Else function returns the s parameter. */ static char * -getAffixFlagSet(IspellDict *Conf, char *s) +getAffixFlagSet(IspellDictBuild *ConfBuild, char *s) { - if (Conf->useFlagAliases && *s != '\0') + if (ConfBuild->dict->useFlagAliases && *s != '\0') { int curaffix; char *end; @@ -1162,13 +1378,13 @@ getAffixFlagSet(IspellDict *Conf, char *s) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", s))); - if (curaffix > 0 && curaffix <= Conf->nAffixData) + if (curaffix > 0 && curaffix <= ConfBuild->nAffixData) /* * Do not subtract 1 from curaffix because empty string was added * in NIImportOOAffixes */ - return Conf->AffixData[curaffix]; + return AffixDataGet(ConfBuild, curaffix); else return VoidString; } @@ -1179,11 +1395,11 @@ getAffixFlagSet(IspellDict *Conf, char *s) /* * Import an affix file that follows MySpell or Hunspell format. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .affix file. */ static void -NIImportOOAffixes(IspellDict *Conf, const char *filename) +NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename) { char type[BUFSIZ], *ptype = NULL; @@ -1195,17 +1411,16 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) char repl[BUFSIZ], *prepl; bool isSuffix = false; - int naffix = 0, - curaffix = 0; + int naffix = 0; int sflaglen = 0; char flagflags = 0; tsearch_readline_state trst; char *recoded; /* read file to find any flag */ - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, @@ -1222,30 +1437,36 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDFLAG"), FF_COMPOUNDFLAG); else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDBEGIN"), FF_COMPOUNDBEGIN); else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDLAST"), FF_COMPOUNDLAST); /* COMPOUNDLAST and COMPOUNDEND are synonyms */ else if (STRNCMP(recoded, "COMPOUNDEND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDEND"), FF_COMPOUNDLAST); else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDMIDDLE"), FF_COMPOUNDMIDDLE); else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("ONLYINCOMPOUND"), FF_COMPOUNDONLY); else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDPERMITFLAG"), FF_COMPOUNDPERMITFLAG); else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDFORBIDFLAG"), FF_COMPOUNDFORBIDFLAG); else if (STRNCMP(recoded, "FLAG") == 0) @@ -1258,9 +1479,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (*s) { if (STRNCMP(s, "long") == 0) - Conf->flagMode = FM_LONG; + ConfBuild->dict->flagMode = FM_LONG; else if (STRNCMP(s, "num") == 0) - Conf->flagMode = FM_NUM; + ConfBuild->dict->flagMode = FM_NUM; else if (STRNCMP(s, "default") != 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -1274,8 +1495,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } tsearch_readline_end(&trst); - if (Conf->nCompoundAffixFlag > 1) - qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag, + if (ConfBuild->nCompoundAffixFlag > 1) + qsort((void *) ConfBuild->CompoundAffixFlags, ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (!tsearch_readline_begin(&trst, filename)) @@ -1295,15 +1516,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (ptype) pfree(ptype); - ptype = lowerstr_ctx(Conf, type); + ptype = lowerstr_ctx(ConfBuild, type); /* First try to parse AF parameter (alias compression) */ if (STRNCMP(ptype, "af") == 0) { /* First line is the number of aliases */ - if (!Conf->useFlagAliases) + if (!ConfBuild->dict->useFlagAliases) { - Conf->useFlagAliases = true; + ConfBuild->dict->useFlagAliases = true; naffix = atoi(sflag); if (naffix == 0) ereport(ERROR, @@ -1313,21 +1534,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Also reserve place for empty flag set */ naffix++; - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); - Conf->lenAffixData = Conf->nAffixData = naffix; + InitAffixData(ConfBuild, naffix); /* Add empty flag set into AffixData */ - Conf->AffixData[curaffix] = VoidString; - curaffix++; + AddAffixSet(ConfBuild, VoidString, 0); } /* Other lines is aliases */ else { - if (curaffix < naffix) - { - Conf->AffixData[curaffix] = cpstrdup(Conf, sflag); - curaffix++; - } + AddAffixSet(ConfBuild, sflag, strlen(sflag)); } goto nextline; } @@ -1338,8 +1553,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) sflaglen = strlen(sflag); if (sflaglen == 0 - || (sflaglen > 1 && Conf->flagMode == FM_CHAR) - || (sflaglen > 2 && Conf->flagMode == FM_LONG)) + || (sflaglen > 1 && ConfBuild->dict->flagMode == FM_CHAR) + || (sflaglen > 2 && ConfBuild->dict->flagMode == FM_LONG)) goto nextline; /*-------- @@ -1367,21 +1582,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Get flags after '/' (flags are case sensitive) */ if ((ptr = strchr(repl, '/')) != NULL) - aflg |= getCompoundAffixFlagValue(Conf, - getAffixFlagSet(Conf, + aflg |= getCompoundAffixFlagValue(ConfBuild, + getAffixFlagSet(ConfBuild, ptr + 1)); /* Get lowercased version of string before '/' */ - prepl = lowerstr_ctx(Conf, repl); + prepl = lowerstr_ctx(ConfBuild, repl); if ((ptr = strchr(prepl, '/')) != NULL) *ptr = '\0'; - pfind = lowerstr_ctx(Conf, find); - pmask = lowerstr_ctx(Conf, mask); + pfind = lowerstr_ctx(ConfBuild, find); + pmask = lowerstr_ctx(ConfBuild, mask); if (t_iseq(find, '0')) *pfind = '\0'; if (t_iseq(repl, '0')) *prepl = '\0'; - NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl, + NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); pfree(prepl); pfree(pfind); @@ -1407,7 +1622,7 @@ nextline: * work to NIImportOOAffixes(), which will re-read the whole file. */ void -NIImportAffixes(IspellDict *Conf, const char *filename) +NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename) { char *pstr = NULL; char flag[BUFSIZ]; @@ -1428,9 +1643,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename) errmsg("could not open affix file \"%s\": %m", filename))); - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; while ((recoded = tsearch_readline(&trst)) != NULL) { @@ -1452,10 +1667,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) - { - addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); - Conf->usecompound = true; - } + addCompoundAffixFlagValue(ConfBuild, s, FF_COMPOUNDFLAG); + oldformat = true; goto nextline; } @@ -1528,7 +1741,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) if (!parse_affentry(pstr, mask, find, repl)) goto nextline; - NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); + NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl, + suffixes ? FF_SUFFIX : FF_PREFIX); nextline: pfree(recoded); @@ -1547,53 +1761,48 @@ isnewformat: errmsg("affix file contains both old-style and new-style commands"))); tsearch_readline_end(&trst); - NIImportOOAffixes(Conf, filename); + NIImportOOAffixes(ConfBuild, filename); } /* * Merges two affix flag sets and stores a new affix flag set into - * Conf->AffixData. + * ConfBuild->AffixData. * * Returns index of a new affix flag set. */ static int -MergeAffix(IspellDict *Conf, int a1, int a2) +MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2) { - char **ptr; + char *ptr; + uint32 len; /* Do not merge affix flags if one of affix flags is empty */ - if (*Conf->AffixData[a1] == '\0') + if (*AffixDataGet(ConfBuild, a1) == '\0') return a2; - else if (*Conf->AffixData[a2] == '\0') + else if (*AffixDataGet(ConfBuild, a2) == '\0') return a1; - while (Conf->nAffixData + 1 >= Conf->lenAffixData) - { - Conf->lenAffixData *= 2; - Conf->AffixData = (char **) repalloc(Conf->AffixData, - sizeof(char *) * Conf->lenAffixData); - } - - ptr = Conf->AffixData + Conf->nAffixData; - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* comma */ + 1 /* \0 */ ); - sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */); + sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } else { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* \0 */ ); - sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */ ); + sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } - ptr++; - *ptr = NULL; - Conf->nAffixData++; - return Conf->nAffixData - 1; + AddAffixSet(ConfBuild, ptr, len); + pfree(ptr); + + return ConfBuild->nAffixData - 1; } /* @@ -1601,66 +1810,87 @@ MergeAffix(IspellDict *Conf, int a1, int a2) * flags with the given index. */ static uint32 -makeCompoundFlags(IspellDict *Conf, int affix) +makeCompoundFlags(IspellDictBuild *ConfBuild, int affix) { - char *str = Conf->AffixData[affix]; + char *str = AffixDataGet(ConfBuild, affix); - return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK); + return (getCompoundAffixFlagValue(ConfBuild, str) & FF_COMPOUNDFLAGMASK); } /* * Makes a prefix tree for the given level. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Spell array. * high: upper index of the Conf->Spell array. * level: current prefix tree level. + * + * Returns an offset of SPNode in DictNodes. */ -static SPNode * -mkSPNode(IspellDict *Conf, int low, int high, int level) +static uint32 +mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level) { int i; int nchar = 0; char lastchar = '\0'; + uint32 rs_offset, + new_offset; SPNode *rs; SPNodeData *data; + int data_index = 0; int lownew = low; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) + if (ConfBuild->Spell[i]->p.d.len > level && + lastchar != ConfBuild->Spell[i]->word[level]) { nchar++; - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); - rs->length = nchar; + rs_offset = AllocateSPNode(ConfBuild, nchar); + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level) + if (ConfBuild->Spell[i]->p.d.len > level) { - if (lastchar != Conf->Spell[i]->word[level]) + if (lastchar != ConfBuild->Spell[i]->word[level]) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, i, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, i, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within + * mkSPNode(), so reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Work with next node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; - data++; } - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } - data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; - if (Conf->Spell[i]->p.d.len == level + 1) + data->val = ((uint8 *) (ConfBuild->Spell[i]->word))[level]; + if (ConfBuild->Spell[i]->p.d.len == level + 1) { bool clearCompoundOnly = false; - if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) + if (data->isword && data->affix != ConfBuild->Spell[i]->p.d.affix) { /* * MergeAffix called a few times. If one of word is @@ -1669,15 +1899,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) */ clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag - & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) + & makeCompoundFlags(ConfBuild, + ConfBuild->Spell[i]->p.d.affix)) ? false : true; - data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); + data->affix = MergeAffix(ConfBuild, data->affix, + ConfBuild->Spell[i]->p.d.affix); } else - data->affix = Conf->Spell[i]->p.d.affix; + data->affix = ConfBuild->Spell[i]->p.d.affix; data->isword = 1; - data->compoundflag = makeCompoundFlags(Conf, data->affix); + data->compoundflag = makeCompoundFlags(ConfBuild, data->affix); if ((data->compoundflag & FF_COMPOUNDONLY) && (data->compoundflag & FF_COMPOUNDFLAG) == 0) @@ -1689,9 +1921,19 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) } /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, high, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, high, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within mkSPNode(), so + * reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); - return rs; + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + return rs_offset; } /* @@ -1699,7 +1941,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) * and affixes. */ void -NISortDictionary(IspellDict *Conf) +NISortDictionary(IspellDictBuild *ConfBuild) { int i; int naffix = 0; @@ -1708,81 +1950,81 @@ NISortDictionary(IspellDict *Conf) /* compress affixes */ /* - * If we use flag aliases then we need to use Conf->AffixData filled in + * If we use flag aliases then we need to use ConfBuild->AffixData filled in * the NIImportOOAffixes(). */ - if (Conf->useFlagAliases) + if (ConfBuild->dict->useFlagAliases) { - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { char *end; - if (*Conf->Spell[i]->p.flag != '\0') + if (*ConfBuild->Spell[i]->p.flag != '\0') { - curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10); - if (Conf->Spell[i]->p.flag == end || errno == ERANGE) + curaffix = strtol(ConfBuild->Spell[i]->p.flag, &end, 10); + if (ConfBuild->Spell[i]->p.flag == end || errno == ERANGE) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", - Conf->Spell[i]->p.flag))); + ConfBuild->Spell[i]->p.flag))); } else { /* - * If Conf->Spell[i]->p.flag is empty, then get empty value of - * Conf->AffixData (0 index). + * If ConfBuild->Spell[i]->p.flag is empty, then get empty + * value of ConfBuild->AffixData (0 index). */ curaffix = 0; } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } } - /* Otherwise fill Conf->AffixData here */ + /* Otherwise fill ConfBuild->AffixData here */ else { /* Count the number of different flags used in the dictionary */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspellaffix); naffix = 0; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) + || strcmp(ConfBuild->Spell[i]->p.flag, + ConfBuild->Spell[i - 1]->p.flag)) naffix++; } /* - * Fill in Conf->AffixData with the affixes that were used in the - * dictionary. Replace textual flag-field of Conf->Spell entries with - * indexes into Conf->AffixData array. + * Fill in AffixData with the affixes that were used in the + * dictionary. Replace textual flag-field of ConfBuild->Spell entries + * with indexes into ConfBuild->AffixData array. */ - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); + InitAffixData(ConfBuild, naffix); curaffix = -1; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix])) + || strcmp(ConfBuild->Spell[i]->p.flag, + AffixDataGet(ConfBuild, curaffix))) { curaffix++; Assert(curaffix < naffix); - Conf->AffixData[curaffix] = cpstrdup(Conf, - Conf->Spell[i]->p.flag); + AddAffixSet(ConfBuild, ConfBuild->Spell[i]->p.flag, + strlen(ConfBuild->Spell[i]->p.flag)); } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } - - Conf->lenAffixData = Conf->nAffixData = naffix; } /* Start build a prefix tree */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); - Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspell); + mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0); } /* @@ -1790,83 +2032,104 @@ NISortDictionary(IspellDict *Conf) * rule. Affixes with empty replace string do not include in the prefix tree. * This affixes are included by mkVoidAffix(). * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Affix array. * high: upper index of the Conf->Affix array. * level: current prefix tree level. * type: FF_SUFFIX or FF_PREFIX. + * + * Returns an offset in nodes array. */ -static AffixNode * -mkANode(IspellDict *Conf, int low, int high, int level, int type) +static uint32 +mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type) { int i; int nchar = 0; uint8 lastchar = '\0'; + NodeArray *array; + uint32 rs_offset, + new_offset; AffixNode *rs; AffixNodeData *data; + int data_index = 0; int lownew = low; - int naff; - AFFIX **aff; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (ConfBuild->Affix[i]->replen > level && + lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { nchar++; - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); - naff = 0; + if (type == FF_SUFFIX) + array = &ConfBuild->SuffixNodes; + else + array = &ConfBuild->PrefixNodes; - rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); - rs->length = nchar; - data = rs->data; + rs_offset = AllocateAffixNode(ConfBuild, array, nchar); + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + data = (AffixNodeData *) rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level) + if (ConfBuild->Affix[i]->replen > level) { - if (lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, i, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } - data++; + new_offset = mkANode(ConfBuild, lownew, i, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so + * reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Handle next data node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; } - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } - data->val = GETCHAR(Conf->Affix + i, level, type); - if (Conf->Affix[i].replen == level + 1) + data->val = GETCHAR(ConfBuild->Affix[i], level, type); + if (ConfBuild->Affix[i]->replen == level + 1) { /* affix stopped */ - aff[naff++] = Conf->Affix + i; + if (data->affstart == ISPELL_INVALID_INDEX) + { + data->affstart = i; + data->affend = i; + } + else + data->affend = i; } } /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, high, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } + new_offset = mkANode(ConfBuild, lownew, high, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); - pfree(aff); + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; - return rs; + return rs_offset; } /* @@ -1874,137 +2137,154 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) * for affixes which have empty replace string ("repl" field). */ static void -mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) +mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix) { - int i, - cnt = 0; + int i; int start = (issuffix) ? startsuffix : 0; - int end = (issuffix) ? Conf->naffixes : startsuffix; - AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); - - Affix->length = 1; - Affix->isvoid = 1; + int end = (issuffix) ? ConfBuild->nAffix : startsuffix; + uint32 node_offset; + NodeArray *array; + AffixNode *Affix; + AffixNodeData *AffixData; if (issuffix) - { - Affix->data->node = Conf->Suffix; - Conf->Suffix = Affix; - } + array = &ConfBuild->SuffixNodes; else - { - Affix->data->node = Conf->Prefix; - Conf->Prefix = Affix; - } + array = &ConfBuild->PrefixNodes; - /* Count affixes with empty replace string */ - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - cnt++; - - /* There is not affixes with empty replace string */ - if (cnt == 0) - return; + node_offset = AllocateAffixNode(ConfBuild, array, 1); + Affix = (AffixNode *) NodeArrayGet(array, node_offset); - Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); - Affix->data->naff = (uint32) cnt; + Affix->isvoid = 1; + AffixData = (AffixNodeData *) Affix->data; - cnt = 0; for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) + if (ConfBuild->Affix[i]->replen == 0) { - Affix->data->aff[cnt] = Conf->Affix + i; - cnt++; + if (AffixData->affstart == ISPELL_INVALID_INDEX) + { + AffixData->affstart = i; + AffixData->affend = i; + } + else + AffixData->affend = i; } } /* - * Checks if the affixflag is used by dictionary. Conf->AffixData does not + * Checks if the affixflag is used by dictionary. AffixData does not * contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * affixflag: affix flag. * - * Returns true if the Conf->AffixData array contains affixflag, otherwise + * Returns true if the ConfBuild->AffixData array contains affixflag, otherwise * returns false. */ static bool -isAffixInUse(IspellDict *Conf, char *affixflag) +isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag) { int i; - for (i = 0; i < Conf->nAffixData; i++) - if (IsAffixFlagInUse(Conf, i, affixflag)) + for (i = 0; i < ConfBuild->nAffixData; i++) + if (IsAffixFlagInUse(ConfBuild->dict->flagMode, + AffixDataGet(ConfBuild, i), affixflag)) return true; return false; } /* - * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes. + * Builds Prefix and Suffix trees from the imported affixes. */ void -NISortAffixes(IspellDict *Conf) +NISortAffixes(IspellDictBuild *ConfBuild) { AFFIX *Affix; + AffixNode *voidPrefix, + *voidSuffix; size_t i; CMPDAffix *ptr; - int firstsuffix = Conf->naffixes; + int firstsuffix = ConfBuild->nAffix; + uint32 prefix_offset, + suffix_offset; - if (Conf->naffixes == 0) + if (ConfBuild->nAffix == 0) return; /* Store compound affixes in the Conf->CompoundAffix array */ - if (Conf->naffixes > 1) - qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); - Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); - ptr->affix = NULL; - - for (i = 0; i < Conf->naffixes; i++) + if (ConfBuild->nAffix > 1) + qsort((void *) ConfBuild->Affix, ConfBuild->nAffix, + sizeof(AFFIX *), cmpaffix); + ConfBuild->nCompoundAffix = ConfBuild->nAffix; + ConfBuild->CompoundAffix = ptr = + (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); + ptr->affix = ISPELL_INVALID_INDEX; + + for (i = 0; i < ConfBuild->nAffix; i++) { - Affix = &(((AFFIX *) Conf->Affix)[i]); + Affix = ConfBuild->Affix[i]; if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && - isAffixInUse(Conf, Affix->flag)) + isAffixInUse(ConfBuild, AffixFieldFlag(Affix))) { - if (ptr == Conf->CompoundAffix || + if (ptr == ConfBuild->CompoundAffix || ptr->issuffix != (ptr - 1)->issuffix || - strbncmp((const unsigned char *) (ptr - 1)->affix, - (const unsigned char *) Affix->repl, + strbncmp((const unsigned char *) AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]), + (const unsigned char *) AffixFieldRepl(Affix), (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ - ptr->affix = Affix->repl; + ptr->affix = i; ptr->len = Affix->replen; ptr->issuffix = (Affix->type == FF_SUFFIX); ptr++; } } } - ptr->affix = NULL; - Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); + ptr->affix = ISPELL_INVALID_INDEX; + ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1; + ConfBuild->CompoundAffix = (CMPDAffix *) repalloc(ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * (ConfBuild->nCompoundAffix)); /* Start build a prefix tree */ - Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); - Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); - mkVoidAffix(Conf, true, firstsuffix); - mkVoidAffix(Conf, false, firstsuffix); + mkVoidAffix(ConfBuild, true, firstsuffix); + mkVoidAffix(ConfBuild, false, firstsuffix); + + prefix_offset = mkANode(ConfBuild, 0, firstsuffix, 0, FF_PREFIX); + suffix_offset = mkANode(ConfBuild, firstsuffix, ConfBuild->nAffix, 0, + FF_SUFFIX); + + /* Adjust offsets of new nodes for nodes of void affixes */ + voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0); + voidPrefix->data[0].node_offset = prefix_offset; + + voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0); + voidSuffix->data[0].node_offset = suffix_offset; } static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) +FindAffixes(IspellDictData *dict, AffixNode *node, const char *word, int wrdlen, + int *level, int type) { + AffixNode *node_start; AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; + if (type == FF_PREFIX) + node_start = (AffixNode *) DictPrefixNodes(dict); + else + node_start = (AffixNode *) DictSuffixNodes(dict); + if (node->isvoid) { /* search void affixes */ - if (node->data->naff) + if (node->data->affstart != ISPELL_INVALID_INDEX) return node->data; - node = node->data->node; + node = (AffixNode *) DictNodeGet(node_start, node->data->node_offset); } while (node && *level < wrdlen) @@ -2019,9 +2299,10 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) if (StopMiddle->val == symbol) { (*level)++; - if (StopMiddle->naff) + if (StopMiddle->affstart != ISPELL_INVALID_INDEX) return StopMiddle; - node = StopMiddle->node; + node = (AffixNode *) DictNodeGet(node_start, + StopMiddle->node_offset); break; } else if (StopMiddle->val < symbol) @@ -2035,8 +2316,67 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) return NULL; } +/* + * Compile regular expression on first use and store it within reg. + */ +static void +CompileAffixReg(AffixReg *reg, bool isregis, int type, + const char *mask, int masklen, MemoryContext dictCtx) +{ + MemoryContext oldcontext; + + Assert(dictCtx); + + /* + * Switch to memory context of the dictionary, so compiled expression can be + * used in other queries. + */ + oldcontext = MemoryContextSwitchTo(dictCtx); + + if (isregis) + RS_compile(®->r.regis, (type == FF_SUFFIX), mask); + else + { + int wmasklen; + int err; + pg_wchar *wmask; + char *tmask; + + tmask = (char *) palloc(masklen + 3); + if (type == FF_SUFFIX) + sprintf(tmask, "%s$", mask); + else + sprintf(tmask, "^%s", mask); + + masklen = strlen(tmask); + wmask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar)); + wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); + + err = pg_regcomp(&(reg->r.regex), wmask, wmasklen, + REG_ADVANCED | REG_NOSUB, + DEFAULT_COLLATION_OID); + if (err) + { + char errstr[100]; + + pg_regerror(err, &(reg->r.regex), errstr, sizeof(errstr)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errstr))); + } + + pfree(wmask); + pfree(tmask); + } + + reg->iscompiled = true; + + MemoryContextSwitchTo(oldcontext); +} + static char * -CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) +CheckAffix(const char *word, size_t len, AFFIX *Affix, AffixReg *reg, + int flagflags, char *newword, int *baselen, MemoryContext dictCtx) { /* * Check compound allow flags @@ -2076,7 +2416,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww if (Affix->type == FF_SUFFIX) { strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); + strcpy(newword + len - Affix->replen, AffixFieldFind(Affix)); if (baselen) /* store length of non-changed part of word */ *baselen = len - Affix->replen; } @@ -2086,9 +2426,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww * if prefix is an all non-changed part's length then all word * contains only prefix and suffix, so out */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + if (baselen && *baselen + Affix->findlen <= Affix->replen) return NULL; - strcpy(newword, Affix->find); + strcpy(newword, AffixFieldFind(Affix)); strcat(newword, word + Affix->replen); } @@ -2099,7 +2439,12 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww return newword; else if (Affix->isregis) { - if (RS_execute(&(Affix->reg.regis), newword)) + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + + if (RS_execute(&(reg->r.regis), newword)) return newword; } else @@ -2109,12 +2454,17 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww size_t data_len; int newword_len; + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + /* Convert data string to wide characters */ newword_len = strlen(newword); data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, newword_len); - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) + if (!(err = pg_regexec(&(reg->r.regex), data, data_len, 0, NULL, 0, NULL, 0))) { pfree(data); return newword; @@ -2153,7 +2503,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, + AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf->dict), *pnode; int i, j; @@ -2165,7 +2515,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) /* Check that the word itself is normal form */ - if (FindWord(Conf, word, VoidString, flag)) + if (FindWord(Conf->dict, word, VoidString, flag)) { *cur = pstrdup(word); cur++; @@ -2173,23 +2523,29 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) } /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; while (pnode) { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *reg = &(Conf->reg[j]); + + if (affix && + CheckAffix(word, wrdlen, affix, reg, flag, newword, NULL, + Conf->dictCtx)) { /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(affix), flag)) cur += addToResult(forms, cur, newword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } /* @@ -2201,45 +2557,59 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) int baselen = 0; /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); + suffix = FindAffixes(Conf->dict, snode, word, wrdlen, &slevel, + FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) + for (i = suffix->affstart; i <= suffix->affend; i++) { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) + AFFIX *sufentry = (AFFIX *) DictAffixGet(Conf->dict, i); + AffixReg *sufreg = &(Conf->reg[i]); + + if (sufentry && + CheckAffix(word, wrdlen, sufentry, sufreg, flag, newword, &baselen, + Conf->dictCtx)) { /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(sufentry), flag)) cur += addToResult(forms, cur, newword); /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; swrdlen = strlen(newword); while (pnode) { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, newword, swrdlen, + &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) + AFFIX *prefentry = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *prefreg = &(Conf->reg[j]); + + if (prefentry && + CheckAffix(newword, swrdlen, prefentry, prefreg, + flag, pnewword, &baselen, Conf->dictCtx)) { /* prefix success */ - char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - VoidString : prefix->aff[j]->flag; + char *ff = (prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ? + VoidString : AffixFieldFlag(prefentry); - if (FindWord(Conf, pnewword, ff, flag)) + if (FindWord(Conf->dict, pnewword, ff, flag)) cur += addToResult(forms, cur, pnewword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } } } - snode = suffix->node; + snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf->dict), + suffix->node_offset); } if (cur == forms) @@ -2259,7 +2629,8 @@ typedef struct SplitVar } SplitVar; static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) +CheckCompoundAffixes(IspellDictData *dict, CMPDAffix **ptr, + char *word, int len, bool CheckInPlace) { bool issuffix; @@ -2269,9 +2640,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) if (CheckInPlace) { - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + strncmp(AffixFieldRepl(affix), word, (*ptr)->len) == 0) { len = (*ptr)->len; issuffix = (*ptr)->issuffix; @@ -2285,9 +2659,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) { char *affbegin; - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + (affbegin = strstr(word, AffixFieldRepl(affix))) != NULL) { len = (*ptr)->len + (affbegin - word); issuffix = (*ptr)->issuffix; @@ -2339,13 +2716,14 @@ AddStem(SplitVar *v, char *word) } static SplitVar * -SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) +SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, + char *word, int wordlen, int startpos, int minpos) { SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; + SPNode *node = (snode) ? snode : (SPNode *) DictDictNodes(Conf->dict); int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; @@ -2360,8 +2738,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (level < wordlen) { /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) + caff = (CMPDAffix *) DictCompoundAffix(Conf->dict); + while (level > startpos && + (lenaff = CheckCompoundAffixes(Conf->dict, &caff, + word + level, wordlen - level, + (node) ? true : false)) >= 0) { /* * there is one of compound affixes, so check word for existings @@ -2408,7 +2789,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (ptr->next) ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, + startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); @@ -2467,13 +2849,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int /* we can find next word */ level++; AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; + node = (SPNode *) DictDictNodes(Conf->dict); startpos = level; continue; } } } - node = StopMiddle->node; + node = (SPNode *) DictNodeGet(DictDictNodes(Conf->dict), + StopMiddle->node_offset); } else node = NULL; @@ -2523,7 +2906,7 @@ NINormalizeWord(IspellDict *Conf, char *word) pfree(res); } - if (Conf->usecompound) + if (Conf->dict->usecompound) { int wordlen = strlen(word); SplitVar *ptr, diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 210f97dda9..b40cf379eb 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -18,21 +18,23 @@ #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" +#define ISPELL_INVALID_INDEX (0x7FFFF) +#define ISPELL_INVALID_OFFSET (0xFFFFFFFF) + /* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. */ -struct SPNode; - typedef struct { uint32 val:8, isword:1, /* Stores compound flags listed below */ compoundflag:4, - /* Reference to an entry of the AffixData field */ + /* Index of an entry of the AffixData field */ affix:19; - struct SPNode *node; + /* Offset to a node of the DictNodes field */ + uint32 node_offset; } SPNodeData; /* @@ -86,21 +88,55 @@ typedef struct spell_struct */ typedef struct aff_struct { - char *flag; /* FF_SUFFIX or FF_PREFIX */ - uint32 type:1, + uint16 type:1, flagflags:7, issimple:1, isregis:1, - replen:14; - char *find; - char *repl; + flaglen:2; + + /* 8 bytes could be too mach for repl, find and mask, but who knows */ + uint8 replen; + uint8 findlen; + uint8 masklen; + + /* + * fields stores the following data (each ends with \0): + * - repl + * - find + * - mask + * - flag - one character (if FM_CHAR), + * two characters (if FM_LONG), + * number, >= 0 and < 65536 (if FM_NUM). + */ + char fields[FLEXIBLE_ARRAY_MEMBER]; +} AFFIX; + +#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */ +#define AF_REPL_MAXSIZE 255 /* 8 bytes */ + +#define AFFIXHDRSZ (offsetof(AFFIX, fields)) + +#define AffixFieldRepl(af) ((af)->fields) +#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1) +#define AffixFieldMask(af) (AffixFieldFind(af) + (af)->findlen + 1) +#define AffixFieldFlag(af) (AffixFieldMask(af) + (af)->masklen + 1) +#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen + 1 \ + + (af)->masklen + 1 + strlen(AffixFieldFlag(af)) + 1) + +/* + * Stores compiled regular expression of affix. AffixReg uses mask field of + * AFFIX as a regular expression. + */ +typedef struct AffixReg +{ + bool iscompiled; union { regex_t regex; Regis regis; - } reg; -} AFFIX; + } r; +} AffixReg; /* * affixes use dictionary flags too @@ -120,14 +156,13 @@ typedef struct aff_struct * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store * an affix list. */ -struct AffixNode; - typedef struct { - uint32 val:8, - naff:24; - AFFIX **aff; - struct AffixNode *node; + uint8 val; + uint32 affstart; + uint32 affend; + /* Offset to a node of the PrefixNodes or SuffixNodes field */ + uint32 node_offset; } AffixNodeData; typedef struct AffixNode @@ -139,9 +174,19 @@ typedef struct AffixNode #define ANHRDSZ (offsetof(AffixNode, data)) +typedef struct NodeArray +{ + char *Nodes; + uint32 NodesSize; /* allocated size of Nodes */ + uint32 NodesEnd; /* end of data in Nodes */ +} NodeArray; + +#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (na)->Nodes + (of)) + typedef struct { - char *affix; + /* Index of an affix of the Affix field */ + uint32 affix; int len; bool issuffix; } CMPDAffix; @@ -176,30 +221,71 @@ typedef struct CompoundAffixFlag #define FLAGNUM_MAXSIZE (1 << 16) -typedef struct +typedef struct IspellDictData { - int maffixes; - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; + FlagMode flagMode; + bool usecompound; - SPNode *Dictionary; - /* Array of sets of affixes */ - char **AffixData; - int lenAffixData; - int nAffixData; bool useFlagAliases; - CMPDAffix *CompoundAffix; + uint32 nAffixData; + uint32 AffixDataStart; - bool usecompound; - FlagMode flagMode; + uint32 AffixOffsetStart; + uint32 AffixStart; + uint32 nAffix; + + uint32 DictNodesStart; + uint32 PrefixNodesStart; + uint32 SuffixNodesStart; + + uint32 CompoundAffixStart; /* - * All follow fields are actually needed only for initialization + * data stores: + * - AffixData - array of affix sets + * - Affix - sorted array of affixes + * - DictNodes - prefix tree of a word list + * - PrefixNodes - prefix tree of a prefix list + * - SuffixNodes - prefix tree of a suffix list + * - CompoundAffix - array of compound affixes */ + char data[FLEXIBLE_ARRAY_MEMBER]; +} IspellDictData; + +#define IspellDictDataHdrSize (offsetof(IspellDictData, data)) + +#define DictAffixDataOffset(d) ((d)->data) +#define DictAffixData(d) ((d)->data + (d)->AffixDataStart) +#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i]) + +#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart) +#define DictAffix(d) ((d)->data + (d)->AffixStart) +#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffix(d) + ((uint32 *) DictAffixOffset(d))[i]) + +#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart) +#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart) +#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart) +#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : \ + (char *) (node_start) + (of)) + +#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart) + +/* + * IspellDictBuild is used to initialize IspellDictData struct. This is a + * temprorary structure which is setup by NIStartBuild() and released by + * NIFinishBuild(). + */ +typedef struct IspellDictBuild +{ + MemoryContext buildCxt; /* temp context for construction */ + + IspellDictData *dict; + uint32 dict_size; + + /* Temporary data */ /* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; @@ -208,29 +294,73 @@ typedef struct /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag; - /* - * Remaining fields are only used during dictionary construction; they are - * set up by NIStartBuild and cleared by NIFinishBuild. - */ - MemoryContext buildCxt; /* temp context for construction */ - - /* Temporary array of all words in the dict file */ + /* Array of all words in the dict file */ SPELL **Spell; - int nspell; /* number of valid entries in Spell array */ - int mspell; /* allocated length of Spell array */ + int nSpell; /* number of valid entries in Spell array */ + int mSpell; /* allocated length of Spell array */ + + /* Array of all affixes in the aff file */ + AFFIX **Affix; + int nAffix; /* number of valid entries in Affix array */ + int mAffix; /* allocated length of Affix array */ + uint32 AffixSize; + + /* Data for IspellDictData */ + + /* Array of sets of affixes */ + uint32 *AffixDataOffset; + int nAffixData; /* number of affix sets */ + int mAffixData; /* allocated number of affix sets */ + char *AffixData; + uint32 AffixDataSize; /* allocated size of AffixData */ + uint32 AffixDataEnd; /* end of data in AffixData */ + + /* Prefix tree which stores a word list */ + NodeArray DictNodes; + + /* Prefix tree which stores a prefix list */ + NodeArray PrefixNodes; + + /* Prefix tree which stores a suffix list */ + NodeArray SuffixNodes; - /* These are used to allocate "compact" data without palloc overhead */ - char *firstfree; /* first free address (always maxaligned) */ - size_t avail; /* free space remaining at firstfree */ + /* Array of compound affixes */ + CMPDAffix *CompoundAffix; + int nCompoundAffix; /* number of entries of CompoundAffix */ +} IspellDictBuild; + +#define AffixDataGet(d, i) ((d)->AffixData + (d)->AffixDataOffset[i]) + +/* + * IspellDict is used within NINormalizeWord. + */ +typedef struct IspellDict +{ + /* + * Pointer to a DSM location of IspellDictData. Should be retreived per + * every dispell_lexize() call. + */ + IspellDictData *dict; + /* + * Array of regular expression of affixes. Each regular expression is + * compiled only on demand. + */ + AffixReg *reg; + /* + * Memory context for compiling regular expressions. + */ + MemoryContext dictCtx; } IspellDict; extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); -extern void NIStartBuild(IspellDict *Conf); -extern void NIImportAffixes(IspellDict *Conf, const char *filename); -extern void NIImportDictionary(IspellDict *Conf, const char *filename); -extern void NISortDictionary(IspellDict *Conf); -extern void NISortAffixes(IspellDict *Conf); -extern void NIFinishBuild(IspellDict *Conf); +extern void NIStartBuild(IspellDictBuild *ConfBuild); +extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename); +extern void NIImportDictionary(IspellDictBuild *ConfBuild, + const char *filename); +extern void NISortDictionary(IspellDictBuild *ConfBuild); +extern void NISortAffixes(IspellDictBuild *ConfBuild); +extern void NICopyData(IspellDictBuild *ConfBuild); +extern void NIFinishBuild(IspellDictBuild *ConfBuild); #endif
diff --git a/contrib/dict_int/Makefile b/contrib/dict_int/Makefile index f6ae24aa4d..897be348ff 100644 --- a/contrib/dict_int/Makefile +++ b/contrib/dict_int/Makefile @@ -4,7 +4,7 @@ MODULE_big = dict_int OBJS = dict_int.o $(WIN32RES) EXTENSION = dict_int -DATA = dict_int--1.0.sql dict_int--unpackaged--1.0.sql +DATA = dict_int--1.1.sql dict_int--1.0--1.1.sql dict_int--unpackaged--1.0.sql PGFILEDESC = "dict_int - add-on dictionary template for full-text search" REGRESS = dict_int diff --git a/contrib/dict_int/dict_int--1.0--1.1.sql b/contrib/dict_int/dict_int--1.0--1.1.sql new file mode 100644 index 0000000000..3517a5ecd1 --- /dev/null +++ b/contrib/dict_int/dict_int--1.0--1.1.sql @@ -0,0 +1,9 @@ +/* contrib/dict_int/dict_int--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION dict_int UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION dintdict_init(internal, internal) + RETURNS internal + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; diff --git a/contrib/dict_int/dict_int--1.0.sql b/contrib/dict_int/dict_int--1.1.sql similarity index 93% rename from contrib/dict_int/dict_int--1.0.sql rename to contrib/dict_int/dict_int--1.1.sql index acb1461b56..6d3933e3d3 100644 --- a/contrib/dict_int/dict_int--1.0.sql +++ b/contrib/dict_int/dict_int--1.1.sql @@ -3,7 +3,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION dict_int" to load this file. \quit -CREATE FUNCTION dintdict_init(internal) +CREATE FUNCTION dintdict_init(internal, internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE C STRICT; diff --git a/contrib/dict_int/dict_int.control b/contrib/dict_int/dict_int.control index 6e2d2b351a..51894171f6 100644 --- a/contrib/dict_int/dict_int.control +++ b/contrib/dict_int/dict_int.control @@ -1,5 +1,5 @@ # dict_int extension comment = 'text search dictionary template for integers' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/dict_int' relocatable = true diff --git a/contrib/dict_xsyn/Makefile b/contrib/dict_xsyn/Makefile index 0c401cf3c8..d1cf8d0b5d 100644 --- a/contrib/dict_xsyn/Makefile +++ b/contrib/dict_xsyn/Makefile @@ -4,7 +4,7 @@ MODULE_big = dict_xsyn OBJS = dict_xsyn.o $(WIN32RES) EXTENSION = dict_xsyn -DATA = dict_xsyn--1.0.sql dict_xsyn--unpackaged--1.0.sql +DATA = dict_xsyn--1.1.sql dict_xsyn--1.0--1.1.sql dict_xsyn--unpackaged--1.0.sql DATA_TSEARCH = xsyn_sample.rules PGFILEDESC = "dict_xsyn - add-on dictionary template for full-text search" diff --git a/contrib/dict_xsyn/dict_xsyn--1.0--1.1.sql b/contrib/dict_xsyn/dict_xsyn--1.0--1.1.sql new file mode 100644 index 0000000000..35a576bfee --- /dev/null +++ b/contrib/dict_xsyn/dict_xsyn--1.0--1.1.sql @@ -0,0 +1,9 @@ +/* contrib/dict_xsyn/dict_xsyn--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION dict_xsyn UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION dxsyn_init(internal, internal) + RETURNS internal + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; diff --git a/contrib/dict_xsyn/dict_xsyn--1.0.sql b/contrib/dict_xsyn/dict_xsyn--1.1.sql similarity index 93% rename from contrib/dict_xsyn/dict_xsyn--1.0.sql rename to contrib/dict_xsyn/dict_xsyn--1.1.sql index 3d6bb51ca8..d8d1de1aa4 100644 --- a/contrib/dict_xsyn/dict_xsyn--1.0.sql +++ b/contrib/dict_xsyn/dict_xsyn--1.1.sql @@ -3,7 +3,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION dict_xsyn" to load this file. \quit -CREATE FUNCTION dxsyn_init(internal) +CREATE FUNCTION dxsyn_init(internal, internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE C STRICT; diff --git a/contrib/dict_xsyn/dict_xsyn.control b/contrib/dict_xsyn/dict_xsyn.control index 3fd465a955..50358374a7 100644 --- a/contrib/dict_xsyn/dict_xsyn.control +++ b/contrib/dict_xsyn/dict_xsyn.control @@ -1,5 +1,5 @@ # dict_xsyn extension comment = 'text search dictionary template for extended synonym processing' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/dict_xsyn' relocatable = true diff --git a/contrib/unaccent/Makefile b/contrib/unaccent/Makefile index f8e3860926..b0ba23ed37 100644 --- a/contrib/unaccent/Makefile +++ b/contrib/unaccent/Makefile @@ -4,7 +4,8 @@ MODULE_big = unaccent OBJS = unaccent.o $(WIN32RES) EXTENSION = unaccent -DATA = unaccent--1.1.sql unaccent--1.0--1.1.sql unaccent--unpackaged--1.0.sql +DATA = unaccent--1.2.sql unaccent--1.1--1.2.sql unaccent--1.0--1.1.sql \ + unaccent--unpackaged--1.0.sql DATA_TSEARCH = unaccent.rules PGFILEDESC = "unaccent - text search dictionary that removes accents" diff --git a/contrib/unaccent/unaccent--1.1--1.2.sql b/contrib/unaccent/unaccent--1.1--1.2.sql new file mode 100644 index 0000000000..eaef37f87e --- /dev/null +++ b/contrib/unaccent/unaccent--1.1--1.2.sql @@ -0,0 +1,9 @@ +/* contrib/unaccent/unaccent--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION unaccent UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION unaccent_init(internal,internal) + RETURNS internal + AS 'MODULE_PATHNAME', 'unaccent_init' + LANGUAGE C PARALLEL SAFE; diff --git a/contrib/unaccent/unaccent--1.1.sql b/contrib/unaccent/unaccent--1.2.sql similarity index 94% rename from contrib/unaccent/unaccent--1.1.sql rename to contrib/unaccent/unaccent--1.2.sql index ecc8651780..d6ce193e82 100644 --- a/contrib/unaccent/unaccent--1.1.sql +++ b/contrib/unaccent/unaccent--1.2.sql @@ -13,7 +13,7 @@ CREATE FUNCTION unaccent(text) AS 'MODULE_PATHNAME', 'unaccent_dict' LANGUAGE C STABLE STRICT PARALLEL SAFE; -CREATE FUNCTION unaccent_init(internal) +CREATE FUNCTION unaccent_init(internal,internal) RETURNS internal AS 'MODULE_PATHNAME', 'unaccent_init' LANGUAGE C PARALLEL SAFE; diff --git a/contrib/unaccent/unaccent.control b/contrib/unaccent/unaccent.control index a77a65f891..aec53b5ad5 100644 --- a/contrib/unaccent/unaccent.control +++ b/contrib/unaccent/unaccent.control @@ -1,5 +1,5 @@ # unaccent extension comment = 'text search dictionary that removes accents' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/unaccent' relocatable = true diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index b6aeae449b..32ab98b6a7 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -664,7 +664,7 @@ get_ts_template_func(DefElem *defel, int attnum) switch (attnum) { case Anum_pg_ts_template_tmplinit: - nargs = 1; + nargs = 2; break; case Anum_pg_ts_template_tmpllexize: nargs = 4; diff --git a/src/backend/snowball/snowball_func.sql.in b/src/backend/snowball/snowball_func.sql.in index c02dad43e3..9b85e41ff8 100644 --- a/src/backend/snowball/snowball_func.sql.in +++ b/src/backend/snowball/snowball_func.sql.in @@ -19,7 +19,7 @@ SET search_path = pg_catalog; -CREATE FUNCTION dsnowball_init(INTERNAL) +CREATE FUNCTION dsnowball_init(INTERNAL, INTERNAL) RETURNS INTERNAL AS '$libdir/dict_snowball', 'dsnowball_init' LANGUAGE C STRICT; diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 2a5321315a..ecec8f7ff8 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4881,22 +4881,22 @@ DESCR("(internal)"); DATA(insert OID = 3723 ( ts_lexize PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "3769 25" _null_ _null_ _null_ _null_ _null_ ts_lexize _null_ _null_ _null_ )); DESCR("normalize one word by dictionary"); -DATA(insert OID = 3725 ( dsimple_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2281 "2281" _null_ _null_ _null_ _null_ _null_ dsimple_init _null_ _null_ _null_ )); +DATA(insert OID = 3725 ( dsimple_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ _null_ dsimple_init _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 3726 ( dsimple_lexize PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ dsimple_lexize _null_ _null_ _null_ )); DESCR("(internal)"); -DATA(insert OID = 3728 ( dsynonym_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2281 "2281" _null_ _null_ _null_ _null_ _null_ dsynonym_init _null_ _null_ _null_ )); +DATA(insert OID = 3728 ( dsynonym_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ _null_ dsynonym_init _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 3729 ( dsynonym_lexize PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ dsynonym_lexize _null_ _null_ _null_ )); DESCR("(internal)"); -DATA(insert OID = 3731 ( dispell_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2281 "2281" _null_ _null_ _null_ _null_ _null_ dispell_init _null_ _null_ _null_ )); +DATA(insert OID = 3731 ( dispell_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ _null_ dispell_init _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 3732 ( dispell_lexize PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ dispell_lexize _null_ _null_ _null_ )); DESCR("(internal)"); -DATA(insert OID = 3740 ( thesaurus_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2281 "2281" _null_ _null_ _null_ _null_ _null_ thesaurus_init _null_ _null_ _null_ )); +DATA(insert OID = 3740 ( thesaurus_init PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ _null_ thesaurus_init _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 3741 ( thesaurus_lexize PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ thesaurus_lexize _null_ _null_ _null_ )); DESCR("(internal)");
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 71e20f2740..00faef73ed 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8228,6 +8228,11 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l <entry>time zone names</entry> </row> + <row> + <entry><link linkend="view-pg-ts-shared-dictionaries"><structname>pg_ts_shared_dictionaries</structname></link></entry> + <entry>dictionaries currently in shared memory</entry> + </row> + <row> <entry><link linkend="view-pg-user"><structname>pg_user</structname></link></entry> <entry>database users</entry> @@ -10983,6 +10988,63 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx </sect1> + <sect1 id="view-pg-ts-shared-dictionaries"> + <title><structname>pg_ts_shared_dictionaries</structname></title> + + <indexterm zone="view-pg-ts-shared-dictionaries"> + <primary>pg_ts_shared_dictionaries</primary> + </indexterm> + + <para> + The <structname>pg_ts_shared_dictionaries</structname> view provides a + listing of all text search dictionaries that currently allocated in the + shared memory. The size of available space in shared memory is controlled by + <xref linkend="guc-shared-buffers"/>. A dictionary may have an option which + controls allocation in shared memory (see <xref linkend="textsearch-ispell-dictionary"/>). + </para> + + <table> + <title><structname>pg_ts_shared_dictionaries</structname> Columns</title> + + <tgroup cols="4"> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry><structfield>dictoid</structfield></entry> + <entry><type>oid</type></entry> + <entry><literal><link linkend="catalog-pg-ts-dict"><structname>pg_ts_dict</structname></link>.oid</literal></entry> + <entry>The OID of the text search dictionary located in shared memory</entry> + </row> + <row> + <entry><structfield>schemaname</structfield></entry> + <entry><type>name</type></entry> + <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.nspname</literal></entry> + <entry>The name of schema containing the text search dictionary</entry> + </row> + <row> + <entry><structfield>dictname</structfield></entry> + <entry><type>name</type></entry> + <entry><literal><link linkend="catalog-pg-ts-dict"><structname>pg_ts_dict</structname></link>.dictname</literal></entry> + <entry>The text search dictionary name</entry> + </row> + <row> + <entry><structfield>size</structfield></entry> + <entry><type>bigint</type></entry> + <entry></entry> + <entry>Size of the text search dictionary in bytes</entry> + </row> + </tbody> + </tgroup> + </table> + </sect1> + <sect1 id="view-pg-user"> <title><structname>pg_user</structname></title> diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 82afe201f8..78ed082994 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -3045,6 +3045,12 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( parameter value greater than zero before server starting. </para> + <para> + List of dictionaries currently located in shared memory can be retreived by + <link linkend="view-pg-ts-shared-dictionaries"><structname>pg_ts_shared_dictionaries</structname></link> + view. + </para> + </sect2> </sect1> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5652e9ee6d..c663db3cf2 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -504,6 +504,9 @@ CREATE VIEW pg_config AS REVOKE ALL on pg_config FROM PUBLIC; REVOKE EXECUTE ON FUNCTION pg_config() FROM PUBLIC; +CREATE VIEW pg_ts_shared_dictionaries AS + SELECT * FROM pg_ts_shared_dictionaries(); + -- Statistics views CREATE VIEW pg_stat_all_tables AS diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c index 7d1f7544cf..ff3127f207 100644 --- a/src/backend/tsearch/ts_shared.c +++ b/src/backend/tsearch/ts_shared.c @@ -13,11 +13,18 @@ */ #include "postgres.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "access/htup_details.h" +#include "catalog/pg_ts_dict.h" #include "lib/dshash.h" #include "storage/lwlock.h" #include "storage/shmem.h" #include "tsearch/ts_shared.h" +#include "utils/builtins.h" #include "utils/hashutils.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" @@ -364,3 +371,100 @@ recheck_table: MemoryContextSwitchTo(old_context); } + +/* + * pg_ts_shared_dictionaries - SQL SRF showing dictionaries currently in + * shared memory. + */ +Datum +pg_ts_shared_dictionaries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryContext oldcontext; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + Relation rel; + HeapTuple tuple; + SysScanDesc scan; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Build tuplestore to hold the result rows */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + init_dict_table(); + + /* + * If a hash table wasn't created return zero records. + */ + if (!DsaPointerIsValid(tsearch_ctl->dict_table_handle)) + { + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); + } + + /* Start to scan pg_ts_dict */ + rel = heap_open(TSDictionaryRelationId, AccessShareLock); + scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Datum values[4]; + bool nulls[4]; + Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple); + Oid dictid = HeapTupleGetOid(tuple); + TsearchDictEntry *entry; + NameData dict_name; + + /* If dictionary isn't located in shared memory try following */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &dictid, false); + if (!entry) + continue; + + namecpy(&dict_name, &dict->dictname); + + memset(nulls, 0, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(dictid); + + if (OidIsValid(dict->dictnamespace)) + values[1] = CStringGetDatum(get_namespace_name(dict->dictnamespace)); + else + nulls[1] = true; + + values[2] = NameGetDatum(&dict_name); + values[3] = Int64GetDatum(entry->dict_size); + + dshash_release_lock(dict_table, entry); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + systable_endscan(scan); + heap_close(rel, AccessShareLock); + + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index ecec8f7ff8..71f704fc92 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4961,6 +4961,9 @@ DESCR("trigger for automatic update of tsvector column"); DATA(insert OID = 3759 ( get_current_ts_config PGNSP PGUID 12 1 0 0 0 f f f f t f s s 0 0 3734 "" _null_ _null_ _null_ _null_ _null_ get_current_ts_config _null_ _null_ _null_ )); DESCR("get current tsearch configuration"); +DATA(insert OID = 4213 ( pg_ts_shared_dictionaries PGNSP PGUID 12 1 10 0 0 f f f f f t s s 0 0 2249 "" "{26,19,19,20}" "{o,o,o,o}" "{dictoid,schemaname,dictname,size}" _null_ _null_ pg_ts_shared_dictionaries _null_ _null_ _null_ )); +DESCR("information about text search dictionaries currently in shared memory"); + DATA(insert OID = 3736 ( regconfigin PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3734 "2275" _null_ _null_ _null_ _null_ _null_ regconfigin _null_ _null_ _null_ )); DESCR("I/O"); DATA(insert OID = 3737 ( regconfigout PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2275 "3734" _null_ _null_ _null_ _null_ _null_ regconfigout _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 5433944c6a..235b066119 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2209,6 +2209,11 @@ pg_timezone_names| SELECT pg_timezone_names.name, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst); +pg_ts_shared_dictionaries| SELECT pg_ts_shared_dictionaries.dictoid, + pg_ts_shared_dictionaries.schemaname, + pg_ts_shared_dictionaries.dictname, + pg_ts_shared_dictionaries.size + FROM pg_ts_shared_dictionaries() pg_ts_shared_dictionaries(dictoid, schemaname, dictname, size); pg_user| SELECT pg_shadow.usename, pg_shadow.usesysid, pg_shadow.usecreatedb,
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 78ed082994..f5e88f7c86 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -2829,6 +2829,7 @@ iconv -f ISO_8859-1 -t UTF-8 -o nn_no.dict nn_NO.dic <programlisting> CREATE TEXT SEARCH DICTIONARY english_hunspell ( TEMPLATE = ispell, + Shareable = false, DictFile = en_us, AffFile = en_us, Stopwords = english); @@ -2843,6 +2844,9 @@ CREATE TEXT SEARCH DICTIONARY english_hunspell ( The stop-words file has the same format explained above for the <literal>simple</literal> dictionary type. The format of the other files is not specified here but is available from the above-mentioned web sites. + <literal>Shareable</literal> controls loading into shared memory. By + default it is <literal>true</literal> (see more in + <xref linkend="textsearch-shared-dictionaries"/>). </para> <para> @@ -3037,7 +3041,8 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( Some dictionaries, especially <application>Ispell</application>, consumes a noticable value of memory. Size of a dictionary can reach tens of megabytes. Most of them also stores configuration in text files. A dictionary is compiled - during first access per a user session. + during first access per a user session. Currently only + <application>Ispell</application> supports loading into shared memory. </para> <para> diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index e7f4d5a48d..8a714cec54 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -38,7 +38,8 @@ typedef struct } DictISpell; static void parse_dictoptions(List *dictoptions, - char **dictfile, char **afffile, char **stopfile); + char **dictfile, char **afffile, char **stopfile, + bool *isshared); static void *dispell_build(List *dictoptions, Size *size); Datum @@ -49,15 +50,22 @@ dispell_init(PG_FUNCTION_ARGS) DictISpell *d; void *dict_location; char *stopfile; + bool isshared; d = (DictISpell *) palloc0(sizeof(DictISpell)); - parse_dictoptions(dictoptions, NULL, NULL, &stopfile); + parse_dictoptions(dictoptions, NULL, NULL, &stopfile, &isshared); + /* Make stop word list */ if (stopfile) readstoplist(stopfile, &(d->stoplist), lowerstr); - dict_location = ts_dict_shmem_location(dictid, dictoptions, dispell_build); + /* Make or get from shared memory dictionary itself */ + if (isshared) + dict_location = ts_dict_shmem_location(dictid, dictoptions, dispell_build); + else + dict_location = dispell_build(dictoptions, NULL); + Assert(dict_location); d->obj.dict = (IspellDictData *) dict_location; @@ -111,9 +119,10 @@ dispell_lexize(PG_FUNCTION_ARGS) static void parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, - char **stopfile) + char **stopfile, bool *isshared) { ListCell *l; + bool isshared_defined = false; if (dictfile) *dictfile = NULL; @@ -121,6 +130,8 @@ parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, *afffile = NULL; if (stopfile) *stopfile = NULL; + if (isshared) + *isshared = true; foreach(l, dictoptions) { @@ -159,6 +170,19 @@ parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, errmsg("multiple StopWords parameters"))); *stopfile = defGetString(defel); } + else if (pg_strcasecmp(defel->defname, "Shareable") == 0) + { + if (!isshared) + continue; + + if (isshared_defined) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple Shareable parameters"))); + + *isshared = defGetBoolean(defel); + isshared_defined = true; + } else { ereport(ERROR, @@ -181,7 +205,7 @@ dispell_build(List *dictoptions, Size *size) char *dictfile, *afffile; - parse_dictoptions(dictoptions, &dictfile, &afffile, NULL); + parse_dictoptions(dictoptions, &dictfile, &afffile, NULL, NULL); if (!afffile) { @@ -213,6 +237,7 @@ dispell_build(List *dictoptions, Size *size) NIFinishBuild(&build); /* Return the buffer and its size */ - *size = build.dict_size; + if (size) + *size = build.dict_size; return build.dict; } diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index 0c1d7c7675..6f6bca4f42 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -194,6 +194,7 @@ SELECT ts_lexize('hunspell', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG long parameter CREATE TEXT SEARCH DICTIONARY hunspell_long ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_long, AffFile=hunspell_sample_long ); @@ -290,6 +291,7 @@ SELECT ts_lexize('hunspell_long', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG num parameter CREATE TEXT SEARCH DICTIONARY hunspell_num ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_num, AffFile=hunspell_sample_num ); @@ -588,3 +590,58 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case "AffFile" = ispell_sample ); ERROR: unrecognized Ispell parameter: "DictFile" +-- Test shared dictionaries +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); +-- Make sure that dictionaries in shared memory +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + schemaname | dictname +------------+--------------- + public | ispell + public | hunspell + public | shared_ispell +(3 rows) + +-- shared_ispell space should be released in shared memory +DROP TEXT SEARCH DICTIONARY shared_ispell; +-- Make sure that dictionaries in shared memory, DROP invalidates cache +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + schemaname | dictname +------------+---------- + public | ispell + public | hunspell +(2 rows) + diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql index 1633c0d066..66a7c37e53 100644 --- a/src/test/regress/sql/tsdicts.sql +++ b/src/test/regress/sql/tsdicts.sql @@ -51,6 +51,7 @@ SELECT ts_lexize('hunspell', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG long parameter CREATE TEXT SEARCH DICTIONARY hunspell_long ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_long, AffFile=hunspell_sample_long ); @@ -75,6 +76,7 @@ SELECT ts_lexize('hunspell_long', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG num parameter CREATE TEXT SEARCH DICTIONARY hunspell_num ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_num, AffFile=hunspell_sample_num ); @@ -196,3 +198,26 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case "DictFile" = ispell_sample, "AffFile" = ispell_sample ); + +-- Test shared dictionaries +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); + +-- Make sure that dictionaries in shared memory +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('hunspell', 'skies'); +SELECT ts_lexize('shared_ispell', 'skies'); + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + +-- shared_ispell space should be released in shared memory +DROP TEXT SEARCH DICTIONARY shared_ispell; + +-- Make sure that dictionaries in shared memory, DROP invalidates cache +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('hunspell', 'skies'); + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries;