On 16.11.2015 15:51, Artur Zakirov wrote:
On 10.11.2015 13:23, Artur Zakirov wrote:

Link to patch in commitfest:

Link to regression tests:

I have done some changes in documentation in the section "12.6. Dictionaries". I have added some description how to load Ispell and Hunspell dictionaries and description about Ispell and Hunspell formats.

Patches for the documentation and for the code are attached separately.

Artur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
*** a/src/backend/tsearch/spell.c
--- b/src/backend/tsearch/spell.c
*** 153,159 **** cmpspell(const void *s1, const void *s2)
  static int
  cmpspellaffix(const void *s1, const void *s2)
! 	return (strncmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag, MAXFLAGLEN));
  static char *
--- 153,159 ----
  static int
  cmpspellaffix(const void *s1, const void *s2)
! 	return (strncmp((*(SPELL *const *) s1)->flag, (*(SPELL *const *) s2)->flag, MAXFLAGLEN));
  static char *
*** 237,242 **** cmpaffix(const void *s1, const void *s2)
--- 237,309 ----
  					   (const unsigned char *) a2->repl);
+ static unsigned short
+ decodeFlag(IspellDict *Conf, char *sflag, char **sflagnext)
+ {
+ 	unsigned short	s;
+ 	char		   *next;
+ 	switch (Conf->flagMode)
+ 	{
+ 		case FM_LONG:
+ 			s = (int)sflag[0] << 8 | (int)sflag[1];
+ 			if (sflagnext)
+ 				*sflagnext = sflag + 2;
+ 			break;
+ 		case FM_NUM:
+ 			s = (unsigned short) strtol(sflag, &next, 10);
+ 			if (sflagnext)
+ 			{
+ 				if (next)
+ 				{
+ 					*sflagnext = next;
+ 					while (**sflagnext)
+ 					{
+ 						if (**sflagnext == ',')
+ 						{
+ 							*sflagnext = *sflagnext + 1;
+ 							break;
+ 						}
+ 						*sflagnext = *sflagnext + 1;
+ 					}
+ 				}
+ 				else
+ 					*sflagnext = 0;
+ 			}
+ 			break;
+ 		default:
+ 			s = (unsigned short) *((unsigned char *)sflag);
+ 			if (sflagnext)
+ 				*sflagnext = sflag + 1;
+ 	}
+ 	return s;
+ }
+ static bool
+ isAffixFlagInUse(IspellDict *Conf, int affix, unsigned short affixflag)
+ {
+ 	char *flagcur;
+ 	char *flagnext = 0;
+ 	if (affixflag == 0)
+ 		return true;
+ 	flagcur = Conf->AffixData[affix];
+ 	while (*flagcur)
+ 	{
+ 		if (decodeFlag(Conf, flagcur, &flagnext) == affixflag)
+ 			return true;
+ 		if (flagnext)
+ 			flagcur = flagnext;
+ 		else
+ 			break;
+ 	}
+ 	return false;
+ }
  static void
  NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
*** 255,261 **** NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
  	Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
  	strcpy(Conf->Spell[Conf->nspell]->word, word);
! 	strlcpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
--- 322,328 ----
  	Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
  	strcpy(Conf->Spell[Conf->nspell]->word, word);
! 	Conf->Spell[Conf->nspell]->flag = (*flag != '\0') ? cpstrdup(Conf, flag) : VoidString;
*** 355,361 **** FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
  					else if ((flag & StopMiddle->compoundflag) == 0)
  						return 0;
! 					if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
  						return 1;
  				node = StopMiddle->node;
--- 422,428 ----
  					else if ((flag & StopMiddle->compoundflag) == 0)
  						return 0;
! 					if (isAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
  						return 1;
  				node = StopMiddle->node;
*** 394,400 **** NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
  	Affix = Conf->Affix + Conf->naffixes;
! 	if (strcmp(mask, ".") == 0)
  		Affix->issimple = 1;
  		Affix->isregis = 0;
--- 461,467 ----
  	Affix = Conf->Affix + Conf->naffixes;
! 	if (strcmp(mask, ".") == 0 || *mask == '\0')
  		Affix->issimple = 1;
  		Affix->isregis = 0;
*** 429,443 **** NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
  		err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
  		if (err)
! 		{
! 			char		errstr[100];
! 			pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
! 			ereport(ERROR,
! 					 errmsg("invalid regular expression: %s", errstr)));
! 		}
  	Affix->flagflags = flagflags;
--- 496,504 ----
  		err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
+ 		/* Ignore regular expression error and do not add wrong affix */
  		if (err)
! 			return;
  	Affix->flagflags = flagflags;
*** 595,604 **** addFlagValue(IspellDict *Conf, char *s, uint32 val)
  				 errmsg("multibyte flag character is not allowed")));
! 	Conf->flagval[*(unsigned char *) s] = (unsigned char) val;
  	Conf->usecompound = true;
   * Import an affix file that follows MySpell or Hunspell format
--- 656,713 ----
  				 errmsg("multibyte flag character is not allowed")));
! 	Conf->flagval[decodeFlag(Conf, s, (char **)NULL)] = (unsigned char) val;
  	Conf->usecompound = true;
+ static int
+ getFlagValues(IspellDict *Conf, char *s)
+ {
+ 	uint32	 flag = 0;
+ 	char	*flagcur;
+ 	char	*flagnext = 0;
+ 	flagcur = s;
+ 	while (*flagcur)
+ 	{
+ 		flag |= Conf->flagval[decodeFlag(Conf, flagcur, &flagnext)];
+ 		if (flagnext)
+ 			flagcur = flagnext;
+ 		else
+ 			break;
+ 	}
+ 	return flag;
+ }
+ /*
+  * Get flag set from "s".
+  *
+  * Returns flag set from AffixData array if AF parameter used (useFlagAliases is true).
+  * In this case "s" is alias for flag set.
+  *
+  * Otherwise returns "s".
+  */
+ static char *
+ getFlags(IspellDict *Conf, char *s)
+ {
+ 	int curaffix;
+ 	if (Conf->useFlagAliases)
+ 	{
+ 		curaffix = strtol(s, (char **)NULL, 10);
+ 		if (curaffix && curaffix <= Conf->nAffixData)
+ 			/*
+ 			 * Do not substract 1 from curaffix
+ 			 * because empty string was added in NIImportOOAffixes
+ 			 */
+ 			return Conf->AffixData[curaffix];
+ 		else
+ 			return VoidString;
+ 	}
+ 	else
+ 		return s;
+ }
   * Import an affix file that follows MySpell or Hunspell format
*** 615,621 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
  	char		repl[BUFSIZ],
  	bool		isSuffix = false;
! 	int			flag = 0;
  	char		flagflags = 0;
  	tsearch_readline_state trst;
  	int			scanread = 0;
--- 724,734 ----
  	char		repl[BUFSIZ],
  	bool		isSuffix = false;
! 	int			naffix = 0,
! 				curaffix = 0;
! 	int			flag = 0,
! 				flagprev = 0,
! 				sflaglen = 0;
  	char		flagflags = 0;
  	tsearch_readline_state trst;
  	int			scanread = 0;
*** 625,630 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
--- 738,745 ----
  	/* read file to find any flag */
  	memset(Conf->flagval, 0, sizeof(Conf->flagval));
  	Conf->usecompound = false;
+ 	Conf->useFlagAliases = false;
+ 	Conf->flagMode = FM_CHAR;
  	if (!tsearch_readline_begin(&trst, filename))
*** 672,681 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
  			while (*s && t_isspace(s))
  				s += pg_mblen(s);
! 			if (*s && STRNCMP(s, "default") != 0)
! 				ereport(ERROR,
! 						 errmsg("Ispell dictionary supports only default flag value")));
--- 787,803 ----
  			while (*s && t_isspace(s))
  				s += pg_mblen(s);
! 			if (*s)
! 			{
! 				if (STRNCMP(s, "long") == 0)
! 					Conf->flagMode = FM_LONG;
! 				else if (STRNCMP(s, "num") == 0)
! 					Conf->flagMode = FM_NUM;
! 				else if (STRNCMP(s, "default") != 0)
! 					ereport(ERROR,
! 						 errmsg("Ispell dictionary supports only default, long and num flag value")));
! 			}
*** 695,725 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
  		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
  			goto nextline;
  		scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
  		if (ptype)
  		ptype = lowerstr_ctx(Conf, type);
  		if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
  			goto nextline;
! 		if (scanread == 4)
! 			if (strlen(sflag) != 1)
! 				goto nextline;
! 			flag = *sflag;
  			isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
  			if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
  				flagflags = FF_CROSSPRODUCT;
  				flagflags = 0;
  			char	   *ptr;
  			int			aflg = 0;
! 			if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
  				goto nextline;
  			prepl = lowerstr_ctx(Conf, repl);
  			/* affix flag */
--- 817,891 ----
  		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
  			goto nextline;
+ 		*find = *repl = *mask = '\0';
  		scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
  		if (ptype)
  		ptype = lowerstr_ctx(Conf, type);
+ 		/* First try to parse AF parameter (alias compression) */
+ 		if (STRNCMP(ptype, "af") == 0)
+ 		{
+ 			/* First line is the number of aliases */
+ 			if (!Conf->useFlagAliases)
+ 			{
+ 				Conf->useFlagAliases = true;
+ 				naffix = atoi(sflag);
+ 				if (naffix == 0)
+ 					ereport(ERROR,
+ 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 						 errmsg("invalid number of flag vector aliases")));
+ 				/* Also reserve place for empty flag set */
+ 				naffix++;
+ 				Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+ 				Conf->lenAffixData = Conf->nAffixData = naffix;
+ 				/* Add empty flag set into AffixData */
+ 				Conf->AffixData[curaffix] = VoidString;
+ 				curaffix++;
+ 			}
+ 			/* Other lines is aliases */
+ 			else
+ 			{
+ 				if (curaffix < naffix)
+ 				{
+ 					Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
+ 					curaffix++;
+ 				}
+ 			}
+ 			goto nextline;
+ 		}
+ 		/* Else try to parse prefixes and suffixes */
  		if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
  			goto nextline;
! 		sflaglen = strlen(sflag);
! 		if (sflaglen == 0
! 			|| (sflaglen > 1 && Conf->flagMode == FM_CHAR)
! 			|| (sflaglen > 2 && Conf->flagMode == FM_LONG))
! 			goto nextline;
! 		flag = decodeFlag(Conf, sflag, (char **)NULL);
! 		/* Affix header */
! 		if (flag != flagprev)
! 			flagprev = flag;
  			isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
  			if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
  				flagflags = FF_CROSSPRODUCT;
  				flagflags = 0;
+ 		/* Affix fields */
  			char	   *ptr;
  			int			aflg = 0;
! 			if (flag == 0)
  				goto nextline;
  			prepl = lowerstr_ctx(Conf, repl);
  			/* affix flag */
*** 727,737 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
  				*ptr = '\0';
  				ptr = repl + (ptr - prepl) + 1;
! 				while (*ptr)
! 				{
! 					aflg |= Conf->flagval[*(unsigned char *) ptr];
! 					ptr++;
! 				}
  			pfind = lowerstr_ctx(Conf, find);
  			pmask = lowerstr_ctx(Conf, mask);
--- 893,899 ----
  				*ptr = '\0';
  				ptr = repl + (ptr - prepl) + 1;
! 				aflg |= getFlagValues(Conf, getFlags(Conf, ptr));
  			pfind = lowerstr_ctx(Conf, find);
  			pmask = lowerstr_ctx(Conf, mask);
*** 789,794 **** NIImportAffixes(IspellDict *Conf, const char *filename)
--- 951,958 ----
  	memset(Conf->flagval, 0, sizeof(Conf->flagval));
  	Conf->usecompound = false;
+ 	Conf->useFlagAliases = false;
+ 	Conf->flagMode = FM_CHAR;
  	while ((recoded = tsearch_readline(&trst)) != NULL)
*** 931,946 **** MergeAffix(IspellDict *Conf, int a1, int a2)
  static uint32
  makeCompoundFlags(IspellDict *Conf, int affix)
! 	uint32		flag = 0;
! 	char	   *str = Conf->AffixData[affix];
! 	while (str && *str)
! 	{
! 		flag |= Conf->flagval[*(unsigned char *) str];
! 		str++;
! 	}
! 	return (flag & FF_DICTFLAGMASK);
  static SPNode *
--- 1095,1102 ----
  static uint32
  makeCompoundFlags(IspellDict *Conf, int affix)
! 	char *str = Conf->AffixData[affix];
! 	return (getFlagValues(Conf, str) & FF_DICTFLAGMASK);
  static SPNode *
*** 954,960 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
  	int			lownew = low;
  	for (i = low; i < high; i++)
! 		if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
  			lastchar = Conf->Spell[i]->word[level];
--- 1110,1116 ----
  	int			lownew = low;
  	for (i = low; i < high; i++)
! 		if (Conf->Spell[i]->d.len > level && lastchar != Conf->Spell[i]->word[level])
  			lastchar = Conf->Spell[i]->word[level];
*** 969,975 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
  	lastchar = '\0';
  	for (i = low; i < high; i++)
! 		if (Conf->Spell[i]->p.d.len > level)
  			if (lastchar != Conf->Spell[i]->word[level])
--- 1125,1131 ----
  	lastchar = '\0';
  	for (i = low; i < high; i++)
! 		if (Conf->Spell[i]->d.len > level)
  			if (lastchar != Conf->Spell[i]->word[level])
*** 982,992 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
  				lastchar = Conf->Spell[i]->word[level];
  			data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
! 			if (Conf->Spell[i]->p.d.len == level + 1)
  				bool		clearCompoundOnly = false;
! 				if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
  					 * MergeAffix called a few times. If one of word is
--- 1138,1148 ----
  				lastchar = Conf->Spell[i]->word[level];
  			data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
! 			if (Conf->Spell[i]->d.len == level + 1)
  				bool		clearCompoundOnly = false;
! 				if (data->isword && data->affix != Conf->Spell[i]->d.affix)
  					 * MergeAffix called a few times. If one of word is
*** 995,1006 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
  					clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
! 						& makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
  						? false : true;
! 					data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
! 					data->affix = Conf->Spell[i]->p.d.affix;
  				data->isword = 1;
  				data->compoundflag = makeCompoundFlags(Conf, data->affix);
--- 1151,1162 ----
  					clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
! 						& makeCompoundFlags(Conf, Conf->Spell[i]->d.affix))
  						? false : true;
! 					data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->d.affix);
! 					data->affix = Conf->Spell[i]->d.affix;
  				data->isword = 1;
  				data->compoundflag = makeCompoundFlags(Conf, data->affix);
*** 1032,1070 **** NISortDictionary(IspellDict *Conf)
  	/* compress affixes */
! 	/* Count the number of different flags used in the dictionary */
! 	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
! 	naffix = 0;
! 	for (i = 0; i < Conf->nspell; i++)
! 	{
! 		if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
! 			naffix++;
! 	}
! 	/*
! 	 * Fill in Conf->AffixData with the affixes that were used in the
! 	 * dictionary. Replace textual flag-field of Conf->Spell entries with
! 	 * indexes into Conf->AffixData array.
! 	Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
! 	curaffix = -1;
! 	for (i = 0; i < Conf->nspell; i++)
! 		if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
! 			curaffix++;
! 			Assert(curaffix < naffix);
! 			Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
- 		Conf->Spell[i]->p.d.affix = curaffix;
- 		Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
! 	Conf->lenAffixData = Conf->nAffixData = naffix;
  	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
  	Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
--- 1188,1244 ----
  	/* compress affixes */
! 	/* If we use flag aliases then we need to use Conf->AffixData filled in NIImportOOAffixes.
! 	 * If Conf->Spell[i]->flag is empty, then get empty value of Conf->AffixData (0 index)
! 	if (Conf->useFlagAliases)
! 		for (i = 0; i < Conf->nspell; i++)
! 			curaffix = strtol(Conf->Spell[i]->flag, (char **)NULL, 10);
! 			if (curaffix && curaffix <= Conf->nAffixData)
! 				Conf->Spell[i]->d.affix = curaffix;
! 			else
! 				Conf->Spell[i]->d.affix = 0;
! 			Conf->Spell[i]->d.len = strlen(Conf->Spell[i]->word);
+ 	/* Otherwise fill Conf->AffixData here */
+ 	else
+ 	{
+ 		/* Count the number of different flags used in the dictionary */
+ 		qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
+ 		naffix = 0;
+ 		for (i = 0; i < Conf->nspell; i++)
+ 		{
+ 			if (i == 0 || strcmp(Conf->Spell[i]->flag, Conf->Spell[i - 1]->flag))
+ 				naffix++;
+ 		}
! 		/*
! 		 * Fill in Conf->AffixData with the affixes that were used in the
! 		 * dictionary. Replace textual flag-field of Conf->Spell entries with
! 		 * indexes into Conf->AffixData array.
! 		 */
! 		Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
! 		curaffix = -1;
! 		for (i = 0; i < Conf->nspell; i++)
! 		{
! 			if (i == 0 || strcmp(Conf->Spell[i]->flag, Conf->AffixData[curaffix]))
! 			{
! 				curaffix++;
! 				Assert(curaffix < naffix);
! 				Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->flag);
! 			}
! 			Conf->Spell[i]->d.affix = curaffix;
! 			Conf->Spell[i]->d.len = strlen(Conf->Spell[i]->word);
! 		}
! 		Conf->lenAffixData = Conf->nAffixData = naffix;
! 	}
  	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
  	Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
*** 1185,1196 **** mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
  static bool
! isAffixInUse(IspellDict *Conf, char flag)
  	int			i;
  	for (i = 0; i < Conf->nAffixData; i++)
! 		if (strchr(Conf->AffixData[i], flag) != NULL)
  			return true;
  	return false;
--- 1359,1370 ----
  static bool
! isAffixInUse(IspellDict *Conf, int flag)
  	int			i;
  	for (i = 0; i < Conf->nAffixData; i++)
! 		if (isAffixFlagInUse(Conf, i, flag))
  			return true;
  	return false;
*** 1219,1225 **** NISortAffixes(IspellDict *Conf)
  			firstsuffix = i;
  		if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
! 			isAffixInUse(Conf, (char) Affix->flag))
  			if (ptr == Conf->CompoundAffix ||
  				ptr->issuffix != (ptr - 1)->issuffix ||
--- 1393,1399 ----
  			firstsuffix = i;
  		if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
! 			isAffixInUse(Conf, Affix->flag))
  			if (ptr == Conf->CompoundAffix ||
  				ptr->issuffix != (ptr - 1)->issuffix ||
*** 1685,1691 **** SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
  		if (StopLow < StopHigh)
! 			if (level == FF_COMPOUNDBEGIN)
  				compoundflag = FF_COMPOUNDBEGIN;
  			else if (level == wordlen - 1)
  				compoundflag = FF_COMPOUNDLAST;
--- 1859,1865 ----
  		if (StopLow < StopHigh)
! 			if (startpos == 0)
  				compoundflag = FF_COMPOUNDBEGIN;
  			else if (level == wordlen - 1)
  				compoundflag = FF_COMPOUNDLAST;
*** a/src/include/tsearch/dicts/spell.h
--- b/src/include/tsearch/dicts/spell.h
*** 57,75 **** typedef struct SPNode
  typedef struct spell_struct
! 	union
! 		/*
! 		 * flag is filled in by NIImportDictionary. After NISortDictionary, d
! 		 * is valid and flag is invalid.
! 		 */
! 		char		flag[MAXFLAGLEN];
! 		struct
! 		{
! 			int			affix;
! 			int			len;
! 		}			d;
! 	}			p;
  } SPELL;
--- 57,72 ----
  typedef struct spell_struct
! 	struct
! 		int			affix;
! 		int			len;
! 	}			d;
! 	/*
! 	 * flag is filled in by NIImportDictionary. After NISortDictionary, d
! 	 * is valid and flag is invalid.
! 	 */
! 	char	   *flag;
  } SPELL;
*** 77,83 **** typedef struct spell_struct
  typedef struct aff_struct
! 	uint32		flag:8,
--- 74,80 ----
  typedef struct aff_struct
! 	uint32		flag:16,
*** 132,137 **** typedef struct
--- 129,141 ----
  	bool		issuffix;
  } CMPDAffix;
+ typedef enum
+ {
+ } FlagMode;
  typedef struct
  	int			maffixes;
*** 145,155 **** typedef struct
  	char	  **AffixData;
  	int			lenAffixData;
  	int			nAffixData;
  	CMPDAffix  *CompoundAffix;
! 	unsigned char flagval[256];
  	bool		usecompound;
  	 * Remaining fields are only used during dictionary construction; they are
--- 149,161 ----
  	char	  **AffixData;
  	int			lenAffixData;
  	int			nAffixData;
+ 	bool		useFlagAliases;
  	CMPDAffix  *CompoundAffix;
! 	unsigned char flagval[65000];
  	bool		usecompound;
+ 	FlagMode	flagMode;
  	 * Remaining fields are only used during dictionary construction; they are
*** a/doc/src/sgml/textsearch.sgml
--- b/doc/src/sgml/textsearch.sgml
*** 2615,2632 **** SELECT plainto_tsquery('supernova star');
!     To create an <application>Ispell</> dictionary, use the built-in
!     <literal>ispell</literal> template and specify several parameters:
      TEMPLATE = ispell,
!     DictFile = english,
!     AffFile = english,
!     StopWords = english
! );
      Here, <literal>DictFile</>, <literal>AffFile</>, and <literal>StopWords</>
--- 2615,2655 ----
!     To create an <application>Ispell</> dictionary perform these steps:
!    <itemizedlist spacing="compact" mark="bullet">
!     <listitem>
!      <para>
!       download dictionary configuration files. <productname>OpenOffice</>
!       extension files have the <filename>.oxt</> extension. It is necessary
!       to extract <filename>.aff</> and <filename>.dic</> files, change extensions
!       to <filename>.affix</> and <filename>.dict</>. For some dictionary
!       files it is also needed to convert characters to the UTF-8 encoding
!       with commands (for example, for norwegian language dictionary):
! iconv -f ISO_8859-1 -t UTF-8 -o nn_no.affix nn_NO.aff
! iconv -f ISO_8859-1 -t UTF-8 -o nn_no.dict nn_NO.dic
! </programlisting>
!      </para>
!     </listitem>
!     <listitem>
!      <para>
!       copy files to the <filename>$SHAREDIR/tsearch_data</> directory
!      </para>
!     </listitem>
!     <listitem>
!      <para>
!       load files into PostgreSQL with the following command:
! <programlisting>
      TEMPLATE = ispell,
!     DictFile = en_us,
!     AffFile = en_us,
!     Stopwords = english);
+      </para>
+     </listitem>
+    </itemizedlist>
      Here, <literal>DictFile</>, <literal>AffFile</>, and <literal>StopWords</>
*** 2643,2648 **** CREATE TEXT SEARCH DICTIONARY english_ispell (
--- 2666,2720 ----
+     The <filename>.affix</> file of <application>Ispell</> has the following structure:
+ <programlisting>
+ prefixes
+ flag *A:
+     .           >   RE      # As in enter > reenter
+ suffixes
+ flag T:
+     E           >   ST      # As in late > latest
+     [^AEIOU]Y   >   -Y,IEST # As in dirty > dirtiest
+     [AEIOU]Y    >   EST     # As in gray > grayest
+     [^EY]       >   EST     # As in small > smallest
+ </programlisting>
+    </para>
+    <para>
+     And the <filename>.dict</> file has the following structure:
+ <programlisting>
+ lapse/ADGRS
+ lard/DGRS
+ large/PRTY
+ lark/MRS
+ </programlisting>
+    </para>
+    <para>
+     Format of the <filename>.dict</> file is:
+ <programlisting>
+ basic_form/affix_class_name
+ </programlisting>
+    </para>
+    <para>
+     In the <filename>.affix</> file every affix flag is described in the
+     following format:
+ <programlisting>
+ condition > [-stripping_letters,] adding_affix
+ </programlisting>
+    </para>
+    <para>
+     Here, condition has a format similar to the format of regular expressions.
+     It can use groupings <literal>[...]</> and <literal>[^...]</>.
+     For example, <literal>[AEIOU]Y</> means that the last letter of the word
+     is <literal>"y"</> and the penultimate letter is <literal>"a"</>,
+     <literal>"e"</>, <literal>"i"</>, <literal>"o"</> or <literal>"u"</>.
+     <literal>[^EY]</> means that the last letter is neither <literal>"e"</>
+     nor <literal>"y"</>.
+    </para>
+    <para>
      Ispell dictionaries support splitting compound words;
      a useful feature.
      Notice that the affix file should specify a special flag using the
*** 2663,2668 **** SELECT ts_lexize('norwegian_ispell', 'sjokoladefabrikk');
--- 2735,2796 ----
+    <para>
+     <application>MySpell</> is very similar to <application>Hunspell</>.
+     The <filename>.affix</> file of <application>Hunspell</> has the following structure:
+ <programlisting>
+ PFX A Y 1
+ PFX A   0     re         .
+ SFX T N 4
+ SFX T   0     st         e
+ SFX T   y     iest       [^aeiou]y
+ SFX T   0     est        [aeiou]y
+ SFX T   0     est        [^ey]
+ </programlisting>
+    </para>
+    <para>
+     The first line of an affix class is the header. Fields of an affix rules are listed after the header:
+    </para>
+    <itemizedlist spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       parameter name (PFX or SFX)
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       flag (name of the affix class)
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       stripping characters from beginning (at prefix) or end (at suffix) of the word
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       adding affix
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       condition that has a format similar to the format of regular expressions.
+      </para>
+     </listitem>
+    </itemizedlist>
+    <para>
+     The <filename>.dict</> file looks like the <filename>.dict</> file of
+     <application>Ispell</>:
+ <programlisting>
+ larder/M
+ lardy/RT
+ large/RSPMYT
+ largehearted
+ </programlisting>
+    </para>
       <application>MySpell</> does not support compound words.
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:

Reply via email to