On Wed, Jul 29, 2009 at 6:59 PM, Andres Freund<and...@anarazel.de> wrote: > Looks nice. The only small gripe I have is that the patch adds trailing > whitespaces at a lot of places... > > Except maybe that I do see no need for changes anymore...
I have fixed this for Sergey in the attached version using "git apply --whitespace=fix". (For those who may be using git to develop patches, I highly recommend git --check to catch these types of issues before submitting.) I will mark this "Ready for Committer". ...Robert
*** a/contrib/dict_xsyn/dict_xsyn.c --- b/contrib/dict_xsyn/dict_xsyn.c *************** *** 26,31 **** typedef struct --- 26,32 ---- char *key; /* Word */ char *value; /* Unparsed list of synonyms, including the * word itself */ + int pos; /* Position of key word in original string */ } Syn; typedef struct *************** *** 33,39 **** typedef struct --- 34,44 ---- int len; Syn *syn; + bool matchorig; bool keeporig; + bool matchsynonyms; + bool keepsynonyms; + } DictSyn; *************** *** 88,93 **** read_dictionary(DictSyn *d, char *filename) --- 93,99 ---- { char *value; char *key; + char *pos; char *end = NULL; if (*line == '\0') *************** *** 96,121 **** read_dictionary(DictSyn *d, char *filename) value = lowerstr(line); pfree(line); ! key = find_word(value, &end); ! if (!key) ! { ! pfree(value); ! continue; ! } ! if (cur == d->len) { ! d->len = (d->len > 0) ? 2 * d->len : 16; ! if (d->syn) ! d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); ! else ! d->syn = (Syn *) palloc(sizeof(Syn) * d->len); ! } ! d->syn[cur].key = pnstrdup(key, end - key); ! d->syn[cur].value = value; ! cur++; } tsearch_readline_end(&trst); --- 102,140 ---- value = lowerstr(line); pfree(line); ! pos = value; ! while((key = find_word(pos, &end)) != NULL) { ! if (cur == d->len) ! { ! d->len = (d->len > 0) ? 2 * d->len : 16; ! if (d->syn) ! d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); ! else ! d->syn = (Syn *) palloc(sizeof(Syn) * d->len); ! } ! ! /* Read first word only if we will match it */ ! if (pos != value || d->matchorig) ! { ! d->syn[cur].key = pnstrdup(key, end - key); ! d->syn[cur].value = pstrdup(value); ! d->syn[cur].pos = key - value; ! ! cur++; ! } ! ! pos = end; ! /* Don't read synonyms if we do not match them */ ! if (!d->matchsynonyms) ! { ! break; ! } ! } ! pfree(value); } tsearch_readline_end(&trst); *************** *** 133,155 **** dxsyn_init(PG_FUNCTION_ARGS) List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; d = (DictSyn *) palloc0(sizeof(DictSyn)); d->len = 0; d->syn = NULL; d->keeporig = true; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); ! if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0) { d->keeporig = defGetBoolean(defel); } else if (pg_strcasecmp(defel->defname, "RULES") == 0) { ! read_dictionary(d, defGetString(defel)); } else { --- 152,191 ---- List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; + char *filename = NULL; d = (DictSyn *) palloc0(sizeof(DictSyn)); d->len = 0; d->syn = NULL; + d->matchorig = true; d->keeporig = true; + d->matchsynonyms = false; + d->keepsynonyms = true; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); ! if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0) ! { ! d->matchorig = defGetBoolean(defel); ! } ! else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0) { d->keeporig = defGetBoolean(defel); } + else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0) + { + d->matchsynonyms = defGetBoolean(defel); + } + else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0) + { + d->keepsynonyms = defGetBoolean(defel); + } else if (pg_strcasecmp(defel->defname, "RULES") == 0) { ! /* we can't read the rules before parsing all options! */ ! filename = pstrdup(defGetString(defel)); } else { *************** *** 160,165 **** dxsyn_init(PG_FUNCTION_ARGS) --- 196,207 ---- } } + if(filename) + { + read_dictionary(d, filename); + pfree(filename); + } + PG_RETURN_POINTER(d); } *************** *** 198,204 **** dxsyn_lexize(PG_FUNCTION_ARGS) int value_length = strlen(value); char *pos = value; int nsyns = 0; - bool is_first = true; res = palloc(0); --- 240,245 ---- *************** *** 214,221 **** dxsyn_lexize(PG_FUNCTION_ARGS) res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2)); res[nsyns].lexeme = NULL; ! /* first word is added to result only if KEEPORIG flag is set */ ! if (d->keeporig || !is_first) { res[nsyns].lexeme = pstrdup(syn); res[nsyns + 1].lexeme = NULL; --- 255,262 ---- res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2)); res[nsyns].lexeme = NULL; ! /* The first word is added only if keeporig=true */ ! if (pos != value || d->keeporig) { res[nsyns].lexeme = pstrdup(syn); res[nsyns + 1].lexeme = NULL; *************** *** 223,231 **** dxsyn_lexize(PG_FUNCTION_ARGS) nsyns++; } - is_first = false; - pos = end + 1; } pfree(value); --- 264,275 ---- nsyns++; } pos = end + 1; + + if(!d->keepsynonyms) + { + break; + } } pfree(value); *** a/contrib/dict_xsyn/expected/dict_xsyn.out --- b/contrib/dict_xsyn/expected/dict_xsyn.out *************** *** 5,14 **** SET client_min_messages = warning; \set ECHO none RESET client_min_messages; ! --configuration ! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false); --lexize SELECT ts_lexize('xsyn', 'supernova'); ts_lexize ---------------- {sn,sne,1987a} --- 5,80 ---- SET client_min_messages = warning; \set ECHO none RESET client_min_messages; ! -- default configuration - match first word and return it among with all synonyms ! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false); --lexize SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + -------------------------- + {supernova,sn,sne,1987a} + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + ----------- + + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + + -- the same, but return only synonyms + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + ---------------- + {sn,sne,1987a} + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + ----------- + + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + + -- match any word and return all words + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + -------------------------- + {supernova,sn,sne,1987a} + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + -------------------------- + {supernova,sn,sne,1987a} + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + + -- match any word and return all words except first one + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + ---------------- + {sn,sne,1987a} + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); ts_lexize ---------------- {sn,sne,1987a} *************** *** 20,22 **** SELECT ts_lexize('xsyn', 'grb'); --- 86,148 ---- (1 row) + -- match any synonym but not first word, and return first word instead + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + ----------- + + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + ------------- + {supernova} + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + + -- do not match or return anything + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + ----------- + + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + ----------- + + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + + -- match any word but return nothing + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + ts_lexize + ----------- + {} + (1 row) + + SELECT ts_lexize('xsyn', 'sn'); + ts_lexize + ----------- + {} + (1 row) + + SELECT ts_lexize('xsyn', 'grb'); + ts_lexize + ----------- + + (1 row) + *** a/contrib/dict_xsyn/sql/dict_xsyn.sql --- b/contrib/dict_xsyn/sql/dict_xsyn.sql *************** *** 8,16 **** SET client_min_messages = warning; \set ECHO all RESET client_min_messages; ! --configuration ! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false); --lexize SELECT ts_lexize('xsyn', 'supernova'); SELECT ts_lexize('xsyn', 'grb'); --- 8,54 ---- \set ECHO all RESET client_min_messages; ! -- default configuration - match first word and return it among with all synonyms ! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false); --lexize SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); SELECT ts_lexize('xsyn', 'grb'); + + -- the same, but return only synonyms + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + + -- match any word and return all words + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + + -- match any word and return all words except first one + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + + -- match any synonym but not first word, and return first word instead + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + + -- do not match or return anything + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + + -- match any word but return nothing + ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true); + SELECT ts_lexize('xsyn', 'supernova'); + SELECT ts_lexize('xsyn', 'sn'); + SELECT ts_lexize('xsyn', 'grb'); + *** a/doc/src/sgml/dict-xsyn.sgml --- b/doc/src/sgml/dict-xsyn.sgml *************** *** 23,35 **** <itemizedlist> <listitem> <para> <literal>keeporig</> controls whether the original word is included (if ! <literal>true</>), or only its synonyms (if <literal>false</>). Default ! is <literal>true</>. </para> </listitem> <listitem> <para> <literal>rules</> is the base name of the file containing the list of synonyms. This file must be stored in <filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means --- 23,54 ---- <itemizedlist> <listitem> <para> + <literal>matchorig</> controls whether the original word is accepted by + the dictionary. Default is <literal>true</>. + </para> + </listitem> + <listitem> + <para> <literal>keeporig</> controls whether the original word is included (if ! <literal>true</>) in results, or only its synonyms (if ! <literal>false</>). Default is <literal>true</>. ! </para> ! </listitem> ! <listitem> ! <para> ! <literal>matchsynonyms</> controls whether any of the synonyms is accepted ! by the dictionary (if <literal>true</>). Default is <literal>false</>. </para> </listitem> <listitem> <para> + <literal>keepsynonyms</> controls whether synonyms are returned by the + dictionary (if <literal>true</>). Default is <literal>true</>. + </para> + </listitem> + + <listitem> + <para> <literal>rules</> is the base name of the file containing the list of synonyms. This file must be stored in <filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means *************** *** 90,96 **** ALTER TEXT SEARCH DICTIONARY --- 109,139 ---- mydb=# SELECT ts_lexize('xsyn', 'word'); ts_lexize ----------------------- + {syn1,syn2,syn3} + + mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true); + ALTER TEXT SEARCH DICTIONARY + + mydb=# SELECT ts_lexize('xsyn', 'word'); + ts_lexize + ----------------------- {word,syn1,syn2,syn3} + + mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=false, MATCHSYNONYMS=true); + ALTER TEXT SEARCH DICTIONARY + + mydb=# SELECT ts_lexize('xsyn', 'syn1'); + ts_lexize + ----------------------- + {syn1,syn2,syn3} + + mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false); + ALTER TEXT SEARCH DICTIONARY + + mydb=# SELECT ts_lexize('xsyn', 'syn1'); + ts_lexize + ----------------------- + {word} </programlisting> but real-world usage will involve including it in a text search
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers