On 01.02.2016 20:12, Artur Zakirov wrote:
I have changed the patch: 1 - trgm2.data was corrected, duplicates were deleted. 2 - I have added operators <<-> and <->> with GiST index supporting. A regression test will pass only with the patch http://www.postgresql.org/message-id/capphfdt19fwqxaryjkzxb3oxmv-kan3fluzrooare_u3h3c...@mail.gmail.com 3 - the function substring_similarity() was renamed to subword_similarity(). But there is not a function substring_similarity_pos() yet. It is not trivial.
Sorry, in the previous patch was a typo. Here is the fixed patch. -- Artur Zakirov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
*** a/contrib/pg_trgm/pg_trgm--1.2.sql --- b/contrib/pg_trgm/pg_trgm--1.2.sql *************** *** 3,13 **** --- 3,15 ---- -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit + -- Deprecated function CREATE FUNCTION set_limit(float4) RETURNS float4 AS 'MODULE_PATHNAME' LANGUAGE C STRICT VOLATILE; + -- Deprecated function CREATE FUNCTION show_limit() RETURNS float4 AS 'MODULE_PATHNAME' *************** *** 26,32 **** LANGUAGE C STRICT IMMUTABLE; CREATE FUNCTION similarity_op(text,text) RETURNS bool AS 'MODULE_PATHNAME' ! LANGUAGE C STRICT STABLE; -- stable because depends on trgm_limit CREATE OPERATOR % ( LEFTARG = text, --- 28,34 ---- CREATE FUNCTION similarity_op(text,text) RETURNS bool AS 'MODULE_PATHNAME' ! LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.sml_limit CREATE OPERATOR % ( LEFTARG = text, *** a/contrib/pg_trgm/trgm.h --- b/contrib/pg_trgm/trgm.h *************** *** 105,111 **** typedef char *BITVECP; typedef struct TrgmPackedGraph TrgmPackedGraph; ! extern float4 trgm_limit; extern uint32 trgm2int(trgm *ptr); extern void compact_trigram(trgm *tptr, char *str, int bytelen); --- 105,111 ---- typedef struct TrgmPackedGraph TrgmPackedGraph; ! extern double trgm_sml_limit; extern uint32 trgm2int(trgm *ptr); extern void compact_trigram(trgm *tptr, char *str, int bytelen); *** a/contrib/pg_trgm/trgm_gin.c --- b/contrib/pg_trgm/trgm_gin.c *************** *** 206,212 **** gin_trgm_consistent(PG_FUNCTION_ARGS) * similarity is just c / len1. * So, independly on DIVUNION the upper bound formula is the same. */ ! res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false); break; case ILikeStrategyNumber: #ifndef IGNORECASE --- 206,213 ---- * similarity is just c / len1. * So, independly on DIVUNION the upper bound formula is the same. */ ! res = (nkeys == 0) ? false : ! ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_sml_limit) ? true : false); break; case ILikeStrategyNumber: #ifndef IGNORECASE *************** *** 283,289 **** gin_trgm_triconsistent(PG_FUNCTION_ARGS) /* * See comment in gin_trgm_consistent() about * upper bound formula */ ! res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); break; case ILikeStrategyNumber: #ifndef IGNORECASE --- 284,291 ---- /* * See comment in gin_trgm_consistent() about * upper bound formula */ ! res = (nkeys == 0) ? GIN_FALSE : ! (((((float4) ntrue) / ((float4) nkeys)) >= trgm_sml_limit) ? GIN_MAYBE : GIN_FALSE); break; case ILikeStrategyNumber: #ifndef IGNORECASE *** a/contrib/pg_trgm/trgm_gist.c --- b/contrib/pg_trgm/trgm_gist.c *************** *** 294,300 **** gtrgm_consistent(PG_FUNCTION_ARGS) float4 tmpsml = cnt_sml(key, qtrg); /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ ! res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false; } else if (ISALLTRUE(key)) { /* non-leaf contains signature */ --- 294,301 ---- float4 tmpsml = cnt_sml(key, qtrg); /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ ! res = (*(int *) &tmpsml == *(int *) &trgm_sml_limit ! || tmpsml > trgm_sml_limit) ? true : false; } else if (ISALLTRUE(key)) { /* non-leaf contains signature */ *************** *** 308,314 **** gtrgm_consistent(PG_FUNCTION_ARGS) if (len == 0) res = false; else ! res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false; } break; case ILikeStrategyNumber: --- 309,315 ---- if (len == 0) res = false; else ! res = (((((float8) count) / ((float8) len))) >= trgm_sml_limit) ? true : false; } break; case ILikeStrategyNumber: *** a/contrib/pg_trgm/trgm_op.c --- b/contrib/pg_trgm/trgm_op.c *************** *** 14,20 **** PG_MODULE_MAGIC; ! float4 trgm_limit = 0.3f; PG_FUNCTION_INFO_V1(set_limit); PG_FUNCTION_INFO_V1(show_limit); --- 14,23 ---- PG_MODULE_MAGIC; ! /* GUC variables */ ! double trgm_sml_limit = 0.3f; ! ! void _PG_init(void); PG_FUNCTION_INFO_V1(set_limit); PG_FUNCTION_INFO_V1(show_limit); *************** *** 23,44 **** PG_FUNCTION_INFO_V1(similarity); PG_FUNCTION_INFO_V1(similarity_dist); PG_FUNCTION_INFO_V1(similarity_op); Datum set_limit(PG_FUNCTION_ARGS) { float4 nlimit = PG_GETARG_FLOAT4(0); if (nlimit < 0 || nlimit > 1.0) ! elog(ERROR, "wrong limit, should be between 0 and 1"); ! trgm_limit = nlimit; ! PG_RETURN_FLOAT4(trgm_limit); } Datum show_limit(PG_FUNCTION_ARGS) { ! PG_RETURN_FLOAT4(trgm_limit); } static int --- 26,77 ---- PG_FUNCTION_INFO_V1(similarity_dist); PG_FUNCTION_INFO_V1(similarity_op); + /* + * Module load callback + */ + void + _PG_init(void) + { + /* Define custom GUC variables. */ + DefineCustomRealVariable("pg_trgm.sml_limit", + "Sets the threshold used by the %% operator.", + "Valid range is 0.0 .. 1.0.", + &trgm_sml_limit, + 0.3, + 0.0, + 1.0, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + } + /* + * Deprecated function. + * Use "pg_trgm.sml_limit" GUC variable instead of this function + */ Datum set_limit(PG_FUNCTION_ARGS) { float4 nlimit = PG_GETARG_FLOAT4(0); if (nlimit < 0 || nlimit > 1.0) ! ereport(ERROR, ! (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), ! errmsg("wrong limit, should be between 0 and 1"))); ! trgm_sml_limit = nlimit; ! PG_RETURN_FLOAT4(trgm_sml_limit); } + /* + * Deprecated function. + * Use "pg_trgm.sml_limit" GUC variable instead of this function + */ Datum show_limit(PG_FUNCTION_ARGS) { ! PG_RETURN_FLOAT4(trgm_sml_limit); } static int *************** *** 720,724 **** similarity_op(PG_FUNCTION_ARGS) PG_GETARG_DATUM(0), PG_GETARG_DATUM(1))); ! PG_RETURN_BOOL(res >= trgm_limit); } --- 753,757 ---- PG_GETARG_DATUM(0), PG_GETARG_DATUM(1))); ! PG_RETURN_BOOL(res >= trgm_sml_limit); } *** a/doc/src/sgml/pgtrgm.sgml --- b/doc/src/sgml/pgtrgm.sgml *************** *** 99,105 **** Returns the current similarity threshold used by the <literal>%</> operator. This sets the minimum similarity between two words for them to be considered similar enough to ! be misspellings of each other, for example. </entry> </row> <row> --- 99,106 ---- Returns the current similarity threshold used by the <literal>%</> operator. This sets the minimum similarity between two words for them to be considered similar enough to ! be misspellings of each other, for example ! (<emphasis>deprecated</emphasis>). </entry> </row> <row> *************** *** 108,114 **** <entry> Sets the current similarity threshold that is used by the <literal>%</> operator. The threshold must be between 0 and 1 (default is 0.3). ! Returns the same value passed in. </entry> </row> </tbody> --- 109,115 ---- <entry> Sets the current similarity threshold that is used by the <literal>%</> operator. The threshold must be between 0 and 1 (default is 0.3). ! Returns the same value passed in (<emphasis>deprecated</emphasis>). </entry> </row> </tbody> *************** *** 133,139 **** <entry> Returns <literal>true</> if its arguments have a similarity that is greater than the current similarity threshold set by ! <function>set_limit</>. </entry> </row> <row> --- 134,140 ---- <entry> Returns <literal>true</> if its arguments have a similarity that is greater than the current similarity threshold set by ! <varname>pg_trgm.sml_limit</>. </entry> </row> <row> *************** *** 150,155 **** --- 151,177 ---- </sect2> <sect2> + <title>GUC Parameters</title> + + <variablelist> + <varlistentry id="guc-pgtrgm-sml-limit" xreflabel="pg_trgm.sml_limit"> + <term> + <varname>pg_trgm.sml_limit</> (<type>real</type>) + <indexterm> + <primary><varname>pg_trgm.sml_limit</> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Sets the current similarity threshold that is used by the <literal>%</> + operator. The threshold must be between 0 and 1 (default is 0.3). + </para> + </listitem> + </varlistentry> + </variablelist> + </sect2> + + <sect2> <title>Index Support</title> <para>
*** a/contrib/pg_trgm/Makefile --- b/contrib/pg_trgm/Makefile *************** *** 7,13 **** EXTENSION = pg_trgm DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql PGFILEDESC = "pg_trgm - trigram matching" ! REGRESS = pg_trgm ifdef USE_PGXS PG_CONFIG = pg_config --- 7,13 ---- DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql PGFILEDESC = "pg_trgm - trigram matching" ! REGRESS = pg_trgm pg_subword_trgm ifdef USE_PGXS PG_CONFIG = pg_config *** /dev/null --- b/contrib/pg_trgm/data/trgm2.data *************** *** 0 **** --- 1,696 ---- + Baikal + Baikaluobbal + Lake Baikal + Baikalakko + Baikal Business Centre + Baikal Listvyanka Hotel + Baikal Airfield + Baikalovo + Transbaikalia + Baikal Mountains + Baikal Hotel Moscow + Zabaikalie + Pribaikalskaya + Baikal Plaza + Rubaikale + Tandobai Algad + Daikalay + Bakall + Stubaital + Neustift im Stubaital + Anonyme Appartments Stubaital + Barkaladja Pool + Awabakal Nature Reserve + Awabakal Field Studies Centre + Barkala + Bailallie + Barkala Park + Purba Kalaujan + Nabakalas + Barkal + Baikanthapur + Baikarjhuti + Baika + Baikari + Bakalia Char + Dakshin Bakalia + Purba Kalmegha + Efreytor-Bakalovo + Baykalsko + Baykal + Baskaltsi + Bakalite + Bajkal + Efrejtor Bakalovo + Kampong Bakaladong + Riacho do Sambaibal + Sambaibal + Barkalabava + Zabaykal + Bakalar Lake + Kaikalahun Indian Reserve 25 + Tumba-Kalamba + Kamba-Kalele + Boyagbakala + Bombakalo + Batikalengbe + Bakalukudu + Bakalawa + Bakala + Matamba-Kalenge + Kusu-Bakali + Kambakala + Bakali + Abakalu + Bonagbakala + Bakalua + Bikala Madila + Bikala + Bumba-Kaloki + Tumba-Kalunga + Kabankala + Mambakala + Tumba-Kalumba + Kabakala + Bikalabwa + Bomba-Kalende + Mwalaba-Kalamba + Matamba-Kalenga + Bumba-Kalumba + Bikalange + Kabikala + Mubikale + Kanampumba-Kalawa + Tshiabakale + Bakaly + Bakalongo + Bakale + Bakala Koupi + Bambakala + Bakalou + Tsibakala + Kimbakala + Dabakalakoro + Dabakala + Bakalafoulou + Ngao Bakala + Mobaika + Baimalou + Xibaitaling + Baikai + Baikang + Baitaling + Baikan + Baimaling Linchang + Baimalong + Baikanzui + Baiyali + Baimaling + Baimalang Donggang + Baikangshuoma + Baitaliao + Taikale + Babainale + Bailale + Baibale + Baiwale + Baikangnei + Baitali + Xiabaikan + Bailalong + Baimaluo + Baikacun + Baisala + Bailalin + Baimala + Baidalong + Dabaika + Caikalong + Cuobaikacun + Baikadangcun + Baimalin + Subaika + Gabakkale + Barkallou + Embatkala + Bodega Tabaibal + Golba Kalo + Haikala + Kaikale + Waikaloulevu + Waikalou Creek + Waikalou + Ndelaikalou + Ndelaikalokalo + Bay of Backaland + Bankali + Ker Samba Kalla + Demba Kali + Bakalarr + Baipal + Kalibakalako + Dalabakala + Bikal + Sembaikan + Praikalogu + Tanjung Ompaikalio + Bonebabakal + Tanjung Batikala + Pulau Bakalanpauno + Teluk Bakalan + Bakaltua Bank + Bakalrejo + Bakalan + Sungai Bakaladiyan + Bakal + Buku Baikole + Pulau Baika + Tanjung Bakalinga + Pulau Bakalan + Desa Bakalan + Kebakkalang + Ngambakalang + Mota Sabakal + Bakalan Lor + Babakalo + Buyu Rapanbakalai + Kalimundubakalan + Bakalpokok + Bakaldukuh + Tanabakal + Tanjung Aikaluin + Desa Bakalrejo + Bakalan Kidul + Desa Kebakalan + Kebakalan + Bakalan Kulon + Gunung Bakalan + Kalibakal + Bakaljaya + Trobakal + Bakalan Wetan + Desa Bakal + Alue Bakkala + Uruk Bakal + Bakalbuah + Kwala Bakala + Bakal Lama + Bakal Julu + Bakal Batu + Moncong Baika + Sampangbakalan + Bakalam + Desa Bakalankrapyak + Lebakkalapa Tonggoh + Trembakal + Bakalan Tengah + Kali Bakalan + Desa Cemengbakalan + Desa Bakalanpule + Gunung Bakal + Desa Tambakkalisogo + Tambakkalisogo + Desa Bakalanrayung + Salu Bakalaeng + Bakalaeng + Danau Bakalan + Selat Bakalan + Selat Bakalanpauno + Laikalanda + Bakalinga + Tanjung Mbakalang + Desa Bakalankrajan + Bakalan Dua + Kali Purbakala + Desa Bakalanwringinpitu + Tukad Kubakal + Praikalangga + Banjar Kubakal + Eat Bakal + Sungai Bakala + Kombakalada + Sori Rabakalo + Kahambikalela + Baikarara + Baikapaka + Tukad Bakalan + Teluk Haludubakal + Yabakalewa + Praikalumbang + Waikalowo + Praikalubu + Loko Praikalubu + Ramuk Ombakalada + Praikalebung + Praikaleka + Andabakal + Praikalau + Praikalokat + Praikalimbung + Bambakalo + Leubakkalian + Pematang Baitalimbangan + Lebakalil + Gereba Kaler + Krajan Bakalan + Bakalan Barat + Muarabakal + Umbulan Maharobakal + Bakaldalam + Talang Bakal + Pematang Bakalpanang + Baidaloen + Jatibakal + Tubu Bakalekuk + Dola Peimambakal + Bakalang + Teluk Bakalang + Salu Baidale + Bakalerek + Ile Bakalibu + Parbakalan + Praikalembu + Palindi Laikali + Praikalu + Sori Labakalate + Air Bakal-kecil + Sungaikalung + Sungaikalong + Pematang Bakalpanjang + Payabakal + Waikala + Sungaikali + Sungai Pebakalan + Parit Membakal + Bakalpakebo + Baikat Abu Jaraban + Maikalganj + Maikala Range + Bakalha + Baitalpur + Baikanthpur + Baihal + Barkala Reserved Forest + Babaipalli + Kaikalapettai + Kambainallur + Bakkalale + Kaikalui + Baijalpur + Nehalla Bankalah Reserved Forest + Barkala Rao + Barkali + Baidal + Barkaleh + Darreh Pumba Kal + Bahkalleh + Wibakale + Gaikali + Gagaba Kalo + Barkalare + Bakkalmal + Gora Bakalyadyr + Rodnik Bakalybulak + Urochishche Bakaly + Sopka Bakaly + Gory Bakaly + Bugor Arba-Kalgan + Ozero Baykal + Kolodets Tabakkalgan + Walangivattu Vaikal + Vattevaikal Anicut + Vaikali Tevar Kulam + Vaikalitevan Kulam + Vaikaladichchenai + Uchchodaikallu + Sellapattu Vaikal + Savata Vaikal + Puttadivali Vaikal + Palukadu Vaikal + Mulaikallu Kulam + Koraikallimadu + Koraikalapu Kulam + Karaiyamullivaikal + Karaivaikal Kulam + Kanawali Vaikal + Habakkala + Chalam Vaikal Aru + Ambakala Wewa + Alaikallupoddakulam + Alaikallupodda Alankulam + Akamadi Vaikal + Alaikalluppodda Kulam + Vaikaliththevakulam + Baikole + Sidi Mohammed el Bakali + Sidi Mohammed Bakkal + Sidi Bakal + Oulad el Bakkal + Zaouia Oulad Bakal + Azib el Bakkali + Tombakala + Malaikaly + Ambadikala + Bakalica + Bakalnica + Abankala + Kombakala + Bawkalut + Bakaleko + Bawkalut Chaung + Baukala + Cerro Bainaltzin + Sungai Bakal + Bukit Ubaibalih + Kampong Sombakal + Kampung Lebai Ali + Batikal + Bakalalan Airport + Maikali + Bakalum + Bakalambani + Abakaliki + Tsaunin Maikalaji + Baikaha + Llano Limbaika + Barkald + Barkald stasjon + Barkaleitet + Barkaldfossen + Barkaldvola + Bakkalegskardet + Baikajavri + Barkalden + Bakkalia + Siljabaika + Aikaluokta + Blombakkali + Bavkalasis + Baikajohka + Bakkalykkja + Bakalauri + Bakalauri1 + Bakalauri2 + Bakalauri3 + Bakalauri4 + Bakalauri5 + Bakalauri6 + Bakalauri7 + Bakalauri8 + Bakalauri9 + Bakalsen + Baiyaldi + Naikala + Baikanda + Barkalne + Bakalipur + Bakaldum + Raikal + Baikatte + Maikal + Bakalbhar + Waikalabubu Bay + Baikai Island + Abikal + Boikalakalawa Bay + Maikal River + Bakalao Asibi Point + Bankal + Bakalod Island + Bakalao Point + Bakalan River + Bakal Dos + Bakal Uno + Daang Bakal + Bankal School + Bakal Tres + Kabankalan City Public Plaza + Ranra Tabai Algad + Bairkal Jabal + Bairkal Dhora + Bairkal + Zaibai Algad + Gulba Kalle + Ragha Bakalzai + Dabbarkal Sar + Tabai Algad + Haikalzai + Wuchobai Algad + Jabba Kalai + Goth Soba Kaloi + Baikar Tsarai + Dudgaikal + Baixale Kamar + Zebai Algad + Bakal Khel + Goth Haikal + Haikal + Jaba Kalle + Bakalovina + Salabaikasy + Guba Kalita + Guba Kalgalaksha + Guba Kaldo + Bakalovo + Baykalovo + Baskalino + Sopka Barkaleptskaya + Bakalovskaya Ferma + Bakalinskiy Rayon + Sovkhoz Bakalinskiy + Bakalinskiy + Bakaldy + Bakaldinskoye + Urochishche Bakaldikha + Zabaykalovskiy + Barkalova + Barkalovka + Gora Barkalova + Gora Barkalyu + Bikalamakhi + Stantsiya Bakal + Baykalovskiy Rayon + Baykalovskiy + Baykalovsk + Bakalda + Boloto Malyy Baykal + Boloto Baykal + Zabaykalka + Stantsiya Baykal + Baykalo-Amurskaya Zheleznaya Doroga + Kolkhoz Krasnyy Baykal + Zaliv Baykal + Bakalino + Ovrag Bakalda + Bakaldovshchina + Prud Novyy Baykal + Bakaleyka + Bakalka + Bakaly TV Mast + Urochishche Bakalovo + Kambaika + Maloye Baykalovo + Bakalinskiy Leskhoz + Bikalikha + Kordon Barkalo + Sanatoriy Baykal + Port Baykal + Baykalikha + Polevoy Stan Baykal + Bakalovka + Ramada Makkah Shubaika + Mount Tohebakala + Tambakale Island + Mbanitambaika Island + Mbakalaka Island + Kumbakale + Kaikaloka + Kelesaikal + Nasb Gabakallah + Jabal Barkal + Jabal Abakallah + Al Barkali + Shabakal Abbass + Mabaikuli + Bambakalema + Bambakalia + Baiwala + Babakalia + Baikama + Bankalol + Kundebakali + Yumbaikamadu + Tabakali + Daba Kalharereh + Barkale + Bakalshile + Bakaloolay + Buur Bakaley + Bakaley + Buur Bakale + Bakalaale + Jabal Mobakali + Khor Bakallii + Korombaital + Ambakali + Ba Kaliin + Mbay Bakala + Tagobikala + Fayzabadkala + Aghbai Allazy + Aghbai Alikagar + Gora Fayzabadkala + Daraikalot + Aghbai Alakisirak + Beikala + Foho Berbakalau + Mota Caicabaisala + Sungai Utabailale + Urochishche Bakalarnyn-Ayasy + Urochishche Batkali + Khrebet Batkali + Ras Barkallah + Babakale + Fabrikalar + Bakalukalu Shan + Bakalukalu + Laikala + Waikalakaka + Columbus Bakalar Municipal Airport + Bakalar Library + Bakkala Cemetery + Clifton T Barkalow Elementary School + Barkalow Hollow + Kailuapuhi Waikalua Homesteads + Kawaikalia Gulch + Waikalae + Waikaloa Stream + Waikalua-Loko Fish Pond + Halekou Waikaluakai Homesteads + East Waikalua + Omar Haikal Islamic Academy + Bakalar Air Force Base (historical) + Koshbakaly + Bagkalen + Gora Baikara + Mfumbaika + Mbakalungu + Chumbaika + Ntombankala School + Bakalabwa Pans + Khobai al Janhra + Holiday Inn Dubai Al Barsha + Novotel Dubai Al Barsha + Doubletree Res.Dubai-Al Barsha + Doubletree By Hilton Hotel and Apartments Dubai Al Barsha + Doubletree By Hilton Dubai Al Barsha Hotel and Res + Park Inn By Radisson Dubai Al Barsha + Ramee Rose Hotel Dubai Al Barsha + Aparthotel Adagio Premium Dubai Al Barsha + Ataikala + Selman Marrakech + Riad Ain Marrakech + Taj Palace Marrakech + Delano Marrakech + Pullman Marrakech Palmeraie Resort And Spa + Lalla Calipau Marrakech + Hotel Fashion Marrakech + Four Seasons Resort Marrakech + Adama Resort Marrakech + Pullman Marrakech Palmeraie Re + Ramada Resort Marrakech Douar Al Hana + Hotel Zahia Marrakech + Hotel Marrakech Le Tichka + Le Chems Marrakech + Beachcomber Royal Palm Marrakech + Residence Marrakech + Riad Hermes Marrakech + Riad La Lune De Marrakech + Hotel Marrakech Le Sangho Privilege + Tempoo Hotel Marrakech + Ag Hotel & Spa Marrakech + Palm Appart Club Marrakech + Hotel Ibis Moussafir Marrakech Palmeraie + Ibis Marrakech Gare Voyageurs + Marrakech Ryads Parc And Spa + Terra Mia Marrakech Riad + Residence Dar Lamia Marrakech + Pullman Marrakech Palmeraie Rs + Moussaf Marrakech Centre Gare + Tempoo Hotel Marrakech Adults Only + Sahara Palace Marrakech + Moroccan House Marrakech + El Andalouss And Spa Marrakech + Suite Novotel Marrakech Rs + Dar Catalina Marrakech Hotel Non Refundable Room + Marrakech Hotel + Oued Tammarrakech + Tammarrakech + Cercle de Marrakech-Banlieue + Marrakech-Tensift-Al Haouz + Koudia Marrakech + Hotel Tichka Salam Marrakech + L'Atlas Marrakech + Royal Mirage Deluxe Marrakech + Golden Tulip Farah Marrakech + Ryad Mogador Marrakech + Coralia Club Marrakech Palmariva + La Sultana Marrakech + Marrakech-Medina + Marrakech + Museum of Marrakech + Douar Marrakechiyinc + Ibis Marrakech Centre Gare + Golden Tulip Rawabi Marrakech + Murano Resort Marrakech + Marrakech Garden Hotel + Pullman Marrakech Palmerai Resort & Spa + The Pearl Marrakech + Palais Calipau Marrakech + Hostal Equity Point Marrakech + Sofitel Marrakech Lounge And Spa + Pullman Marrakech Hotel And Spa + Sofitel Marrakech Palais Imperial + Hotel Ibis Moussafir Marrakech Centre Gare + Red Hotel Marrakech + Riad Zenith Marrakech + Ksar Catalina Marrakech Hotel + Blue Sea Hotel Marrakech Ryads Parc & Spa + Bluebay Marrakech + Pullman Marrakech Palmeraie Resort & Spa Hotel + Riad Litzy Marrakech + Sultana Hotel & Spa Marrakech + Albatros Club Marrakech + Hotel Sangho Club Marrakech + Suite Novotel Marrakech Hotel + Riad Utopia Suites & Spa Marrakech + Riad Fatinat Marrakech + Riad Dar El Aila Marrakech + Es Saadi And Casino De Marrakech + Dar Catalina Marrakech Hotel + Grace Marrakech + Marrakesh Apartments + Marrakesh Country Club + Koudiat Lmerrakechiyine + Sidi Mohammed el Marrakchi + Marrakesh + Marrakchien + Marrakchia + Marrakesh Menara Airport + Marrakesh Hua Hin Resort & Spa + Marrakesh Hua Hin Resort And Spa + Marrakesh Resort And Spa (Pool Suite) + Marrakesh Huahin Resort & Spa + Ibis Moussafir Marrakesh Centre Gare Hotel + Maerak-chi + Dar Hammou Ben Merrakchi + Lalla el Marakchia + Khrebet Marrakh + Sungai Maru Kechil + Marrache + Goth Marracha + Maramech Hill + Maramech Woods Nature Preserve + Oued Karakech + Samarra School + Jangal-e Marakeh Sar *** /dev/null --- b/contrib/pg_trgm/expected/pg_subword_trgm.out *************** *** 0 **** --- 1,1044 ---- + CREATE TABLE test_trgm2(t text COLLATE "C"); + \copy test_trgm2 from 'data/trgm2.data' + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + ?column? | t + ----------+---------------------------------- + 0 | Kabankala + 0.1 | Kabankalan City Public Plaza + 0.3 | Abankala + 0.4 | Ntombankala School + 0.416667 | Kabakala + 0.5 | Nehalla Bankalah Reserved Forest + 0.538462 | Kabikala + (7 rows) + + create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); + set enable_seqscan=off; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + explain (costs off) + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + QUERY PLAN + ------------------------------------------------ + Limit + -> Index Scan using trgm_idx2 on test_trgm2 + Order By: (t <->> 'Kabankala'::text) + (3 rows) + + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + ?column? | t + ----------+---------------------------------- + 0 | Kabankala + 0.1 | Kabankalan City Public Plaza + 0.3 | Abankala + 0.4 | Ntombankala School + 0.416667 | Kabakala + 0.5 | Nehalla Bankalah Reserved Forest + 0.538462 | Kabikala + (7 rows) + + drop index trgm_idx2; + create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); + set enable_seqscan=off; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + (20 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+----- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + (4 rows) + + set "pg_trgm.subword_limit" to 0.5; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + Bakal Batu | 0.571429 + Zabaykalka | 0.571429 + Zabaykalovskiy | 0.571429 + (23 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + Kabakala | 0.583333 + Nehalla Bankalah Reserved Forest | 0.5 + (6 rows) + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + Bakal Batu | 0.571429 + Zabaykalka | 0.571429 + Zabaykalovskiy | 0.571429 + (23 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + Kabakala | 0.583333 + Nehalla Bankalah Reserved Forest | 0.5 + (6 rows) + + set "pg_trgm.subword_limit" to 0.3; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -----------------------------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + Bakal Batu | 0.571429 + Zabaykalka | 0.571429 + Zabaykalovskiy | 0.571429 + Air Bakal-kecil | 0.444444 + Bakal | 0.444444 + Bakal Dos | 0.444444 + Bakal Julu | 0.444444 + Bakal Khel | 0.444444 + Bakal Lama | 0.444444 + Bakal Tres | 0.444444 + Bakal Uno | 0.444444 + Daang Bakal | 0.444444 + Desa Bakal | 0.444444 + Eat Bakal | 0.444444 + Gunung Bakal | 0.444444 + Sidi Bakal | 0.444444 + Stantsiya Bakal | 0.444444 + Sungai Bakal | 0.444444 + Talang Bakal | 0.444444 + Uruk Bakal | 0.444444 + Zaouia Oulad Bakal | 0.444444 + Al Barkali | 0.428571 + Aparthotel Adagio Premium Dubai Al Barsha | 0.428571 + Baikal Business Centre | 0.428571 + Bay of Backaland | 0.428571 + Boikalakalawa Bay | 0.428571 + Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.428571 + Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.428571 + Doubletree Res.Dubai-Al Barsha | 0.428571 + Holiday Inn Dubai Al Barsha | 0.428571 + Jabal Barkal | 0.428571 + Novotel Dubai Al Barsha | 0.428571 + Park Inn By Radisson Dubai Al Barsha | 0.428571 + Ramee Rose Hotel Dubai Al Barsha | 0.428571 + Waikalabubu Bay | 0.428571 + Baikal | 0.4 + Baikal Airfield | 0.4 + Baikal Hotel Moscow | 0.4 + Baikal Listvyanka Hotel | 0.4 + Baikal Mountains | 0.4 + Baikal Plaza | 0.4 + Bajkal | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Barkal | 0.4 + Lake Baikal | 0.4 + Mbay Bakala | 0.4 + Oulad el Bakkal | 0.4 + Sidi Mohammed Bakkal | 0.4 + Bairkal | 0.363636 + Bairkal Dhora | 0.363636 + Bairkal Jabal | 0.363636 + Batikal | 0.363636 + Bakala | 0.333333 + Bakala Koupi | 0.333333 + Bakalaale | 0.333333 + Bakalabwa Pans | 0.333333 + Bakalaeng | 0.333333 + Bakalafoulou | 0.333333 + Bakalalan Airport | 0.333333 + Bakalam | 0.333333 + Bakalambani | 0.333333 + Bakalan | 0.333333 + Bakalan Barat | 0.333333 + Bakalan Dua | 0.333333 + Bakalan Kidul | 0.333333 + Bakalan Kulon | 0.333333 + Bakalan Lor | 0.333333 + Bakalan River | 0.333333 + Bakalan Tengah | 0.333333 + Bakalan Wetan | 0.333333 + Bakalang | 0.333333 + Bakalao Asibi Point | 0.333333 + Bakalao Point | 0.333333 + Bakalar Air Force Base (historical) | 0.333333 + Bakalar Lake | 0.333333 + Bakalar Library | 0.333333 + Bakalarr | 0.333333 + Bakalauri | 0.333333 + Bakalauri1 | 0.333333 + Bakalauri2 | 0.333333 + Bakalauri3 | 0.333333 + Bakalauri4 | 0.333333 + Bakalauri5 | 0.333333 + Bakalauri6 | 0.333333 + Bakalauri7 | 0.333333 + Bakalauri8 | 0.333333 + Bakalauri9 | 0.333333 + Bakalawa | 0.333333 + Bakalbhar | 0.333333 + Bakalbuah | 0.333333 + Bakalda | 0.333333 + Bakaldalam | 0.333333 + Bakaldinskoye | 0.333333 + Bakaldovshchina | 0.333333 + Bakaldukuh | 0.333333 + Bakaldum | 0.333333 + Bakaldy | 0.333333 + Bakale | 0.333333 + Bakaleko | 0.333333 + Bakalerek | 0.333333 + Bakaley | 0.333333 + Bakaleyka | 0.333333 + Bakalha | 0.333333 + Bakali | 0.333333 + Bakalia Char | 0.333333 + Bakalica | 0.333333 + Bakalinga | 0.333333 + Bakalino | 0.333333 + Bakalinskiy | 0.333333 + Bakalinskiy Leskhoz | 0.333333 + Bakalinskiy Rayon | 0.333333 + Bakalipur | 0.333333 + Bakalite | 0.333333 + Bakaljaya | 0.333333 + Bakalka | 0.333333 + Bakall | 0.333333 + Bakalnica | 0.333333 + Bakalod Island | 0.333333 + Bakalongo | 0.333333 + Bakaloolay | 0.333333 + Bakalou | 0.333333 + Bakalovina | 0.333333 + Bakalovka | 0.333333 + Bakalovo | 0.333333 + Bakalovskaya Ferma | 0.333333 + Bakalpakebo | 0.333333 + Bakalpokok | 0.333333 + Bakalrejo | 0.333333 + Bakalsen | 0.333333 + Bakalshile | 0.333333 + Bakaltua Bank | 0.333333 + Bakalua | 0.333333 + Bakalukalu | 0.333333 + Bakalukalu Shan | 0.333333 + Bakalukudu | 0.333333 + Bakalum | 0.333333 + Bakaly | 0.333333 + Bakaly TV Mast | 0.333333 + Buur Bakale | 0.333333 + Buur Bakaley | 0.333333 + Columbus Bakalar Municipal Airport | 0.333333 + Dakshin Bakalia | 0.333333 + Danau Bakalan | 0.333333 + Desa Bakalan | 0.333333 + Desa Bakalankrajan | 0.333333 + Desa Bakalankrapyak | 0.333333 + Desa Bakalanpule | 0.333333 + Desa Bakalanrayung | 0.333333 + Desa Bakalanwringinpitu | 0.333333 + Desa Bakalrejo | 0.333333 + Efrejtor Bakalovo | 0.333333 + Efreytor-Bakalovo | 0.333333 + Gora Bakalyadyr | 0.333333 + Gory Bakaly | 0.333333 + Gunung Bakalan | 0.333333 + Ile Bakalibu | 0.333333 + Kali Bakalan | 0.333333 + Kampong Bakaladong | 0.333333 + Khor Bakallii | 0.333333 + Krajan Bakalan | 0.333333 + Kusu-Bakali | 0.333333 + Kwala Bakala | 0.333333 + Ngao Bakala | 0.333333 + Ovrag Bakalda | 0.333333 + Pematang Bakalpanang | 0.333333 + Pematang Bakalpanjang | 0.333333 + Pulau Bakalan | 0.333333 + Pulau Bakalanpauno | 0.333333 + Ragha Bakalzai | 0.333333 + Rodnik Bakalybulak | 0.333333 + Salu Bakalaeng | 0.333333 + Selat Bakalan | 0.333333 + Selat Bakalanpauno | 0.333333 + Sidi Mohammed el Bakali | 0.333333 + Sopka Bakaly | 0.333333 + Sovkhoz Bakalinskiy | 0.333333 + Sungai Bakala | 0.333333 + Sungai Bakaladiyan | 0.333333 + Tanjung Bakalinga | 0.333333 + Teluk Bakalan | 0.333333 + Teluk Bakalang | 0.333333 + Tubu Bakalekuk | 0.333333 + Tukad Bakalan | 0.333333 + Urochishche Bakalarnyn-Ayasy | 0.333333 + Urochishche Bakaldikha | 0.333333 + Urochishche Bakalovo | 0.333333 + Urochishche Bakaly | 0.333333 + Bakkalmal | 0.307692 + Alue Bakkala | 0.3 + Azib el Bakkali | 0.3 + Ba Kaliin | 0.3 + Bagkalen | 0.3 + Bahkalleh | 0.3 + Baikalakko | 0.3 + Baikalovo | 0.3 + Baikaluobbal | 0.3 + Bakkala Cemetery | 0.3 + Bakkalale | 0.3 + Bakkalegskardet | 0.3 + Bakkalia | 0.3 + Bakkalykkja | 0.3 + Bankali | 0.3 + Bankalol | 0.3 + Barkala | 0.3 + Barkala Park | 0.3 + Barkala Rao | 0.3 + Barkala Reserved Forest | 0.3 + Barkalabava | 0.3 + Barkaladja Pool | 0.3 + Barkalare | 0.3 + Barkald | 0.3 + Barkald stasjon | 0.3 + Barkalden | 0.3 + Barkaldfossen | 0.3 + Barkaldvola | 0.3 + Barkale | 0.3 + Barkaleh | 0.3 + Barkaleitet | 0.3 + Barkali | 0.3 + Barkallou | 0.3 + Barkalne | 0.3 + Barkalova | 0.3 + Barkalovka | 0.3 + Barkalow Hollow | 0.3 + Baskalino | 0.3 + Baskaltsi | 0.3 + Baukala | 0.3 + Bavkalasis | 0.3 + Bawkalut | 0.3 + Bawkalut Chaung | 0.3 + Bikal | 0.3 + Clifton T Barkalow Elementary School | 0.3 + Gora Barkalova | 0.3 + Gora Barkalyu | 0.3 + Khrebet Batkali | 0.3 + Kordon Barkalo | 0.3 + Nehalla Bankalah Reserved Forest | 0.3 + Ras Barkallah | 0.3 + Sopka Barkaleptskaya | 0.3 + Urochishche Batkali | 0.3 + (261 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + Kabakala | 0.583333 + Nehalla Bankalah Reserved Forest | 0.5 + Kabikala | 0.461538 + Mwalaba-Kalamba | 0.454545 + Bakala Koupi | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Bankali | 0.4 + Bankalol | 0.4 + Jabba Kalai | 0.4 + Kanampumba-Kalawa | 0.4 + Purba Kalaujan | 0.4 + Tumba-Kalamba | 0.4 + Daba Kalharereh | 0.363636 + Gagaba Kalo | 0.363636 + Jaba Kalle | 0.363636 + Dabakala | 0.333333 + Dalabakala | 0.333333 + Kambakala | 0.333333 + Ker Samba Kalla | 0.333333 + Fayzabadkala | 0.307692 + Gora Fayzabadkala | 0.307692 + Guba Kalgalaksha | 0.307692 + Habakkala | 0.307692 + Kaikalahun Indian Reserve 25 | 0.307692 + Kaikalapettai | 0.307692 + Alue Bakkala | 0.3 + Ambadikala | 0.3 + Ambakala Wewa | 0.3 + Ataikala | 0.3 + Ba Kaliin | 0.3 + Bakala | 0.3 + Bakkala Cemetery | 0.3 + Bambakala | 0.3 + Barkala | 0.3 + Barkala Park | 0.3 + Barkala Rao | 0.3 + Barkala Reserved Forest | 0.3 + Baukala | 0.3 + Beikala | 0.3 + Bikala | 0.3 + Bikala Madila | 0.3 + Bomba-Kalende | 0.3 + Bonagbakala | 0.3 + Boyagbakala | 0.3 + Bugor Arba-Kalgan | 0.3 + Bumba-Kaloki | 0.3 + Bumba-Kalumba | 0.3 + Darreh Pumba Kal | 0.3 + Demba Kali | 0.3 + Embatkala | 0.3 + Gereba Kaler | 0.3 + Golba Kalo | 0.3 + Goth Soba Kaloi | 0.3 + Guba Kaldo | 0.3 + Guba Kalita | 0.3 + Gulba Kalle | 0.3 + Haikala | 0.3 + Kali Bakalan | 0.3 + Kali Purbakala | 0.3 + Kalibakal | 0.3 + Kalibakalako | 0.3 + Kalimundubakalan | 0.3 + Kamba-Kalele | 0.3 + Kimbakala | 0.3 + Kombakala | 0.3 + Kwala Bakala | 0.3 + Laikala | 0.3 + Maikala Range | 0.3 + Mambakala | 0.3 + Matamba-Kalenga | 0.3 + Matamba-Kalenge | 0.3 + Mbay Bakala | 0.3 + Mount Tohebakala | 0.3 + Naikala | 0.3 + Ngao Bakala | 0.3 + Purba Kalmegha | 0.3 + Sungai Bakala | 0.3 + Tagobikala | 0.3 + Tanjung Batikala | 0.3 + Tombakala | 0.3 + Tsibakala | 0.3 + Tumba-Kalumba | 0.3 + Tumba-Kalunga | 0.3 + Waikala | 0.3 + (89 rows) + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -----------------------------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalikha | 0.857143 + Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 + Baykalovo | 0.857143 + Baykalovsk | 0.857143 + Baykalovskiy | 0.857143 + Baykalovskiy Rayon | 0.857143 + Baykalsko | 0.857143 + Maloye Baykalovo | 0.857143 + Zabaykal | 0.714286 + Bakal Batu | 0.571429 + Zabaykalka | 0.571429 + Zabaykalovskiy | 0.571429 + Air Bakal-kecil | 0.444444 + Bakal | 0.444444 + Bakal Dos | 0.444444 + Bakal Julu | 0.444444 + Bakal Khel | 0.444444 + Bakal Lama | 0.444444 + Bakal Tres | 0.444444 + Bakal Uno | 0.444444 + Daang Bakal | 0.444444 + Desa Bakal | 0.444444 + Eat Bakal | 0.444444 + Gunung Bakal | 0.444444 + Sidi Bakal | 0.444444 + Stantsiya Bakal | 0.444444 + Sungai Bakal | 0.444444 + Talang Bakal | 0.444444 + Uruk Bakal | 0.444444 + Zaouia Oulad Bakal | 0.444444 + Al Barkali | 0.428571 + Aparthotel Adagio Premium Dubai Al Barsha | 0.428571 + Baikal Business Centre | 0.428571 + Bay of Backaland | 0.428571 + Boikalakalawa Bay | 0.428571 + Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.428571 + Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.428571 + Doubletree Res.Dubai-Al Barsha | 0.428571 + Holiday Inn Dubai Al Barsha | 0.428571 + Jabal Barkal | 0.428571 + Novotel Dubai Al Barsha | 0.428571 + Park Inn By Radisson Dubai Al Barsha | 0.428571 + Ramee Rose Hotel Dubai Al Barsha | 0.428571 + Waikalabubu Bay | 0.428571 + Baikal | 0.4 + Baikal Airfield | 0.4 + Baikal Hotel Moscow | 0.4 + Baikal Listvyanka Hotel | 0.4 + Baikal Mountains | 0.4 + Baikal Plaza | 0.4 + Bajkal | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Barkal | 0.4 + Lake Baikal | 0.4 + Mbay Bakala | 0.4 + Oulad el Bakkal | 0.4 + Sidi Mohammed Bakkal | 0.4 + Bairkal | 0.363636 + Bairkal Dhora | 0.363636 + Bairkal Jabal | 0.363636 + Batikal | 0.363636 + Bakala | 0.333333 + Bakala Koupi | 0.333333 + Bakalaale | 0.333333 + Bakalabwa Pans | 0.333333 + Bakalaeng | 0.333333 + Bakalafoulou | 0.333333 + Bakalalan Airport | 0.333333 + Bakalam | 0.333333 + Bakalambani | 0.333333 + Bakalan | 0.333333 + Bakalan Barat | 0.333333 + Bakalan Dua | 0.333333 + Bakalan Kidul | 0.333333 + Bakalan Kulon | 0.333333 + Bakalan Lor | 0.333333 + Bakalan River | 0.333333 + Bakalan Tengah | 0.333333 + Bakalan Wetan | 0.333333 + Bakalang | 0.333333 + Bakalao Asibi Point | 0.333333 + Bakalao Point | 0.333333 + Bakalar Air Force Base (historical) | 0.333333 + Bakalar Lake | 0.333333 + Bakalar Library | 0.333333 + Bakalarr | 0.333333 + Bakalauri | 0.333333 + Bakalauri1 | 0.333333 + Bakalauri2 | 0.333333 + Bakalauri3 | 0.333333 + Bakalauri4 | 0.333333 + Bakalauri5 | 0.333333 + Bakalauri6 | 0.333333 + Bakalauri7 | 0.333333 + Bakalauri8 | 0.333333 + Bakalauri9 | 0.333333 + Bakalawa | 0.333333 + Bakalbhar | 0.333333 + Bakalbuah | 0.333333 + Bakalda | 0.333333 + Bakaldalam | 0.333333 + Bakaldinskoye | 0.333333 + Bakaldovshchina | 0.333333 + Bakaldukuh | 0.333333 + Bakaldum | 0.333333 + Bakaldy | 0.333333 + Bakale | 0.333333 + Bakaleko | 0.333333 + Bakalerek | 0.333333 + Bakaley | 0.333333 + Bakaleyka | 0.333333 + Bakalha | 0.333333 + Bakali | 0.333333 + Bakalia Char | 0.333333 + Bakalica | 0.333333 + Bakalinga | 0.333333 + Bakalino | 0.333333 + Bakalinskiy | 0.333333 + Bakalinskiy Leskhoz | 0.333333 + Bakalinskiy Rayon | 0.333333 + Bakalipur | 0.333333 + Bakalite | 0.333333 + Bakaljaya | 0.333333 + Bakalka | 0.333333 + Bakall | 0.333333 + Bakalnica | 0.333333 + Bakalod Island | 0.333333 + Bakalongo | 0.333333 + Bakaloolay | 0.333333 + Bakalou | 0.333333 + Bakalovina | 0.333333 + Bakalovka | 0.333333 + Bakalovo | 0.333333 + Bakalovskaya Ferma | 0.333333 + Bakalpakebo | 0.333333 + Bakalpokok | 0.333333 + Bakalrejo | 0.333333 + Bakalsen | 0.333333 + Bakalshile | 0.333333 + Bakaltua Bank | 0.333333 + Bakalua | 0.333333 + Bakalukalu | 0.333333 + Bakalukalu Shan | 0.333333 + Bakalukudu | 0.333333 + Bakalum | 0.333333 + Bakaly | 0.333333 + Bakaly TV Mast | 0.333333 + Buur Bakale | 0.333333 + Buur Bakaley | 0.333333 + Columbus Bakalar Municipal Airport | 0.333333 + Dakshin Bakalia | 0.333333 + Danau Bakalan | 0.333333 + Desa Bakalan | 0.333333 + Desa Bakalankrajan | 0.333333 + Desa Bakalankrapyak | 0.333333 + Desa Bakalanpule | 0.333333 + Desa Bakalanrayung | 0.333333 + Desa Bakalanwringinpitu | 0.333333 + Desa Bakalrejo | 0.333333 + Efrejtor Bakalovo | 0.333333 + Efreytor-Bakalovo | 0.333333 + Gora Bakalyadyr | 0.333333 + Gory Bakaly | 0.333333 + Gunung Bakalan | 0.333333 + Ile Bakalibu | 0.333333 + Kali Bakalan | 0.333333 + Kampong Bakaladong | 0.333333 + Khor Bakallii | 0.333333 + Krajan Bakalan | 0.333333 + Kusu-Bakali | 0.333333 + Kwala Bakala | 0.333333 + Ngao Bakala | 0.333333 + Ovrag Bakalda | 0.333333 + Pematang Bakalpanang | 0.333333 + Pematang Bakalpanjang | 0.333333 + Pulau Bakalan | 0.333333 + Pulau Bakalanpauno | 0.333333 + Ragha Bakalzai | 0.333333 + Rodnik Bakalybulak | 0.333333 + Salu Bakalaeng | 0.333333 + Selat Bakalan | 0.333333 + Selat Bakalanpauno | 0.333333 + Sidi Mohammed el Bakali | 0.333333 + Sopka Bakaly | 0.333333 + Sovkhoz Bakalinskiy | 0.333333 + Sungai Bakala | 0.333333 + Sungai Bakaladiyan | 0.333333 + Tanjung Bakalinga | 0.333333 + Teluk Bakalan | 0.333333 + Teluk Bakalang | 0.333333 + Tubu Bakalekuk | 0.333333 + Tukad Bakalan | 0.333333 + Urochishche Bakalarnyn-Ayasy | 0.333333 + Urochishche Bakaldikha | 0.333333 + Urochishche Bakalovo | 0.333333 + Urochishche Bakaly | 0.333333 + Bakkalmal | 0.307692 + Alue Bakkala | 0.3 + Azib el Bakkali | 0.3 + Ba Kaliin | 0.3 + Bagkalen | 0.3 + Bahkalleh | 0.3 + Baikalakko | 0.3 + Baikalovo | 0.3 + Baikaluobbal | 0.3 + Bakkala Cemetery | 0.3 + Bakkalale | 0.3 + Bakkalegskardet | 0.3 + Bakkalia | 0.3 + Bakkalykkja | 0.3 + Bankali | 0.3 + Bankalol | 0.3 + Barkala | 0.3 + Barkala Park | 0.3 + Barkala Rao | 0.3 + Barkala Reserved Forest | 0.3 + Barkalabava | 0.3 + Barkaladja Pool | 0.3 + Barkalare | 0.3 + Barkald | 0.3 + Barkald stasjon | 0.3 + Barkalden | 0.3 + Barkaldfossen | 0.3 + Barkaldvola | 0.3 + Barkale | 0.3 + Barkaleh | 0.3 + Barkaleitet | 0.3 + Barkali | 0.3 + Barkallou | 0.3 + Barkalne | 0.3 + Barkalova | 0.3 + Barkalovka | 0.3 + Barkalow Hollow | 0.3 + Baskalino | 0.3 + Baskaltsi | 0.3 + Baukala | 0.3 + Bavkalasis | 0.3 + Bawkalut | 0.3 + Bawkalut Chaung | 0.3 + Bikal | 0.3 + Clifton T Barkalow Elementary School | 0.3 + Gora Barkalova | 0.3 + Gora Barkalyu | 0.3 + Khrebet Batkali | 0.3 + Kordon Barkalo | 0.3 + Nehalla Bankalah Reserved Forest | 0.3 + Ras Barkallah | 0.3 + Sopka Barkaleptskaya | 0.3 + Urochishche Batkali | 0.3 + (261 rows) + + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.9 + Abankala | 0.7 + Ntombankala School | 0.6 + Kabakala | 0.583333 + Nehalla Bankalah Reserved Forest | 0.5 + Kabikala | 0.461538 + Mwalaba-Kalamba | 0.454545 + Bakala Koupi | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Bankali | 0.4 + Bankalol | 0.4 + Jabba Kalai | 0.4 + Kanampumba-Kalawa | 0.4 + Purba Kalaujan | 0.4 + Tumba-Kalamba | 0.4 + Daba Kalharereh | 0.363636 + Gagaba Kalo | 0.363636 + Jaba Kalle | 0.363636 + Dabakala | 0.333333 + Dalabakala | 0.333333 + Kambakala | 0.333333 + Ker Samba Kalla | 0.333333 + Fayzabadkala | 0.307692 + Gora Fayzabadkala | 0.307692 + Guba Kalgalaksha | 0.307692 + Habakkala | 0.307692 + Kaikalahun Indian Reserve 25 | 0.307692 + Kaikalapettai | 0.307692 + Alue Bakkala | 0.3 + Ambadikala | 0.3 + Ambakala Wewa | 0.3 + Ataikala | 0.3 + Ba Kaliin | 0.3 + Bakala | 0.3 + Bakkala Cemetery | 0.3 + Bambakala | 0.3 + Barkala | 0.3 + Barkala Park | 0.3 + Barkala Rao | 0.3 + Barkala Reserved Forest | 0.3 + Baukala | 0.3 + Beikala | 0.3 + Bikala | 0.3 + Bikala Madila | 0.3 + Bomba-Kalende | 0.3 + Bonagbakala | 0.3 + Boyagbakala | 0.3 + Bugor Arba-Kalgan | 0.3 + Bumba-Kaloki | 0.3 + Bumba-Kalumba | 0.3 + Darreh Pumba Kal | 0.3 + Demba Kali | 0.3 + Embatkala | 0.3 + Gereba Kaler | 0.3 + Golba Kalo | 0.3 + Goth Soba Kaloi | 0.3 + Guba Kaldo | 0.3 + Guba Kalita | 0.3 + Gulba Kalle | 0.3 + Haikala | 0.3 + Kali Bakalan | 0.3 + Kali Purbakala | 0.3 + Kalibakal | 0.3 + Kalibakalako | 0.3 + Kalimundubakalan | 0.3 + Kamba-Kalele | 0.3 + Kimbakala | 0.3 + Kombakala | 0.3 + Kwala Bakala | 0.3 + Laikala | 0.3 + Maikala Range | 0.3 + Mambakala | 0.3 + Matamba-Kalenga | 0.3 + Matamba-Kalenge | 0.3 + Mbay Bakala | 0.3 + Mount Tohebakala | 0.3 + Naikala | 0.3 + Ngao Bakala | 0.3 + Purba Kalmegha | 0.3 + Sungai Bakala | 0.3 + Tagobikala | 0.3 + Tanjung Batikala | 0.3 + Tombakala | 0.3 + Tsibakala | 0.3 + Tumba-Kalumba | 0.3 + Tumba-Kalunga | 0.3 + Waikala | 0.3 + (89 rows) + *** a/contrib/pg_trgm/expected/pg_trgm.out --- b/contrib/pg_trgm/expected/pg_trgm.out *************** *** 59,65 **** select similarity('---', '####---'); 0 (1 row) ! CREATE TABLE test_trgm(t text); \copy test_trgm from 'data/trgm.data' select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; t | sml --- 59,65 ---- 0 (1 row) ! CREATE TABLE test_trgm(t text COLLATE "C"); \copy test_trgm from 'data/trgm.data' select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; t | sml *************** *** 3467,3473 **** select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198 qwertyu0988 | 0.333333 (1 row) ! create table test2(t text); insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); --- 3467,3473 ---- qwertyu0988 | 0.333333 (1 row) ! create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); *** a/contrib/pg_trgm/pg_trgm--1.1--1.2.sql --- b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql *************** *** 3,12 **** -- complain if script is sourced in psql, rather than via ALTER EXTENSION \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) RETURNS "char" AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD ! FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); --- 3,74 ---- -- complain if script is sourced in psql, rather than via ALTER EXTENSION \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit + CREATE FUNCTION subword_similarity(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE FUNCTION subword_similarity_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.subword_limit + + CREATE FUNCTION subword_similarity_commutator_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.subword_limit + + CREATE FUNCTION subword_similarity_dist_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE FUNCTION subword_similarity_dist_commutator_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE OPERATOR <% ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_op, + COMMUTATOR = '%>', + RESTRICT = contsel, + JOIN = contjoinsel + ); + + CREATE OPERATOR %> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_commutator_op, + COMMUTATOR = '<%', + RESTRICT = contsel, + JOIN = contjoinsel + ); + + CREATE OPERATOR <<-> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_dist_op, + COMMUTATOR = '<->>' + ); + + CREATE OPERATOR <->> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_dist_commutator_op, + COMMUTATOR = '<<->' + ); + CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) RETURNS "char" AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 7 %> (text, text), + OPERATOR 8 <->> (text, text) FOR ORDER BY pg_catalog.float_ops; + ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD ! OPERATOR 7 %> (text, text), ! FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); *** a/contrib/pg_trgm/pg_trgm--1.2.sql --- b/contrib/pg_trgm/pg_trgm--1.2.sql *************** *** 39,44 **** CREATE OPERATOR % ( --- 39,77 ---- JOIN = contjoinsel ); + CREATE FUNCTION subword_similarity(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE FUNCTION subword_similarity_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.subword_limit + + CREATE FUNCTION subword_similarity_commutator_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.subword_limit + + CREATE OPERATOR <% ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_op, + COMMUTATOR = '%>', + RESTRICT = contsel, + JOIN = contjoinsel + ); + + CREATE OPERATOR %> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_commutator_op, + COMMUTATOR = '<%', + RESTRICT = contsel, + JOIN = contjoinsel + ); + CREATE FUNCTION similarity_dist(text,text) RETURNS float4 AS 'MODULE_PATHNAME' *************** *** 51,56 **** CREATE OPERATOR <-> ( --- 84,113 ---- COMMUTATOR = '<->' ); + CREATE FUNCTION subword_similarity_dist_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE FUNCTION subword_similarity_dist_commutator_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE; + + CREATE OPERATOR <<-> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_dist_op, + COMMUTATOR = '<->>' + ); + + CREATE OPERATOR <->> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subword_similarity_dist_commutator_op, + COMMUTATOR = '<<->' + ); + -- gist key CREATE FUNCTION gtrgm_in(cstring) RETURNS gtrgm *************** *** 140,145 **** ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD --- 197,208 ---- OPERATOR 5 pg_catalog.~ (text, text), OPERATOR 6 pg_catalog.~* (text, text); + -- Add operators that are new in 9.6 (pg_trgm 1.2). + + ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 7 %> (text, text), + OPERATOR 8 <->> (text, text) FOR ORDER BY pg_catalog.float_ops; + -- support functions for gin CREATE FUNCTION gin_extract_value_trgm(text, internal) RETURNS internal *************** *** 187,190 **** AS 'MODULE_PATHNAME' --- 250,254 ---- LANGUAGE C IMMUTABLE STRICT; ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + OPERATOR 7 %> (text, text), FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); *** /dev/null --- b/contrib/pg_trgm/sql/pg_subword_trgm.sql *************** *** 0 **** --- 1,42 ---- + CREATE TABLE test_trgm2(t text COLLATE "C"); + + \copy test_trgm2 from 'data/trgm2.data' + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + + create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); + set enable_seqscan=off; + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + + explain (costs off) + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + + drop index trgm_idx2; + create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); + set enable_seqscan=off; + + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + + set "pg_trgm.subword_limit" to 0.5; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + + set "pg_trgm.subword_limit" to 0.3; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + select t,subword_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + select t,subword_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; *** a/contrib/pg_trgm/sql/pg_trgm.sql --- b/contrib/pg_trgm/sql/pg_trgm.sql *************** *** 13,19 **** select similarity('wow',' WOW '); select similarity('---', '####---'); ! CREATE TABLE test_trgm(t text); \copy test_trgm from 'data/trgm.data' --- 13,19 ---- select similarity('---', '####---'); ! CREATE TABLE test_trgm(t text COLLATE "C"); \copy test_trgm from 'data/trgm.data' *************** *** 40,46 **** select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu098 select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t; select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; ! create table test2(t text); insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); --- 40,46 ---- select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t; select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; ! create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); insert into test2 values (' z foo bar'); *** a/contrib/pg_trgm/trgm.h --- b/contrib/pg_trgm/trgm.h *************** *** 26,38 **** #define DIVUNION /* operator strategy numbers */ ! #define SimilarityStrategyNumber 1 ! #define DistanceStrategyNumber 2 ! #define LikeStrategyNumber 3 ! #define ILikeStrategyNumber 4 ! #define RegExpStrategyNumber 5 ! #define RegExpICaseStrategyNumber 6 ! typedef char trgm[3]; --- 26,39 ---- #define DIVUNION /* operator strategy numbers */ ! #define SimilarityStrategyNumber 1 ! #define DistanceStrategyNumber 2 ! #define LikeStrategyNumber 3 ! #define ILikeStrategyNumber 4 ! #define RegExpStrategyNumber 5 ! #define RegExpICaseStrategyNumber 6 ! #define SubwordSimilarityStrategyNumber 7 ! #define SubwordDistanceStrategyNumber 8 typedef char trgm[3]; *************** *** 103,117 **** typedef char *BITVECP; #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) typedef struct TrgmPackedGraph TrgmPackedGraph; extern double trgm_sml_limit; extern uint32 trgm2int(trgm *ptr); extern void compact_trigram(trgm *tptr, char *str, int bytelen); extern TRGM *generate_trgm(char *str, int slen); extern TRGM *generate_wildcard_trgm(const char *str, int slen); ! extern float4 cnt_sml(TRGM *trg1, TRGM *trg2); extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2); extern bool *trgm_presence_map(TRGM *query, TRGM *key); extern TRGM *createTrgmNFA(text *text_re, Oid collation, --- 104,131 ---- #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) + /* + * If DIVUNION is defined then similarity formula is: + * count / (len1 + len2 - count) + * else if DIVUNION is not defined then similarity formula is: + * count / max(len1, len2) + */ + #ifdef DIVUNION + #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count))) + #else + #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2))) + #endif + typedef struct TrgmPackedGraph TrgmPackedGraph; extern double trgm_sml_limit; + extern double trgm_subword_limit; extern uint32 trgm2int(trgm *ptr); extern void compact_trigram(trgm *tptr, char *str, int bytelen); extern TRGM *generate_trgm(char *str, int slen); extern TRGM *generate_wildcard_trgm(const char *str, int slen); ! extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact); extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2); extern bool *trgm_presence_map(TRGM *query, TRGM *key); extern TRGM *createTrgmNFA(text *text_re, Oid collation, *** a/contrib/pg_trgm/trgm_gin.c --- b/contrib/pg_trgm/trgm_gin.c *************** *** 89,94 **** gin_extract_query_trgm(PG_FUNCTION_ARGS) --- 89,95 ---- switch (strategy) { case SimilarityStrategyNumber: + case SubwordSimilarityStrategyNumber: trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); break; case ILikeStrategyNumber: *************** *** 176,181 **** gin_trgm_consistent(PG_FUNCTION_ARGS) --- 177,183 ---- bool res; int32 i, ntrue; + double nlimit; /* All cases served by this function are inexact */ *recheck = true; *************** *** 183,188 **** gin_trgm_consistent(PG_FUNCTION_ARGS) --- 185,194 ---- switch (strategy) { case SimilarityStrategyNumber: + case SubwordSimilarityStrategyNumber: + nlimit = (strategy == SimilarityStrategyNumber) ? + trgm_sml_limit : trgm_subword_limit; + /* Count the matches */ ntrue = 0; for (i = 0; i < nkeys; i++) *************** *** 207,213 **** gin_trgm_consistent(PG_FUNCTION_ARGS) * So, independly on DIVUNION the upper bound formula is the same. */ res = (nkeys == 0) ? false : ! ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_sml_limit) ? true : false); break; case ILikeStrategyNumber: #ifndef IGNORECASE --- 213,219 ---- * So, independly on DIVUNION the upper bound formula is the same. */ res = (nkeys == 0) ? false : ! (((((float4) ntrue) / ((float4) nkeys))) >= nlimit); break; case ILikeStrategyNumber: #ifndef IGNORECASE *************** *** 269,278 **** gin_trgm_triconsistent(PG_FUNCTION_ARGS) --- 275,289 ---- int32 i, ntrue; bool *boolcheck; + double nlimit; switch (strategy) { case SimilarityStrategyNumber: + case SubwordSimilarityStrategyNumber: + nlimit = (strategy == SimilarityStrategyNumber) ? + trgm_sml_limit : trgm_subword_limit; + /* Count the matches */ ntrue = 0; for (i = 0; i < nkeys; i++) *************** *** 285,291 **** gin_trgm_triconsistent(PG_FUNCTION_ARGS) * See comment in gin_trgm_consistent() about * upper bound formula */ res = (nkeys == 0) ? GIN_FALSE : ! (((((float4) ntrue) / ((float4) nkeys)) >= trgm_sml_limit) ? GIN_MAYBE : GIN_FALSE); break; case ILikeStrategyNumber: #ifndef IGNORECASE --- 296,302 ---- * See comment in gin_trgm_consistent() about * upper bound formula */ res = (nkeys == 0) ? GIN_FALSE : ! (((((float4) ntrue) / ((float4) nkeys)) >= nlimit) ? GIN_MAYBE : GIN_FALSE); break; case ILikeStrategyNumber: #ifndef IGNORECASE *** a/contrib/pg_trgm/trgm_gist.c --- b/contrib/pg_trgm/trgm_gist.c *************** *** 191,196 **** gtrgm_consistent(PG_FUNCTION_ARGS) --- 191,197 ---- bool res; Size querysize = VARSIZE(query); gtrgm_consistent_cache *cache; + double nlimit; /* * We keep the extracted trigrams in cache, because trigram extraction is *************** *** 218,223 **** gtrgm_consistent(PG_FUNCTION_ARGS) --- 219,225 ---- switch (strategy) { case SimilarityStrategyNumber: + case SubwordSimilarityStrategyNumber: qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ); break; *************** *** 286,301 **** gtrgm_consistent(PG_FUNCTION_ARGS) switch (strategy) { case SimilarityStrategyNumber: ! /* Similarity search is exact */ ! *recheck = false; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ ! float4 tmpsml = cnt_sml(key, qtrg); /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ ! res = (*(int *) &tmpsml == *(int *) &trgm_sml_limit ! || tmpsml > trgm_sml_limit) ? true : false; } else if (ISALLTRUE(key)) { /* non-leaf contains signature */ --- 288,305 ---- switch (strategy) { case SimilarityStrategyNumber: ! case SubwordSimilarityStrategyNumber: ! /* Similarity search is exact. Subword similarity search is inexact */ ! *recheck = (strategy == SubwordSimilarityStrategyNumber); ! nlimit = (strategy == SimilarityStrategyNumber) ? ! trgm_sml_limit : trgm_subword_limit; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ ! float4 tmpsml = cnt_sml(qtrg, key, *recheck); /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ ! res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit); } else if (ISALLTRUE(key)) { /* non-leaf contains signature */ *************** *** 309,315 **** gtrgm_consistent(PG_FUNCTION_ARGS) if (len == 0) res = false; else ! res = (((((float8) count) / ((float8) len))) >= trgm_sml_limit) ? true : false; } break; case ILikeStrategyNumber: --- 313,319 ---- if (len == 0) res = false; else ! res = (((((float8) count) / ((float8) len))) >= nlimit); } break; case ILikeStrategyNumber: *************** *** 427,432 **** gtrgm_distance(PG_FUNCTION_ARGS) --- 431,437 ---- StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; float8 res; *************** *** 462,470 **** gtrgm_distance(PG_FUNCTION_ARGS) switch (strategy) { case DistanceStrategyNumber: if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ ! res = 1.0 - cnt_sml(key, qtrg); } else if (ISALLTRUE(key)) { /* all leafs contains orig trgm */ --- 467,477 ---- switch (strategy) { case DistanceStrategyNumber: + case SubwordDistanceStrategyNumber: + *recheck = strategy == SubwordDistanceStrategyNumber; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ ! res = 1.0 - cnt_sml(qtrg, key, *recheck); } else if (ISALLTRUE(key)) { /* all leafs contains orig trgm */ *** a/contrib/pg_trgm/trgm_op.c --- b/contrib/pg_trgm/trgm_op.c *************** *** 15,21 **** PG_MODULE_MAGIC; /* GUC variables */ ! double trgm_sml_limit = 0.3f; void _PG_init(void); --- 15,22 ---- PG_MODULE_MAGIC; /* GUC variables */ ! double trgm_sml_limit = 0.3f; ! double trgm_subword_limit = 0.6f; void _PG_init(void); *************** *** 23,30 **** PG_FUNCTION_INFO_V1(set_limit); --- 24,43 ---- PG_FUNCTION_INFO_V1(show_limit); PG_FUNCTION_INFO_V1(show_trgm); PG_FUNCTION_INFO_V1(similarity); + PG_FUNCTION_INFO_V1(subword_similarity); PG_FUNCTION_INFO_V1(similarity_dist); PG_FUNCTION_INFO_V1(similarity_op); + PG_FUNCTION_INFO_V1(subword_similarity_op); + PG_FUNCTION_INFO_V1(subword_similarity_commutator_op); + PG_FUNCTION_INFO_V1(subword_similarity_dist_op); + PG_FUNCTION_INFO_V1(subword_similarity_dist_commutator_op); + + /* Trigram with position */ + typedef struct + { + trgm trg; + int index; + } pos_trgm; /* * Module load callback *************** *** 45,50 **** _PG_init(void) --- 58,75 ---- NULL, NULL, NULL); + DefineCustomRealVariable("pg_trgm.subword_limit", + "Sets the threshold used by the <%% operator.", + "Valid range is 0.0 .. 1.0.", + &trgm_subword_limit, + 0.6, + 0.0, + 1.0, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); } /* *************** *** 199,236 **** make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) return tptr; } ! TRGM * ! generate_trgm(char *str, int slen) { ! TRGM *trg; char *buf; ! trgm *tptr; ! int len, ! charlen, bytelen; char *bword, *eword; - /* - * Guard against possible overflow in the palloc requests below. (We - * don't worry about the additive constants, since palloc can detect - * requests that are a little above MaxAllocSize --- we just need to - * prevent integer overflow in the multiplications.) - */ - if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || - (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("out of memory"))); - - trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); - trg->flag = ARRKEY; - SET_VARSIZE(trg, TRGMHDRSIZE); - if (slen + LPADDING + RPADDING < 3 || slen == 0) ! return trg; ! tptr = GETARR(trg); /* Allocate a buffer for case-folded, blank-padded words */ buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4); --- 224,251 ---- return tptr; } ! /* ! * Make array of trigrams without sorting and removing duplicate items. ! * ! * trg: where to return the array of trigrams. ! * str: source string, of length slen bytes. ! * ! * Returns length of the generated array. ! */ ! static int ! generate_trgm_only(trgm *trg, char *str, int slen) { ! trgm *tptr; char *buf; ! int charlen, bytelen; char *bword, *eword; if (slen + LPADDING + RPADDING < 3 || slen == 0) ! return 0; ! tptr = trg; /* Allocate a buffer for case-folded, blank-padded words */ buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4); *************** *** 270,276 **** generate_trgm(char *str, int slen) pfree(buf); ! if ((len = tptr - GETARR(trg)) == 0) return trg; /* --- 285,331 ---- pfree(buf); ! return tptr - trg; ! } ! ! /* ! * Guard against possible overflow in the palloc requests below. (We ! * don't worry about the additive constants, since palloc can detect ! * requests that are a little above MaxAllocSize --- we just need to ! * prevent integer overflow in the multiplications.) ! */ ! static void ! protect_out_of_mem(int slen) ! { ! if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || ! (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) ! ereport(ERROR, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("out of memory"))); ! } ! ! /* ! * Make array of trigrams with sorting and removing duplicate items. ! * ! * str: source string, of length slen bytes. ! * ! * Returns the sorted array of unique trigrams. ! */ ! TRGM * ! generate_trgm(char *str, int slen) ! { ! TRGM *trg; ! int len; ! ! protect_out_of_mem(slen); ! ! trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); ! trg->flag = ARRKEY; ! ! len = generate_trgm_only(GETARR(trg), str, slen); ! SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); ! ! if (len == 0) return trg; /* *************** *** 288,293 **** generate_trgm(char *str, int slen) --- 343,625 ---- } /* + * Make array of positional trigrams from two trigram arrays trg1 and trg2. + * + * trg1: trigram array of search pattern, of length len1. trg1 is required + * word which positions don't matter and replaced with -1. + * trg2: trigram array of text, of length len2. trg2 is haystack where we + * search and have to store its positions. + * + * Returns concatenated trigram array. + */ + static pos_trgm * + make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2) + { + pos_trgm *result; + int i, len = len1 + len2; + + result = (pos_trgm *) palloc(sizeof(pos_trgm) * len); + + for (i = 0; i < len1; i++) + { + memcpy(&result[i].trg, &trg1[i], sizeof(trgm)); + result[i].index = -1; + } + + for (i = 0; i < len2; i++) + { + memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm)); + result[i + len1].index = i; + } + + return result; + } + + /* + * Compare position trigrams: compare trigrams first and position second. + */ + static int + comp_ptrgm(const void *v1, const void *v2) + { + const pos_trgm *p1 = (const pos_trgm *)v1; + const pos_trgm *p2 = (const pos_trgm *)v2; + int cmp; + + cmp = CMPTRGM(p1->trg, p2->trg); + if (cmp != 0) + return cmp; + + if (p1->index < p2->index) + return -1; + else if (p1->index == p2->index) + return 0; + else + return 1; + } + + /* + * Iterative search function which calculates maximum similarity with word in + * the string. But maximum similarity is calculated only if check_only == false. + * + * trg2indexes: array which stores indexes of the array "found". + * found: array which stores true of false values. + * ulen1: count of unique trigrams of array "trg1". + * len2: length of array "trg2" and array "trg2indexes". + * len: length of the array "found". + * check_only: if true then only check existaince of similar search pattern in text + * + * Returns subword similarity. + */ + static float4 + iterate_subword_similarity(int *trg2indexes, + bool *found, + int ulen1, + int len2, + int len, + bool check_only) + { + int *lastpos, + i, + ulen2 = 0, + count = 0, + upper = -1, + lower = -1; + float4 smlr_cur, + smlr_max = 0.0f; + + /* Memorise last position of each trigram */ + lastpos = (int *) palloc(sizeof(int) * len); + memset(lastpos, -1, sizeof(int) * len); + + for (i = 0; i < len2; i++) + { + /* Get index of next trigram */ + int trgindex = trg2indexes[i]; + + /* Update last position of this trigram */ + if (lower >= 0 || found[trgindex]) + { + if (lastpos[trgindex] < 0) + { + ulen2++; + if (found[trgindex]) + count++; + } + lastpos[trgindex] = i; + } + + /* Adjust lower bound if this trigram is present in required substing */ + if (found[trgindex]) + { + int prev_lower, + tmp_ulen2, + tmp_lower, + tmp_count; + + upper = i; + if (lower == -1) + { + lower = i; + ulen2 = 1; + } + + smlr_cur = CALCSML(count, ulen1, ulen2); + + /* Also try to adjust upper bound for greater similarity */ + tmp_count = count; + tmp_ulen2 = ulen2; + prev_lower = lower; + for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++) + { + float smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2); + int tmp_trgindex; + + if (smlr_tmp > smlr_cur) + { + smlr_cur = smlr_tmp; + ulen2 = tmp_ulen2; + lower = tmp_lower; + count = tmp_count; + } + /* + * if we only check that subword similarity is greater than + * pg_trgm.subword_limit we do not need to calculate a + * maximum similarity + */ + if (check_only && smlr_cur >= trgm_subword_limit) + break; + + tmp_trgindex = trg2indexes[tmp_lower]; + if (lastpos[tmp_trgindex] == tmp_lower) + { + tmp_ulen2--; + if (found[tmp_trgindex]) + tmp_count--; + } + } + + smlr_max = Max(smlr_max, smlr_cur); + /* + * if we only check that subword similarity is greater than + * pg_trgm.subword_limit we do not need to calculate a + * maximum similarity + */ + if (check_only && smlr_max >= trgm_subword_limit) + break; + + for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++) + { + int tmp_trgindex; + tmp_trgindex = trg2indexes[tmp_lower]; + if (lastpos[tmp_trgindex] == tmp_lower) + lastpos[tmp_trgindex] = -1; + } + } + } + + pfree(lastpos); + + return smlr_max; + } + + /* + * Calculate subword similarity. + * This function prepare two arrays: "trg2indexes" and "found". Then this arrays + * are used to calculate subword similarity using iterate_subword_similarity(). + * + * "trg2indexes" is array which stores indexes of the array "found". + * In other words: + * trg2indexes[j] = i; + * found[i] = true (or false); + * If found[i] == true then there is trigram trg2[j] in array "trg1". + * If found[i] == false then there is not trigram trg2[j] in array "trg1". + * + * str1: search pattern string, of length slen1 bytes. + * str2: text in which we are looking for a word, of length slen2 bytes. + * check_only: if true then only check existaince of similar search pattern in text + * + * Returns subword similarity. + */ + static float4 + calc_subword_similarity(char *str1, int slen1, char *str2, int slen2, + bool check_only) + { + bool *found; + pos_trgm *ptrg; + trgm *trg1; + trgm *trg2; + int len1, + len2, + len, + i, + j, + ulen1; + int *trg2indexes; + float4 result; + + protect_out_of_mem(slen1 + slen2); + + /* Make positional trigrams */ + trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3); + trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3); + + len1 = generate_trgm_only(trg1, str1, slen1); + len2 = generate_trgm_only(trg2, str2, slen2); + + ptrg = make_positional_trgm(trg1, len1, trg2, len2); + len = len1 + len2; + qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm); + + pfree(trg1); + pfree(trg2); + + /* + * Merge positional trigrams array: enumerate each trigram and find its + * presence in required word. + */ + trg2indexes = (int *) palloc(sizeof(int) * len2); + found = (bool *) palloc0(sizeof(bool) * len); + + ulen1 = 0; + j = 0; + for (i = 0; i < len; i++) + { + if (i > 0) + { + int cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg); + if (cmp != 0) + { + if (found[j]) + ulen1++; + j++; + } + } + + if (ptrg[i].index >= 0) + { + trg2indexes[ptrg[i].index] = j; + } + else + { + found[j] = true; + } + } + if (found[j]) + ulen1++; + + /* Run iterative procedure to find maximum similarity with subword */ + result = iterate_subword_similarity(trg2indexes, found, ulen1, len2, len, + check_only); + + pfree(trg2indexes); + pfree(found); + pfree(ptrg); + + return result; + } + + + /* * Extract the next non-wildcard part of a search string, ie, a word bounded * by '_' or '%' meta-characters, non-word characters or string end. * *************** *** 459,475 **** generate_wildcard_trgm(const char *str, int slen) bytelen; const char *eword; ! /* ! * Guard against possible overflow in the palloc requests below. (We ! * don't worry about the additive constants, since palloc can detect ! * requests that are a little above MaxAllocSize --- we just need to ! * prevent integer overflow in the multiplications.) ! */ ! if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || ! (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) ! ereport(ERROR, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("out of memory"))); trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; --- 791,797 ---- bytelen; const char *eword; ! protect_out_of_mem(slen); trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; *************** *** 590,596 **** show_trgm(PG_FUNCTION_ARGS) } float4 ! cnt_sml(TRGM *trg1, TRGM *trg2) { trgm *ptr1, *ptr2; --- 912,918 ---- } float4 ! cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact) { trgm *ptr1, *ptr2; *************** *** 624,637 **** cnt_sml(TRGM *trg1, TRGM *trg2) } } ! #ifdef DIVUNION ! return ((float4) count) / ((float4) (len1 + len2 - count)); ! #else ! return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2)); ! #endif ! } /* * Returns whether trg2 contains all trigrams in trg1. * This relies on the trigram arrays being sorted. --- 946,960 ---- } } ! /* ! * If inexact then len2 is equal to count, because we don't know actual ! * length of second string in inexact search and we can assume that count ! * is a lower bound of len2. ! */ ! return CALCSML(count, len1, inexact ? count : len2); } + /* * Returns whether trg2 contains all trigrams in trg1. * This relies on the trigram arrays being sorted. *************** *** 726,732 **** similarity(PG_FUNCTION_ARGS) trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ); trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ); ! res = cnt_sml(trg1, trg2); pfree(trg1); pfree(trg2); --- 1049,1055 ---- trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ); trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ); ! res = cnt_sml(trg1, trg2, false); pfree(trg1); pfree(trg2); *************** *** 737,742 **** similarity(PG_FUNCTION_ARGS) --- 1060,1081 ---- } Datum + subword_similarity(PG_FUNCTION_ARGS) + { + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); + float4 res; + + res = calc_subword_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), + VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), + false); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_FLOAT4(res); + } + + Datum similarity_dist(PG_FUNCTION_ARGS) { float4 res = DatumGetFloat4(DirectFunctionCall2(similarity, *************** *** 755,757 **** similarity_op(PG_FUNCTION_ARGS) --- 1094,1160 ---- PG_RETURN_BOOL(res >= trgm_sml_limit); } + + Datum + subword_similarity_op(PG_FUNCTION_ARGS) + { + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); + float4 res; + + res = calc_subword_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), + VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), + true); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_BOOL(res >= trgm_subword_limit); + } + + Datum + subword_similarity_commutator_op(PG_FUNCTION_ARGS) + { + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); + float4 res; + + res = calc_subword_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), + VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), + true); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_BOOL(res >= trgm_subword_limit); + } + + Datum + subword_similarity_dist_op(PG_FUNCTION_ARGS) + { + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); + float4 res; + + res = calc_subword_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), + VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), + false); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_FLOAT4(1.0 - res); + } + + Datum + subword_similarity_dist_commutator_op(PG_FUNCTION_ARGS) + { + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); + float4 res; + + res = calc_subword_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), + VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), + false); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_FLOAT4(1.0 - res); + } *** a/doc/src/sgml/pgtrgm.sgml --- b/doc/src/sgml/pgtrgm.sgml *************** *** 85,90 **** --- 85,101 ---- </entry> </row> <row> + <entry><function>subword_similarity(text, text)</function><indexterm><primary>subword_similarity</primary></indexterm></entry> + <entry><type>real</type></entry> + <entry> + Returns a number that indicates how similar the first string + to the most similar word of the second string. The range of + the result is zero (indicating that the two strings are completely + dissimilar) to one (indicating that the first string is identical + to one of the word of the second string). + </entry> + </row> + <row> <entry><function>show_trgm(text)</function><indexterm><primary>show_trgm</primary></indexterm></entry> <entry><type>text[]</type></entry> <entry> *************** *** 138,143 **** --- 149,163 ---- </entry> </row> <row> + <entry><type>text</> <literal><%</literal> <type>text</></entry> + <entry><type>boolean</type></entry> + <entry> + Returns <literal>true</> if its arguments have a subword similarity + that is greater than the current subword similarity threshold set by + <varname>pg_trgm.subword_limit</> parameter. + </entry> + </row> + <row> <entry><type>text</> <literal><-></literal> <type>text</></entry> <entry><type>real</type></entry> <entry> *************** *** 145,150 **** --- 165,178 ---- one minus the <function>similarity()</> value. </entry> </row> + <row> + <entry><type>text</> <literal><<-></literal> <type>text</></entry> + <entry><type>real</type></entry> + <entry> + Returns the <quote>distance</> between the arguments, that is + one minus the <function>subword_similarity()</> value. + </entry> + </row> </tbody> </tgroup> </table> *************** *** 168,173 **** --- 196,217 ---- </para> </listitem> </varlistentry> + + <varlistentry id="guc-pgtrgm-subword-limit" xreflabel="pg_trgm.subword_limit"> + <term> + <varname>pg_trgm.subword_limit</> (<type>real</type>) + <indexterm> + <primary><varname>pg_trgm.subword_limit</> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Sets the current subword similarity threshold that is used by + the <literal><%</> operator. The threshold must be between + 0 and 1 (default is 0.6). + </para> + </listitem> + </varlistentry> </variablelist> </sect2> *************** *** 226,231 **** SELECT t, t <-> '<replaceable>word</>' AS dist --- 270,301 ---- </para> <para> + Also you can use an index on the <structfield>t</> column for subword + similarity. For example: + <programlisting> + SELECT t, subword_similarity('<replaceable>word</>', t) AS sml + FROM test_trgm + WHERE '<replaceable>word</>' <% t + ORDER BY sml DESC, t; + </programlisting> + This will return all values in the text column that have a word + which sufficiently similar to <replaceable>word</>, sorted from best + match to worst. The index will be used to make this a fast operation + even over very large data sets. + </para> + + <para> + A variant of the above query is + <programlisting> + SELECT t, '<replaceable>word</>' <<-> t AS dist + FROM test_trgm + ORDER BY dist LIMIT 10; + </programlisting> + This can be implemented quite efficiently by GiST indexes, but not + by GIN indexes. + </para> + + <para> Beginning in <productname>PostgreSQL</> 9.1, these index types also support index searches for <literal>LIKE</> and <literal>ILIKE</>, for example <programlisting>
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers