Stephen Frost <sfr...@snowman.net> writes: > * Tom Lane (t...@sss.pgh.pa.us) wrote: >> Stephen Frost <sfr...@snowman.net> writes: >>> * Tom Lane (t...@sss.pgh.pa.us) wrote: >>>> We could hard-code a rule like that, or we could introduce a new >>>> explicit parameter for the maximum cover length. The latter would be >>>> more flexible, but we need something back-patchable and I'm concerned >>>> about the compatibility hazards of adding a new parameter in minor >>>> releases. So on the whole I propose hard-wiring a multiplier of, >>>> say, 10 for both these cases.
>>> That sounds alright to me, though I do think we should probably still >>> toss a CFI (or two) in this path somewhere as we don't know how long >>> some of these functions might take... >> Yeah, of course. I'm still leaning to doing that in TS_execute_recurse. > Works for me. Here's a proposed patch along that line. regards, tom lane
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 76b6f9aef0..1df1c0362d 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2028,8 +2028,10 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos) /* * hlCover: try to find a substring of prs' word list that satisfies query * - * At entry, *p must be the first word index to consider (initialize this to - * zero, or to the next index after a previous successful search). + * At entry, *p must be the first word index to consider (initialize this + * to zero, or to the next index after a previous successful search). + * We will consider all substrings starting at or after that word, and + * containing no more than max_cover words. * * On success, sets *p to first word index and *q to last word index of the * cover substring, and returns true. @@ -2038,7 +2040,8 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos) * words used in the query. */ static bool -hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) +hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover, + int *p, int *q) { int pmin, pmax, @@ -2084,7 +2087,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) nextpmin = nextpmax; pmax = nextpmax; } - while (pmax >= 0); + while (pmax >= 0 && pmax - pmin < max_cover); /* No luck here, so try next feasible startpoint */ pmin = nextpmin; } @@ -2186,7 +2189,7 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, - int max_words, int max_fragments) + int max_words, int max_fragments, int max_cover) { int32 poslen, curlen, @@ -2213,7 +2216,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, covers = palloc(maxcovers * sizeof(CoverPos)); /* get all covers */ - while (hlCover(prs, query, &p, &q)) + while (hlCover(prs, query, max_cover, &p, &q)) { startpos = p; endpos = q; @@ -2368,7 +2371,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, */ static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, - int shortword, int min_words, int max_words) + int shortword, int min_words, int max_words, int max_cover) { int p = 0, q = 0; @@ -2386,7 +2389,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, if (!highlightall) { /* examine all covers, select a headline using the best one */ - while (hlCover(prs, query, &p, &q)) + while (hlCover(prs, query, max_cover, &p, &q)) { /* * Count words (curlen) and interesting words (poslen) within @@ -2542,6 +2545,7 @@ prsd_headline(PG_FUNCTION_ARGS) int shortword = 3; int max_fragments = 0; bool highlightall = false; + int max_cover; ListCell *l; /* Extract configuration option values */ @@ -2581,6 +2585,15 @@ prsd_headline(PG_FUNCTION_ARGS) defel->defname))); } + /* + * We might eventually make max_cover a user-settable parameter, but for + * now, just compute a reasonable value based on max_words and + * max_fragments. + */ + max_cover = Max(max_words * 10, 100); + if (max_fragments > 0) + max_cover *= max_fragments; + /* in HighlightAll mode these parameters are ignored */ if (!highlightall) { @@ -2605,10 +2618,10 @@ prsd_headline(PG_FUNCTION_ARGS) /* Apply appropriate headline selector */ if (max_fragments == 0) mark_hl_words(prs, query, highlightall, shortword, - min_words, max_words); + min_words, max_words, max_cover); else mark_hl_fragments(prs, query, highlightall, shortword, - min_words, max_words, max_fragments); + min_words, max_words, max_fragments, max_cover); /* Fill in default values for string options */ if (!prs->startsel) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index f01b1ee253..756a48a167 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1868,6 +1868,9 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + if (curitem->type == QI_VAL) return chkcond(arg, (QueryOperand *) curitem, NULL /* don't need position info */ );