In the same vein, here's a patch to remove the hard-coded line length limit for tsearch dictionary files.
regards, tom lane
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index cb0835982d..64c979086d 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line"))); - /* - * Note: currently, tsearch_readline can't return lines exceeding 4KB, - * so overflow of the word counts is impossible. But that may not - * always be true, so let's check. - */ if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index a916dd6cb6..247180d56e 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -14,6 +14,8 @@ #include "postgres.h" #include "catalog/pg_collation.h" +#include "common/string.h" +#include "lib/stringinfo.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_public.h" @@ -204,29 +206,41 @@ tsearch_readline_callback(void *arg) char * t_readline(FILE *fp) { + StringInfoData buf; int len; char *recoded; - char buf[4096]; /* lines must not be longer than this */ - if (fgets(buf, sizeof(buf), fp) == NULL) + initStringInfo(&buf); + + if (!pg_get_line_buf(fp, &buf)) + { + pfree(buf.data); return NULL; + } - len = strlen(buf); + len = buf.len; /* Make sure the input is valid UTF-8 */ - (void) pg_verify_mbstr(PG_UTF8, buf, len, false); + (void) pg_verify_mbstr(PG_UTF8, buf.data, len, false); /* And convert */ - recoded = pg_any_to_server(buf, len, PG_UTF8); - if (recoded == buf) + recoded = pg_any_to_server(buf.data, len, PG_UTF8); + if (recoded == buf.data) { /* * conversion didn't pstrdup, so we must. We can use the length of the * original string, because no conversion was done. + * + * Note: it might seem attractive to just return buf.data, and in most + * usages that'd be fine. But a few callers save the returned string + * as long-term data, so returning a palloc chunk that's bigger than + * necessary is a bad idea. */ recoded = pnstrdup(recoded, len); } + pfree(buf.data); + return recoded; }