In the same vein, here's a patch to remove the hard-coded line length
limit for tsearch dictionary files.

                        regards, tom lane

diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index cb0835982d..64c979086d 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
 					 errmsg("unexpected end of line")));
 
-		/*
-		 * Note: currently, tsearch_readline can't return lines exceeding 4KB,
-		 * so overflow of the word counts is impossible.  But that may not
-		 * always be true, so let's check.
-		 */
 		if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index a916dd6cb6..247180d56e 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -14,6 +14,8 @@
 #include "postgres.h"
 
 #include "catalog/pg_collation.h"
+#include "common/string.h"
+#include "lib/stringinfo.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
@@ -204,29 +206,41 @@ tsearch_readline_callback(void *arg)
 char *
 t_readline(FILE *fp)
 {
+	StringInfoData buf;
 	int			len;
 	char	   *recoded;
-	char		buf[4096];		/* lines must not be longer than this */
 
-	if (fgets(buf, sizeof(buf), fp) == NULL)
+	initStringInfo(&buf);
+
+	if (!pg_get_line_buf(fp, &buf))
+	{
+		pfree(buf.data);
 		return NULL;
+	}
 
-	len = strlen(buf);
+	len = buf.len;
 
 	/* Make sure the input is valid UTF-8 */
-	(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
+	(void) pg_verify_mbstr(PG_UTF8, buf.data, len, false);
 
 	/* And convert */
-	recoded = pg_any_to_server(buf, len, PG_UTF8);
-	if (recoded == buf)
+	recoded = pg_any_to_server(buf.data, len, PG_UTF8);
+	if (recoded == buf.data)
 	{
 		/*
 		 * conversion didn't pstrdup, so we must. We can use the length of the
 		 * original string, because no conversion was done.
+		 *
+		 * Note: it might seem attractive to just return buf.data, and in most
+		 * usages that'd be fine.  But a few callers save the returned string
+		 * as long-term data, so returning a palloc chunk that's bigger than
+		 * necessary is a bad idea.
 		 */
 		recoded = pnstrdup(recoded, len);
 	}
 
+	pfree(buf.data);
+
 	return recoded;
 }
 

Reply via email to