From 6e75d02598594e55930d4441724e136f4273d0bd Mon Sep 17 00:00:00 2001
From: J Smith <dark.panda@gmail.com>
Date: Sun, 6 Nov 2011 16:48:20 -0500
Subject: [PATCH] Fix weirdness when dealing with UTF-8 in buggy libc
 implementations.

OSX libc has a bug from an older FreeBSD libc that causes it to see
certain characters as spaces incorrectly when using UTF-8.
---
 contrib/unaccent/unaccent.c |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index d9c2eac..f5ab8b2 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -93,6 +93,9 @@ initSuffixTree(char *filename)
 	{
 		char		src[4096];
 		char		trg[4096];
+		wchar_t	wsrc[4096];
+		wchar_t	wtrg[4096];
+		wchar_t	wline[4096];
 		int			srclen;
 		int			trglen;
 		char	   *line = NULL;
@@ -108,9 +111,14 @@ initSuffixTree(char *filename)
 			 */
 			while ((line = tsearch_readline(&trst)) != NULL)
 			{
-				if (sscanf(line, "%s\t%s\n", src, trg) != 2)
+				char2wchar(wline, 4096, line, strlen(line) + 1, (pg_locale_t) 0);
+
+				if (swscanf(wline, L"%ls\t%ls\n", wsrc, wtrg) != 2)
 					continue;
 
+				wchar2char(src, wsrc, 4096, (pg_locale_t) 0);
+				wchar2char(trg, wtrg, 4096, (pg_locale_t) 0);
+
 				srclen = strlen(src);
 				trglen = strlen(trg);
 
-- 
1.7.7.2

