Re: [HACKERS] Radix tree for character conversion

Kyotaro HORIGUCHI Thu, 02 Feb 2017 20:20:08 -0800

Tnanks to that Heikki have pushed the first two patches and a
part of the third, only one patch is remaining now.


# Sorry for not separating KOI8 stuffs.

At Tue, 31 Jan 2017 19:06:09 +0900 (Tokyo Standard Time), Kyotaro HORIGUCHI 
<horiguchi.kyot...@lab.ntt.co.jp> wrote in 
<20170131.190609.254672218.horiguchi.kyot...@lab.ntt.co.jp>
> > Thanks for the new version, I'll look at it once I am done with the
> > cleanup of the current CF. For now I have moved it to the CF 2017-03.
> 
> Agreed. Thank you.

Attached is the latest version on the current master (555494d).

Note: since this patch is created by git diff --irreversble-delete,
three files mb/Unicode/*.(txt|xml) to be deleted are left alone.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

>From 68d75100b7e8aaab7706ea780a1e23557c676c87 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp>
Date: Tue, 10 Jan 2017 20:02:00 +0900
Subject: [PATCH] Use radix tree for character conversion

This patch adds multibyte character converter based using radix tree
based on Heikki's rework of my previous patch.
---
 src/backend/utils/mb/Makefile                      |     2 +
 src/backend/utils/mb/Unicode/.gitignore            |    11 +
 src/backend/utils/mb/Unicode/Makefile              |    72 +-
 src/backend/utils/mb/Unicode/UCS_to_BIG5.pl        |     9 +-
 src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl      |     9 +-
 .../utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl        |    19 +-
 src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl      |     6 +-
 src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl      |    13 +-
 src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl      |     9 +-
 src/backend/utils/mb/Unicode/UCS_to_GB18030.pl     |     9 +-
 src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl       |    11 +-
 .../utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl      |    14 +-
 src/backend/utils/mb/Unicode/UCS_to_SJIS.pl        |    29 +-
 src/backend/utils/mb/Unicode/UCS_to_UHC.pl         |    11 +-
 src/backend/utils/mb/Unicode/UCS_to_most.pl        |     5 +-
 src/backend/utils/mb/Unicode/convutils.pm          |   679 +-
 src/backend/utils/mb/Unicode/euc-jis-2004-std.txt  | 11549 -------
 src/backend/utils/mb/Unicode/gb-18030-2000.xml     | 30916 -------------------
 src/backend/utils/mb/Unicode/make_mapchecker.pl    |    78 +
 src/backend/utils/mb/Unicode/map_checker.c         |    94 +
 .../utils/mb/Unicode/sjis-0213-2004-std.txt        | 11549 -------
 src/backend/utils/mb/char_converter.c              |   116 +
 src/backend/utils/mb/conv.c                        |   137 +-
 .../conversion_procs/utf8_and_big5/utf8_and_big5.c |     8 +-
 .../utf8_and_cyrillic/utf8_and_cyrillic.c          |    16 +-
 .../utf8_and_euc2004/utf8_and_euc2004.c            |     8 +-
 .../utf8_and_euc_cn/utf8_and_euc_cn.c              |     8 +-
 .../utf8_and_euc_jp/utf8_and_euc_jp.c              |     8 +-
 .../utf8_and_euc_kr/utf8_and_euc_kr.c              |     8 +-
 .../utf8_and_euc_tw/utf8_and_euc_tw.c              |     8 +-
 .../utf8_and_gb18030/utf8_and_gb18030.c            |     8 +-
 .../conversion_procs/utf8_and_gbk/utf8_and_gbk.c   |     8 +-
 .../utf8_and_iso8859/utf8_and_iso8859.c            |   127 +-
 .../utf8_and_johab/utf8_and_johab.c                |     8 +-
 .../conversion_procs/utf8_and_sjis/utf8_and_sjis.c |     8 +-
 .../utf8_and_sjis2004/utf8_and_sjis2004.c          |     8 +-
 .../conversion_procs/utf8_and_uhc/utf8_and_uhc.c   |     8 +-
 .../conversion_procs/utf8_and_win/utf8_and_win.c   |    98 +-
 src/include/mb/pg_wchar.h                          |    56 +-
 39 files changed, 1355 insertions(+), 54385 deletions(-)
 create mode 100644 src/backend/utils/mb/Unicode/.gitignore
 delete mode 100644 src/backend/utils/mb/Unicode/euc-jis-2004-std.txt
 delete mode 100644 src/backend/utils/mb/Unicode/gb-18030-2000.xml
 create mode 100755 src/backend/utils/mb/Unicode/make_mapchecker.pl
 create mode 100644 src/backend/utils/mb/Unicode/map_checker.c
 delete mode 100644 src/backend/utils/mb/Unicode/sjis-0213-2004-std.txt
 create mode 100644 src/backend/utils/mb/char_converter.c

diff --git a/src/backend/utils/mb/Makefile b/src/backend/utils/mb/Makefile
index 89bec21..d48e729 100644
--- a/src/backend/utils/mb/Makefile
+++ b/src/backend/utils/mb/Makefile
@@ -14,6 +14,8 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = encnames.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o
 
+conv.o: conv.c char_converter.c
+
 include $(top_srcdir)/src/backend/common.mk
 
 clean distclean maintainer-clean:
diff --git a/src/backend/utils/mb/Unicode/.gitignore b/src/backend/utils/mb/Unicode/.gitignore
new file mode 100644
index 0000000..3908cc3
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/.gitignore
@@ -0,0 +1,11 @@
+# ignore backup files of editors
+/*[~#]
+
+# ignore authority files
+/*.TXT
+/*.txt
+/*.xml
+
+# ignore generated files
+/map_checker
+/map_checker.h
diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile
index 10708b3..6706157 100644
--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -52,12 +52,17 @@ SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \
 	big5_to_utf8.map utf8_to_big5.map \
 	johab_to_utf8.map utf8_to_johab.map \
 	uhc_to_utf8.map utf8_to_uhc.map \
-	euc_jis_2004_to_utf8.map euc_jis_2004_to_utf8_combined.map \
-	utf8_to_euc_jis_2004.map utf8_to_euc_jis_2004_combined.map \
-	shift_jis_2004_to_utf8.map shift_jis_2004_to_utf8_combined.map \
-	utf8_to_shift_jis_2004.map utf8_to_shift_jis_2004_combined.map
+	euc_jis_2004_to_utf8.map utf8_to_euc_jis_2004.map \
+	shift_jis_2004_to_utf8.map utf8_to_shift_jis_2004.map
 
-MAPS = $(GENERICMAPS) $(SPECIALMAPS)
+COMBINEDMAPS = euc_jis_2004_to_utf8_combined.map \
+	utf8_to_euc_jis_2004_combined.map \
+	shift_jis_2004_to_utf8_combined.map \
+	utf8_to_shift_jis_2004_combined.map
+
+RADIXGENERICMAPS = $(subst .map,_radix.map,$(GENERICMAPS))
+RADIXMAPS = $(subst .map,_radix.map,$(GENERICMAPS) $(SPECIALMAPS))
+MAPS = $(GENERICMAPS) $(SPECIALMAPS) $(COMBINEDMAPS)
 
 ISO8859TEXTS = 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT \
 	8859-6.TXT 8859-7.TXT 8859-8.TXT 8859-9.TXT \
@@ -69,53 +74,76 @@ WINTEXTS = CP866.TXT CP874.TXT CP936.TXT \
 	CP1252.TXT CP1253.TXT CP1254.TXT CP1255.TXT \
 	CP1256.TXT CP1257.TXT CP1258.TXT
 
+SPECIALTEXTS = BIG5.TXT CNS11643.TXT \
+	CP932.TXT CP950.TXT \
+	JIS0201.TXT JIS0208.TXT JIS0212.TXT SHIFTJIS.TXT \
+	JOHAB.TXT KSX1001.TXT windows-949-2000.xml \
+	euc-jis-2004-std.txt sjis-0213-2004-std.txt \
+	gb-18030-2000.xml
+
 GENERICTEXTS = $(ISO8859TEXTS) $(WINTEXTS) \
 	KOI8-R.TXT KOI8-U.TXT
 
-all: $(MAPS)
+TEXTS = $(GENERICTEXTS) $(WINTEXTS) $(ISO8859TEXTS) $(SPECIALTEXTS)
+
+OBJS = map_checker.o
+
+BINS = map_checker
+
+all: $(MAPS) $(RADIXMAPS) $(BINS)
+
+map_checker.h: make_mapchecker.pl $(MAPS) $(RADIXMAPS)
+	$(PERL) $<
+
+map_checker.o: map_checker.c map_checker.h ../char_converter.c
+
+map_checker: map_checker.o
 
-$(GENERICMAPS): UCS_to_most.pl $(GENERICTEXTS)
+$(GENERICMAPS) $(RADIXGENERICMAPS): UCS_to_most.pl $(GENERICTEXTS)
 	$(PERL) $<
 
-johab_to_utf8.map utf8_to_johab.map: UCS_to_JOHAB.pl JOHAB.TXT
+johab_to_utf8.map utf8_to_johab.map johab_to_utf8_radix.map utf8_to_johab_radix.map: UCS_to_JOHAB.pl JOHAB.TXT
 	$(PERL) $<
 
-uhc_to_utf8.map utf8_to_uhc.map: UCS_to_UHC.pl windows-949-2000.xml
+uhc_to_utf8.map utf8_to_uhc.map uhc_to_utf8_radix.map utf8_to_uhc_radix.map: UCS_to_UHC.pl windows-949-2000.xml
 	$(PERL) $<
 
-euc_jp_to_utf8.map utf8_to_euc_jp.map: UCS_to_EUC_JP.pl CP932.TXT JIS0212.TXT
+euc_jp_to_utf8.map utf8_to_euc_jp.map euc_jp_to_utf8_radix.map utf8_to_euc_jp_radix.map: UCS_to_EUC_JP.pl CP932.TXT JIS0212.TXT
 	$(PERL) $<
 
-euc_cn_to_utf8.map utf8_to_euc_cn.map: UCS_to_EUC_CN.pl gb-18030-2000.xml
+euc_cn_to_utf8.map utf8_to_euc_cn.map euc_cn_to_utf8_radix.map utf8_to_euc_cn_radix.map: UCS_to_EUC_CN.pl gb-18030-2000.xml
 	$(PERL) $<
 
-euc_kr_to_utf8.map utf8_to_euc_kr.map: UCS_to_EUC_KR.pl KSX1001.TXT
+euc_kr_to_utf8.map utf8_to_euc_kr.map euc_kr_to_utf8_radix.map utf8_to_euc_kr_radix.map: UCS_to_EUC_KR.pl KSX1001.TXT
 	$(PERL) $<
 
-euc_tw_to_utf8.map utf8_to_euc_tw.map: UCS_to_EUC_TW.pl CNS11643.TXT
+euc_tw_to_utf8.map utf8_to_euc_tw.map euc_tw_to_utf8_radix.map utf8_to_euc_tw_radix.map: UCS_to_EUC_TW.pl CNS11643.TXT
 	$(PERL) $<
 
-sjis_to_utf8.map utf8_to_sjis.map: UCS_to_SJIS.pl CP932.TXT
+sjis_to_utf8.map utf8_to_sjis.map sjis_to_utf8_radix.map utf8_to_sjis_radix.map: UCS_to_SJIS.pl CP932.TXT
 	$(PERL) $<
 
-gb18030_to_utf8.map utf8_to_gb18030.map: UCS_to_GB18030.pl gb-18030-2000.xml
+gb18030_to_utf8.map utf8_to_gb18030.map gb18030_to_utf8_radix.map utf8_to_gb18030_radix.map: UCS_to_GB18030.pl gb-18030-2000.xml
 	$(PERL) $<
 
-big5_to_utf8.map utf8_to_big5.map: UCS_to_BIG5.pl BIG5.TXT CP950.TXT
+big5_to_utf8.map utf8_to_big5.map big5_to_utf8_radix.map utf8_to_big5_radix.map: UCS_to_BIG5.pl BIG5.TXT CP950.TXT
 	$(PERL) $<
 
-euc_jis_2004_to_utf8.map euc_jis_2004_to_utf8_combined.map utf8_to_euc_jis_2004.map utf8_to_euc_jis_2004_combined.map: UCS_to_EUC_JIS_2004.pl euc-jis-2004-std.txt
+euc_jis_2004_to_utf8.map euc_jis_2004_to_utf8_radix.map euc_jis_2004_to_utf8_combined.map utf8_to_euc_jis_2004.map utf8_to_euc_jis_2004_radix.map utf8_to_euc_jis_2004_combined.map: UCS_to_EUC_JIS_2004.pl euc-jis-2004-std.txt
 	$(PERL) $<
 
-shift_jis_2004_to_utf8.map shift_jis_2004_to_utf8_combined.map utf8_to_shift_jis_2004.map utf8_to_shift_jis_2004_combined.map: UCS_to_SHIFT_JIS_2004.pl sjis-0213-2004-std.txt
+shift_jis_2004_to_utf8.map shift_jis_2004_to_utf8_radix.map shift_jis_2004_to_utf8_combined.map utf8_to_shift_jis_2004.map utf8_to_shift_jis_2004_radix.map utf8_to_shift_jis_2004_combined.map: UCS_to_SHIFT_JIS_2004.pl sjis-0213-2004-std.txt
 	$(PERL) $<
 
-distclean: clean
-	rm -f $(TEXTS)
+distclean:
+	rm -f $(TEXTS) $(GENERICMAPS) $(SPECIALMAPS) $(OBJS) $(BINS) map_checker.h
 
-maintainer-clean: distclean
-	rm -f $(MAPS)
+# maintainer-clean intentionally leaves $(TEXTS)
+maintainer-clean:
+	rm -f $(MAPS) $(RADIXMAPS) $(GENERICMAPS) $(SPECIALMAPS) $(OBJS) $(BINS) map_checker.h
 
+mapcheck: $(MAPS) $(RADIXMAPS) map_checker
+	./map_checker
 
 DOWNLOAD = wget -O $@ --no-use-server-timestamps
 #DOWNLOAD = curl -o $@
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
index 20f6c70..65c6955 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -27,6 +27,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Load BIG5.TXT
 my $all = &read_source("BIG5.TXT");
 
@@ -47,7 +49,9 @@ foreach my $i (@$cp950txt) {
 		push @$all, {code => $code,
 					 ucs => $ucs,
 					 comment => $i->{comment},
-					 direction => "both"};
+					 direction => "both",
+					 f => $i->{f},
+					 l => $i->{l} };
 	}
 }
 
@@ -65,4 +69,5 @@ foreach my $i (@$all) {
 }
 
 # Output
-print_tables("BIG5", $all);
+print_tables($this_script, "BIG5", $all, 1);
+print_radix_trees($this_script, "BIG5", $all);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
index 4f4375e..600d3ce 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -16,6 +16,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Read the input
 
 my $in_file = "gb-18030-2000.xml";
@@ -68,9 +70,12 @@ while (<$in>)
 	push @mapping, {
 		ucs => $ucs,
 		code => $code,
-		direction => 'both'
+		direction => 'both',
+		f		  => $in_file,
+		l		  => $.
 	};
 }
 close($in);
 
-print_tables("EUC_CN", \@mapping);
+print_tables($this_script, "EUC_CN", \@mapping, 1);
+print_radix_trees($this_script, "EUC_CN", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
index cbe2a1e..6066139 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -10,6 +10,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # first generate UTF-8 --> EUC_JIS_2004 table
 
 my $in_file = "euc-jis-2004-std.txt";
@@ -33,8 +35,10 @@ while (my $line = <$in>)
 					 ucs => $ucs1,
 					 ucs_second => $ucs2,
 					 code => $code,
-					 comment => $rest };
-		next;
+					 comment => $rest,
+					 f => $in_file,
+					 l => $.
+		};
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
@@ -45,9 +49,16 @@ while (my $line = <$in>)
 
 		next if ($code < 0x80 && $ucs < 0x80);
 
-		push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
+		push @all, { direction => 'both',
+					 ucs => $ucs,
+					 code => $code,
+					 comment => $rest,
+					 f => $in_file,
+					 l => $.
+		};
 	}
 }
 close($in);
 
-print_tables("EUC_JIS_2004", \@all, 1);
+print_tables($this_script, "EUC_JIS_2004", \@all, 1);
+print_radix_trees($this_script, "EUC_JIS_2004", \@all);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
index 926d2d8..681b0d9 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -14,6 +14,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Load JIS0212.TXT
 my $jis0212 = &read_source("JIS0212.TXT");
 
@@ -191,7 +193,9 @@ push @mapping, (
 	 {direction => 'to_unicode', ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
 	);
 
-print_tables("EUC_JP", \@mapping);
+print_tables($this_script, "EUC_JP", \@mapping, 1);
+print_radix_trees($this_script, "EUC_JP", \@mapping);
+
 
 #######################################################################
 # sjis2jis ; SJIS => JIS conversion
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
index 228fc4d..a032a27 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -19,6 +19,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Load the source file.
 
 my $mapping = &read_source("KSX1001.TXT");
@@ -29,10 +31,11 @@ foreach my $i (@$mapping)
 }
 
 # Some extra characters that are not in KSX1001.TXT
-push @$mapping, (
-	{direction => 'both', ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN'},
-	{direction => 'both', ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN'},
-	{direction => 'both', ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U'}
+push @$mapping,(
+	{direction => 'both', ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN', f => $this_script, l => __LINE__},
+	{direction => 'both', ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN', f => $this_script, l => __LINE__ },
+	{direction => 'both', ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ }
 	);
 
-print_tables("EUC_KR", $mapping);
+print_tables($this_script, "EUC_KR", $mapping, 1);
+print_radix_trees($this_script, "EUC_KR", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
index 296ed2b..0b73218 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -20,6 +20,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 my $mapping = &read_source("CNS11643.TXT");
 
 my @extras;
@@ -54,11 +56,14 @@ foreach my $i (@$mapping)
 			ucs => $i->{ucs},
 			code => ($i->{code} + 0x8ea10000),
 			rest => $i->{rest},
-			direction => 'to_unicode'
+			direction => 'to_unicode',
+		    f		  => $i->{f},
+		    l		  => $i->{l}
 		};
 	}
 }
 
 push @$mapping, @extras;
 
-print_tables("EUC_TW", $mapping);
+print_tables($this_script, "EUC_TW", $mapping, 1);
+print_radix_trees($this_script, "EUC_TW", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
index f754611..3c57fd6 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -16,6 +16,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Read the input
 
 my $in_file = "gb-18030-2000.xml";
@@ -36,10 +38,13 @@ while (<$in>)
 		push @mapping, {
 			ucs => $ucs,
 			code => $code,
-			direction => 'both'
+			direction => 'both',
+		    f => $in_file,
+		    l => $.
 		};
 	}
 }
 close($in);
 
-print_tables("GB18030", \@mapping);
+print_tables($this_script, "GB18030", \@mapping, 1);
+print_radix_trees($this_script, "GB18030", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
index b84d589..b3447ff 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -18,15 +18,18 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Load the source file.
 
 my $mapping = &read_source("JOHAB.TXT");
 
 # Some extra characters that are not in JOHAB.TXT
 push @$mapping, (
-	{direction => 'both', ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN'},
-	{direction => 'both', ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN'},
-	{direction => 'both', ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U'}
+	{direction => 'both', ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN', f => $this_script, l =>  __LINE__ },
+	{direction => 'both', ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN', f => $this_script, l =>  __LINE__ },
+	{direction => 'both', ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U', f => $this_script, l =>  __LINE__ }
 	);
 
-print_tables("JOHAB", $mapping);
+print_tables($this_script, "JOHAB", $mapping, 1);
+print_radix_trees($this_script, "JOHAB", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
index 67b6ef6..a6d5483 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -12,6 +12,8 @@ require convutils;
 
 # first generate UTF-8 --> SHIFT_JIS_2004 table
 
+my $this_script = $0;
+
 my $in_file = "sjis-0213-2004-std.txt";
 
 open(my $in, '<', $in_file) || die("cannot open $in_file");
@@ -34,9 +36,10 @@ while (my $line = <$in>)
 			ucs => $ucs1,
 			ucs_second => $ucs2,
 			comment => $rest,
-			direction => 'both'
+			direction => 'both',
+			f => $in_file,
+			l => $.
 		};
-		next;
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
@@ -67,10 +70,13 @@ while (my $line = <$in>)
 			code => $code,
 			ucs => $ucs,
 			comment => $rest,
-			direction => $direction
+			direction => $direction,
+			f => $in_file,
+			l => $.
 		};
 	}
 }
 close($in);
 
-print_tables("SHIFT_JIS_2004", \@mapping, 1);
+print_tables($this_script, "SHIFT_JIS_2004", \@mapping, 1);
+print_radix_trees($this_script, "SHIFT_JIS_2004", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
index 74e206f..0dd9798 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@@ -13,7 +13,9 @@
 use strict;
 require convutils;
 
-my $charset = read_source("CP932.TXT");
+my $this_script = $0;
+
+my $mapping = read_source("CP932.TXT");
 
 # Drop these SJIS codes from the source for UTF8=>SJIS conversion
 my @reject_sjis =(
@@ -22,7 +24,7 @@ my @reject_sjis =(
 	0x879a..0x879c
 );
 
-foreach my $i (@$charset)
+foreach my $i (@$mapping)
 {
 	my $code = $i->{code};
 	my $ucs = $i->{ucs};
@@ -34,15 +36,16 @@ foreach my $i (@$charset)
 }
 
 # Add these UTF8->SJIS pairs to the table.
-push @$charset, (
-	{direction => "from_unicode", ucs => 0x00a2,   code => 0x8191, comment => '# CENT SIGN'},
-	{direction => "from_unicode", ucs => 0x00a3,   code => 0x8192, comment => '# POUND SIGN'},
-	{direction => "from_unicode", ucs => 0x00a5,   code => 0x5c,   comment => '# YEN SIGN'},
-	{direction => "from_unicode", ucs => 0x00ac,   code => 0x81ca, comment => '# NOT SIGN'},
-	{direction => "from_unicode", ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE'},
-	{direction => "from_unicode", ucs => 0x203e, code => 0x7e,   comment => '# OVERLINE'},
-	{direction => "from_unicode", ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN'},
-	{direction => "from_unicode", ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH'}
-);
+push @$mapping, (
+	{direction => "from_unicode", ucs => 0x00a2, code => 0x8191, comment => '# CENT SIGN', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x00a3, code => 0x8192, comment => '# POUND SIGN', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x00a5, code => 0x5c,   comment => '# YEN SIGN', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x00ac, code => 0x81ca, comment => '# NOT SIGN', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x203e, code => 0x7e,   comment => '# OVERLINE', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN', f => $this_script, l => __LINE__ },
+	{direction => "from_unicode", ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH', f => $this_script, l => __LINE__ }
+	);
 
-print_tables("SJIS", $charset);
+print_tables($this_script, "SJIS", $mapping, 1);
+print_radix_trees($this_script, "SJIS", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
index a65c537..d1297b8 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -16,6 +16,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 # Read the input
 
 my $in_file = "windows-949-2000.xml";
@@ -39,13 +41,16 @@ while (<$in>)
 		push @mapping, {
 			ucs => $ucs,
 			code => $code,
-			direction => 'both'
+			direction => 'both',
+			f => $in_file,
+		    l => $.
 		};
 	}
 }
 close($in);
 
 # One extra character that's not in the source file.
-push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
+push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U', f => $this_script, l => __LINE__ };
 
-print_tables("UHC", \@mapping);
+print_tables($this_script, "UHC", \@mapping, 1);
+print_radix_trees($this_script, "UHC", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl
index acc03e3..799a0a1 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -18,6 +18,8 @@
 use strict;
 require convutils;
 
+my $this_script = $0;
+
 my %filename = (
 	'WIN866'     => 'CP866.TXT',
 	'WIN874'     => 'CP874.TXT',
@@ -54,5 +56,6 @@ foreach my $charset (@charsets)
 {
 	my $mapping = &read_source($filename{$charset});
 
-	print_tables($charset, $mapping);
+	print_tables($this_script, $charset, $mapping, 1);
+	print_radix_trees($this_script, $charset, $mapping);
 }
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
index 0ae79a2..7808c32 100644
--- a/src/backend/utils/mb/Unicode/convutils.pm
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -67,7 +67,9 @@ sub read_source
 				   code => hex($1),
 				   ucs => hex($2),
 				   comment => $4,
-				   direction => "both"
+				   direction => "both",
+				   f => $fname,
+				   l => $.
 				};
 
 		# Ignore pure ASCII mappings. PostgreSQL character conversion code
@@ -85,6 +87,7 @@ sub read_source
 # print_tables : output mapping tables
 #
 # Arguments:
+#  this_script - the name of the *caller script* of this feature
 #  charset - string name of the character set.
 #  table   - mapping table (see format below)
 #  verbose - if 1, output comment on each line,
@@ -106,7 +109,7 @@ sub read_source
 #
 sub print_tables
 {
-	my ($charset, $table, $verbose) = @_;
+	my ($this_script, $charset, $table, $verbose) = @_;
 
 	# Build an array with only the to-UTF8 direction mappings
 	my @to_unicode;
@@ -149,76 +152,96 @@ sub print_tables
 		}
 	}
 
-	print_to_utf8_map($charset, \@to_unicode, $verbose);
-	print_to_utf8_combined_map($charset, \@to_unicode_combined, $verbose) if (scalar @to_unicode_combined > 0);
-	print_from_utf8_map($charset, \@from_unicode, $verbose);
-	print_from_utf8_combined_map($charset, \@from_unicode_combined, $verbose) if (scalar @from_unicode_combined > 0);
+	print_to_utf8_map($this_script, $charset, \@to_unicode, $verbose);
+	if (scalar @to_unicode_combined > 0)
+	{
+		print_to_utf8_combined_map($this_script, $charset,
+			\@to_unicode_combined, $verbose);
+	}
+	print_from_utf8_map($this_script, $charset, \@from_unicode, $verbose);
+	if (scalar @from_unicode_combined > 0)
+	{
+		print_from_utf8_combined_map($this_script, $charset,
+			\@from_unicode_combined, $verbose);
+	}
 }
 
 sub print_from_utf8_map
 {
-	my ($charset, $table, $verbose) = @_;
+	my ($this_script, $charset, $table, $verbose) = @_;
 
 	my $last_comment = "";
 
 	my $fname = lc("utf8_to_${charset}.map");
 	print "- Writing UTF8=>${charset} conversion table: $fname\n";
 	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
-	printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
-		   "static const pg_utf_to_local ULmap${charset}[ %d ] = {",
-		   scalar(@$table));
+	printf $out "/* src/backend/utils/mb/Unicode/$fname */\n"
+	  . "/* This file is generated by $this_script */\n\n"
+	  . "static const pg_utf_to_local ULmap${charset}[ %d ] = {",
+	  scalar(@$table);
 	my $first = 1;
 	foreach my $i (sort {$a->{utf8} <=> $b->{utf8}} @$table)
     {
 		print($out ",") if (!$first);
 		$first = 0;
-		print($out "\t/* $last_comment */") if ($verbose);
+		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 
 		printf($out "\n  {0x%04x, 0x%04x}", $i->{utf8}, $i->{code});
 		if ($verbose >= 2)
 		{
-			$last_comment = "$i->{f}:$i->{l} $i->{comment}";
+			$last_comment =
+			  sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
 		}
-		else
+		elsif ($verbose >= 1)
 		{
 			$last_comment = $i->{comment};
 		}
 	}
-	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 	print $out "\n};\n";
 	close($out);
 }
 
 sub print_from_utf8_combined_map
 {
-	my ($charset, $table, $verbose) = @_;
+	my ($this_script, $charset, $table, $verbose) = @_;
 
 	my $last_comment = "";
 
 	my $fname = lc("utf8_to_${charset}_combined.map");
 	print "- Writing UTF8=>${charset} conversion table: $fname\n";
 	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
-	printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
-		   "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
-		   scalar(@$table));
+	printf $out "/* src/backend/utils/mb/Unicode/$fname */\n"
+	  . "/* This file is generated by $this_script */\n\n"
+	  . "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
+	  scalar(@$table);
 	my $first = 1;
 	foreach my $i (sort {$a->{utf8} <=> $b->{utf8}} @$table)
     {
 		print($out ",") if (!$first);
 		$first = 0;
-		print($out "\t/* $last_comment */") if ($verbose);
+		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 
-		printf($out "\n  {0x%08x, 0x%08x, 0x%04x}", $i->{utf8}, $i->{utf8_second}, $i->{code});
-		$last_comment = "$i->{comment}";
+		printf $out "\n  {0x%08x, 0x%08x, 0x%04x}",
+		  $i->{utf8}, $i->{utf8_second}, $i->{code};
+		if ($verbose >= 2)
+		{
+			$last_comment =
+			  sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
+		}
+		elsif ($verbose >= 1)
+		{
+			$last_comment = $i->{comment};
+		}
 	}
-	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 	print $out "\n};\n";
 	close($out);
 }
 
 sub print_to_utf8_map
 {
-	my ($charset, $table, $verbose) = @_;
+	my ($this_script, $charset, $table, $verbose) = @_;
 
 	my $last_comment = "";
 
@@ -226,34 +249,37 @@ sub print_to_utf8_map
 
 	print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
 	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
-	printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
-		   "static const pg_local_to_utf LUmap${charset}[ %d ] = {",
-		   scalar(@$table));
+	printf $out "/* src/backend/utils/mb/Unicode/$fname */\n"
+	  . "/* This file is generated by $this_script */\n\n"
+	  . "static const pg_local_to_utf LUmap${charset}[ %d ] = {",
+	  scalar(@$table);
+
 	my $first = 1;
 	foreach my $i (sort {$a->{code} <=> $b->{code}} @$table)
     {
 		print($out ",") if (!$first);
 		$first = 0;
-		print($out "\t/* $last_comment */") if ($verbose);
+		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 
 		printf($out "\n  {0x%04x, 0x%x}", $i->{code}, $i->{utf8});
 		if ($verbose >= 2)
 		{
-			$last_comment = "$i->{f}:$i->{l} $i->{comment}";
+			$last_comment =
+			  sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
 		}
-		else
+		elsif ($verbose >= 1)
 		{
 			$last_comment = $i->{comment};
 		}
 	}
-	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 	print $out "\n};\n";
 	close($out);
 }
 
 sub print_to_utf8_combined_map
 {
-	my ($charset, $table, $verbose) = @_;
+	my ($this_script, $charset, $table, $verbose) = @_;
 
 	my $last_comment = "";
 
@@ -261,22 +287,599 @@ sub print_to_utf8_combined_map
 
 	print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
 	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
-	printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
-		   "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
-		   scalar(@$table));
+	printf $out "/* src/backend/utils/mb/Unicode/$fname */\n"
+	  . "/* This file is generated by $this_script */\n\n"
+	  . "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
+	  scalar(@$table);
+
 	my $first = 1;
 	foreach my $i (sort {$a->{code} <=> $b->{code}} @$table)
     {
 		print($out ",") if (!$first);
 		$first = 0;
-		print($out "\t/* $last_comment */") if ($verbose);
+		print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
+
+		printf $out "\n  {0x%04x, 0x%08x, 0x%08x}",
+		  $i->{code}, $i->{utf8}, $i->{utf8_second};
 
-		printf($out "\n  {0x%04x, 0x%08x, 0x%08x}", $i->{code}, $i->{utf8}, $i->{utf8_second});
-		$last_comment = "$i->{comment}";
+		if ($verbose >= 2)
+		{
+			$last_comment =
+				sprintf("%s:%d %s", $i->{f}, $i->{l}, $i->{comment});
+		}
+		elsif ($verbose >= 1)
+		{
+			$last_comment = $i->{comment};
+		}
 	}
-	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\t/* $last_comment */" if ($verbose && $last_comment ne "");
 	print $out "\n};\n";
 	close($out);
 }
 
+#############################################################################
+# RADIX TREE STUFF
+
+#########################################
+# print_radix_table(<charmap hash ref>)
+#
+# Input: A hash, mapping an input character to an output character.
+#
+# Constructs a radix tree from the hash, and prints it out as a C-struct.
+#
+
+sub print_radix_table
+{
+	my ($out, $tblname, $c) = @_;
+
+	###
+	### Build radix trees in memory, for 1-, 2-, 3- and 4-byte inputs. Each
+	### radix tree is represented as a nested hash, each hash indexed by
+	### input byte
+	###
+	my %b1map;
+	my %b2map;
+	my %b3map;
+	my %b4map;
+	foreach my $in (keys %$c)
+	{
+		my $out = $c->{$in};
+
+		if ($in < 0x100)
+		{
+			$b1map{$in} = $out;
+		}
+		elsif ($in < 0x10000)
+		{
+			my $b1     = $in >> 8;
+			my $b2     = $in & 0xff;
+
+			$b2map{$b1}{$b2} = $out;
+		}
+		elsif ($in < 0x1000000)
+		{
+			my $b1     = $in >> 16;
+			my $b2     = ($in >> 8) & 0xff;
+			my $b3     = $in & 0xff;
+
+			$b3map{$b1}{$b2}{$b3} = $out;
+		}
+		elsif ($in < 0x100000000)
+		{
+			my $b1     = $in >> 24;
+			my $b2     = ($in >> 16) & 0xff;
+			my $b3     = ($in >> 8) & 0xff;
+			my $b4     = $in & 0xff;
+
+			$b4map{$b1}{$b2}{$b3}{$b4} = $out;
+		}
+		else
+		{
+			die sprintf("up to 4 byte code is supported: %x", $in);
+		}
+	}
+
+	my @segments;
+
+	###
+	### Build a linear list of "segments", from the nested hashes.
+	###
+	### Each segment is a lookup table, keyed by the next byte in the input.
+	### The segments are written out physically to one big array in the final
+	### step, but logically, they form a radix tree. Or rather, four radix
+	### trees: one for 1-byte inputs, another for 2-byte inputs, 3-byte
+	### inputs, and 4-byte inputs.
+	###
+	### Each segment is represented by a hash with following fields:
+	###
+	### comment => <string to output as a comment>
+	### label => <label that can be used to refer to this segment from elsewhere>
+	### values => <a hash, keyed by byte, 0-0xff>
+	###
+	### Entries in 'values' can be integers (for leaf-level segments), or
+	### string labels, pointing to a segment with that label. Any missing
+	### values are treated as zeros. If 'values' hash is missing altogether,
+	###  it's treated as all-zeros.
+	###
+	### Subsequent steps will enrich the segments with more fields.
+	###
+
+	# Add the segments for the radix trees themselves.
+	push @segments, build_segments_from_tree("Single byte table", "1-byte", 1, \%b1map);
+	push @segments, build_segments_from_tree("Two byte table", "2-byte", 2, \%b2map);
+	push @segments, build_segments_from_tree("Three byte table", "3-byte", 3, \%b3map);
+	push @segments, build_segments_from_tree("Four byte table", "4-byte", 4, \%b4map);
+
+	###
+	### Find min and max index used in each level of each tree.
+	###
+	### These are stored separately, and we can then leave out the unused
+	### parts of every segment. (When using the resulting tree, you must
+	### check each input byte against the min and max.)
+	###
+	my %min_idx;
+	my %max_idx;
+	foreach my $seg (@segments)
+	{
+		my $this_min = $min_idx{$seg->{depth}}->{$seg->{level}};
+		my $this_max = $max_idx{$seg->{depth}}->{$seg->{level}};
+
+		foreach my $i (keys %{$seg->{values}})
+		{
+			$this_min = $i if (!defined $this_min || $i < $this_min);
+			$this_max = $i if (!defined $this_max || $i > $this_max);
+		}
+
+		$min_idx{$seg->{depth}}{$seg->{level}} = $this_min;
+		$max_idx{$seg->{depth}}{$seg->{level}} = $this_max;
+	}
+	# Copy the mins and max's back to every segment, for convenience
+	foreach my $seg (@segments)
+	{
+		$seg->{min_idx} = $min_idx{$seg->{depth}}{$seg->{level}};
+		$seg->{max_idx} = $max_idx{$seg->{depth}}{$seg->{level}};
+	}
+
+	###
+	### Prepend a dummy all-zeros map to the beginning.
+	###
+	### A 0 is an invalid value anywhere in the table, and this allows us to
+	### point to 0 offset anywhere else in the tables, to get a 0 result.
+
+	# Find the max range between min and max indexes in any of the segments.
+	my $widest_range = 0;
+	foreach my $seg (@segments)
+	{
+		my $this_range = $seg->{max_idx} - $seg->{min_idx};
+		$widest_range = $this_range if ($this_range > $widest_range);
+	}
+
+	unshift @segments, {
+		header => "Dummy map, for invalid values",
+		min_idx => 0,
+		max_idx => $widest_range
+	};
+
+	###
+	### Eliminate overlapping zeros
+	###
+	### For each segment, if there are zero values at the end of, and there
+	### are also zero values at the beginning of the next segment, we can
+	### overlay the tail of this segment with the head of next segment, to
+	### save space.
+	###
+	### To achieve that, we subtract the 'max_idx' of each segment with the
+	### amount of zeros that can be ovarlaid.
+	###
+	for (my $j = 0; $j < $#segments - 1; $j++)
+	{
+		my $seg = $segments[$j];
+		my $nextseg = $segments[$j + 1];
+
+		# Count the number of zero values at the end of this segment.
+		my $this_trail_zeros = 0;
+		for (my $i = $seg->{max_idx}; $i >= $seg->{min_idx} && !$seg->{values}->{$i}; $i--)
+		{
+			$this_trail_zeros++;
+		}
+
+		# Count the number of zeros at the beginning of next segment.
+		my $next_lead_zeros = 0;
+		for (my $i = $nextseg->{min_idx}; $i <= $nextseg->{max_idx} && !$nextseg->{values}->{$i}; $i++)
+		{
+			$next_lead_zeros++;
+		}
+
+		# How many zeros in common?
+		my $overlaid_trail_zeros =
+			($this_trail_zeros > $next_lead_zeros) ? $next_lead_zeros : $this_trail_zeros;
+
+		$seg->{overlaid_trail_zeros} = $overlaid_trail_zeros;
+		$seg->{max_idx} = $seg->{max_idx} - $overlaid_trail_zeros;
+	}
+
+	###
+	### Replace label references with real offsets.
+	###
+	### So far, the non-leaf segments have referred to other segments by
+	### their labels. Replace them with numerical offsets from the beginning
+	### of the final array. You cannot move, add, or remove segments after
+	### this step, as that would invalidate the offsets calculated here!
+	###
+	my $flatoff = 0;
+	my %segmap;
+
+	# First pass: assign offsets to each segment, and build hash
+	# of label => offset.
+	foreach my $seg (@segments)
+	{
+		$seg->{offset} = $flatoff;
+		$segmap{$seg->{label}} = $flatoff;
+		$flatoff += $seg->{max_idx} - $seg->{min_idx} + 1;
+	}
+	my $tblsize = $flatoff;
+
+	# Second pass: look up the offset of each label reference in the hash.
+	foreach my $seg (@segments)
+	{
+		while (my ($i, $val) = each %{$seg->{values}})
+		{
+			if (!($val =~ /^[0-9,.E]+$/ ))
+			{
+				my $segoff = $segmap{$val};
+				if ($segoff)
+				{
+					$seg->{values}->{$i} = $segoff;
+				}
+				else
+				{
+					die "no segment with label $val";
+				}
+			}
+		}
+	}
+
+	# Also look up the positions of the roots in the table.
+	my $b1root = $segmap{"1-byte"};
+	my $b2root = $segmap{"2-byte"};
+	my $b3root = $segmap{"3-byte"};
+	my $b4root = $segmap{"4-byte"};
+
+	# And the lower-upper values of each level in each radix tree.
+	my $b1_lower = $min_idx{1}{1};
+	my $b1_upper = $max_idx{1}{1};
+
+	my $b2_1_lower = $min_idx{2}{1};
+	my $b2_1_upper = $max_idx{2}{1};
+	my $b2_2_lower = $min_idx{2}{2};
+	my $b2_2_upper = $max_idx{2}{2};
+
+	my $b3_1_lower = $min_idx{3}{1};
+	my $b3_1_upper = $max_idx{3}{1};
+	my $b3_2_lower = $min_idx{3}{2};
+	my $b3_2_upper = $max_idx{3}{2};
+	my $b3_3_lower = $min_idx{3}{3};
+	my $b3_3_upper = $max_idx{3}{3};
+
+	my $b4_1_lower = $min_idx{4}{1};
+	my $b4_1_upper = $max_idx{4}{1};
+	my $b4_2_lower = $min_idx{4}{2};
+	my $b4_2_upper = $max_idx{4}{2};
+	my $b4_3_lower = $min_idx{4}{3};
+	my $b4_3_upper = $max_idx{4}{3};
+	my $b4_4_lower = $min_idx{4}{4};
+	my $b4_4_upper = $max_idx{4}{4};
+
+	###
+	### Find the maximum value in the whole table, to determine if we can
+	### use uint16 or if we need to use uint32.
+	###
+	my $max_val = 0;
+	foreach my $seg (@segments)
+	{
+		foreach my $val (values %{$seg->{values}})
+		{
+			$max_val = $val if ($val > $max_val);
+		}
+	}
+
+	my $datatype = ($max_val <= 0xffff) ? "uint16" : "uint32";
+
+	# For formatting, determine how many values we can fit on a single
+	# line, and how wide each value needs to be to align nicely.
+	my $vals_per_line;
+	my $colwidth;
+
+	if ($max_val <= 0xffff)
+	{
+		$vals_per_line = 8;
+		$colwidth = 4;
+	}
+	elsif ($max_val <= 0xffffff)
+	{
+		$vals_per_line = 4;
+		$colwidth = 6;
+	}
+	else
+	{
+		$vals_per_line = 4;
+		$colwidth = 8;
+	}
+
+	###
+	### Print the struct and array.
+	###
+	printf $out "static const $datatype ${tblname}_table[];\n";
+	printf $out "\n";
+	printf $out "static const pg_mb_radix_tree $tblname =\n";
+	printf $out "{\n";
+	if ($datatype eq "uint16")
+	{
+		print $out "  ${tblname}_table,\n";
+		print $out "  NULL, /* 32-bit table not used */\n";
+	}
+	if ($datatype eq "uint32")
+	{
+		print $out "  NULL, /* 16-bit table not used */\n";
+		print $out "  ${tblname}_table,\n";
+	}
+	printf $out "\n";
+	printf $out "  0x%04x, /* offset of table for 1-byte inputs */\n", $b1root;
+	printf $out "  0x%02x, /* b1_lower */\n", $b1_lower;
+	printf $out "  0x%02x, /* b1_upper */\n", $b1_upper;
+	printf $out "\n";
+	printf $out "  0x%04x, /* offset of table for 2-byte inputs */\n", $b2root;
+	printf $out "  0x%02x, /* b2_1_lower */\n", $b2_1_lower;
+	printf $out "  0x%02x, /* b2_1_upper */\n", $b2_1_upper;
+	printf $out "  0x%02x, /* b2_2_lower */\n", $b2_2_lower;
+	printf $out "  0x%02x, /* b2_2_upper */\n", $b2_2_upper;
+	printf $out "\n";
+	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n", $b3root;
+	printf $out "  0x%02x, /* b3_1_lower */\n", $b3_1_lower;
+	printf $out "  0x%02x, /* b3_1_upper */\n", $b3_1_upper;
+	printf $out "  0x%02x, /* b3_2_lower */\n", $b3_2_lower;
+	printf $out "  0x%02x, /* b3_2_upper */\n", $b3_2_upper;
+	printf $out "  0x%02x, /* b3_3_lower */\n", $b3_3_lower;
+	printf $out "  0x%02x, /* b3_3_upper */\n", $b3_3_upper;
+	printf $out "\n";
+	printf $out "  0x%04x, /* offset of table for 3-byte inputs */\n", $b4root;
+	printf $out "  0x%02x, /* b4_1_lower */\n", $b4_1_lower;
+	printf $out "  0x%02x, /* b4_1_upper */\n", $b4_1_upper;
+	printf $out "  0x%02x, /* b4_2_lower */\n", $b4_2_lower;
+	printf $out "  0x%02x, /* b4_2_upper */\n", $b4_2_upper;
+	printf $out "  0x%02x, /* b4_3_lower */\n", $b4_3_lower;
+	printf $out "  0x%02x, /* b4_3_upper */\n", $b4_3_upper;
+	printf $out "  0x%02x, /* b4_4_lower */\n", $b4_4_lower;
+	printf $out "  0x%02x  /* b4_4_upper */\n", $b4_4_upper;
+	print $out "};\n";
+	print $out "\n";
+	print $out "static const $datatype ${tblname}_table[$tblsize] =\n";
+	print $out "{";
+	my $off = 0;
+	foreach my $seg (@segments)
+	{
+		printf $out "\n";
+		printf $out "  /*** %s - offset 0x%05x ***/\n", $seg->{header}, $off;
+		printf $out "\n";
+
+		for (my $i=$seg->{min_idx}; $i <= $seg->{max_idx};)
+		{
+			# Print the next line's worth of values.
+			# XXX pad to begin at a nice boundary
+			printf $out "  /* %02x */ ", $i;
+			for (my $j = 0; $j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
+			{
+				my $val = $seg->{values}->{$i};
+
+				printf $out " 0x%0*x", $colwidth, $val;
+				$off++;
+				if ($off != $tblsize)
+				{
+					print $out ",";
+				}
+				$i++;
+			}
+			print $out "\n";
+		}
+		if ($seg->{overlaid_trail_zeros})
+		{
+			printf $out "    /* $seg->{overlaid_trail_zeros} trailing zero values shared with next segment */\n";
+		}
+	}
+
+	# Sanity check.
+	if ($off != $tblsize) { die "table size didn't match!"; }
+
+	print $out "};\n";
+}
+
+###
+sub build_segments_from_tree
+{
+	my ($header, $rootlabel, $depth, $map) = @_;
+
+	my @segments;
+
+	if (%{$map})
+	{
+		@segments = build_segments_recurse($header, $rootlabel, "", 1, $depth, $map);
+
+		# Sort the segments into "breadth-first" order. Not strictly required,
+		# but makes the maps nicer to read.
+		@segments = sort { $a->{level} cmp $b->{level} or
+						   $a->{path}  cmp $b->{path}}
+						 @segments;
+	}
+
+	return @segments;
+}
+
+###
+sub build_segments_recurse
+{
+	my ($header, $label, $path, $level, $depth, $map) = @_;
+
+	my @segments;
+
+	if ($level == $depth)
+	{
+		push @segments, {
+			header => $header . ", leaf: ${path}xx",
+			label => $label,
+			level => $level,
+			depth => $depth,
+			path => $path,
+			values => $map
+		};
+	}
+	else
+	{
+		my %children;
+
+		while (my ($i, $val) = each %$map)
+		{
+			my $childpath = $path . sprintf("%02x", $i);
+			my $childlabel = "$depth-level-$level-$childpath";
+
+			push @segments, build_segments_recurse($header, $childlabel, $childpath,
+												   $level + 1, $depth, $val);
+			$children{$i} = $childlabel;
+		}
+
+		push @segments, {
+			header => $header . ", byte #$level: ${path}xx",
+			label => $label,
+			level => $level,
+			depth => $depth,
+			path => $path,
+			values => \%children
+		};
+	}
+	return @segments;
+}
+
+######################################################
+# make_charmap - convert charset table to charmap hash
+#     with checking duplicate source code
+#
+# make_charmap(\@charset, $direction)
+# charset     - ref to charset table : see print_tables
+# direction   - conversion direction
+
+sub make_charmap
+{
+	my ($charset, $direction) = @_;
+
+	die "unacceptable direction : $direction"
+	  if ($direction ne "to_unicode" && $direction ne "from_unicode");
+
+	my %charmap;
+	foreach my $c (@$charset)
+	{
+		next if ($c->{direction} ne $direction && $c->{direction} ne "both");
+
+		# don't generate entries for combined characters
+		next if (defined $c->{ucs_second});
+
+		my ($src, $dst) =
+		  $direction eq "to_unicode"
+		  ? ($c->{code}, $c->{ucs})
+		  : ($c->{ucs}, $c->{code});
+
+		if (defined $c->{$src})
+		{
+			printf STDERR
+			  "Error: duplicate source code: 0x%04x => 0x%04x, 0x%04x\n",
+			  $src, $c->{$src}, $dst;
+			exit;
+		}
+		if ($direction eq "to_unicode")
+		{
+			$charmap{$src} = ucs2utf($dst);
+		}
+		else
+		{
+			$charmap{ ucs2utf($src) } = $dst;
+		}
+
+	}
+
+	return \%charmap;
+}
+
+
+#########################################
+# print_radix_map - write the whole content of C source of tadix tree
+#
+# print_radix_map($this_script, $csname, $direction, \%charset, $tblwidth)
+#
+# this_script - the name of the *caller script* of this feature
+# csname      - character set name other than ucs
+# direction   - desired direction "to_unicode" or "from_unicode"
+# charset     - ref to character set array
+# tblwidth    - width in characters of output source file
+
+sub print_radix_map
+{
+	my ($this_script, $csname, $direction, $charset, $tblwidth) = @_;
+
+	my $charmap = &make_charmap($charset, $direction);
+	my $fname =
+	  $direction eq "to_unicode"
+	  ? lc("${csname}_to_utf8_radix.map")
+	  : lc("utf8_to_${csname}_radix.map");
+
+	my $tblname     = lc("${csname}_${direction}_tree");
+	my $name_prefix = lc("${csname}_${direction}_");
+
+	if ($direction eq "to_unicode")
+	{
+		print "- Writing ${csname}=>UTF8 conversion radix index: $fname\n";
+	}
+	else
+	{
+		print "- Writing UTF8=>${csname} conversion radix index: $fname\n";
+	}
+
+	open(my $out, '>', $fname) || die("cannot open $fname");
+
+	print $out "/* src/backend/utils/mb/Unicode/$fname */\n"
+	  . "/* This file is generated by $this_script */\n\n";
+
+	print_radix_table($out, $tblname, $charmap);
+
+	close($out);
+}
+
+
+###################################################################
+# print_radix_trees - write the radix tree files for both direction
+#
+# print_radix_trees($this_script, $csname, \%charset)
+#
+# this_script - the name of the *caller script* of this feature
+# csname      - character set name other than ucs
+# charset     - ref to character set array
+sub print_radix_trees
+{
+	my ($this_script, $csname, $charset) = @_;
+
+	&print_radix_map($this_script, $csname, "from_unicode", $charset, 78);
+	&print_radix_map($this_script, $csname, "to_unicode",   $charset, 78);
+}
+
+sub dump_charset
+{
+	my ($list, $filt) = @_;
+
+	foreach my $i (@$list)
+	{
+		next if (defined $filt && !&$filt($i));
+		if (!defined $i->{ucs}) { $i->{ucs} = &utf2ucs($i->{utf8}); }
+		printf "ucs=%x, code=%x, direction=%s %s:%d %s\n",
+		  $i->{ucs}, $i->{code}, $i->{direction},
+		  $i->{f},   $i->{l},    $i->{comment};
+	}
+}
+
 1;
diff --git a/src/backend/utils/mb/Unicode/euc-jis-2004-std.txt b/src/backend/utils/mb/Unicode/euc-jis-2004-std.txt
deleted file mode 100644
index 8657e7f..0000000
diff --git a/src/backend/utils/mb/Unicode/gb-18030-2000.xml b/src/backend/utils/mb/Unicode/gb-18030-2000.xml
deleted file mode 100644
index fbbc9e3..0000000
diff --git a/src/backend/utils/mb/Unicode/make_mapchecker.pl b/src/backend/utils/mb/Unicode/make_mapchecker.pl
new file mode 100755
index 0000000..b912d83
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/make_mapchecker.pl
@@ -0,0 +1,78 @@
+#! /usr/bin/perl
+#
+# make_mapchecker.pl - Gerates map_checker.h file included by map_checker.c
+#
+
+use strict;
+
+# collect all radix mapfiles
+opendir(my $dh, ".") || die "failed to open directory: .";
+my @radixmaps = grep { /_radix\.map$/ } readdir($dh);
+closedir($dh);
+
+my %plainmaps;
+
+# check if all radix maps has corresponding plain map
+foreach my $rmap (@radixmaps)
+{
+	my $pmap = $rmap;
+	$pmap =~ s/_radix//;
+	if (!-e $pmap)
+	{
+		die("radix map \"$rmap\" has no corresponding plain map\n");
+	}
+	$plainmaps{$rmap} = $pmap;
+}
+
+# generate sanity checker source
+my $out;
+open($out, '>', "map_checker.h")
+  || die "cannot open file to write: map_checker.h";
+
+# add #include lines for all radix maps and corresponding plain maps
+foreach my $i (sort @radixmaps)
+{
+	print $out "#include \"$i\"\n";
+	print $out "#include \"$plainmaps{$i}\"\n";
+}
+
+print $out <<'EOF';
+
+struct mappair
+{
+	const char			   *name;
+	int						len;
+	const pg_local_to_utf  *lu;
+	const pg_utf_to_local  *ul;
+	const pg_mb_radix_tree *rt;
+} mappairs[] = {
+EOF
+
+# generate variable names for the array of mappair
+my @mapnames = map { my $m = $_; $m =~ s/\.map//; $m } values %plainmaps;
+
+# write the content of mappairs array.
+foreach my $m (@mapnames)
+{
+	if ($m =~ /^utf8_to_(.*)$/)
+	{
+		my $e = uc($1);
+		print $out
+		"	{\"$m\", lengthof(ULmap$e), NULL, ULmap$e, &$1_from_unicode_tree}";
+	}
+	elsif ($m =~ /^(.*)_to_utf8$/)
+	{
+		my $e = uc($1);
+		print $out
+		  "	{\"$m\", lengthof(LUmap$e), LUmap$e, NULL, &$1_to_unicode_tree}";
+	}
+	else
+	{
+		die "Unrecognizable map name: $m";
+	}
+	print $out ",\n";
+}
+
+print $out "	{NULL, 0, NULL, NULL, NULL}\n};\n";
+
+close($out);
diff --git a/src/backend/utils/mb/Unicode/map_checker.c b/src/backend/utils/mb/Unicode/map_checker.c
new file mode 100644
index 0000000..dec0716
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/map_checker.c
@@ -0,0 +1,94 @@
+/*-------------------------------------------------------------------------
+ *
+ *	  Radix map checker
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mb/Unicode/map_checker.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "mb/pg_wchar.h"
+
+#include "map_checker.h"
+
+#include "../char_converter.c"
+
+/*
+ * The old-style plain map files were error-resistant due to its
+ * straight-forward way for generation from authority files. In contrast the
+ * radix tree maps are generated by a rather complex calculation and have a
+ * complex, hard-to-confirm format.
+ *
+ * This program runs sanity check of the radix tree maps by confirming all
+ * characters in the plain map files to be converted to the same code by the
+ * corresponding radix tree map.
+ *
+ * All map files are included by map_checker.h that is generated by the script
+ * make_mapchecker.pl as the variable mappairs.
+ *
+ */
+int main(void)
+{
+	struct mappair *mp;
+
+	for (mp = mappairs ; mp->name ; mp++)
+	{
+		int i;
+
+		printf("Checking \"%s_radix.map\" against \"%s.map\"(%d chars)..", mp->name, mp->name, mp->len);
+		for (i = 0 ; i < mp->len ; i++)
+		{
+			uint32 s, c, d;
+
+			unsigned char b1;
+			unsigned char b2;
+			unsigned char b3;
+			unsigned char b4;
+			int l;
+
+			if (mp->ul)
+			{
+				s = mp->ul[i].utf;
+				d = mp->ul[i].code;
+			}
+			else
+			{
+				s = mp->lu[i].code;
+				d = mp->lu[i].utf;
+			}
+			if (s < 0x80)
+			{
+				fprintf(stderr, "\nASCII character ? (%x)", s);
+				exit(1);
+			}
+
+			b1 = s >> 24;
+			b2 = s >> 16;
+			b3 = s >> 8;
+			b4 = s;
+			if (b1 != 0)
+				l = 4;
+			else if (b2 != 0)
+				l = 3;
+			else if (b3 != 0)
+				l = 2;
+			else
+				l = 1;
+
+			c = pg_mb_radix_conv(mp->rt, l, b1, b2, b3, b4);
+
+			if (c != d)
+			{
+				fprintf(stderr, "\nConversion failure in \"%s\": %x => %x, expected %x\n",
+						mp->name, s, c, d);
+				exit(1);
+			}
+		}
+		printf("Ok.\n");
+	}
+	printf("All radix trees are perfect!\n");
+}
diff --git a/src/backend/utils/mb/Unicode/sjis-0213-2004-std.txt b/src/backend/utils/mb/Unicode/sjis-0213-2004-std.txt
deleted file mode 100644
index 4b12bce..0000000
diff --git a/src/backend/utils/mb/char_converter.c b/src/backend/utils/mb/char_converter.c
new file mode 100644
index 0000000..3795b7d
--- /dev/null
+++ b/src/backend/utils/mb/char_converter.c
@@ -0,0 +1,116 @@
+/*-------------------------------------------------------------------------
+ *
+ *	  Character converter function using radix tree
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mb/char_converter.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static inline uint32
+pg_mb_radix_conv(const pg_mb_radix_tree *rt,
+				 int l,
+				 unsigned char b1,
+				 unsigned char b2,
+				 unsigned char b3,
+				 unsigned char b4)
+{
+	if (l == 4)
+	{
+		/* 4-byte code */
+
+		/* check code validity */
+		if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
+			b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
+			b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
+			b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
+			return 0;
+
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b4root;
+
+			idx = rt->chars32[b1 + idx - rt->b4_1_lower];
+			idx = rt->chars32[b2 + idx - rt->b4_2_lower];
+			idx = rt->chars32[b3 + idx - rt->b4_3_lower];
+			return rt->chars32[b4 + idx - rt->b4_4_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b4root;
+
+			idx = rt->chars16[b1 + idx - rt->b4_1_lower];
+			idx = rt->chars16[b2 + idx - rt->b4_2_lower];
+			idx = rt->chars16[b3 + idx - rt->b4_3_lower];
+			return rt->chars16[b4 + idx - rt->b4_4_lower];
+		}
+	}
+	else if (l == 3)
+	{
+		/* 3-byte code */
+
+		/* check code validity */
+		if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
+			b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
+			b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
+			return 0;
+
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b3root;
+
+			idx = rt->chars32[b2 + idx - rt->b3_1_lower];
+			idx = rt->chars32[b3 + idx - rt->b3_2_lower];
+			return rt->chars32[b4 + idx - rt->b3_3_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b3root;
+
+			idx = rt->chars16[b2 + idx - rt->b3_1_lower];
+			idx = rt->chars16[b3 + idx - rt->b3_2_lower];
+			return rt->chars16[b4 + idx - rt->b3_3_lower];
+		}
+	}
+	else if (l == 2)
+	{
+		/* 2-byte code */
+
+		/* check code validity - first byte */
+		if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
+			b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
+			return 0;
+
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b2root;
+
+			idx = rt->chars32[b3 + idx - rt->b2_1_lower];
+			return rt->chars32[b4 + idx - rt->b2_2_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b2root;
+
+			idx = rt->chars16[b3 + idx - rt->b2_1_lower];
+			return rt->chars16[b4 + idx - rt->b2_2_lower];
+		}
+	}
+	else if (l == 1)
+	{
+		/* 1-byte code */
+
+		/* check code validity - first byte */
+		if (b4 < rt->b1_lower || b4 > rt->b1_upper)
+			return 0;
+
+		if (rt->chars32)
+			return rt->chars32[b4 + rt->b1root - rt->b1_lower];
+		else
+			return rt->chars16[b4 + rt->b1root - rt->b1_lower];
+	}
+	return 0; /* shouldn't happen */
+}
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index 9014a57..feaf8ef 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -13,6 +13,7 @@
 #include "postgres.h"
 #include "mb/pg_wchar.h"
 
+#include "char_converter.c"
 
 /*
  * local2local: a generic single byte charset encoding
@@ -284,36 +285,6 @@ mic2latin_with_table(const unsigned char *mic,
 
 /*
  * comparison routine for bsearch()
- * this routine is intended for UTF8 -> local code
- */
-static int
-compare1(const void *p1, const void *p2)
-{
-	uint32		v1,
-				v2;
-
-	v1 = *(const uint32 *) p1;
-	v2 = ((const pg_utf_to_local *) p2)->utf;
-	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
-}
-
-/*
- * comparison routine for bsearch()
- * this routine is intended for local code -> UTF8
- */
-static int
-compare2(const void *p1, const void *p2)
-{
-	uint32		v1,
-				v2;
-
-	v1 = *(const uint32 *) p1;
-	v2 = ((const pg_local_to_utf *) p2)->code;
-	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
-}
-
-/*
- * comparison routine for bsearch()
  * this routine is intended for combined UTF8 -> local code
  */
 static int
@@ -371,7 +342,6 @@ store_coded_char(unsigned char *dest, uint32 code)
  * iso: pointer to the output area (must be large enough!)
 		  (output string will be null-terminated)
  * map: conversion map for single characters
- * mapsize: number of entries in the conversion map
  * cmap: conversion map for combined characters
  *		  (optional, pass NULL if none)
  * cmapsize: number of entries in the conversion map for combined characters
@@ -389,14 +359,13 @@ store_coded_char(unsigned char *dest, uint32 code)
 void
 UtfToLocal(const unsigned char *utf, int len,
 		   unsigned char *iso,
-		   const pg_utf_to_local *map, int mapsize,
+		   const pg_mb_radix_tree *map,
 		   const pg_utf_to_local_combined *cmap, int cmapsize,
 		   utf_local_conversion_func conv_func,
 		   int encoding)
 {
 	uint32		iutf;
 	int			l;
-	const pg_utf_to_local *p;
 	const pg_utf_to_local_combined *cp;
 
 	if (!PG_VALID_ENCODING(encoding))
@@ -406,6 +375,11 @@ UtfToLocal(const unsigned char *utf, int len,
 
 	for (; len > 0; len -= l)
 	{
+		unsigned char b1 = 0;
+		unsigned char b2 = 0;
+		unsigned char b3 = 0;
+		unsigned char b4 = 0;
+
 		/* "break" cases all represent errors */
 		if (*utf == '\0')
 			break;
@@ -427,27 +401,28 @@ UtfToLocal(const unsigned char *utf, int len,
 		/* collect coded char of length l */
 		if (l == 2)
 		{
-			iutf = *utf++ << 8;
-			iutf |= *utf++;
+			b3 = *utf++;
+			b4 = *utf++;
 		}
 		else if (l == 3)
 		{
-			iutf = *utf++ << 16;
-			iutf |= *utf++ << 8;
-			iutf |= *utf++;
+			b2 = *utf++;
+			b3 = *utf++;
+			b4 = *utf++;
 		}
 		else if (l == 4)
 		{
-			iutf = *utf++ << 24;
-			iutf |= *utf++ << 16;
-			iutf |= *utf++ << 8;
-			iutf |= *utf++;
+			b1 = *utf++;
+			b2 = *utf++;
+			b3 = *utf++;
+			b4 = *utf++;
 		}
 		else
 		{
 			elog(ERROR, "unsupported character length %d", l);
 			iutf = 0;			/* keep compiler quiet */
 		}
+		iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
 
 		/* First, try with combined map if possible */
 		if (cmap && len > l)
@@ -516,13 +491,14 @@ UtfToLocal(const unsigned char *utf, int len,
 		}
 
 		/* Now check ordinary map */
-		p = bsearch(&iutf, map, mapsize,
-					sizeof(pg_utf_to_local), compare1);
-
-		if (p)
+		if (map)
 		{
-			iso = store_coded_char(iso, p->code);
-			continue;
+			uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
+			if (converted)
+			{
+				iso = store_coded_char(iso, converted);
+				continue;
+			}
 		}
 
 		/* if there's a conversion function, try that */
@@ -557,7 +533,6 @@ UtfToLocal(const unsigned char *utf, int len,
  * utf: pointer to the output area (must be large enough!)
 		  (output string will be null-terminated)
  * map: conversion map for single characters
- * mapsize: number of entries in the conversion map
  * cmap: conversion map for combined characters
  *		  (optional, pass NULL if none)
  * cmapsize: number of entries in the conversion map for combined characters
@@ -575,14 +550,13 @@ UtfToLocal(const unsigned char *utf, int len,
 void
 LocalToUtf(const unsigned char *iso, int len,
 		   unsigned char *utf,
-		   const pg_local_to_utf *map, int mapsize,
+		   const pg_mb_radix_tree *map,
 		   const pg_local_to_utf_combined *cmap, int cmapsize,
 		   utf_local_conversion_func conv_func,
 		   int encoding)
 {
 	uint32		iiso;
 	int			l;
-	const pg_local_to_utf *p;
 	const pg_local_to_utf_combined *cp;
 
 	if (!PG_VALID_ENCODING(encoding))
@@ -592,6 +566,11 @@ LocalToUtf(const unsigned char *iso, int len,
 
 	for (; len > 0; len -= l)
 	{
+		unsigned char b1 = 0;
+		unsigned char b2 = 0;
+		unsigned char b3 = 0;
+		unsigned char b4 = 0;
+
 		/* "break" cases all represent errors */
 		if (*iso == '\0')
 			break;
@@ -610,53 +589,55 @@ LocalToUtf(const unsigned char *iso, int len,
 
 		/* collect coded char of length l */
 		if (l == 1)
-			iiso = *iso++;
+			b4 = *iso++;
 		else if (l == 2)
 		{
-			iiso = *iso++ << 8;
-			iiso |= *iso++;
+			b3 = *iso++;
+			b4 = *iso++;
 		}
 		else if (l == 3)
 		{
-			iiso = *iso++ << 16;
-			iiso |= *iso++ << 8;
-			iiso |= *iso++;
+			b2 = *iso++;
+			b3 = *iso++;
+			b4 = *iso++;
 		}
 		else if (l == 4)
 		{
-			iiso = *iso++ << 24;
-			iiso |= *iso++ << 16;
-			iiso |= *iso++ << 8;
-			iiso |= *iso++;
+			b1 = *iso++;
+			b2 = *iso++;
+			b3 = *iso++;
+			b4 = *iso++;
 		}
 		else
 		{
 			elog(ERROR, "unsupported character length %d", l);
 			iiso = 0;			/* keep compiler quiet */
 		}
+		iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
 
-		/* First check ordinary map */
-		p = bsearch(&iiso, map, mapsize,
-					sizeof(pg_local_to_utf), compare2);
-
-		if (p)
-		{
-			utf = store_coded_char(utf, p->utf);
-			continue;
-		}
-
-		/* If there's a combined character map, try that */
-		if (cmap)
+		if (map)
 		{
-			cp = bsearch(&iiso, cmap, cmapsize,
-						 sizeof(pg_local_to_utf_combined), compare4);
+			uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
 
-			if (cp)
+			if (converted)
 			{
-				utf = store_coded_char(utf, cp->utf1);
-				utf = store_coded_char(utf, cp->utf2);
+				utf = store_coded_char(utf, converted);
 				continue;
 			}
+
+			/* If there's a combined character map, try that */
+			if (cmap)
+			{
+				cp = bsearch(&iiso, cmap, cmapsize,
+							 sizeof(pg_local_to_utf_combined), compare4);
+
+				if (cp)
+				{
+					utf = store_coded_char(utf, cp->utf1);
+					utf = store_coded_char(utf, cp->utf2);
+					continue;
+				}
+			}
 		}
 
 		/* if there's a conversion function, try that */
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
index 746ed35..66e36d4 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/big5_to_utf8.map"
-#include "../../Unicode/utf8_to_big5.map"
+#include "../../Unicode/big5_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_big5_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ big5_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapBIG5, lengthof(LUmapBIG5),
+			   &big5_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_BIG5);
@@ -60,7 +60,7 @@ utf8_to_big5(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
 
 	UtfToLocal(src, len, dest,
-			   ULmapBIG5, lengthof(ULmapBIG5),
+			   &big5_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_BIG5);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
index d568c63..1a6402a 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
@@ -14,10 +14,10 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/utf8_to_koi8r.map"
-#include "../../Unicode/koi8r_to_utf8.map"
-#include "../../Unicode/utf8_to_koi8u.map"
-#include "../../Unicode/koi8u_to_utf8.map"
+#include "../../Unicode/utf8_to_koi8r_radix.map"
+#include "../../Unicode/koi8r_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_koi8u_radix.map"
+#include "../../Unicode/koi8u_to_utf8_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -48,7 +48,7 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
 
 	UtfToLocal(src, len, dest,
-			   ULmapKOI8R, lengthof(ULmapKOI8R),
+			   &koi8r_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_KOI8R);
@@ -66,7 +66,7 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapKOI8R, lengthof(LUmapKOI8R),
+			   &koi8r_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_KOI8R);
@@ -84,7 +84,7 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
 
 	UtfToLocal(src, len, dest,
-			   ULmapKOI8U, lengthof(ULmapKOI8U),
+			   &koi8u_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_KOI8U);
@@ -102,7 +102,7 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapKOI8U, lengthof(LUmapKOI8U),
+			   &koi8u_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_KOI8U);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
index ebf5f23..ec27841 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/euc_jis_2004_to_utf8.map"
-#include "../../Unicode/utf8_to_euc_jis_2004.map"
+#include "../../Unicode/euc_jis_2004_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_euc_jis_2004_radix.map"
 #include "../../Unicode/euc_jis_2004_to_utf8_combined.map"
 #include "../../Unicode/utf8_to_euc_jis_2004_combined.map"
 
@@ -44,7 +44,7 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapEUC_JIS_2004, lengthof(LUmapEUC_JIS_2004),
+			   &euc_jis_2004_to_unicode_tree,
 			LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
 			   NULL,
 			   PG_EUC_JIS_2004);
@@ -62,7 +62,7 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
 
 	UtfToLocal(src, len, dest,
-			   ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004),
+			   &euc_jis_2004_from_unicode_tree,
 			ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
 			   NULL,
 			   PG_EUC_JIS_2004);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
index cb0751c..a6b156d 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/euc_cn_to_utf8.map"
-#include "../../Unicode/utf8_to_euc_cn.map"
+#include "../../Unicode/euc_cn_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_euc_cn_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapEUC_CN, lengthof(LUmapEUC_CN),
+			   &euc_cn_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_CN);
@@ -60,7 +60,7 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
 
 	UtfToLocal(src, len, dest,
-			   ULmapEUC_CN, lengthof(ULmapEUC_CN),
+			   &euc_cn_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_CN);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
index 6512eee..75d190a 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/euc_jp_to_utf8.map"
-#include "../../Unicode/utf8_to_euc_jp.map"
+#include "../../Unicode/euc_jp_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_euc_jp_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapEUC_JP, lengthof(LUmapEUC_JP),
+			   &euc_jp_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_JP);
@@ -60,7 +60,7 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
 
 	UtfToLocal(src, len, dest,
-			   ULmapEUC_JP, lengthof(ULmapEUC_JP),
+			   &euc_jp_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_JP);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
index f85720f..84302d3 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/euc_kr_to_utf8.map"
-#include "../../Unicode/utf8_to_euc_kr.map"
+#include "../../Unicode/euc_kr_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_euc_kr_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapEUC_KR, lengthof(LUmapEUC_KR),
+			   &euc_kr_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_KR);
@@ -60,7 +60,7 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
 
 	UtfToLocal(src, len, dest,
-			   ULmapEUC_KR, lengthof(ULmapEUC_KR),
+			   &euc_kr_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_KR);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
index 1ce4099..0dc3c1f 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/euc_tw_to_utf8.map"
-#include "../../Unicode/utf8_to_euc_tw.map"
+#include "../../Unicode/euc_tw_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_euc_tw_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapEUC_TW, lengthof(LUmapEUC_TW),
+			   &euc_tw_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_TW);
@@ -60,7 +60,7 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
 
 	UtfToLocal(src, len, dest,
-			   ULmapEUC_TW, lengthof(ULmapEUC_TW),
+			   &euc_tw_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_EUC_TW);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
index 22dd642..836ef72 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/gb18030_to_utf8.map"
-#include "../../Unicode/utf8_to_gb18030.map"
+#include "../../Unicode/gb18030_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_gb18030_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -197,7 +197,7 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapGB18030, lengthof(LUmapGB18030),
+			   &gb18030_to_unicode_tree,
 			   NULL, 0,
 			   conv_18030_to_utf8,
 			   PG_GB18030);
@@ -215,7 +215,7 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
 
 	UtfToLocal(src, len, dest,
-			   ULmapGB18030, lengthof(ULmapGB18030),
+			   &gb18030_from_unicode_tree,
 			   NULL, 0,
 			   conv_utf8_to_18030,
 			   PG_GB18030);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
index 1238e3d..a3b97b9 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/gbk_to_utf8.map"
-#include "../../Unicode/utf8_to_gbk.map"
+#include "../../Unicode/gbk_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_gbk_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapGBK, lengthof(LUmapGBK),
+			   &gbk_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_GBK);
@@ -60,7 +60,7 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
 
 	UtfToLocal(src, len, dest,
-			   ULmapGBK, lengthof(ULmapGBK),
+			   &gbk_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_GBK);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
index 48acd3f..ca8ada5 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
@@ -14,32 +14,32 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/iso8859_10_to_utf8.map"
-#include "../../Unicode/iso8859_13_to_utf8.map"
-#include "../../Unicode/iso8859_14_to_utf8.map"
-#include "../../Unicode/iso8859_15_to_utf8.map"
-#include "../../Unicode/iso8859_2_to_utf8.map"
-#include "../../Unicode/iso8859_3_to_utf8.map"
-#include "../../Unicode/iso8859_4_to_utf8.map"
-#include "../../Unicode/iso8859_5_to_utf8.map"
-#include "../../Unicode/iso8859_6_to_utf8.map"
-#include "../../Unicode/iso8859_7_to_utf8.map"
-#include "../../Unicode/iso8859_8_to_utf8.map"
-#include "../../Unicode/iso8859_9_to_utf8.map"
-#include "../../Unicode/utf8_to_iso8859_10.map"
-#include "../../Unicode/utf8_to_iso8859_13.map"
-#include "../../Unicode/utf8_to_iso8859_14.map"
-#include "../../Unicode/utf8_to_iso8859_15.map"
-#include "../../Unicode/utf8_to_iso8859_16.map"
-#include "../../Unicode/utf8_to_iso8859_2.map"
-#include "../../Unicode/utf8_to_iso8859_3.map"
-#include "../../Unicode/utf8_to_iso8859_4.map"
-#include "../../Unicode/utf8_to_iso8859_5.map"
-#include "../../Unicode/utf8_to_iso8859_6.map"
-#include "../../Unicode/utf8_to_iso8859_7.map"
-#include "../../Unicode/utf8_to_iso8859_8.map"
-#include "../../Unicode/utf8_to_iso8859_9.map"
-#include "../../Unicode/iso8859_16_to_utf8.map"
+#include "../../Unicode/iso8859_10_to_utf8_radix.map"
+#include "../../Unicode/iso8859_13_to_utf8_radix.map"
+#include "../../Unicode/iso8859_14_to_utf8_radix.map"
+#include "../../Unicode/iso8859_15_to_utf8_radix.map"
+#include "../../Unicode/iso8859_2_to_utf8_radix.map"
+#include "../../Unicode/iso8859_3_to_utf8_radix.map"
+#include "../../Unicode/iso8859_4_to_utf8_radix.map"
+#include "../../Unicode/iso8859_5_to_utf8_radix.map"
+#include "../../Unicode/iso8859_6_to_utf8_radix.map"
+#include "../../Unicode/iso8859_7_to_utf8_radix.map"
+#include "../../Unicode/iso8859_8_to_utf8_radix.map"
+#include "../../Unicode/iso8859_9_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_iso8859_10_radix.map"
+#include "../../Unicode/utf8_to_iso8859_13_radix.map"
+#include "../../Unicode/utf8_to_iso8859_14_radix.map"
+#include "../../Unicode/utf8_to_iso8859_15_radix.map"
+#include "../../Unicode/utf8_to_iso8859_16_radix.map"
+#include "../../Unicode/utf8_to_iso8859_2_radix.map"
+#include "../../Unicode/utf8_to_iso8859_3_radix.map"
+#include "../../Unicode/utf8_to_iso8859_4_radix.map"
+#include "../../Unicode/utf8_to_iso8859_5_radix.map"
+#include "../../Unicode/utf8_to_iso8859_6_radix.map"
+#include "../../Unicode/utf8_to_iso8859_7_radix.map"
+#include "../../Unicode/utf8_to_iso8859_8_radix.map"
+#include "../../Unicode/utf8_to_iso8859_9_radix.map"
+#include "../../Unicode/iso8859_16_to_utf8_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -60,52 +60,37 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859);
 typedef struct
 {
 	pg_enc		encoding;
-	const pg_local_to_utf *map1;	/* to UTF8 map name */
-	const pg_utf_to_local *map2;	/* from UTF8 map name */
-	int			size1;			/* size of map1 */
-	int			size2;			/* size of map2 */
+	const pg_mb_radix_tree *map1;	/* to UTF8 map name */
+	const pg_mb_radix_tree *map2;	/* from UTF8 map name */
 } pg_conv_map;
 
 static const pg_conv_map maps[] = {
-	{PG_LATIN2, LUmapISO8859_2, ULmapISO8859_2,
-		lengthof(LUmapISO8859_2),
-	lengthof(ULmapISO8859_2)},	/* ISO-8859-2 Latin 2 */
-	{PG_LATIN3, LUmapISO8859_3, ULmapISO8859_3,
-		lengthof(LUmapISO8859_3),
-	lengthof(ULmapISO8859_3)},	/* ISO-8859-3 Latin 3 */
-	{PG_LATIN4, LUmapISO8859_4, ULmapISO8859_4,
-		lengthof(LUmapISO8859_4),
-	lengthof(ULmapISO8859_4)},	/* ISO-8859-4 Latin 4 */
-	{PG_LATIN5, LUmapISO8859_9, ULmapISO8859_9,
-		lengthof(LUmapISO8859_9),
-	lengthof(ULmapISO8859_9)},	/* ISO-8859-9 Latin 5 */
-	{PG_LATIN6, LUmapISO8859_10, ULmapISO8859_10,
-		lengthof(LUmapISO8859_10),
-	lengthof(ULmapISO8859_10)}, /* ISO-8859-10 Latin 6 */
-	{PG_LATIN7, LUmapISO8859_13, ULmapISO8859_13,
-		lengthof(LUmapISO8859_13),
-	lengthof(ULmapISO8859_13)}, /* ISO-8859-13 Latin 7 */
-	{PG_LATIN8, LUmapISO8859_14, ULmapISO8859_14,
-		lengthof(LUmapISO8859_14),
-	lengthof(ULmapISO8859_14)}, /* ISO-8859-14 Latin 8 */
-	{PG_LATIN9, LUmapISO8859_15, ULmapISO8859_15,
-		lengthof(LUmapISO8859_15),
-	lengthof(ULmapISO8859_15)}, /* ISO-8859-15 Latin 9 */
-	{PG_LATIN10, LUmapISO8859_16, ULmapISO8859_16,
-		lengthof(LUmapISO8859_16),
-	lengthof(ULmapISO8859_16)}, /* ISO-8859-16 Latin 10 */
-	{PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5,
-		lengthof(LUmapISO8859_5),
-	lengthof(ULmapISO8859_5)},	/* ISO-8859-5 */
-	{PG_ISO_8859_6, LUmapISO8859_6, ULmapISO8859_6,
-		lengthof(LUmapISO8859_6),
-	lengthof(ULmapISO8859_6)},	/* ISO-8859-6 */
-	{PG_ISO_8859_7, LUmapISO8859_7, ULmapISO8859_7,
-		lengthof(LUmapISO8859_7),
-	lengthof(ULmapISO8859_7)},	/* ISO-8859-7 */
-	{PG_ISO_8859_8, LUmapISO8859_8, ULmapISO8859_8,
-		lengthof(LUmapISO8859_8),
-	lengthof(ULmapISO8859_8)},	/* ISO-8859-8 */
+	{PG_LATIN2, &iso8859_2_to_unicode_tree,
+	 &iso8859_2_from_unicode_tree},	/* ISO-8859-2 Latin 2 */
+	{PG_LATIN3, &iso8859_3_to_unicode_tree,
+	 &iso8859_3_from_unicode_tree},	/* ISO-8859-3 Latin 3 */
+	{PG_LATIN4, &iso8859_4_to_unicode_tree,
+	 &iso8859_4_from_unicode_tree},	/* ISO-8859-4 Latin 4 */
+	{PG_LATIN5, &iso8859_9_to_unicode_tree,
+	 &iso8859_9_from_unicode_tree},	/* ISO-8859-9 Latin 5 */
+	{PG_LATIN6, &iso8859_10_to_unicode_tree,
+	 &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */
+	{PG_LATIN7, &iso8859_13_to_unicode_tree,
+	 &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */
+	{PG_LATIN8, &iso8859_14_to_unicode_tree,
+	 &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */
+	{PG_LATIN9, &iso8859_15_to_unicode_tree,
+	 &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */
+	{PG_LATIN10, &iso8859_16_to_unicode_tree,
+	 &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */
+	{PG_ISO_8859_5, &iso8859_5_to_unicode_tree,
+	 &iso8859_5_from_unicode_tree},	/* ISO-8859-5 */
+	{PG_ISO_8859_6, &iso8859_6_to_unicode_tree,
+	 &iso8859_6_from_unicode_tree},	/* ISO-8859-6 */
+	{PG_ISO_8859_7, &iso8859_7_to_unicode_tree,
+	 &iso8859_7_from_unicode_tree},	/* ISO-8859-7 */
+	{PG_ISO_8859_8, &iso8859_8_to_unicode_tree,
+	 &iso8859_8_from_unicode_tree},	/* ISO-8859-8 */
 };
 
 Datum
@@ -124,7 +109,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
 		if (encoding == maps[i].encoding)
 		{
 			LocalToUtf(src, len, dest,
-					   maps[i].map1, maps[i].size1,
+					   maps[i].map1,
 					   NULL, 0,
 					   NULL,
 					   encoding);
@@ -156,7 +141,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
 		if (encoding == maps[i].encoding)
 		{
 			UtfToLocal(src, len, dest,
-					   maps[i].map2, maps[i].size2,
+					   maps[i].map2,
 					   NULL, 0,
 					   NULL,
 					   encoding);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
index 51690b9..7e3a3cb 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/johab_to_utf8.map"
-#include "../../Unicode/utf8_to_johab.map"
+#include "../../Unicode/johab_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_johab_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ johab_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapJOHAB, lengthof(LUmapJOHAB),
+			   &johab_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_JOHAB);
@@ -60,7 +60,7 @@ utf8_to_johab(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
 
 	UtfToLocal(src, len, dest,
-			   ULmapJOHAB, lengthof(ULmapJOHAB),
+			   &johab_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_JOHAB);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
index 605fe40..4bd5ea5 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/sjis_to_utf8.map"
-#include "../../Unicode/utf8_to_sjis.map"
+#include "../../Unicode/sjis_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_sjis_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapSJIS, lengthof(LUmapSJIS),
+			   &sjis_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_SJIS);
@@ -60,7 +60,7 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
 
 	UtfToLocal(src, len, dest,
-			   ULmapSJIS, lengthof(ULmapSJIS),
+			   &sjis_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_SJIS);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
index 8d8f508..5a1e8c6 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/shift_jis_2004_to_utf8.map"
-#include "../../Unicode/utf8_to_shift_jis_2004.map"
+#include "../../Unicode/shift_jis_2004_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_shift_jis_2004_radix.map"
 #include "../../Unicode/shift_jis_2004_to_utf8_combined.map"
 #include "../../Unicode/utf8_to_shift_jis_2004_combined.map"
 
@@ -44,7 +44,7 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapSHIFT_JIS_2004, lengthof(LUmapSHIFT_JIS_2004),
+			   &shift_jis_2004_to_unicode_tree,
 		LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
 			   NULL,
 			   PG_SHIFT_JIS_2004);
@@ -62,7 +62,7 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
 
 	UtfToLocal(src, len, dest,
-			   ULmapSHIFT_JIS_2004, lengthof(ULmapSHIFT_JIS_2004),
+			   &shift_jis_2004_from_unicode_tree,
 		ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
 			   NULL,
 			   PG_SHIFT_JIS_2004);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
index 97e895c..dd7a788 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
@@ -14,8 +14,8 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/uhc_to_utf8.map"
-#include "../../Unicode/utf8_to_uhc.map"
+#include "../../Unicode/uhc_to_utf8_radix.map"
+#include "../../Unicode/utf8_to_uhc_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -42,7 +42,7 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
 
 	LocalToUtf(src, len, dest,
-			   LUmapUHC, lengthof(LUmapUHC),
+			   &uhc_to_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_UHC);
@@ -60,7 +60,7 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
 	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
 
 	UtfToLocal(src, len, dest,
-			   ULmapUHC, lengthof(ULmapUHC),
+			   &uhc_from_unicode_tree,
 			   NULL, 0,
 			   NULL,
 			   PG_UHC);
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
index ab6e624..9ee72c9 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
@@ -14,28 +14,28 @@
 #include "postgres.h"
 #include "fmgr.h"
 #include "mb/pg_wchar.h"
-#include "../../Unicode/utf8_to_win1250.map"
-#include "../../Unicode/utf8_to_win1251.map"
-#include "../../Unicode/utf8_to_win1252.map"
-#include "../../Unicode/utf8_to_win1253.map"
-#include "../../Unicode/utf8_to_win1254.map"
-#include "../../Unicode/utf8_to_win1255.map"
-#include "../../Unicode/utf8_to_win1256.map"
-#include "../../Unicode/utf8_to_win1257.map"
-#include "../../Unicode/utf8_to_win1258.map"
-#include "../../Unicode/utf8_to_win866.map"
-#include "../../Unicode/utf8_to_win874.map"
-#include "../../Unicode/win1250_to_utf8.map"
-#include "../../Unicode/win1251_to_utf8.map"
-#include "../../Unicode/win1252_to_utf8.map"
-#include "../../Unicode/win1253_to_utf8.map"
-#include "../../Unicode/win1254_to_utf8.map"
-#include "../../Unicode/win1255_to_utf8.map"
-#include "../../Unicode/win1256_to_utf8.map"
-#include "../../Unicode/win1257_to_utf8.map"
-#include "../../Unicode/win866_to_utf8.map"
-#include "../../Unicode/win874_to_utf8.map"
-#include "../../Unicode/win1258_to_utf8.map"
+#include "../../Unicode/utf8_to_win1250_radix.map"
+#include "../../Unicode/utf8_to_win1251_radix.map"
+#include "../../Unicode/utf8_to_win1252_radix.map"
+#include "../../Unicode/utf8_to_win1253_radix.map"
+#include "../../Unicode/utf8_to_win1254_radix.map"
+#include "../../Unicode/utf8_to_win1255_radix.map"
+#include "../../Unicode/utf8_to_win1256_radix.map"
+#include "../../Unicode/utf8_to_win1257_radix.map"
+#include "../../Unicode/utf8_to_win1258_radix.map"
+#include "../../Unicode/utf8_to_win866_radix.map"
+#include "../../Unicode/utf8_to_win874_radix.map"
+#include "../../Unicode/win1250_to_utf8_radix.map"
+#include "../../Unicode/win1251_to_utf8_radix.map"
+#include "../../Unicode/win1252_to_utf8_radix.map"
+#include "../../Unicode/win1253_to_utf8_radix.map"
+#include "../../Unicode/win1254_to_utf8_radix.map"
+#include "../../Unicode/win1255_to_utf8_radix.map"
+#include "../../Unicode/win1256_to_utf8_radix.map"
+#include "../../Unicode/win1257_to_utf8_radix.map"
+#include "../../Unicode/win866_to_utf8_radix.map"
+#include "../../Unicode/win874_to_utf8_radix.map"
+#include "../../Unicode/win1258_to_utf8_radix.map"
 
 PG_MODULE_MAGIC;
 
@@ -56,46 +56,22 @@ PG_FUNCTION_INFO_V1(utf8_to_win);
 typedef struct
 {
 	pg_enc		encoding;
-	const pg_local_to_utf *map1;	/* to UTF8 map name */
-	const pg_utf_to_local *map2;	/* from UTF8 map name */
-	int			size1;			/* size of map1 */
-	int			size2;			/* size of map2 */
+	const pg_mb_radix_tree *map1;	/* to UTF8 map name */
+	const pg_mb_radix_tree *map2;	/* from UTF8 map name */
 } pg_conv_map;
 
 static const pg_conv_map maps[] = {
-	{PG_WIN866, LUmapWIN866, ULmapWIN866,
-		lengthof(LUmapWIN866),
-	lengthof(ULmapWIN866)},
-	{PG_WIN874, LUmapWIN874, ULmapWIN874,
-		lengthof(LUmapWIN874),
-	lengthof(ULmapWIN874)},
-	{PG_WIN1250, LUmapWIN1250, ULmapWIN1250,
-		lengthof(LUmapWIN1250),
-	lengthof(ULmapWIN1250)},
-	{PG_WIN1251, LUmapWIN1251, ULmapWIN1251,
-		lengthof(LUmapWIN1251),
-	lengthof(ULmapWIN1251)},
-	{PG_WIN1252, LUmapWIN1252, ULmapWIN1252,
-		lengthof(LUmapWIN1252),
-	lengthof(ULmapWIN1252)},
-	{PG_WIN1253, LUmapWIN1253, ULmapWIN1253,
-		lengthof(LUmapWIN1253),
-	lengthof(ULmapWIN1253)},
-	{PG_WIN1254, LUmapWIN1254, ULmapWIN1254,
-		lengthof(LUmapWIN1254),
-	lengthof(ULmapWIN1254)},
-	{PG_WIN1255, LUmapWIN1255, ULmapWIN1255,
-		lengthof(LUmapWIN1255),
-	lengthof(ULmapWIN1255)},
-	{PG_WIN1256, LUmapWIN1256, ULmapWIN1256,
-		lengthof(LUmapWIN1256),
-	lengthof(ULmapWIN1256)},
-	{PG_WIN1257, LUmapWIN1257, ULmapWIN1257,
-		lengthof(LUmapWIN1257),
-	lengthof(ULmapWIN1257)},
-	{PG_WIN1258, LUmapWIN1258, ULmapWIN1258,
-		lengthof(LUmapWIN1258),
-	lengthof(ULmapWIN1258)},
+	{PG_WIN866,  &win866_to_unicode_tree,  &win866_from_unicode_tree},
+	{PG_WIN874,  &win874_to_unicode_tree,  &win874_from_unicode_tree},
+	{PG_WIN1250, &win1250_to_unicode_tree, &win1250_from_unicode_tree},
+	{PG_WIN1251, &win1251_to_unicode_tree, &win1251_from_unicode_tree},
+	{PG_WIN1252, &win1252_to_unicode_tree, &win1252_from_unicode_tree},
+	{PG_WIN1253, &win1253_to_unicode_tree, &win1253_from_unicode_tree},
+	{PG_WIN1254, &win1254_to_unicode_tree, &win1254_from_unicode_tree},
+	{PG_WIN1255, &win1255_to_unicode_tree, &win1255_from_unicode_tree},
+	{PG_WIN1256, &win1256_to_unicode_tree, &win1256_from_unicode_tree},
+	{PG_WIN1257, &win1257_to_unicode_tree, &win1257_from_unicode_tree},
+	{PG_WIN1258, &win1258_to_unicode_tree, &win1258_from_unicode_tree},
 };
 
 Datum
@@ -114,7 +90,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
 		if (encoding == maps[i].encoding)
 		{
 			LocalToUtf(src, len, dest,
-					   maps[i].map1, maps[i].size1,
+					   maps[i].map1,
 					   NULL, 0,
 					   NULL,
 					   encoding);
@@ -146,7 +122,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
 		if (encoding == maps[i].encoding)
 		{
 			UtfToLocal(src, len, dest,
-					   maps[i].map2, maps[i].size2,
+					   maps[i].map2,
 					   NULL, 0,
 					   NULL,
 					   encoding);
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index ceb5695..5ab93cb 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -383,6 +383,58 @@ typedef struct
 	uint32		code;			/* local code */
 } pg_utf_to_local;
 
+typedef struct pg_mb_radix_index
+{
+	uint8		lower;
+	uint8		upper;                           /* index range of b2idx */
+} pg_mb_radix_index;
+
+/*
+ * Radix tree structs for faster conversion
+ */
+typedef struct
+{
+	/*
+	 * Array containing all the values. Only one of chars16 or chars32 is
+	 * used, depending on how wide the values we need to represent are.
+	 */
+	const uint16 *chars16;					/* 16 bit  */
+	const uint32 *chars32;					/* 32 bit character table */
+
+	/* Radix tree for 1-byte inputs */
+	uint32		b1root;		/* offset of table in the chars[16|32] array */
+	uint8		b1_lower;	/* min allowed value for a single byte input */
+	uint8		b1_upper;	/* max allowed value for a single byte input */
+
+	/* Radix tree for 2-byte inputs */
+	uint32		b2root;		/* offset of 1st byte's table */
+	uint8		b2_1_lower; /* min/max allowed value for 1st input byte */
+	uint8		b2_1_upper;
+	uint8		b2_2_lower; /* min/max allowed value for 2nd input byte */
+	uint8		b2_2_upper;
+
+	/* Radix tree for 3-byte inputs */
+	uint32		b3root;		/* offset of 1st byte's table */
+	uint8		b3_1_lower; /* min/max allowed value for 1st input byte */
+	uint8		b3_1_upper;
+	uint8		b3_2_lower; /* min/max allowed value for 2nd input byte */
+	uint8		b3_2_upper;
+	uint8		b3_3_lower; /* min/max allowed value for 3rd input byte */
+	uint8		b3_3_upper;
+
+	/* Radix tree for 4-byte inputs */
+	uint32		b4root;		/* offset of 1st byte's table */
+	uint8		b4_1_lower; /* min/max allowed value for 1st input byte */
+	uint8		b4_1_upper;
+	uint8		b4_2_lower; /* min/max allowed value for 2nd input byte */
+	uint8		b4_2_upper;
+	uint8		b4_3_lower; /* min/max allowed value for 3rd input byte */
+	uint8		b4_3_upper;
+	uint8		b4_4_lower; /* min/max allowed value for 4th input byte */
+	uint8		b4_4_upper;
+
+} pg_mb_radix_tree;
+
 /*
  * local code to UTF-8 conversion map
  */
@@ -510,13 +562,13 @@ extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 
 extern void UtfToLocal(const unsigned char *utf, int len,
 		   unsigned char *iso,
-		   const pg_utf_to_local *map, int mapsize,
+		   const pg_mb_radix_tree *map,
 		   const pg_utf_to_local_combined *cmap, int cmapsize,
 		   utf_local_conversion_func conv_func,
 		   int encoding);
 extern void LocalToUtf(const unsigned char *iso, int len,
 		   unsigned char *utf,
-		   const pg_local_to_utf *map, int mapsize,
+		   const pg_mb_radix_tree *map,
 		   const pg_local_to_utf_combined *cmap, int cmapsize,
 		   utf_local_conversion_func conv_func,
 		   int encoding);
-- 
2.9.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Radix tree for character conversion

Reply via email to