* lib/unictype/joininggroup_byname.gperf: Add Rohingya Yeh joining group name. * lib/unictype/joininggroup_name.h: Likewise.
* lib/unilbrk/lbrktables.h (LBP_HL): New enumeration value. (unilbrk_table): Adjust table size. * lib/unilbrk/lbrktables.c (unilbrk_table): Add a row and column for LBP_HL. * lib/gen-uni-tables.c (UC_JOINING_GROUP_ROHINGYA_YEH): New enumeration value. (fill_arabicshaping, joining_group_as_c_identifier): Support UC_JOINING_GROUP_ROHINGYA_YEH. (is_property_default_ignorable_code_point): Reject U+0604. (LBP_HL): New enumeration value. (get_lbp, debug_output_lbp, fill_org_lbp, debug_output_org_lbp) (output_lbp): Support LBP_HL. (fill_org_lbp): Resolve CJ as NS, for backward compatibility. --- lib/gen-uni-tables.c | 42 +++++++++++++++++--------- lib/unictype.in.h | 3 +- lib/unictype/joininggroup_byname.gperf | 2 ++ lib/unictype/joininggroup_name.h | 1 + lib/unilbrk/lbrktables.c | 55 +++++++++++++++++----------------- lib/unilbrk/lbrktables.h | 22 +++++++------- 6 files changed, 73 insertions(+), 52 deletions(-) diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c index 1af832e..ec1aba5 100644 --- a/lib/gen-uni-tables.c +++ b/lib/gen-uni-tables.c @@ -32,7 +32,7 @@ /usr/local/share/Unidata/CompositionExclusions.txt \ /usr/local/share/Unidata/SpecialCasing.txt \ /usr/local/share/Unidata/CaseFolding.txt \ - 6.0.0 + 6.1.0 */ #include <stdbool.h> @@ -2868,7 +2868,7 @@ is_property_default_ignorable_code_point (unsigned int ch) bool result1 = (is_category_Cf (ch) && !(ch >= 0xFFF9 && ch <= 0xFFFB) /* Annotations */ - && !((ch >= 0x0600 && ch <= 0x0603) || ch == 0x06DD || ch == 0x070F) + && !((ch >= 0x0600 && ch <= 0x0604) || ch == 0x06DD || ch == 0x070F) /* For some reason, the following are not listed as having property Default_Ignorable_Code_Point. */ && !(ch == 0x110BD)) @@ -3746,7 +3746,8 @@ enum UC_JOINING_GROUP_YUDH, /* Yudh */ UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */ UC_JOINING_GROUP_ZAIN, /* Zain */ - UC_JOINING_GROUP_ZHAIN /* Zhain */ + UC_JOINING_GROUP_ZHAIN, /* Zhain */ + UC_JOINING_GROUP_ROHINGYA_YEH /* Rohingya_Yeh */ }; static uint8_t unicode_joining_group[0x110000]; @@ -3886,6 +3887,7 @@ fill_arabicshaping (const char *arabicshaping_filename) TRY(UC_JOINING_GROUP_YUDH_HE, "YUDH HE") TRY(UC_JOINING_GROUP_ZAIN, "ZAIN") TRY(UC_JOINING_GROUP_ZHAIN, "ZHAIN") + TRY(UC_JOINING_GROUP_ROHINGYA_YEH, "ROHINGYA YEH") #undef TRY else { @@ -3987,7 +3989,7 @@ output_joining_type (const char *filename, const char *version) } fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n"); - fprintf (stream, "/* Arabic joining group of Unicode characters. */\n"); + fprintf (stream, "/* Arabic joining type of Unicode characters. */\n"); fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s. */\n", version); @@ -4167,6 +4169,7 @@ joining_group_as_c_identifier (int joining_group) TRY(UC_JOINING_GROUP_YUDH_HE) TRY(UC_JOINING_GROUP_ZAIN) TRY(UC_JOINING_GROUP_ZHAIN) + TRY(UC_JOINING_GROUP_ROHINGYA_YEH) #undef TRY abort (); } @@ -6210,22 +6213,22 @@ output_width_property_test (const char *filename) enum { - /* Values >= 25 are resolved at run time. */ - LBP_BK = 25, /* mandatory break */ + /* Values >= 26 are resolved at run time. */ + LBP_BK = 26, /* mandatory break */ /*LBP_CR, carriage return - not used here because it's a DOSism */ /*LBP_LF, line feed - not used here because it's a DOSism */ - LBP_CM = 26, /* attached characters and combining marks */ + LBP_CM = 27, /* attached characters and combining marks */ /*LBP_NL, next line - not used here because it's equivalent to LBP_BK */ /*LBP_SG, surrogates - not used here because they are not characters */ LBP_WJ = 0, /* word joiner */ - LBP_ZW = 27, /* zero width space */ + LBP_ZW = 28, /* zero width space */ LBP_GL = 1, /* non-breaking (glue) */ - LBP_SP = 28, /* space */ + LBP_SP = 29, /* space */ LBP_B2 = 2, /* break opportunity before and after */ LBP_BA = 3, /* break opportunity after */ LBP_BB = 4, /* break opportunity before */ LBP_HY = 5, /* hyphen */ - LBP_CB = 29, /* contingent break opportunity */ + LBP_CB = 30, /* contingent break opportunity */ LBP_CL = 6, /* closing punctuation */ LBP_CP = 7, /* closing parenthesis */ LBP_EX = 8, /* exclamation/interrogation */ @@ -6238,16 +6241,18 @@ enum LBP_PO = 15, /* postfix (numeric) */ LBP_PR = 16, /* prefix (numeric) */ LBP_SY = 17, /* symbols allowing breaks */ - LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */ + LBP_AI = 31, /* ambiguous (alphabetic or ideograph) */ LBP_AL = 18, /* ordinary alphabetic and symbol characters */ +/*LBP_CJ, conditional Japanese starter, resolved to NS */ LBP_H2 = 19, /* Hangul LV syllable */ LBP_H3 = 20, /* Hangul LVT syllable */ + LBP_HL = 25, /* Hebrew letter */ LBP_ID = 21, /* ideographic */ LBP_JL = 22, /* Hangul L Jamo */ LBP_JV = 23, /* Hangul V Jamo */ LBP_JT = 24, /* Hangul T Jamo */ - LBP_SA = 31, /* complex context (South East Asian) */ - LBP_XX = 32 /* unknown */ + LBP_SA = 32, /* complex context (South East Asian) */ + LBP_XX = 33 /* unknown */ }; /* Returns the line breaking classification for ch, as a bit mask. */ @@ -6692,6 +6697,10 @@ get_lbp (unsigned int ch) if (ch >= 0xAC00 && ch <= 0xD7A3 && ((ch - 0xAC00) % 28) != 0) attr |= (int64_t) 1 << LBP_H3; + if ((ch >= 0x05D0 && ch <= 0x05F2) || ch == 0xFB1D + || (ch >= 0xFB1F && ch <= 0xFB28) || (ch >= 0xFB2A && ch <= 0xFB4F)) + attr |= (int64_t) 1 << LBP_HL; + if ((ch >= 0x1100 && ch <= 0x115F) || (ch >= 0xA960 && ch <= 0xA97C)) attr |= (int64_t) 1 << LBP_JL; @@ -6853,7 +6862,7 @@ get_lbp (unsigned int ch) || ch == 0x2064 /* INVISIBLE PLUS */ /* Extra characters for compatibility with Unicode LineBreak.txt. */ || ch == 0x110BD /* KAITHI NUMBER SIGN */) - if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | ((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | ((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | ((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | ((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID)))) + if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | ((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | ((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | ((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | ((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | ((int64_t) 1 << LBP_HL) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID)))) { /* ambiguous (alphabetic) ? */ if ((unicode_width[ch] != NULL @@ -6973,6 +6982,7 @@ debug_output_lbp (FILE *stream) PRINT_BIT(attr,LBP_AL); PRINT_BIT(attr,LBP_H2); PRINT_BIT(attr,LBP_H3); + PRINT_BIT(attr,LBP_HL); PRINT_BIT(attr,LBP_ID); PRINT_BIT(attr,LBP_JL); PRINT_BIT(attr,LBP_JV); @@ -7087,6 +7097,7 @@ fill_org_lbp (const char *linebreak_filename) TRY(LBP_AL) TRY(LBP_H2) TRY(LBP_H3) + TRY(LBP_HL) TRY(LBP_ID) TRY(LBP_JL) TRY(LBP_JV) @@ -7098,6 +7109,7 @@ fill_org_lbp (const char *linebreak_filename) else if (strcmp (field1, "CR") == 0) value = LBP_BK; else if (strcmp (field1, "NL") == 0) value = LBP_BK; else if (strcmp (field1, "SG") == 0) value = LBP_XX; + else if (strcmp (field1, "CJ") == 0) value = LBP_NS; else { fprintf (stderr, "unknown property value \"%s\" in '%s':%d\n", @@ -7167,6 +7179,7 @@ debug_output_org_lbp (FILE *stream) PRINT_BIT(attr,LBP_AL); PRINT_BIT(attr,LBP_H2); PRINT_BIT(attr,LBP_H3); + PRINT_BIT(attr,LBP_HL); PRINT_BIT(attr,LBP_ID); PRINT_BIT(attr,LBP_JL); PRINT_BIT(attr,LBP_JV); @@ -7340,6 +7353,7 @@ output_lbp (FILE *stream1, FILE *stream2) CASE(LBP_AL); CASE(LBP_H2); CASE(LBP_H3); + CASE(LBP_HL); CASE(LBP_ID); CASE(LBP_JL); CASE(LBP_JV); diff --git a/lib/unictype.in.h b/lib/unictype.in.h index 5125e96..30c71aa 100644 --- a/lib/unictype.in.h +++ b/lib/unictype.in.h @@ -518,7 +518,8 @@ enum UC_JOINING_GROUP_YUDH, /* Yudh */ UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */ UC_JOINING_GROUP_ZAIN, /* Zain */ - UC_JOINING_GROUP_ZHAIN /* Zhain */ + UC_JOINING_GROUP_ZHAIN, /* Zhain */ + UC_JOINING_GROUP_ROHINGYA_YEH /* Rohingya_Yeh */ }; /* Return the name of a joining group. */ diff --git a/lib/unictype/joininggroup_byname.gperf b/lib/unictype/joininggroup_byname.gperf index bc2fbc8..90be16e 100644 --- a/lib/unictype/joininggroup_byname.gperf +++ b/lib/unictype/joininggroup_byname.gperf @@ -83,3 +83,5 @@ Yudh He, UC_JOINING_GROUP_YUDH_HE YudhHe, UC_JOINING_GROUP_YUDH_HE Zain, UC_JOINING_GROUP_ZAIN Zhain, UC_JOINING_GROUP_ZHAIN +Rohingya Yeh, UC_JOINING_GROUP_ROHINGYA_YEH +RohingyaYeh, UC_JOINING_GROUP_ROHINGYA_YEH diff --git a/lib/unictype/joininggroup_name.h b/lib/unictype/joininggroup_name.h index 78d4a10..681f1a5 100644 --- a/lib/unictype/joininggroup_name.h +++ b/lib/unictype/joininggroup_name.h @@ -72,3 +72,4 @@ ELEM (YUDH, "Yudh") ELEM (YUDH_HE, "Yudh He") ELEM (ZAIN, "Zain") ELEM (ZHAIN, "Zhain") +ELEM (ROHINGYA_YEH, "Rohingya Yeh") diff --git a/lib/unilbrk/lbrktables.c b/lib/unilbrk/lbrktables.c index f0b3d59..d60321d 100644 --- a/lib/unilbrk/lbrktables.c +++ b/lib/unilbrk/lbrktables.c @@ -23,35 +23,36 @@ /* Define unilbrkprop, table of line breaking properties. */ #include "unilbrk/lbrkprop2.h" -const unsigned char unilbrk_table[25][25] = +const unsigned char unilbrk_table[26][26] = { /* after */ - /* WJ GL B2 BA BB HY CL CP EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID JL JV JT */ -/* WJ */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* GL */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* B2 */ { P, I, P, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* BA */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* BB */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D, D, D, D, }, -/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D, D, D, D, }, -/* EX */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, }, -/* QU */ { P, I, I, I, I, I, P, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* IS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D, D, D, D, }, -/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, }, -/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, }, -/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, }, -/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, -/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, -/* ID */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, D, }, -/* JL */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D, I, I, D, }, -/* JV */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, -/* JT */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, + /* WJ GL B2 BA BB HY CL CP EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID JL JV JT HL */ +/* WJ */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, }, +/* GL */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, }, +/* B2 */ { P, I, P, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, }, +/* BA */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, }, +/* BB */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, }, +/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, D, }, +/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D, D, D, D, D, }, +/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D, D, D, D, I, }, +/* EX */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, }, +/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, }, +/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, }, +/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, }, +/* QU */ { P, I, I, I, I, I, P, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, I, }, +/* IS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, D, }, +/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D, D, D, D, I, }, +/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, }, +/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, I, }, +/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, D, }, +/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, }, +/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, D, }, +/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, D, }, +/* ID */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, D, D, }, +/* JL */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D, I, I, D, D, }, +/* JV */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, D, }, +/* JT */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, D, }, +/* HL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, }, /* "" */ /* before */ }; diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h index e651d71..95bb502 100644 --- a/lib/unilbrk/lbrktables.h +++ b/lib/unilbrk/lbrktables.h @@ -21,22 +21,22 @@ enum { - /* Values >= 25 are resolved at run time. */ - LBP_BK = 25, /* mandatory break */ + /* Values >= 26 are resolved at run time. */ + LBP_BK = 26, /* mandatory break */ /*LBP_CR, carriage return - not used here because it's a DOSism */ /*LBP_LF, line feed - not used here because it's a DOSism */ - LBP_CM = 26, /* attached characters and combining marks */ + LBP_CM = 27, /* attached characters and combining marks */ /*LBP_NL, next line - not used here because it's equivalent to LBP_BK */ /*LBP_SG, surrogates - not used here because they are not characters */ LBP_WJ = 0, /* word joiner */ - LBP_ZW = 27, /* zero width space */ + LBP_ZW = 28, /* zero width space */ LBP_GL = 1, /* non-breaking (glue) */ - LBP_SP = 28, /* space */ + LBP_SP = 29, /* space */ LBP_B2 = 2, /* break opportunity before and after */ LBP_BA = 3, /* break opportunity after */ LBP_BB = 4, /* break opportunity before */ LBP_HY = 5, /* hyphen */ - LBP_CB = 29, /* contingent break opportunity */ + LBP_CB = 30, /* contingent break opportunity */ LBP_CL = 6, /* closing punctuation */ LBP_CP = 7, /* closing parenthesis */ LBP_EX = 8, /* exclamation/interrogation */ @@ -49,16 +49,18 @@ enum LBP_PO = 15, /* postfix (numeric) */ LBP_PR = 16, /* prefix (numeric) */ LBP_SY = 17, /* symbols allowing breaks */ - LBP_AI = 30, /* ambiguous (alphabetic or ideograph) */ + LBP_AI = 31, /* ambiguous (alphabetic or ideograph) */ LBP_AL = 18, /* ordinary alphabetic and symbol characters */ +/*LBP_CJ, conditional Japanese starters, resolved to NS */ LBP_H2 = 19, /* Hangul LV syllable */ LBP_H3 = 20, /* Hangul LVT syllable */ + LBP_HL = 25, /* Hebrew letter */ LBP_ID = 21, /* ideographic */ LBP_JL = 22, /* Hangul L Jamo */ LBP_JV = 23, /* Hangul V Jamo */ LBP_JT = 24, /* Hangul T Jamo */ - LBP_SA = 31, /* complex context (South East Asian) */ - LBP_XX = 32 /* unknown */ + LBP_SA = 32, /* complex context (South East Asian) */ + LBP_XX = 33 /* unknown */ }; #include "lbrkprop1.h" @@ -89,7 +91,7 @@ unilbrkprop_lookup (ucs4_t uc) #define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */ #define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */ -extern const unsigned char unilbrk_table[25][25]; +extern const unsigned char unilbrk_table[26][26]; /* We don't support line breaking of complex-context dependent characters (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */ -- 1.9.3