Changeset: 5ac94c0d2fa5 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/5ac94c0d2fa5 Modified Files: gdk/gdk_string.c Branch: ascii-flag Log Message:
Implemented special case upper(ß) = SS. Also some layout changes. diffs (truncated from 3919 to 300 lines): diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c --- a/gdk/gdk_string.c +++ b/gdk/gdk_string.c @@ -1472,595 +1472,596 @@ GDKanalytical_str_group_concat(BAT *r, B * last byte of a sequence, it is the converted codepoint, and otherwise * a (new) offset into the same table. */ static const int lowercase[4288] = { - [0x0] = 0x0, /* U+0000: <control> */ - [0x1] = 0x1, /* U+0001: <control> */ - [0x2] = 0x2, /* U+0002: <control> */ - [0x3] = 0x3, /* U+0003: <control> */ - [0x4] = 0x4, /* U+0004: <control> */ - [0x5] = 0x5, /* U+0005: <control> */ - [0x6] = 0x6, /* U+0006: <control> */ - [0x7] = 0x7, /* U+0007: <control> */ - [0x8] = 0x8, /* U+0008: <control> */ - [0x9] = 0x9, /* U+0009: <control> */ - [0xA] = 0xA, /* U+000A: <control> */ - [0xB] = 0xB, /* U+000B: <control> */ - [0xC] = 0xC, /* U+000C: <control> */ - [0xD] = 0xD, /* U+000D: <control> */ - [0xE] = 0xE, /* U+000E: <control> */ - [0xF] = 0xF, /* U+000F: <control> */ - [0x10] = 0x10, /* U+0010: <control> */ - [0x11] = 0x11, /* U+0011: <control> */ - [0x12] = 0x12, /* U+0012: <control> */ - [0x13] = 0x13, /* U+0013: <control> */ - [0x14] = 0x14, /* U+0014: <control> */ - [0x15] = 0x15, /* U+0015: <control> */ - [0x16] = 0x16, /* U+0016: <control> */ - [0x17] = 0x17, /* U+0017: <control> */ - [0x18] = 0x18, /* U+0018: <control> */ - [0x19] = 0x19, /* U+0019: <control> */ - [0x1A] = 0x1A, /* U+001A: <control> */ - [0x1B] = 0x1B, /* U+001B: <control> */ - [0x1C] = 0x1C, /* U+001C: <control> */ - [0x1D] = 0x1D, /* U+001D: <control> */ - [0x1E] = 0x1E, /* U+001E: <control> */ - [0x1F] = 0x1F, /* U+001F: <control> */ - [0x20] = 0x20, /* U+0020: SPACE */ - [0x21] = 0x21, /* U+0021: EXCLAMATION MARK */ - [0x22] = 0x22, /* U+0022: QUOTATION MARK */ - [0x23] = 0x23, /* U+0023: NUMBER SIGN */ - [0x24] = 0x24, /* U+0024: DOLLAR SIGN */ - [0x25] = 0x25, /* U+0025: PERCENT SIGN */ - [0x26] = 0x26, /* U+0026: AMPERSAND */ - [0x27] = 0x27, /* U+0027: APOSTROPHE */ - [0x28] = 0x28, /* U+0028: LEFT PARENTHESIS */ - [0x29] = 0x29, /* U+0029: RIGHT PARENTHESIS */ - [0x2A] = 0x2A, /* U+002A: ASTERISK */ - [0x2B] = 0x2B, /* U+002B: PLUS SIGN */ - [0x2C] = 0x2C, /* U+002C: COMMA */ - [0x2D] = 0x2D, /* U+002D: HYPHEN-MINUS */ - [0x2E] = 0x2E, /* U+002E: FULL STOP */ - [0x2F] = 0x2F, /* U+002F: SOLIDUS */ - [0x30] = 0x30, /* U+0030: DIGIT ZERO */ - [0x31] = 0x31, /* U+0031: DIGIT ONE */ - [0x32] = 0x32, /* U+0032: DIGIT TWO */ - [0x33] = 0x33, /* U+0033: DIGIT THREE */ - [0x34] = 0x34, /* U+0034: DIGIT FOUR */ - [0x35] = 0x35, /* U+0035: DIGIT FIVE */ - [0x36] = 0x36, /* U+0036: DIGIT SIX */ - [0x37] = 0x37, /* U+0037: DIGIT SEVEN */ - [0x38] = 0x38, /* U+0038: DIGIT EIGHT */ - [0x39] = 0x39, /* U+0039: DIGIT NINE */ - [0x3A] = 0x3A, /* U+003A: COLON */ - [0x3B] = 0x3B, /* U+003B: SEMICOLON */ - [0x3C] = 0x3C, /* U+003C: LESS-THAN SIGN */ - [0x3D] = 0x3D, /* U+003D: EQUALS SIGN */ - [0x3E] = 0x3E, /* U+003E: GREATER-THAN SIGN */ - [0x3F] = 0x3F, /* U+003F: QUESTION MARK */ - [0x40] = 0x40, /* U+0040: COMMERCIAL AT */ - [0x41] = 0x61, /* U+0041: LATIN CAPITAL LETTER A */ - [0x42] = 0x62, /* U+0042: LATIN CAPITAL LETTER B */ - [0x43] = 0x63, /* U+0043: LATIN CAPITAL LETTER C */ - [0x44] = 0x64, /* U+0044: LATIN CAPITAL LETTER D */ - [0x45] = 0x65, /* U+0045: LATIN CAPITAL LETTER E */ - [0x46] = 0x66, /* U+0046: LATIN CAPITAL LETTER F */ - [0x47] = 0x67, /* U+0047: LATIN CAPITAL LETTER G */ - [0x48] = 0x68, /* U+0048: LATIN CAPITAL LETTER H */ - [0x49] = 0x69, /* U+0049: LATIN CAPITAL LETTER I */ - [0x4A] = 0x6A, /* U+004A: LATIN CAPITAL LETTER J */ - [0x4B] = 0x6B, /* U+004B: LATIN CAPITAL LETTER K */ - [0x4C] = 0x6C, /* U+004C: LATIN CAPITAL LETTER L */ - [0x4D] = 0x6D, /* U+004D: LATIN CAPITAL LETTER M */ - [0x4E] = 0x6E, /* U+004E: LATIN CAPITAL LETTER N */ - [0x4F] = 0x6F, /* U+004F: LATIN CAPITAL LETTER O */ - [0x50] = 0x70, /* U+0050: LATIN CAPITAL LETTER P */ - [0x51] = 0x71, /* U+0051: LATIN CAPITAL LETTER Q */ - [0x52] = 0x72, /* U+0052: LATIN CAPITAL LETTER R */ - [0x53] = 0x73, /* U+0053: LATIN CAPITAL LETTER S */ - [0x54] = 0x74, /* U+0054: LATIN CAPITAL LETTER T */ - [0x55] = 0x75, /* U+0055: LATIN CAPITAL LETTER U */ - [0x56] = 0x76, /* U+0056: LATIN CAPITAL LETTER V */ - [0x57] = 0x77, /* U+0057: LATIN CAPITAL LETTER W */ - [0x58] = 0x78, /* U+0058: LATIN CAPITAL LETTER X */ - [0x59] = 0x79, /* U+0059: LATIN CAPITAL LETTER Y */ - [0x5A] = 0x7A, /* U+005A: LATIN CAPITAL LETTER Z */ - [0x5B] = 0x5B, /* U+005B: LEFT SQUARE BRACKET */ - [0x5C] = 0x5C, /* U+005C: REVERSE SOLIDUS */ - [0x5D] = 0x5D, /* U+005D: RIGHT SQUARE BRACKET */ - [0x5E] = 0x5E, /* U+005E: CIRCUMFLEX ACCENT */ - [0x5F] = 0x5F, /* U+005F: LOW LINE */ - [0x60] = 0x60, /* U+0060: GRAVE ACCENT */ - [0x61] = 0x61, /* U+0061: LATIN SMALL LETTER A */ - [0x62] = 0x62, /* U+0062: LATIN SMALL LETTER B */ - [0x63] = 0x63, /* U+0063: LATIN SMALL LETTER C */ - [0x64] = 0x64, /* U+0064: LATIN SMALL LETTER D */ - [0x65] = 0x65, /* U+0065: LATIN SMALL LETTER E */ - [0x66] = 0x66, /* U+0066: LATIN SMALL LETTER F */ - [0x67] = 0x67, /* U+0067: LATIN SMALL LETTER G */ - [0x68] = 0x68, /* U+0068: LATIN SMALL LETTER H */ - [0x69] = 0x69, /* U+0069: LATIN SMALL LETTER I */ - [0x6A] = 0x6A, /* U+006A: LATIN SMALL LETTER J */ - [0x6B] = 0x6B, /* U+006B: LATIN SMALL LETTER K */ - [0x6C] = 0x6C, /* U+006C: LATIN SMALL LETTER L */ - [0x6D] = 0x6D, /* U+006D: LATIN SMALL LETTER M */ - [0x6E] = 0x6E, /* U+006E: LATIN SMALL LETTER N */ - [0x6F] = 0x6F, /* U+006F: LATIN SMALL LETTER O */ - [0x70] = 0x70, /* U+0070: LATIN SMALL LETTER P */ - [0x71] = 0x71, /* U+0071: LATIN SMALL LETTER Q */ - [0x72] = 0x72, /* U+0072: LATIN SMALL LETTER R */ - [0x73] = 0x73, /* U+0073: LATIN SMALL LETTER S */ - [0x74] = 0x74, /* U+0074: LATIN SMALL LETTER T */ - [0x75] = 0x75, /* U+0075: LATIN SMALL LETTER U */ - [0x76] = 0x76, /* U+0076: LATIN SMALL LETTER V */ - [0x77] = 0x77, /* U+0077: LATIN SMALL LETTER W */ - [0x78] = 0x78, /* U+0078: LATIN SMALL LETTER X */ - [0x79] = 0x79, /* U+0079: LATIN SMALL LETTER Y */ - [0x7A] = 0x7A, /* U+007A: LATIN SMALL LETTER Z */ - [0x7B] = 0x7B, /* U+007B: LEFT CURLY BRACKET */ - [0x7C] = 0x7C, /* U+007C: VERTICAL LINE */ - [0x7D] = 0x7D, /* U+007D: RIGHT CURLY BRACKET */ - [0x7E] = 0x7E, /* U+007E: TILDE */ - [0x7F] = 0x7F, /* U+007F: <control> */ + [0x00] = 0x0000, /* U+0000: <control> */ + [0x01] = 0x0001, /* U+0001: <control> */ + [0x02] = 0x0002, /* U+0002: <control> */ + [0x03] = 0x0003, /* U+0003: <control> */ + [0x04] = 0x0004, /* U+0004: <control> */ + [0x05] = 0x0005, /* U+0005: <control> */ + [0x06] = 0x0006, /* U+0006: <control> */ + [0x07] = 0x0007, /* U+0007: <control> */ + [0x08] = 0x0008, /* U+0008: <control> */ + [0x09] = 0x0009, /* U+0009: <control> */ + [0x0A] = 0x000A, /* U+000A: <control> */ + [0x0B] = 0x000B, /* U+000B: <control> */ + [0x0C] = 0x000C, /* U+000C: <control> */ + [0x0D] = 0x000D, /* U+000D: <control> */ + [0x0E] = 0x000E, /* U+000E: <control> */ + [0x0F] = 0x000F, /* U+000F: <control> */ + [0x10] = 0x0010, /* U+0010: <control> */ + [0x11] = 0x0011, /* U+0011: <control> */ + [0x12] = 0x0012, /* U+0012: <control> */ + [0x13] = 0x0013, /* U+0013: <control> */ + [0x14] = 0x0014, /* U+0014: <control> */ + [0x15] = 0x0015, /* U+0015: <control> */ + [0x16] = 0x0016, /* U+0016: <control> */ + [0x17] = 0x0017, /* U+0017: <control> */ + [0x18] = 0x0018, /* U+0018: <control> */ + [0x19] = 0x0019, /* U+0019: <control> */ + [0x1A] = 0x001A, /* U+001A: <control> */ + [0x1B] = 0x001B, /* U+001B: <control> */ + [0x1C] = 0x001C, /* U+001C: <control> */ + [0x1D] = 0x001D, /* U+001D: <control> */ + [0x1E] = 0x001E, /* U+001E: <control> */ + [0x1F] = 0x001F, /* U+001F: <control> */ + [0x20] = 0x0020, /* U+0020: SPACE */ + [0x21] = 0x0021, /* U+0021: EXCLAMATION MARK */ + [0x22] = 0x0022, /* U+0022: QUOTATION MARK */ + [0x23] = 0x0023, /* U+0023: NUMBER SIGN */ + [0x24] = 0x0024, /* U+0024: DOLLAR SIGN */ + [0x25] = 0x0025, /* U+0025: PERCENT SIGN */ + [0x26] = 0x0026, /* U+0026: AMPERSAND */ + [0x27] = 0x0027, /* U+0027: APOSTROPHE */ + [0x28] = 0x0028, /* U+0028: LEFT PARENTHESIS */ + [0x29] = 0x0029, /* U+0029: RIGHT PARENTHESIS */ + [0x2A] = 0x002A, /* U+002A: ASTERISK */ + [0x2B] = 0x002B, /* U+002B: PLUS SIGN */ + [0x2C] = 0x002C, /* U+002C: COMMA */ + [0x2D] = 0x002D, /* U+002D: HYPHEN-MINUS */ + [0x2E] = 0x002E, /* U+002E: FULL STOP */ + [0x2F] = 0x002F, /* U+002F: SOLIDUS */ + [0x30] = 0x0030, /* U+0030: DIGIT ZERO */ + [0x31] = 0x0031, /* U+0031: DIGIT ONE */ + [0x32] = 0x0032, /* U+0032: DIGIT TWO */ + [0x33] = 0x0033, /* U+0033: DIGIT THREE */ + [0x34] = 0x0034, /* U+0034: DIGIT FOUR */ + [0x35] = 0x0035, /* U+0035: DIGIT FIVE */ + [0x36] = 0x0036, /* U+0036: DIGIT SIX */ + [0x37] = 0x0037, /* U+0037: DIGIT SEVEN */ + [0x38] = 0x0038, /* U+0038: DIGIT EIGHT */ + [0x39] = 0x0039, /* U+0039: DIGIT NINE */ + [0x3A] = 0x003A, /* U+003A: COLON */ + [0x3B] = 0x003B, /* U+003B: SEMICOLON */ + [0x3C] = 0x003C, /* U+003C: LESS-THAN SIGN */ + [0x3D] = 0x003D, /* U+003D: EQUALS SIGN */ + [0x3E] = 0x003E, /* U+003E: GREATER-THAN SIGN */ + [0x3F] = 0x003F, /* U+003F: QUESTION MARK */ + [0x40] = 0x0040, /* U+0040: COMMERCIAL AT */ + [0x41] = 0x0061, /* U+0041: LATIN CAPITAL LETTER A */ + [0x42] = 0x0062, /* U+0042: LATIN CAPITAL LETTER B */ + [0x43] = 0x0063, /* U+0043: LATIN CAPITAL LETTER C */ + [0x44] = 0x0064, /* U+0044: LATIN CAPITAL LETTER D */ + [0x45] = 0x0065, /* U+0045: LATIN CAPITAL LETTER E */ + [0x46] = 0x0066, /* U+0046: LATIN CAPITAL LETTER F */ + [0x47] = 0x0067, /* U+0047: LATIN CAPITAL LETTER G */ + [0x48] = 0x0068, /* U+0048: LATIN CAPITAL LETTER H */ + [0x49] = 0x0069, /* U+0049: LATIN CAPITAL LETTER I */ + [0x4A] = 0x006A, /* U+004A: LATIN CAPITAL LETTER J */ + [0x4B] = 0x006B, /* U+004B: LATIN CAPITAL LETTER K */ + [0x4C] = 0x006C, /* U+004C: LATIN CAPITAL LETTER L */ + [0x4D] = 0x006D, /* U+004D: LATIN CAPITAL LETTER M */ + [0x4E] = 0x006E, /* U+004E: LATIN CAPITAL LETTER N */ + [0x4F] = 0x006F, /* U+004F: LATIN CAPITAL LETTER O */ + [0x50] = 0x0070, /* U+0050: LATIN CAPITAL LETTER P */ + [0x51] = 0x0071, /* U+0051: LATIN CAPITAL LETTER Q */ + [0x52] = 0x0072, /* U+0052: LATIN CAPITAL LETTER R */ + [0x53] = 0x0073, /* U+0053: LATIN CAPITAL LETTER S */ + [0x54] = 0x0074, /* U+0054: LATIN CAPITAL LETTER T */ + [0x55] = 0x0075, /* U+0055: LATIN CAPITAL LETTER U */ + [0x56] = 0x0076, /* U+0056: LATIN CAPITAL LETTER V */ + [0x57] = 0x0077, /* U+0057: LATIN CAPITAL LETTER W */ + [0x58] = 0x0078, /* U+0058: LATIN CAPITAL LETTER X */ + [0x59] = 0x0079, /* U+0059: LATIN CAPITAL LETTER Y */ + [0x5A] = 0x007A, /* U+005A: LATIN CAPITAL LETTER Z */ + [0x5B] = 0x005B, /* U+005B: LEFT SQUARE BRACKET */ + [0x5C] = 0x005C, /* U+005C: REVERSE SOLIDUS */ + [0x5D] = 0x005D, /* U+005D: RIGHT SQUARE BRACKET */ + [0x5E] = 0x005E, /* U+005E: CIRCUMFLEX ACCENT */ + [0x5F] = 0x005F, /* U+005F: LOW LINE */ + [0x60] = 0x0060, /* U+0060: GRAVE ACCENT */ + [0x61] = 0x0061, /* U+0061: LATIN SMALL LETTER A */ + [0x62] = 0x0062, /* U+0062: LATIN SMALL LETTER B */ + [0x63] = 0x0063, /* U+0063: LATIN SMALL LETTER C */ + [0x64] = 0x0064, /* U+0064: LATIN SMALL LETTER D */ + [0x65] = 0x0065, /* U+0065: LATIN SMALL LETTER E */ + [0x66] = 0x0066, /* U+0066: LATIN SMALL LETTER F */ + [0x67] = 0x0067, /* U+0067: LATIN SMALL LETTER G */ + [0x68] = 0x0068, /* U+0068: LATIN SMALL LETTER H */ + [0x69] = 0x0069, /* U+0069: LATIN SMALL LETTER I */ + [0x6A] = 0x006A, /* U+006A: LATIN SMALL LETTER J */ + [0x6B] = 0x006B, /* U+006B: LATIN SMALL LETTER K */ + [0x6C] = 0x006C, /* U+006C: LATIN SMALL LETTER L */ + [0x6D] = 0x006D, /* U+006D: LATIN SMALL LETTER M */ + [0x6E] = 0x006E, /* U+006E: LATIN SMALL LETTER N */ + [0x6F] = 0x006F, /* U+006F: LATIN SMALL LETTER O */ + [0x70] = 0x0070, /* U+0070: LATIN SMALL LETTER P */ + [0x71] = 0x0071, /* U+0071: LATIN SMALL LETTER Q */ + [0x72] = 0x0072, /* U+0072: LATIN SMALL LETTER R */ + [0x73] = 0x0073, /* U+0073: LATIN SMALL LETTER S */ + [0x74] = 0x0074, /* U+0074: LATIN SMALL LETTER T */ + [0x75] = 0x0075, /* U+0075: LATIN SMALL LETTER U */ + [0x76] = 0x0076, /* U+0076: LATIN SMALL LETTER V */ + [0x77] = 0x0077, /* U+0077: LATIN SMALL LETTER W */ + [0x78] = 0x0078, /* U+0078: LATIN SMALL LETTER X */ + [0x79] = 0x0079, /* U+0079: LATIN SMALL LETTER Y */ + [0x7A] = 0x007A, /* U+007A: LATIN SMALL LETTER Z */ + [0x7B] = 0x007B, /* U+007B: LEFT CURLY BRACKET */ + [0x7C] = 0x007C, /* U+007C: VERTICAL LINE */ + [0x7D] = 0x007D, /* U+007D: RIGHT CURLY BRACKET */ + [0x7E] = 0x007E, /* U+007E: TILDE */ + [0x7F] = 0x007F, /* U+007F: <control> */ [0xC3] = 256 - 0x80, /* 303 ... */ - [256+0x0] = 0xE0, /* U+00C0: LATIN CAPITAL LETTER A WITH GRAVE */ - [256+0x1] = 0xE1, /* U+00C1: LATIN CAPITAL LETTER A WITH ACUTE */ - [256+0x2] = 0xE2, /* U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ - [256+0x3] = 0xE3, /* U+00C3: LATIN CAPITAL LETTER A WITH TILDE */ - [256+0x4] = 0xE4, /* U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS */ - [256+0x5] = 0xE5, /* U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE */ - [256+0x6] = 0xE6, /* U+00C6: LATIN CAPITAL LETTER AE */ - [256+0x7] = 0xE7, /* U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA */ - [256+0x8] = 0xE8, /* U+00C8: LATIN CAPITAL LETTER E WITH GRAVE */ - [256+0x9] = 0xE9, /* U+00C9: LATIN CAPITAL LETTER E WITH ACUTE */ - [256+0xA] = 0xEA, /* U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ - [256+0xB] = 0xEB, /* U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS */ - [256+0xC] = 0xEC, /* U+00CC: LATIN CAPITAL LETTER I WITH GRAVE */ - [256+0xD] = 0xED, /* U+00CD: LATIN CAPITAL LETTER I WITH ACUTE */ - [256+0xE] = 0xEE, /* U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ - [256+0xF] = 0xEF, /* U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS */ - [256+0x10] = 0xF0, /* U+00D0: LATIN CAPITAL LETTER ETH */ - [256+0x11] = 0xF1, /* U+00D1: LATIN CAPITAL LETTER N WITH TILDE */ - [256+0x12] = 0xF2, /* U+00D2: LATIN CAPITAL LETTER O WITH GRAVE */ - [256+0x13] = 0xF3, /* U+00D3: LATIN CAPITAL LETTER O WITH ACUTE */ - [256+0x14] = 0xF4, /* U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ - [256+0x15] = 0xF5, /* U+00D5: LATIN CAPITAL LETTER O WITH TILDE */ - [256+0x16] = 0xF6, /* U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS */ - [256+0x18] = 0xF8, /* U+00D8: LATIN CAPITAL LETTER O WITH STROKE */ - [256+0x19] = 0xF9, /* U+00D9: LATIN CAPITAL LETTER U WITH GRAVE */ - [256+0x1A] = 0xFA, /* U+00DA: LATIN CAPITAL LETTER U WITH ACUTE */ - [256+0x1B] = 0xFB, /* U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ - [256+0x1C] = 0xFC, /* U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS */ - [256+0x1D] = 0xFD, /* U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE */ - [256+0x1E] = 0xFE, /* U+00DE: LATIN CAPITAL LETTER THORN */ + [256+0x00] = 0x00E0, /* U+00C0: LATIN CAPITAL LETTER A WITH GRAVE */ + [256+0x01] = 0x00E1, /* U+00C1: LATIN CAPITAL LETTER A WITH ACUTE */ + [256+0x02] = 0x00E2, /* U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + [256+0x03] = 0x00E3, /* U+00C3: LATIN CAPITAL LETTER A WITH TILDE */ + [256+0x04] = 0x00E4, /* U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS */ + [256+0x05] = 0x00E5, /* U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE */ _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org