Changeset: 52d49237cf83 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/52d49237cf83 Added Files: misc/python/unicaseconvtabs.py Modified Files: gdk/gdk_string.c Branch: default Log Message:
Add script to create case conversion tables. Also, add indexes to specialcase table for easy reference. diffs (truncated from 527 to 300 lines): diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c --- a/gdk/gdk_string.c +++ b/gdk/gdk_string.c @@ -1477,153 +1477,155 @@ GDKanalytical_str_group_concat(BAT *r, B * Only for the casefold table, if the converted codepoint is negative, * it is actually an escape into the specialcase table. The absolute * value is the index. */ + +/* These tables were created using the code in unicaseconvtabs.py */ static const char *const specialcase[] = { NULL, - "ss", - "i\xCC\x87", - "\xCA\xBCn", - "j\xCC\x8C", - "\xCE\xB9\xCC\x88\xCC\x81", - "\xCF\x85\xCC\x88\xCC\x81", - "\xD5\xA5\xD6\x82", - "h\xCC\xB1", - "t\xCC\x88", - "w\xCC\x8A", - "y\xCC\x8A", - "a\xCA\xBE", - "\xCF\x85\xCC\x93", - "\xCF\x85\xCC\x93\xCC\x80", - "\xCF\x85\xCC\x93\xCC\x81", - "\xCF\x85\xCC\x93\xCD\x82", - "\xE1\xBC\x80\xCE\xB9", - "\xE1\xBC\x81\xCE\xB9", - "\xE1\xBC\x82\xCE\xB9", - "\xE1\xBC\x83\xCE\xB9", - "\xE1\xBC\x84\xCE\xB9", - "\xE1\xBC\x85\xCE\xB9", - "\xE1\xBC\x86\xCE\xB9", - "\xE1\xBC\x87\xCE\xB9", - "\xE1\xBC\xA0\xCE\xB9", - "\xE1\xBC\xA1\xCE\xB9", - "\xE1\xBC\xA2\xCE\xB9", - "\xE1\xBC\xA3\xCE\xB9", - "\xE1\xBC\xA4\xCE\xB9", - "\xE1\xBC\xA5\xCE\xB9", - "\xE1\xBC\xA6\xCE\xB9", - "\xE1\xBC\xA7\xCE\xB9", - "\xE1\xBD\xA0\xCE\xB9", - "\xE1\xBD\xA1\xCE\xB9", - "\xE1\xBD\xA2\xCE\xB9", - "\xE1\xBD\xA3\xCE\xB9", - "\xE1\xBD\xA4\xCE\xB9", - "\xE1\xBD\xA5\xCE\xB9", - "\xE1\xBD\xA6\xCE\xB9", - "\xE1\xBD\xA7\xCE\xB9", - "\xE1\xBD\xB0\xCE\xB9", - "\xCE\xB1\xCE\xB9", - "\xCE\xAC\xCE\xB9", - "\xCE\xB1\xCD\x82", - "\xCE\xB1\xCD\x82\xCE\xB9", - "\xE1\xBD\xB4\xCE\xB9", - "\xCE\xB7\xCE\xB9", - "\xCE\xAE\xCE\xB9", - "\xCE\xB7\xCD\x82", - "\xCE\xB7\xCD\x82\xCE\xB9", - "\xCE\xB9\xCC\x88\xCC\x80", - "\xCE\xB9\xCD\x82", - "\xCE\xB9\xCC\x88\xCD\x82", - "\xCF\x85\xCC\x88\xCC\x80", - "\xCF\x81\xCC\x93", - "\xCF\x85\xCD\x82", - "\xCF\x85\xCC\x88\xCD\x82", - "\xE1\xBD\xBC\xCE\xB9", - "\xCF\x89\xCE\xB9", - "\xCF\x8E\xCE\xB9", - "\xCF\x89\xCD\x82", - "\xCF\x89\xCD\x82\xCE\xB9", - "ff", - "fi", - "fl", - "ffi", - "ffl", - "st", - "\xD5\xB4\xD5\xB6", - "\xD5\xB4\xD5\xA5", - "\xD5\xB4\xD5\xAB", - "\xD5\xBE\xD5\xB6", - "\xD5\xB4\xD5\xAD", - "SS", - "FF", - "FI", - "FL", - "FFI", - "FFL", - "ST", - "\xD4\xB5\xD5\x92", - "\xD5\x84\xD5\x86", - "\xD5\x84\xD4\xB5", - "\xD5\x84\xD4\xBB", - "\xD5\x8E\xD5\x86", - "\xD5\x84\xD4\xBD", - "\xCA\xBCN", - "\xCE\x99\xCC\x88\xCC\x81", - "\xCE\xA5\xCC\x88\xCC\x81", - "J\xCC\x8C", - "H\xCC\xB1", - "T\xCC\x88", - "W\xCC\x8A", - "Y\xCC\x8A", - "A\xCA\xBE", - "\xCE\xA5\xCC\x93", - "\xCE\xA5\xCC\x93\xCC\x80", - "\xCE\xA5\xCC\x93\xCC\x81", - "\xCE\xA5\xCC\x93\xCD\x82", - "\xCE\x91\xCD\x82", - "\xCE\x97\xCD\x82", - "\xCE\x99\xCC\x88\xCC\x80", - "\xCE\x99\xCD\x82", - "\xCE\x99\xCC\x88\xCD\x82", - "\xCE\xA5\xCC\x88\xCC\x80", - "\xCE\xA1\xCC\x93", - "\xCE\xA5\xCD\x82", - "\xCE\xA5\xCC\x88\xCD\x82", - "\xCE\xA9\xCD\x82", - "\xE1\xBC\x88\xCE\x99", - "\xE1\xBC\x89\xCE\x99", - "\xE1\xBC\x8A\xCE\x99", - "\xE1\xBC\x8B\xCE\x99", - "\xE1\xBC\x8C\xCE\x99", - "\xE1\xBC\x8D\xCE\x99", - "\xE1\xBC\x8E\xCE\x99", - "\xE1\xBC\x8F\xCE\x99", - "\xE1\xBC\xA8\xCE\x99", - "\xE1\xBC\xA9\xCE\x99", - "\xE1\xBC\xAA\xCE\x99", - "\xE1\xBC\xAB\xCE\x99", - "\xE1\xBC\xAC\xCE\x99", - "\xE1\xBC\xAD\xCE\x99", - "\xE1\xBC\xAE\xCE\x99", - "\xE1\xBC\xAF\xCE\x99", - "\xE1\xBD\xA8\xCE\x99", - "\xE1\xBD\xA9\xCE\x99", - "\xE1\xBD\xAA\xCE\x99", - "\xE1\xBD\xAB\xCE\x99", - "\xE1\xBD\xAC\xCE\x99", - "\xE1\xBD\xAD\xCE\x99", - "\xE1\xBD\xAE\xCE\x99", - "\xE1\xBD\xAF\xCE\x99", - "\xCE\x91\xCE\x99", - "\xCE\x97\xCE\x99", - "\xCE\xA9\xCE\x99", - "\xE1\xBE\xBA\xCE\x99", - "\xCE\x86\xCE\x99", - "\xE1\xBF\x8A\xCE\x99", - "\xCE\x89\xCE\x99", - "\xE1\xBF\xBA\xCE\x99", - "\xCE\x8F\xCE\x99", - "\xCE\x91\xCD\x82\xCE\x99", - "\xCE\x97\xCD\x82\xCE\x99", - "\xCE\xA9\xCD\x82\xCE\x99", + [1] = "ss", + [2] = "i\xCC\x87", + [3] = "\xCA\xBCn", + [4] = "j\xCC\x8C", + [5] = "\xCE\xB9\xCC\x88\xCC\x81", + [6] = "\xCF\x85\xCC\x88\xCC\x81", + [7] = "\xD5\xA5\xD6\x82", + [8] = "h\xCC\xB1", + [9] = "t\xCC\x88", + [10] = "w\xCC\x8A", + [11] = "y\xCC\x8A", + [12] = "a\xCA\xBE", + [13] = "\xCF\x85\xCC\x93", + [14] = "\xCF\x85\xCC\x93\xCC\x80", + [15] = "\xCF\x85\xCC\x93\xCC\x81", + [16] = "\xCF\x85\xCC\x93\xCD\x82", + [17] = "\xE1\xBC\x80\xCE\xB9", + [18] = "\xE1\xBC\x81\xCE\xB9", + [19] = "\xE1\xBC\x82\xCE\xB9", + [20] = "\xE1\xBC\x83\xCE\xB9", + [21] = "\xE1\xBC\x84\xCE\xB9", + [22] = "\xE1\xBC\x85\xCE\xB9", + [23] = "\xE1\xBC\x86\xCE\xB9", + [24] = "\xE1\xBC\x87\xCE\xB9", + [25] = "\xE1\xBC\xA0\xCE\xB9", + [26] = "\xE1\xBC\xA1\xCE\xB9", + [27] = "\xE1\xBC\xA2\xCE\xB9", + [28] = "\xE1\xBC\xA3\xCE\xB9", + [29] = "\xE1\xBC\xA4\xCE\xB9", + [30] = "\xE1\xBC\xA5\xCE\xB9", + [31] = "\xE1\xBC\xA6\xCE\xB9", + [32] = "\xE1\xBC\xA7\xCE\xB9", + [33] = "\xE1\xBD\xA0\xCE\xB9", + [34] = "\xE1\xBD\xA1\xCE\xB9", + [35] = "\xE1\xBD\xA2\xCE\xB9", + [36] = "\xE1\xBD\xA3\xCE\xB9", + [37] = "\xE1\xBD\xA4\xCE\xB9", + [38] = "\xE1\xBD\xA5\xCE\xB9", + [39] = "\xE1\xBD\xA6\xCE\xB9", + [40] = "\xE1\xBD\xA7\xCE\xB9", + [41] = "\xE1\xBD\xB0\xCE\xB9", + [42] = "\xCE\xB1\xCE\xB9", + [43] = "\xCE\xAC\xCE\xB9", + [44] = "\xCE\xB1\xCD\x82", + [45] = "\xCE\xB1\xCD\x82\xCE\xB9", + [46] = "\xE1\xBD\xB4\xCE\xB9", + [47] = "\xCE\xB7\xCE\xB9", + [48] = "\xCE\xAE\xCE\xB9", + [49] = "\xCE\xB7\xCD\x82", + [50] = "\xCE\xB7\xCD\x82\xCE\xB9", + [51] = "\xCE\xB9\xCC\x88\xCC\x80", + [52] = "\xCE\xB9\xCD\x82", + [53] = "\xCE\xB9\xCC\x88\xCD\x82", + [54] = "\xCF\x85\xCC\x88\xCC\x80", + [55] = "\xCF\x81\xCC\x93", + [56] = "\xCF\x85\xCD\x82", + [57] = "\xCF\x85\xCC\x88\xCD\x82", + [58] = "\xE1\xBD\xBC\xCE\xB9", + [59] = "\xCF\x89\xCE\xB9", + [60] = "\xCF\x8E\xCE\xB9", + [61] = "\xCF\x89\xCD\x82", + [62] = "\xCF\x89\xCD\x82\xCE\xB9", + [63] = "ff", + [64] = "fi", + [65] = "fl", + [66] = "ffi", + [67] = "ffl", + [68] = "st", + [69] = "\xD5\xB4\xD5\xB6", + [70] = "\xD5\xB4\xD5\xA5", + [71] = "\xD5\xB4\xD5\xAB", + [72] = "\xD5\xBE\xD5\xB6", + [73] = "\xD5\xB4\xD5\xAD", + [74] = "SS", + [75] = "FF", + [76] = "FI", + [77] = "FL", + [78] = "FFI", + [79] = "FFL", + [80] = "ST", + [81] = "\xD4\xB5\xD5\x92", + [82] = "\xD5\x84\xD5\x86", + [83] = "\xD5\x84\xD4\xB5", + [84] = "\xD5\x84\xD4\xBB", + [85] = "\xD5\x8E\xD5\x86", + [86] = "\xD5\x84\xD4\xBD", + [87] = "\xCA\xBCN", + [88] = "\xCE\x99\xCC\x88\xCC\x81", + [89] = "\xCE\xA5\xCC\x88\xCC\x81", + [90] = "J\xCC\x8C", + [91] = "H\xCC\xB1", + [92] = "T\xCC\x88", + [93] = "W\xCC\x8A", + [94] = "Y\xCC\x8A", + [95] = "A\xCA\xBE", + [96] = "\xCE\xA5\xCC\x93", + [97] = "\xCE\xA5\xCC\x93\xCC\x80", + [98] = "\xCE\xA5\xCC\x93\xCC\x81", + [99] = "\xCE\xA5\xCC\x93\xCD\x82", + [100] = "\xCE\x91\xCD\x82", + [101] = "\xCE\x97\xCD\x82", + [102] = "\xCE\x99\xCC\x88\xCC\x80", + [103] = "\xCE\x99\xCD\x82", + [104] = "\xCE\x99\xCC\x88\xCD\x82", + [105] = "\xCE\xA5\xCC\x88\xCC\x80", + [106] = "\xCE\xA1\xCC\x93", + [107] = "\xCE\xA5\xCD\x82", + [108] = "\xCE\xA5\xCC\x88\xCD\x82", + [109] = "\xCE\xA9\xCD\x82", + [110] = "\xE1\xBC\x88\xCE\x99", + [111] = "\xE1\xBC\x89\xCE\x99", + [112] = "\xE1\xBC\x8A\xCE\x99", + [113] = "\xE1\xBC\x8B\xCE\x99", + [114] = "\xE1\xBC\x8C\xCE\x99", + [115] = "\xE1\xBC\x8D\xCE\x99", + [116] = "\xE1\xBC\x8E\xCE\x99", + [117] = "\xE1\xBC\x8F\xCE\x99", + [118] = "\xE1\xBC\xA8\xCE\x99", + [119] = "\xE1\xBC\xA9\xCE\x99", + [120] = "\xE1\xBC\xAA\xCE\x99", + [121] = "\xE1\xBC\xAB\xCE\x99", + [122] = "\xE1\xBC\xAC\xCE\x99", + [123] = "\xE1\xBC\xAD\xCE\x99", + [124] = "\xE1\xBC\xAE\xCE\x99", + [125] = "\xE1\xBC\xAF\xCE\x99", + [126] = "\xE1\xBD\xA8\xCE\x99", + [127] = "\xE1\xBD\xA9\xCE\x99", + [128] = "\xE1\xBD\xAA\xCE\x99", + [129] = "\xE1\xBD\xAB\xCE\x99", + [130] = "\xE1\xBD\xAC\xCE\x99", + [131] = "\xE1\xBD\xAD\xCE\x99", + [132] = "\xE1\xBD\xAE\xCE\x99", + [133] = "\xE1\xBD\xAF\xCE\x99", + [134] = "\xCE\x91\xCE\x99", + [135] = "\xCE\x97\xCE\x99", + [136] = "\xCE\xA9\xCE\x99", + [137] = "\xE1\xBE\xBA\xCE\x99", + [138] = "\xCE\x86\xCE\x99", + [139] = "\xE1\xBF\x8A\xCE\x99", + [140] = "\xCE\x89\xCE\x99", + [141] = "\xE1\xBF\xBA\xCE\x99", + [142] = "\xCE\x8F\xCE\x99", + [143] = "\xCE\x91\xCD\x82\xCE\x99", + [144] = "\xCE\x97\xCD\x82\xCE\x99", _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org