Changeset: 52d49237cf83 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/52d49237cf83
Added Files:
        misc/python/unicaseconvtabs.py
Modified Files:
        gdk/gdk_string.c
Branch: default
Log Message:

Add script to create case conversion tables.
Also, add indexes to specialcase table for easy reference.


diffs (truncated from 527 to 300 lines):

diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -1477,153 +1477,155 @@ GDKanalytical_str_group_concat(BAT *r, B
  * Only for the casefold table, if the converted codepoint is negative,
  * it is actually an escape into the specialcase table.  The absolute
  * value is the index. */
+
+/* These tables were created using the code in unicaseconvtabs.py */
 static const char *const specialcase[] = {
        NULL,
-       "ss",
-       "i\xCC\x87",
-       "\xCA\xBCn",
-       "j\xCC\x8C",
-       "\xCE\xB9\xCC\x88\xCC\x81",
-       "\xCF\x85\xCC\x88\xCC\x81",
-       "\xD5\xA5\xD6\x82",
-       "h\xCC\xB1",
-       "t\xCC\x88",
-       "w\xCC\x8A",
-       "y\xCC\x8A",
-       "a\xCA\xBE",
-       "\xCF\x85\xCC\x93",
-       "\xCF\x85\xCC\x93\xCC\x80",
-       "\xCF\x85\xCC\x93\xCC\x81",
-       "\xCF\x85\xCC\x93\xCD\x82",
-       "\xE1\xBC\x80\xCE\xB9",
-       "\xE1\xBC\x81\xCE\xB9",
-       "\xE1\xBC\x82\xCE\xB9",
-       "\xE1\xBC\x83\xCE\xB9",
-       "\xE1\xBC\x84\xCE\xB9",
-       "\xE1\xBC\x85\xCE\xB9",
-       "\xE1\xBC\x86\xCE\xB9",
-       "\xE1\xBC\x87\xCE\xB9",
-       "\xE1\xBC\xA0\xCE\xB9",
-       "\xE1\xBC\xA1\xCE\xB9",
-       "\xE1\xBC\xA2\xCE\xB9",
-       "\xE1\xBC\xA3\xCE\xB9",
-       "\xE1\xBC\xA4\xCE\xB9",
-       "\xE1\xBC\xA5\xCE\xB9",
-       "\xE1\xBC\xA6\xCE\xB9",
-       "\xE1\xBC\xA7\xCE\xB9",
-       "\xE1\xBD\xA0\xCE\xB9",
-       "\xE1\xBD\xA1\xCE\xB9",
-       "\xE1\xBD\xA2\xCE\xB9",
-       "\xE1\xBD\xA3\xCE\xB9",
-       "\xE1\xBD\xA4\xCE\xB9",
-       "\xE1\xBD\xA5\xCE\xB9",
-       "\xE1\xBD\xA6\xCE\xB9",
-       "\xE1\xBD\xA7\xCE\xB9",
-       "\xE1\xBD\xB0\xCE\xB9",
-       "\xCE\xB1\xCE\xB9",
-       "\xCE\xAC\xCE\xB9",
-       "\xCE\xB1\xCD\x82",
-       "\xCE\xB1\xCD\x82\xCE\xB9",
-       "\xE1\xBD\xB4\xCE\xB9",
-       "\xCE\xB7\xCE\xB9",
-       "\xCE\xAE\xCE\xB9",
-       "\xCE\xB7\xCD\x82",
-       "\xCE\xB7\xCD\x82\xCE\xB9",
-       "\xCE\xB9\xCC\x88\xCC\x80",
-       "\xCE\xB9\xCD\x82",
-       "\xCE\xB9\xCC\x88\xCD\x82",
-       "\xCF\x85\xCC\x88\xCC\x80",
-       "\xCF\x81\xCC\x93",
-       "\xCF\x85\xCD\x82",
-       "\xCF\x85\xCC\x88\xCD\x82",
-       "\xE1\xBD\xBC\xCE\xB9",
-       "\xCF\x89\xCE\xB9",
-       "\xCF\x8E\xCE\xB9",
-       "\xCF\x89\xCD\x82",
-       "\xCF\x89\xCD\x82\xCE\xB9",
-       "ff",
-       "fi",
-       "fl",
-       "ffi",
-       "ffl",
-       "st",
-       "\xD5\xB4\xD5\xB6",
-       "\xD5\xB4\xD5\xA5",
-       "\xD5\xB4\xD5\xAB",
-       "\xD5\xBE\xD5\xB6",
-       "\xD5\xB4\xD5\xAD",
-       "SS",
-       "FF",
-       "FI",
-       "FL",
-       "FFI",
-       "FFL",
-       "ST",
-       "\xD4\xB5\xD5\x92",
-       "\xD5\x84\xD5\x86",
-       "\xD5\x84\xD4\xB5",
-       "\xD5\x84\xD4\xBB",
-       "\xD5\x8E\xD5\x86",
-       "\xD5\x84\xD4\xBD",
-       "\xCA\xBCN",
-       "\xCE\x99\xCC\x88\xCC\x81",
-       "\xCE\xA5\xCC\x88\xCC\x81",
-       "J\xCC\x8C",
-       "H\xCC\xB1",
-       "T\xCC\x88",
-       "W\xCC\x8A",
-       "Y\xCC\x8A",
-       "A\xCA\xBE",
-       "\xCE\xA5\xCC\x93",
-       "\xCE\xA5\xCC\x93\xCC\x80",
-       "\xCE\xA5\xCC\x93\xCC\x81",
-       "\xCE\xA5\xCC\x93\xCD\x82",
-       "\xCE\x91\xCD\x82",
-       "\xCE\x97\xCD\x82",
-       "\xCE\x99\xCC\x88\xCC\x80",
-       "\xCE\x99\xCD\x82",
-       "\xCE\x99\xCC\x88\xCD\x82",
-       "\xCE\xA5\xCC\x88\xCC\x80",
-       "\xCE\xA1\xCC\x93",
-       "\xCE\xA5\xCD\x82",
-       "\xCE\xA5\xCC\x88\xCD\x82",
-       "\xCE\xA9\xCD\x82",
-       "\xE1\xBC\x88\xCE\x99",
-       "\xE1\xBC\x89\xCE\x99",
-       "\xE1\xBC\x8A\xCE\x99",
-       "\xE1\xBC\x8B\xCE\x99",
-       "\xE1\xBC\x8C\xCE\x99",
-       "\xE1\xBC\x8D\xCE\x99",
-       "\xE1\xBC\x8E\xCE\x99",
-       "\xE1\xBC\x8F\xCE\x99",
-       "\xE1\xBC\xA8\xCE\x99",
-       "\xE1\xBC\xA9\xCE\x99",
-       "\xE1\xBC\xAA\xCE\x99",
-       "\xE1\xBC\xAB\xCE\x99",
-       "\xE1\xBC\xAC\xCE\x99",
-       "\xE1\xBC\xAD\xCE\x99",
-       "\xE1\xBC\xAE\xCE\x99",
-       "\xE1\xBC\xAF\xCE\x99",
-       "\xE1\xBD\xA8\xCE\x99",
-       "\xE1\xBD\xA9\xCE\x99",
-       "\xE1\xBD\xAA\xCE\x99",
-       "\xE1\xBD\xAB\xCE\x99",
-       "\xE1\xBD\xAC\xCE\x99",
-       "\xE1\xBD\xAD\xCE\x99",
-       "\xE1\xBD\xAE\xCE\x99",
-       "\xE1\xBD\xAF\xCE\x99",
-       "\xCE\x91\xCE\x99",
-       "\xCE\x97\xCE\x99",
-       "\xCE\xA9\xCE\x99",
-       "\xE1\xBE\xBA\xCE\x99",
-       "\xCE\x86\xCE\x99",
-       "\xE1\xBF\x8A\xCE\x99",
-       "\xCE\x89\xCE\x99",
-       "\xE1\xBF\xBA\xCE\x99",
-       "\xCE\x8F\xCE\x99",
-       "\xCE\x91\xCD\x82\xCE\x99",
-       "\xCE\x97\xCD\x82\xCE\x99",
-       "\xCE\xA9\xCD\x82\xCE\x99",
+       [1] = "ss",
+       [2] = "i\xCC\x87",
+       [3] = "\xCA\xBCn",
+       [4] = "j\xCC\x8C",
+       [5] = "\xCE\xB9\xCC\x88\xCC\x81",
+       [6] = "\xCF\x85\xCC\x88\xCC\x81",
+       [7] = "\xD5\xA5\xD6\x82",
+       [8] = "h\xCC\xB1",
+       [9] = "t\xCC\x88",
+       [10] = "w\xCC\x8A",
+       [11] = "y\xCC\x8A",
+       [12] = "a\xCA\xBE",
+       [13] = "\xCF\x85\xCC\x93",
+       [14] = "\xCF\x85\xCC\x93\xCC\x80",
+       [15] = "\xCF\x85\xCC\x93\xCC\x81",
+       [16] = "\xCF\x85\xCC\x93\xCD\x82",
+       [17] = "\xE1\xBC\x80\xCE\xB9",
+       [18] = "\xE1\xBC\x81\xCE\xB9",
+       [19] = "\xE1\xBC\x82\xCE\xB9",
+       [20] = "\xE1\xBC\x83\xCE\xB9",
+       [21] = "\xE1\xBC\x84\xCE\xB9",
+       [22] = "\xE1\xBC\x85\xCE\xB9",
+       [23] = "\xE1\xBC\x86\xCE\xB9",
+       [24] = "\xE1\xBC\x87\xCE\xB9",
+       [25] = "\xE1\xBC\xA0\xCE\xB9",
+       [26] = "\xE1\xBC\xA1\xCE\xB9",
+       [27] = "\xE1\xBC\xA2\xCE\xB9",
+       [28] = "\xE1\xBC\xA3\xCE\xB9",
+       [29] = "\xE1\xBC\xA4\xCE\xB9",
+       [30] = "\xE1\xBC\xA5\xCE\xB9",
+       [31] = "\xE1\xBC\xA6\xCE\xB9",
+       [32] = "\xE1\xBC\xA7\xCE\xB9",
+       [33] = "\xE1\xBD\xA0\xCE\xB9",
+       [34] = "\xE1\xBD\xA1\xCE\xB9",
+       [35] = "\xE1\xBD\xA2\xCE\xB9",
+       [36] = "\xE1\xBD\xA3\xCE\xB9",
+       [37] = "\xE1\xBD\xA4\xCE\xB9",
+       [38] = "\xE1\xBD\xA5\xCE\xB9",
+       [39] = "\xE1\xBD\xA6\xCE\xB9",
+       [40] = "\xE1\xBD\xA7\xCE\xB9",
+       [41] = "\xE1\xBD\xB0\xCE\xB9",
+       [42] = "\xCE\xB1\xCE\xB9",
+       [43] = "\xCE\xAC\xCE\xB9",
+       [44] = "\xCE\xB1\xCD\x82",
+       [45] = "\xCE\xB1\xCD\x82\xCE\xB9",
+       [46] = "\xE1\xBD\xB4\xCE\xB9",
+       [47] = "\xCE\xB7\xCE\xB9",
+       [48] = "\xCE\xAE\xCE\xB9",
+       [49] = "\xCE\xB7\xCD\x82",
+       [50] = "\xCE\xB7\xCD\x82\xCE\xB9",
+       [51] = "\xCE\xB9\xCC\x88\xCC\x80",
+       [52] = "\xCE\xB9\xCD\x82",
+       [53] = "\xCE\xB9\xCC\x88\xCD\x82",
+       [54] = "\xCF\x85\xCC\x88\xCC\x80",
+       [55] = "\xCF\x81\xCC\x93",
+       [56] = "\xCF\x85\xCD\x82",
+       [57] = "\xCF\x85\xCC\x88\xCD\x82",
+       [58] = "\xE1\xBD\xBC\xCE\xB9",
+       [59] = "\xCF\x89\xCE\xB9",
+       [60] = "\xCF\x8E\xCE\xB9",
+       [61] = "\xCF\x89\xCD\x82",
+       [62] = "\xCF\x89\xCD\x82\xCE\xB9",
+       [63] = "ff",
+       [64] = "fi",
+       [65] = "fl",
+       [66] = "ffi",
+       [67] = "ffl",
+       [68] = "st",
+       [69] = "\xD5\xB4\xD5\xB6",
+       [70] = "\xD5\xB4\xD5\xA5",
+       [71] = "\xD5\xB4\xD5\xAB",
+       [72] = "\xD5\xBE\xD5\xB6",
+       [73] = "\xD5\xB4\xD5\xAD",
+       [74] = "SS",
+       [75] = "FF",
+       [76] = "FI",
+       [77] = "FL",
+       [78] = "FFI",
+       [79] = "FFL",
+       [80] = "ST",
+       [81] = "\xD4\xB5\xD5\x92",
+       [82] = "\xD5\x84\xD5\x86",
+       [83] = "\xD5\x84\xD4\xB5",
+       [84] = "\xD5\x84\xD4\xBB",
+       [85] = "\xD5\x8E\xD5\x86",
+       [86] = "\xD5\x84\xD4\xBD",
+       [87] = "\xCA\xBCN",
+       [88] = "\xCE\x99\xCC\x88\xCC\x81",
+       [89] = "\xCE\xA5\xCC\x88\xCC\x81",
+       [90] = "J\xCC\x8C",
+       [91] = "H\xCC\xB1",
+       [92] = "T\xCC\x88",
+       [93] = "W\xCC\x8A",
+       [94] = "Y\xCC\x8A",
+       [95] = "A\xCA\xBE",
+       [96] = "\xCE\xA5\xCC\x93",
+       [97] = "\xCE\xA5\xCC\x93\xCC\x80",
+       [98] = "\xCE\xA5\xCC\x93\xCC\x81",
+       [99] = "\xCE\xA5\xCC\x93\xCD\x82",
+       [100] = "\xCE\x91\xCD\x82",
+       [101] = "\xCE\x97\xCD\x82",
+       [102] = "\xCE\x99\xCC\x88\xCC\x80",
+       [103] = "\xCE\x99\xCD\x82",
+       [104] = "\xCE\x99\xCC\x88\xCD\x82",
+       [105] = "\xCE\xA5\xCC\x88\xCC\x80",
+       [106] = "\xCE\xA1\xCC\x93",
+       [107] = "\xCE\xA5\xCD\x82",
+       [108] = "\xCE\xA5\xCC\x88\xCD\x82",
+       [109] = "\xCE\xA9\xCD\x82",
+       [110] = "\xE1\xBC\x88\xCE\x99",
+       [111] = "\xE1\xBC\x89\xCE\x99",
+       [112] = "\xE1\xBC\x8A\xCE\x99",
+       [113] = "\xE1\xBC\x8B\xCE\x99",
+       [114] = "\xE1\xBC\x8C\xCE\x99",
+       [115] = "\xE1\xBC\x8D\xCE\x99",
+       [116] = "\xE1\xBC\x8E\xCE\x99",
+       [117] = "\xE1\xBC\x8F\xCE\x99",
+       [118] = "\xE1\xBC\xA8\xCE\x99",
+       [119] = "\xE1\xBC\xA9\xCE\x99",
+       [120] = "\xE1\xBC\xAA\xCE\x99",
+       [121] = "\xE1\xBC\xAB\xCE\x99",
+       [122] = "\xE1\xBC\xAC\xCE\x99",
+       [123] = "\xE1\xBC\xAD\xCE\x99",
+       [124] = "\xE1\xBC\xAE\xCE\x99",
+       [125] = "\xE1\xBC\xAF\xCE\x99",
+       [126] = "\xE1\xBD\xA8\xCE\x99",
+       [127] = "\xE1\xBD\xA9\xCE\x99",
+       [128] = "\xE1\xBD\xAA\xCE\x99",
+       [129] = "\xE1\xBD\xAB\xCE\x99",
+       [130] = "\xE1\xBD\xAC\xCE\x99",
+       [131] = "\xE1\xBD\xAD\xCE\x99",
+       [132] = "\xE1\xBD\xAE\xCE\x99",
+       [133] = "\xE1\xBD\xAF\xCE\x99",
+       [134] = "\xCE\x91\xCE\x99",
+       [135] = "\xCE\x97\xCE\x99",
+       [136] = "\xCE\xA9\xCE\x99",
+       [137] = "\xE1\xBE\xBA\xCE\x99",
+       [138] = "\xCE\x86\xCE\x99",
+       [139] = "\xE1\xBF\x8A\xCE\x99",
+       [140] = "\xCE\x89\xCE\x99",
+       [141] = "\xE1\xBF\xBA\xCE\x99",
+       [142] = "\xCE\x8F\xCE\x99",
+       [143] = "\xCE\x91\xCD\x82\xCE\x99",
+       [144] = "\xCE\x97\xCD\x82\xCE\x99",
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to