Changeset: e3930423f7df for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/e3930423f7df Added Files: misc/bash/uniwidthtab.sh Modified Files: common/utils/mutf8.c Branch: default Log Message:
Add script to create charwidth table. diffs (73 lines): diff --git a/common/utils/mutf8.c b/common/utils/mutf8.c --- a/common/utils/mutf8.c +++ b/common/utils/mutf8.c @@ -19,6 +19,7 @@ struct interval { int width; }; +/* this table was created using the script uniwidthtab.sh */ static const struct interval intervals[] = { /* sorted list of non-overlapping ranges: * ranges with width==0 represent all codepoints with diff --git a/misc/bash/uniwidthtab.sh b/misc/bash/uniwidthtab.sh new file mode 100755 --- /dev/null +++ b/misc/bash/uniwidthtab.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2024 MonetDB Foundation; +# Copyright August 2008 - 2023 MonetDB B.V.; +# Copyright 1997 - July 2008 CWI. + +{ + sed -n -e '/# Mn/d' \ + -e 's/^\([0-9A-F][0-9A-F]*\)\.\.\([0-9A-F][0-9A-F]*\) *; *[FW].*/0x\1 0x\2/p' \ + -e 's/^\([0-9A-F][0-9A-F]*\) *; [FW].*/0x\1 0x\1/p' \ + < /usr/share/unicode/ucd/EastAsianWidth.txt | { + while read line; do + line=($line) + f=$((${line[0]%})) + l=$((${line[1]%})) + if [[ -n $prevl ]]; then + if (($prevl+1 == $f)); then + prevl=$l + else + printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl + prevf=$f + prevl=$l + fi + else + prevf=$f + prevl=$l + fi + done + printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl + } + + sed -n '/^00AD/d;s/^\([0-9A-F][0-9A-F]*\);[^;]*;\(Me\|Mn\|Cf\);.*/0x\1/p' \ + < /usr/share/unicode/ucd/UnicodeData.txt | { + while read line; do + u=$(($line)) + if [[ -n $prevf ]]; then + if (($prevl+1 == $u)); then + prevl=$u + else + printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl + prevf=$u + prevl=$u + fi + else + prevf=$u + prevl=$u + fi + done + printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl + } +} | sort | sed 's/0x0\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\)/0x\1/g' _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org