On 2019-03-12 8:32 am, Aaron Hill wrote:
#(define (is-combining-mark? ucp)
"Returns whether a code-point is a Unicode Combining Character."
(or (<= #x0300 ucp #x03ff)
(<= #x1ab0 ucp #x1aff)
(<= #x1dc0 ucp #x1dff)
(<= #x20d0 ucp #x20ff)
(<= #xfe20 ucp #xfe2f)))
Typo in the above function: the first range of combining characters
should only extend to U+036F:
%%%%
#(define (is-combining-mark? ucp)
"Returns whether a code-point is a Unicode Combining Character."
(or (<= #x0300 ucp #x036f)
(<= #x1ab0 ucp #x1aff)
(<= #x1dc0 ucp #x1dff)
(<= #x20d0 ucp #x20ff)
(<= #xfe20 ucp #xfe2f)))
%%%%
-- Aaron Hill
\version "2.19.82"
#(define (utf8->utf32 lst)
"Converts a list of UTF8-encoded characters into UTF32."
(if (null? lst) '()
(let ((ch (char->integer (car lst))))
(cond
;; Characters 0x00-0x7F
((< ch #b10000000) (cons ch (utf8->utf32 (cdr lst))))
;; Characters 0x80-0x7FF
((eqv? (logand ch #b11100000) #b11000000)
(cons (let ((ch2 (char->integer (cadr lst))))
(logior (ash (logand ch #b11111) 6)
(logand ch2 #b111111)))
(utf8->utf32 (cddr lst))))
;; Characters 0x800-0xFFFF
((eqv? (logand ch #b11110000) #b11100000)
(cons (let ((ch2 (char->integer (cadr lst)))
(ch3 (char->integer (caddr lst))))
(logior (ash (logand ch #b1111) 12)
(ash (logand ch2 #b111111) 6)
(logand ch3 #b111111)))
(utf8->utf32 (cdddr lst))))
;; Characters 0x10000-0x10FFFF
((eqv? (logand ch #b111110000) #b11110000)
(cons (let ((ch2 (char->integer (cadr lst)))
(ch3 (char->integer (caddr lst)))
(ch4 (char->integer (cadddr lst))))
(logior (ash (logand ch #b111) 18)
(ash (logand ch2 #b111111) 12)
(ash (logand ch3 #b111111) 6)
(logand ch4 #b111111)))
(utf8->utf32 (cddddr lst))))
;; Ignore orphaned continuation characters
((eqv? (logand ch #b11000000) #b10000000) (utf8->utf32 (cdr lst)))
;; Error on all else
(else (error "Unexpected character:" ch))))))
#(define (utf32->utf8 lst)
"Converts a list of UTF32-encoded characters into UTF8."
(if (null? lst) '()
(let ((ch (car lst)))
(append (cond
;; Characters 0x00-0x7F
((< ch #x80) (list (integer->char ch)))
;; Characters 0x80-0x7FF
((< ch #x800) (list
(integer->char (logior #b11000000 (logand (ash ch -6) #b11111)))
(integer->char (logior #b10000000 (logand ch #b111111)))))
;; Characters 0x800-0xFFFF
((< ch #x10000) (list
(integer->char (logior #b11100000 (logand (ash ch -12) #b1111)))
(integer->char (logior #b10000000 (logand (ash ch -6) #b111111)))
(integer->char (logior #b10000000 (logand ch #b111111)))))
;; Characters 0x10000-0x10FFFF
(else (list
(integer->char (logior #b11110000 (logand (ash ch -18) #b111)))
(integer->char (logior #b10000000 (logand (ash ch -12) #b111111)))
(integer->char (logior #b10000000 (logand (ash ch -6) #b111111)))
(integer->char (logior #b10000000 (logand ch #b111111))))))
(utf32->utf8 (cdr lst))))))
#(define (string->utf32 s) (utf8->utf32 (string->list s)))
#(define (utf32->string l) (list->string (utf32->utf8 l)))
#(define (is-combining-mark? ucp)
"Returns whether a code-point is a Unicode Combining Character."
(or (<= #x0300 ucp #x036f)
(<= #x1ab0 ucp #x1aff)
(<= #x1dc0 ucp #x1dff)
(<= #x20d0 ucp #x20ff)
(<= #xfe20 ucp #xfe2f)))
#(define (utf32->graphemes lst)
"Splits the UTF32-encoded characters into graphemes."
(if (null? lst) '()
(let* ((marks (take-while is-combining-mark? (cdr lst)))
(rest (drop (cdr lst) (length marks))))
(cons (cons (car lst) marks) (utf32->graphemes rest)))))
#(define-markup-command (letter-spaced layout props arg) (string?)
(interpret-markup layout props #{ \markup {
$@(map utf32->string (utf32->graphemes (string->utf32 arg)))
} #}))
\markup { \column {
\letter-spaced "!@#123abc"
\letter-spaced "aÌb᪰cá·dâeï¸ "
\override #'(word-space . 3) {
\letter-spaced "!@#123abc"
\letter-spaced "aÌb᪰cá·dâeï¸ "
}
} }
_______________________________________________
lilypond-user mailing list
lilypond-user@gnu.org
https://lists.gnu.org/mailman/listinfo/lilypond-user