On 2019-03-12 8:32 am, Aaron Hill wrote:
#(define (is-combining-mark? ucp)
  "Returns whether a code-point is a Unicode Combining Character."
  (or (<= #x0300 ucp #x03ff)
      (<= #x1ab0 ucp #x1aff)
      (<= #x1dc0 ucp #x1dff)
      (<= #x20d0 ucp #x20ff)
      (<= #xfe20 ucp #xfe2f)))

Typo in the above function: the first range of combining characters should only extend to U+036F:

%%%%
#(define (is-combining-mark? ucp)
  "Returns whether a code-point is a Unicode Combining Character."
  (or (<= #x0300 ucp #x036f)
      (<= #x1ab0 ucp #x1aff)
      (<= #x1dc0 ucp #x1dff)
      (<= #x20d0 ucp #x20ff)
      (<= #xfe20 ucp #xfe2f)))
%%%%


-- Aaron Hill
\version "2.19.82"

#(define (utf8->utf32 lst)
  "Converts a list of UTF8-encoded characters into UTF32."
  (if (null? lst) '()
    (let ((ch (char->integer (car lst))))
      (cond
        ;; Characters 0x00-0x7F
        ((< ch #b10000000) (cons ch (utf8->utf32 (cdr lst))))
        ;; Characters 0x80-0x7FF
        ((eqv? (logand ch #b11100000) #b11000000)
          (cons (let ((ch2 (char->integer (cadr lst))))
              (logior (ash (logand ch #b11111) 6)
                      (logand ch2 #b111111)))
            (utf8->utf32 (cddr lst))))
        ;; Characters 0x800-0xFFFF
        ((eqv? (logand ch #b11110000) #b11100000)
          (cons (let ((ch2 (char->integer (cadr lst)))
                      (ch3 (char->integer (caddr lst))))
              (logior (ash (logand ch #b1111) 12)
                      (ash (logand ch2 #b111111) 6)
                      (logand ch3 #b111111)))
            (utf8->utf32 (cdddr lst))))
        ;; Characters 0x10000-0x10FFFF
        ((eqv? (logand ch #b111110000) #b11110000)
          (cons (let ((ch2 (char->integer (cadr lst)))
                      (ch3 (char->integer (caddr lst)))
                      (ch4 (char->integer (cadddr lst))))
              (logior (ash (logand ch #b111) 18)
                      (ash (logand ch2 #b111111) 12)
                      (ash (logand ch3 #b111111) 6)
                      (logand ch4 #b111111)))
            (utf8->utf32 (cddddr lst))))
        ;; Ignore orphaned continuation characters
        ((eqv? (logand ch #b11000000) #b10000000) (utf8->utf32 (cdr lst)))
        ;; Error on all else
        (else (error "Unexpected character:" ch))))))

#(define (utf32->utf8 lst)
  "Converts a list of UTF32-encoded characters into UTF8."
  (if (null? lst) '()
    (let ((ch (car lst)))
      (append (cond
          ;; Characters 0x00-0x7F
          ((< ch #x80) (list (integer->char ch)))
          ;; Characters 0x80-0x7FF
          ((< ch #x800) (list
            (integer->char (logior #b11000000 (logand (ash ch -6) #b11111)))
            (integer->char (logior #b10000000 (logand ch #b111111)))))
          ;; Characters 0x800-0xFFFF
          ((< ch #x10000) (list
            (integer->char (logior #b11100000 (logand (ash ch -12) #b1111)))
            (integer->char (logior #b10000000 (logand (ash ch -6) #b111111)))
            (integer->char (logior #b10000000 (logand ch #b111111)))))
          ;; Characters 0x10000-0x10FFFF
          (else (list
            (integer->char (logior #b11110000 (logand (ash ch -18) #b111)))
            (integer->char (logior #b10000000 (logand (ash ch -12) #b111111)))
            (integer->char (logior #b10000000 (logand (ash ch -6) #b111111)))
            (integer->char (logior #b10000000 (logand ch #b111111))))))
        (utf32->utf8 (cdr lst))))))

#(define (string->utf32 s) (utf8->utf32 (string->list s)))
#(define (utf32->string l) (list->string (utf32->utf8 l)))

#(define (is-combining-mark? ucp)
  "Returns whether a code-point is a Unicode Combining Character."
  (or (<= #x0300 ucp #x036f)
      (<= #x1ab0 ucp #x1aff)
      (<= #x1dc0 ucp #x1dff)
      (<= #x20d0 ucp #x20ff)
      (<= #xfe20 ucp #xfe2f)))
#(define (utf32->graphemes lst)
  "Splits the UTF32-encoded characters into graphemes."
  (if (null? lst) '()
    (let* ((marks (take-while is-combining-mark? (cdr lst)))
           (rest (drop (cdr lst) (length marks))))
      (cons (cons (car lst) marks) (utf32->graphemes rest)))))

#(define-markup-command (letter-spaced layout props arg) (string?)
  (interpret-markup layout props #{ \markup {
    $@(map utf32->string (utf32->graphemes (string->utf32 arg)))
  } #}))

\markup { \column {
  \letter-spaced "!@#123abc"
  \letter-spaced "àb᪰c᷀d⃐e︠"
  \override #'(word-space . 3) {
    \letter-spaced "!@#123abc"
    \letter-spaced "àb᪰c᷀d⃐e︠"
  }
} }
_______________________________________________
lilypond-user mailing list
lilypond-user@gnu.org
https://lists.gnu.org/mailman/listinfo/lilypond-user

Reply via email to