See the R6RS Libraries document page 10. The differences: - R6RS supports reading a BOM.
- R6RS mandates an endianness argument to specify the behavior at the absence of a BOM. - R6RS allows an optional third argument 'endianness-mandatory' to explicitly ignore any possible BOM. Here's a quick patch on top of master to implement the R6RS procedures in terms of the Guile procedures and export them with a rename from (rnrs bytevectors). ===File /home/taylan/src/guile/guile-master/0001-Fix-R6RS-utf16-string-and-utf32-string.patch=== >From f51cd1d4884caafb1ed0072cd77c0e3145f34576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Taylan=20Ulrich=20Bay=C4=B1rl=C4=B1/Kammer?= <taylanbayi...@gmail.com> Date: Fri, 10 Mar 2017 22:36:55 +0100 Subject: [PATCH] Fix R6RS utf16->string and utf32->string. * module/rnrs/bytevectors.scm (read-bom16, read-bom32): New procedures. (r6rs-utf16->string, r6rs-utf32->string): Ditto. --- module/rnrs/bytevectors.scm | 52 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/module/rnrs/bytevectors.scm b/module/rnrs/bytevectors.scm index 9744359f0..997a8c9cb 100644 --- a/module/rnrs/bytevectors.scm +++ b/module/rnrs/bytevectors.scm @@ -69,7 +69,9 @@ bytevector-ieee-double-native-set! string->utf8 string->utf16 string->utf32 - utf8->string utf16->string utf32->string)) + utf8->string + (r6rs-utf16->string . utf16->string) + (r6rs-utf32->string . utf32->string))) (load-extension (string-append "libguile-" (effective-version)) @@ -80,4 +82,52 @@ `(quote ,sym) (error "unsupported endianness" sym))) +(define (read-bom16 bv) + (let ((c0 (bytevector-u8-ref bv 0)) + (c1 (bytevector-u8-ref bv 1))) + (cond + ((and (= c0 #xFE) (= c1 #xFF)) + 'big) + ((and (= c0 #xFF) (= c1 #xFE)) + 'little) + (else + #f)))) + +(define r6rs-utf16->string + (case-lambda + ((bv default-endianness) + (let ((bom-endianness (read-bom16 bv))) + (if (not bom-endianness) + (utf16->string bv default-endianness) + (substring/shared (utf16->string bv bom-endianness) 1)))) + ((bv endianness endianness-mandatory?) + (if endianness-mandatory? + (utf16->string bv endianness) + (r6rs-utf16->string bv endianness))))) + +(define (read-bom32 bv) + (let ((c0 (bytevector-u8-ref bv 0)) + (c1 (bytevector-u8-ref bv 1)) + (c2 (bytevector-u8-ref bv 2)) + (c3 (bytevector-u8-ref bv 3))) + (cond + ((and (= c0 #x00) (= c1 #x00) (= c2 #xFE) (= c3 #xFF)) + 'big) + ((and (= c0 #xFF) (= c1 #xFE) (= c2 #x00) (= c3 #x00)) + 'little) + (else + #f)))) + +(define r6rs-utf32->string + (case-lambda + ((bv default-endianness) + (let ((bom-endianness (read-bom32 bv))) + (if (not bom-endianness) + (utf32->string bv default-endianness) + (substring/shared (utf32->string bv bom-endianness) 1)))) + ((bv endianness endianness-mandatory?) + (if endianness-mandatory? + (utf32->string bv endianness) + (r6rs-utf32->string bv endianness))))) + ;;; bytevector.scm ends here -- 2.11.0 ============================================================