janneke pushed a commit to branch core-packages-team
in repository guix.

commit 3f0f24ec76002fefd0a20e9aea33ed646664f153
Author: Tomas Volf <~@wolfsden.cz>
AuthorDate: Thu Jan 23 23:57:06 2025 +0100

    gexp: Improve support of Unicode characters.
    
    Support for non-ASCII characters was mixed.  Some gexp forms did support 
them,
    while others did not.  Combined with current value for
    %default-port-conversion-strategy, that sometimes led to unpleasant 
surprises.
    For example:
    
        (scheme-file "utf8" #~(with-output-to-file #$output
                                (λ _ (display "猫"))))
    
    Was written to the store as:
    
        ((? _ (display "\u732b")))
    
    No, that is not font issue on your part, that is an actual #\? instead of 
the
    lambda character.  Which, surprisingly, does not do what it should when
    executed.
    
    The solution is to switch to C.UTF-8 LC_CTYPE where possible, since it is 
now
    always available.  Or to explicitly set the port encoding.
    
    No tests are provided, since majority of tests/gexp.scm use guile in version
    2, and it tends to work under it.  The issues occur mostly with guile 3.
    
    I did test it locally using:
    
          #!/bin/sh
          set -eu
          set -x
    
          [ -f guix.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; }
          [ -f gnu.scm  ] || { echo >&2 Run from root of Guix repo.; exit 1; }
    
          cat >猫.scm <<'EOF'
          (define-module (猫)
            #:export (say))
    
          (define (say)
            "nyaaaa~~~~!")
          EOF
    
          mkdir -p dir-with-utf8-file
          cp 猫.scm dir-with-utf8-file/
    
          cat >repro.scm <<'EOF'
          (use-modules (guix build utils)
                       (guix derivations)
                       (guix gexp)
                       (guix store)
                       (ice-9 ftw)
                       (ice-9 textual-ports))
    
          (define cat "猫")
    
          (define (drv-content drv)
            (call-with-input-file (derivation->output-path drv)
              get-string-all))
    
          (define (out-content out)
            (call-with-input-file out
              get-string-all))
    
          (define (drv-listing drv)
            (scandir (derivation->output-path drv)))
    
          (define (dir-listing dir)
            (scandir dir))
    
          (define-macro (test exp lower? report)
            (let ((type (car exp)))
              `(false-if-exception
                (let ((drv (with-store %store
                             (run-with-store %store
                               (,(if lower? lower-object identity) ,exp)))))
                  (format #t "~%~a:~%" ',type)
                  (when (with-store %store
                          (build-derivations %store (list drv)))
                    (format #t "~a~%" (,report drv)))))))
    
          (test (computed-file "utf8"
                               #~(with-output-to-file #$output
                                   (λ _ (display #$cat))))
                #t drv-content)
    
          (test (program-file "utf8"
                              #~((λ _ (display #$cat))))
                #t drv-content)
    
          (test (scheme-file "utf8"
                             #~((λ _ (display #$cat))))
                #t drv-content)
    
          (test (text-file* "utf8" cat cat cat)
                #f drv-content)
    
          (test (compiled-modules '((猫)))
                #f drv-listing)
    
          (test (file-union "utf8" `((,cat ,(plain-file "utf8" cat))))
                #t drv-listing)
    
          ;;; No fix needed:
          (test (imported-modules '((猫)))
                #f dir-listing)
    
          (test (local-file "dir-with-utf8-file" #:recursive? #t)
                #t dir-listing)
    
          (test (plain-file "utf8" cat)
                #t out-content)
    
          (test (mixed-text-file "utf8" cat cat cat)
                #t drv-content)
    
          (test (directory-union "utf8" (list (local-file "dir-with-utf8-file"
                                                          #:recursive? #t)))
                #t dir-listing)
          EOF
    
          guix shell -CWN -D guix glibc-locales -- \
               env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm
    
    Before this series, the output is:
    
          + '[' -f guix.scm ']'
          + '[' -f gnu.scm ']'
          + cat
          + mkdir -p dir-with-utf8-file
          + cp 猫.scm dir-with-utf8-file/
          + cat
          + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 
./pre-inst-env guix repl -- ./repro.scm
    
          computed-file:
          ?
    
          program-file:
          #!/gnu/store/mfkz7fvlfpv3ppwbkv0imb19nrf95akf-guile-3.0.9/bin/guile 
--no-auto-compile
          !#
          ((? _ (display "\u732b")))
    
          scheme-file:
          ((? _ (display "\u732b")))
    
          text-file*:
          ???
    
          compiled-modules:
          building path(s) 
`/gnu/store/ay3jifyvliigfgnz67jf0kgngzpya5a5-module-import-compiled'
          Backtrace:
                     5 (primitive-load 
"/gnu/store/rn7b0dq6iqfmmqyqzamix2mjmfy?")
          In ice-9/eval.scm:
              619:8  4 (_ #f)
          In srfi/srfi-1.scm:
             460:18  3 (fold #<procedure 7ffff79245e0 at ice-9/eval.scm:336:1?> 
?)
          In ice-9/eval.scm:
             245:16  2 (_ #(#(#<directory (guix build utils) 7ffff779f320>) # 
?))
          In ice-9/boot-9.scm:
            1982:24  1 (_ _)
          In unknown file:
                     0 (stat "./???.scm" #<undefined>)
    
          ERROR: In procedure stat:
          In procedure stat: No such file or directory: "./???.scm"
          builder for 
`/gnu/store/dxg87135zcd6a1c92dlrkyvxlbhfwfld-module-import-compiled.drv' failed 
with exit code 1
    
          file-union:
          (. .. ?)
    
          imported-modules:
          (. .. 猫.scm)
    
          local-file:
          (. .. 猫.scm)
    
          plain-file:
          猫
    
          mixed-text-file:
          猫猫猫
    
          directory-union:
          (. .. 猫.scm)
    
    Which I think you will agree is far from optimal.  After these fixes the
    output changes to:
    
          + '[' -f guix.scm ']'
          + '[' -f gnu.scm ']'
          + cat
          + mkdir -p dir-with-utf8-file
          + cp 猫.scm dir-with-utf8-file/
          + cat
          + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 
./pre-inst-env guix repl -- ./repro.scm
    
          computed-file:
          猫
    
          program-file:
          #!/gnu/store/8kbmn359jqkgsbqgqxnmiryvd9ynz8w7-guile-3.0.9/bin/guile 
--no-auto-compile
          !#
          ((λ _ (display "猫")))
    
          scheme-file:
          ((λ _ (display "猫")))
    
          text-file*:
          猫猫猫
    
          compiled-modules:
          (. .. 猫.go)
    
          file-union:
          (. .. 猫)
    
          imported-modules:
          (. .. 猫.scm)
    
          local-file:
          (. .. 猫.scm)
    
          plain-file:
          猫
    
          mixed-text-file:
          猫猫猫
    
          directory-union:
          (. .. 猫.scm)
    
    Which is actually what the user would expect.
    
    * guix/gexp.scm (gexp->derivation): Default LC_CTYPE to C.UTF-8.
    (gexp->script, text-file*): Set port encoding to UTF-8.
    
    Change-Id: Ie92a57fe1c3b45d1c7a5e8865fcf291c5f590c11
    Signed-off-by: Janneke Nieuwenhuizen <jann...@gnu.org>
---
 guix/gexp.scm | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/guix/gexp.scm b/guix/gexp.scm
index ad51bc55b7..15cf13addd 100644
--- a/guix/gexp.scm
+++ b/guix/gexp.scm
@@ -5,6 +5,7 @@
 ;;; Copyright © 2019, 2020 Mathieu Othacehe <m.othac...@gmail.com>
 ;;; Copyright © 2020 Maxim Cournoyer <maxim.courno...@gmail.com>
 ;;; Copyright © 2021, 2022 Maxime Devos <maximede...@telenet.be>
+;;; Copyright © 2025 Tomas Volf <~@wolfsden.cz>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -1149,7 +1150,7 @@ derivations--e.g., code evaluated for its side effects."
                            #:key
                            system (target 'current)
                            hash hash-algo recursive?
-                           (env-vars '())
+                           (env-vars '(("LC_CTYPE" . "C.UTF-8")))
                            (modules '())
                            (module-path %load-path)
                            (guile-for-build (%guile-for-build))
@@ -2018,6 +2019,8 @@ imported modules in its search path.  Look up EXP's 
modules in MODULE-PATH."
                       (gexp
                        (call-with-output-file (ungexp output)
                          (lambda (port)
+                           (set-port-encoding! port "UTF-8")
+
                            ;; Note: that makes a long shebang.  When the store
                            ;; is /gnu/store, that fits within the 128-byte
                            ;; limit imposed by Linux, but that may go beyond
@@ -2116,6 +2119,7 @@ resulting store file holds references to all these."
   (define builder
     (gexp (call-with-output-file (ungexp output "out")
             (lambda (port)
+              (set-port-encoding! port "UTF-8")
               (display (string-append (ungexp-splicing text)) port)))))
 
   (gexp->derivation name builder

Reply via email to