Fixes <https://issues.guix.gnu.org/55499>. Reported by Ricardo Wurmus <rek...@elephly.net>.
With this change, the manifest file created for: guix install r r-seurat r-cistopic r-monocle3 r-cicero-monocle3 r-assertthat goes from 5.6M to 192K. Likewise, on this profile, wall-clock time of: GUIX_PROFILING=gc guix package -I goes from 0.7s to 0.1s, with heap usage going from 55M to 9M. * guix/profiles.scm (manifest->gexp)[entry->gexp]: Turn into a monadic procedure. Return a 'repeated' sexp if ENTRY was already visited before. Adjust caller accordingly. Bump manifest version. (sexp->manifest)[sexp->manifest-entry]: Turn into a monadic procedure. Add case for 'repeated' nodes. Add each entry to the current state vhash. Add clause for version 4 manifests. * tests/profiles.scm ("deduplication of repeated entries"): New test. * guix/build/profiles.scm (manifest-sexp->inputs+search-paths): Expect version 4. Add clause for 'repeated' nodes. --- guix/build/profiles.scm | 4 +- guix/profiles.scm | 127 ++++++++++++++++++++++++++++------------ tests/profiles.scm | 42 +++++++++++++ 3 files changed, 134 insertions(+), 39 deletions(-) diff --git a/guix/build/profiles.scm b/guix/build/profiles.scm index f9875ca92e..c4460f624b 100644 --- a/guix/build/profiles.scm +++ b/guix/build/profiles.scm @@ -150,7 +150,7 @@ (define (manifest-sexp->inputs+search-paths manifest) values: the list of store items of its manifest entries, and the list of search path specifications." (match manifest ;this must match 'manifest->gexp' - (('manifest ('version 3) + (('manifest ('version 4) ('packages (entries ...))) (let loop ((entries entries) (inputs '()) @@ -162,6 +162,8 @@ (define (manifest-sexp->inputs+search-paths manifest) (loop (append rest deps) ;breadth-first traversal (cons item inputs) (append paths search-paths))) + ((('repeated name version item) . rest) + (loop rest inputs search-paths)) (() (values (reverse inputs) (delete-duplicates diff --git a/guix/profiles.scm b/guix/profiles.scm index bf50c00a1e..44ff37e75b 100644 --- a/guix/profiles.scm +++ b/guix/profiles.scm @@ -455,31 +455,53 @@ (define (inferior->entry) (define (manifest->gexp manifest) "Return a representation of MANIFEST as a gexp." (define (entry->gexp entry) - (match entry - (($ <manifest-entry> name version output (? string? path) - (deps ...) (search-paths ...) _ (properties ...)) - #~(#$name #$version #$output #$path - (propagated-inputs #$(map entry->gexp deps)) - (search-paths #$(map search-path-specification->sexp - search-paths)) - #$@(if (null? properties) - #~() - #~((properties . #$properties))))) - (($ <manifest-entry> name version output package - (deps ...) (search-paths ...) _ (properties ...)) - #~(#$name #$version #$output - (ungexp package (or output "out")) - (propagated-inputs #$(map entry->gexp deps)) - (search-paths #$(map search-path-specification->sexp - search-paths)) - #$@(if (null? properties) - #~() - #~((properties . #$properties))))))) + ;; Maintain in state monad a vhash of visited entries, indexed by their + ;; item, usually package objects (we cannot use the entry itself as an + ;; index since identical entries are usually not 'eq?'). Use that vhash + ;; to avoid repeating duplicate entries. This is particularly useful in + ;; the presence of propagated inputs, where we could otherwise end up + ;; repeating large trees. + (mlet %state-monad ((visited (current-state))) + (if (match (vhash-assq (manifest-entry-item entry) visited) + ((_ . previous-entry) + (manifest-entry=? previous-entry entry)) + (#f #f)) + (return #~(repeated #$(manifest-entry-name entry) + #$(manifest-entry-version entry) + #$(manifest-entry-item entry))) + (mbegin %state-monad + (set-current-state (vhash-consq (manifest-entry-item entry) + entry visited)) + (mlet %state-monad ((deps (mapm %state-monad entry->gexp + (manifest-entry-dependencies entry)))) + (return + (match entry + (($ <manifest-entry> name version output (? string? path) + (_deps ...) (search-paths ...) _ (properties ...)) + #~(#$name #$version #$output #$path + (propagated-inputs #$deps) + (search-paths #$(map search-path-specification->sexp + search-paths)) + #$@(if (null? properties) + #~() + #~((properties . #$properties))))) + (($ <manifest-entry> name version output package + (_deps ...) (search-paths ...) _ (properties ...)) + #~(#$name #$version #$output + (ungexp package (or output "out")) + (propagated-inputs #$deps) + (search-paths #$(map search-path-specification->sexp + search-paths)) + #$@(if (null? properties) + #~() + #~((properties . #$properties)))))))))))) (match manifest (($ <manifest> (entries ...)) - #~(manifest (version 3) - (packages #$(map entry->gexp entries)))))) + #~(manifest (version 4) + (packages #$(run-with-state + (mapm %state-monad entry->gexp entries) + vlist-null)))))) (define (find-package name version) "Return a package from the distro matching NAME and possibly VERSION. This @@ -522,25 +544,44 @@ (define (infer-dependency item parent) (define* (sexp->manifest-entry sexp #:optional (parent (delay #f))) (match sexp + (('repeated name version path) + ;; This entry is the same as another one encountered earlier; look it + ;; up and return it. + (mlet %state-monad ((visited (current-state)) + (key -> (list name version path))) + (match (vhash-assoc key visited) + (#f + (raise (formatted-message + (G_ "invalid repeated entry in profile: ~s") + sexp))) + ((_ . entry) + (return entry))))) ((name version output path ('propagated-inputs deps) ('search-paths search-paths) extra-stuff ...) - ;; For each of DEPS, keep a promise pointing to ENTRY. - (letrec* ((deps* (map (cut sexp->manifest-entry <> (delay entry)) - deps)) - (entry (manifest-entry - (name name) - (version version) - (output output) - (item path) - (dependencies deps*) - (search-paths (map sexp->search-path-specification - search-paths)) - (parent parent) - (properties (or (assoc-ref extra-stuff 'properties) - '()))))) - entry)))) + (mlet* %state-monad + ((entry -> #f) + (deps* (mapm %state-monad + (cut sexp->manifest-entry <> (delay entry)) + deps)) + (visited (current-state)) + (key -> (list name version path))) + (set! entry ;XXX: emulate 'letrec*' + (manifest-entry + (name name) + (version version) + (output output) + (item path) + (dependencies deps*) + (search-paths (map sexp->search-path-specification + search-paths)) + (parent parent) + (properties (or (assoc-ref extra-stuff 'properties) + '())))) + (mbegin %state-monad + (set-current-state (vhash-cons key entry visited)) + (return entry)))))) (match sexp (('manifest ('version 0) @@ -608,7 +649,17 @@ (define* (sexp->manifest-entry sexp #:optional (parent (delay #f))) ;; Version 3 represents DEPS as full-blown manifest entries. (('manifest ('version 3 minor-version ...) ('packages (entries ...))) - (manifest (map sexp->manifest-entry entries))) + (manifest (run-with-state + (mapm %state-monad sexp->manifest-entry entries) + vlist-null))) + + ;; Version 4 deduplicates repeated entries, as can happen with deep + ;; propagated input trees. + (('manifest ('version 4 minor-version ...) + ('packages (entries ...))) + (manifest (run-with-state + (mapm %state-monad sexp->manifest-entry entries) + vlist-null))) (_ (raise (condition (&message (message "unsupported manifest format"))))))) diff --git a/tests/profiles.scm b/tests/profiles.scm index 7e51d37ab9..3838d971c9 100644 --- a/tests/profiles.scm +++ b/tests/profiles.scm @@ -586,6 +586,48 @@ (define (entry->sexp entry) #:locales? #f))) (return #f))))) +(test-assertm "deduplication of repeated entries" + ;; Make sure the 'manifest' file does not duplicate identical entries. + ;; See <https://issues.guix.gnu.org/55499>. + (mlet* %store-monad ((p0 -> (dummy-package "p0" + (build-system trivial-build-system) + (arguments + `(#:guile ,%bootstrap-guile + #:builder (mkdir (assoc-ref %outputs "out")))) + (propagated-inputs + `(("guile" ,%bootstrap-guile))))) + (p1 -> (package + (inherit p0) + (name "p1"))) + (drv (profile-derivation (packages->manifest + (list p0 p1)) + #:hooks '() + #:locales? #f))) + (mbegin %store-monad + (built-derivations (list drv)) + (let ((file (string-append (derivation->output-path drv) + "/manifest")) + (manifest (profile-manifest (derivation->output-path drv)))) + (define (contains-repeated? sexp) + (match sexp + (('repeated _ ...) #t) + ((lst ...) (any contains-repeated? sexp)) + (_ #f))) + + (return (and (contains-repeated? (call-with-input-file file read)) + + ;; MANIFEST has two entries for %BOOTSTRAP-GUILE since + ;; it's propagated both from P0 and from P1. When + ;; reading a 'repeated' node, 'read-manifest' should + ;; reuse the previously-read entry so the two + ;; %BOOTSTRAP-GUILE entries must be 'eq?'. + (match (manifest-entries manifest) + (((= manifest-entry-dependencies (dep0)) + (= manifest-entry-dependencies (dep1))) + (and (string=? (manifest-entry-name dep0) + (package-name %bootstrap-guile)) + (eq? dep0 dep1)))))))))) + (test-assertm "no collision" ;; Here we have an entry that is "lowered" (its 'item' field is a store file ;; name) and another entry (its 'item' field is a package) that is -- 2.36.1