Howdy Ricardo, Ricardo Wurmus <rek...@elephly.net> skribis:
>> Unfortunately, this is not fully deterministic: when running --check >> several times in a row, I occasionally get different results. I suspect >> GDBM’s output is not fully deterministic. > > Hmm, I dumped the contents of the generated databases with gdbm_dump and > couldn’t find any difference aside from the header (which is produced by > gdbm_dump itself). Diffoscope shows a lot of differences, though. > > I thought that maybe the difference comes from the fact that upon adding > new entries gdbm grows the hash table. After setting the initial size > to a multiple of the number of entries I haven’t been able to generate a > non-reproducible database. > > My only change is in “write-mandb-database”: > > (gdbm-open file GDBM_WRCREAT #:block-size (* 512 (length entries))) > > I tried this: > > ./pre-inst-env guix package -p foo -i coreutils guile > for i in `seq 30`; do ./pre-inst-env guix build --check -K > /gnu/store/pg3684khpj69py40v7p76b90r9q4j2lv-manual-database.drv; done > > Seems fine. Coincidence or did I get lucky? I checked with the program below. It helps, but does not entirely fix it:
(use-modules (guix man-db) (guix hash) (guix base32)) (define %database "/tmp/index.db") (let loop () (false-if-exception (delete-file %database)) (write-mandb-database %database (mandb-entries "/home/ludo/.guix-profile/share/man")) (pk (stat:size (stat %database)) (bytevector->nix-base32-string (file-sha256 %database))) (loop))
Valgrind reports this: --8<---------------cut here---------------start------------->8--- ==8395== Syscall param write(buf) points to uninitialised byte(s) ==8395== at 0x53E4A8D: ??? (in /gnu/store/3h31zsqxjjg52da5gp3qmhkh4x8klhah-glibc-2.25/lib/libpthread-2.25.so) ==8395== by 0xACAF44D: _gdbm_full_write (in /gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0) ==8395== by 0xACAC6AD: gdbm_fd_open (in /gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0) ==8395== by 0x55FA0BF: ffi_call_unix64 (in /gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4) ==8395== by 0x55F8EE0: ffi_call (in /gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4) ==8395== by 0x4E8C23C: scm_i_foreign_call (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4EF9243: vm_regular_engine (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4EFC7B9: scm_call_n (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4E80A06: scm_primitive_eval (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4E80A62: scm_eval (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4ECBA6F: scm_shell (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== by 0x4E974AC: invoke_main_func (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) ==8395== Address 0xced0044 is 4 bytes inside a block of size 8,388,608 alloc'd ==8395== at 0x4C2AAD6: malloc (in /gnu/store/p2b1rzqlpdqbhn42g76xzgykbivwc063-valgrind-3.12.0/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8395== by 0xACAC5E6: gdbm_fd_open (in /gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0) ==8395== by 0x55FA0BF: ffi_call_unix64 (in /gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4) ==8395== by 0x55F8EE0: ffi_call (in /gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4) ==8395== by 0x4E8C23C: scm_i_foreign_call (in /gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0) --8<---------------cut here---------------end--------------->8--- >> +(define (entry->string entry) >> + "Return the wire format for ENTRY as a string." >> + (match entry >> + (($ <mandb-entry> file name section synopsis) >> + (string-append (abbreviate-file-name file) "\t" >> + (number->string section) "\t" >> + (number->string section) >> + >> + ;; Timestamps, that we always set to the epoch. >> + "\t0\t0" >> + >> + ;; XXX: Weird things. >> + "\tB\t-\t-\tgz\t" > > What’s that? In db_store.c it’s done like this: --8<---------------cut here---------------start------------->8--- MYDBM_SET (cont, xasprintf ( "%s\t%s\t%s\t%ld\t%ld\t%c\t%s\t%s\t%s\t%s", dash_if_unset (in->name), in->ext, in->sec, (long) in->mtime.tv_sec, in->mtime.tv_nsec, in->id, in->pointer, in->filter, in->comp, in->whatis)); --8<---------------cut here---------------end--------------->8--- and db_storage.h says: --8<---------------cut here---------------start------------->8--- struct mandata { struct mandata *next; /* ptr to next structure, if any */ char *addr; /* ptr to memory containing the fields */ char *name; /* Name of page, if != key */ /* The following are all const because they should be pointers to * parts of strings allocated elsewhere (often the addr field above) * and should not be written through or freed themselves. */ const char *ext; /* Filename ext w/o comp ext */ const char *sec; /* Section name/number */ char id; /* id for this entry */ const char *pointer; /* id related file pointer */ const char *comp; /* Compression extension */ const char *filter; /* filters needed for the page */ const char *whatis; /* whatis description for page */ struct timespec mtime; /* mod time for file */ }; --8<---------------cut here---------------end--------------->8--- The ‘B’ part gives the kind of manual page: --8<---------------cut here---------------start------------->8--- /* These definitions give an inherent precedence to each particular type of manual page: ULT_MAN: ultimate manual page, the full source nroff file. SO_MAN: source nroff file containing .so request to an ULT_MAN. WHATIS_MAN: virtual `whatis referenced' page pointing to an ULT_MAN. STRAY_CAT: pre-formatted manual page with no source. WHATIS_CAT: virtual `whatis referenced' page pointing to a STRAY_CAT. */ --8<---------------cut here---------------end--------------->8--- I’ve updated man-db.scm to handle that better. Thanks, Ludo’.
diff --git a/guix/man-db.scm b/guix/man-db.scm index b42558b06..3ce268547 100644 --- a/guix/man-db.scm +++ b/guix/man-db.scm @@ -29,6 +29,7 @@ mandb-entry-name mandb-entry-section mandb-entry-synopsis + mandb-entry-kind mandb-entries write-mandb-database)) @@ -47,12 +48,13 @@ (module-use! (current-module) (resolve-interface '(gdbm))) (define-record-type <mandb-entry> - (mandb-entry file-name name section synopsis) + (mandb-entry file-name name section synopsis kind) mandb-entry? (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz" (name mandb-entry-name) ;e.g., "ABIWORD" (section mandb-entry-section) ;number - (synopsis mandb-entry-synopsis)) ;string + (synopsis mandb-entry-synopsis) ;string + (kind mandb-entry-kind)) ;'ultimate | 'link (define (mandb-entry<? entry1 entry2) (match entry1 @@ -74,16 +76,26 @@ (define (entry->string entry) "Return the wire format for ENTRY as a string." (match entry - (($ <mandb-entry> file name section synopsis) + (($ <mandb-entry> file name section synopsis kind) + ;; See db_store.c:make_content in man-db for the format. (string-append (abbreviate-file-name file) "\t" (number->string section) "\t" (number->string section) - ;; Timestamps, that we always set to the epoch. + ;; Timestamp that we always set to the epoch. "\t0\t0" - ;; XXX: Weird things. - "\tB\t-\t-\tgz\t" + ;; See "db_storage.h" in man-db for the different kinds. + "\t" + (case kind + ((ultimate) "A") ;ultimate man page + ((link) "B") ;".so" link to other man page + (else "A")) ;something that doesn't matter much + + "\t-\t-\t" + + (if (string-suffix? ".gz" file) "gz" "") + "\t" synopsis "\x00")))) @@ -94,7 +106,8 @@ (define (write-mandb-database file entries) "Write ENTRIES to FILE as a man-db database. FILE is usually \".../index.db\", and is a GDBM database." - (let ((db (gdbm-open file GDBM_WRCREAT))) + (let ((db (gdbm-open file GDBM_WRCREAT + #:block-size (* 512 (length entries))))) (gdbm-set! db %version-key %version-value) ;; Write ENTRIES in sorted order so we get deterministic output. @@ -141,33 +154,37 @@ (string->number (string-drop (string-drop-right str 1) 1)) (string->number str))) + ;; Note: This works for both gzipped and uncompressed files. (call-with-gzip-input-port (open-file file "r0") (lambda (port) (let loop ((name #f) (section #f) - (synopsis #f)) + (synopsis #f) + (kind 'ultimate)) (if (and name section synopsis) - (mandb-entry file name section synopsis) + (mandb-entry file name section synopsis kind) (let ((line (read-line port))) (if (eof-object? line) - (mandb-entry file name (or section 0) (or synopsis "")) + (mandb-entry file name (or section 0) (or synopsis "") + kind) (match (string-tokenize line) ((".TH" name (= string->number* section) _ ...) - (loop name section synopsis)) + (loop name section synopsis kind)) ((".SH" (or "NAME" "\"NAME\"")) - (loop name section (read-synopsis port))) + (loop name section (read-synopsis port) kind)) ((".so" link) (match (and=> (resolve link) (cut man-page->entry <> resolve)) (#f - (loop name section synopsis)) + (loop name section synopsis 'link)) (alias (mandb-entry file (mandb-entry-name alias) (mandb-entry-section alias) - (mandb-entry-synopsis alias))))) + (mandb-entry-synopsis alias) + 'link)))) (_ - (loop name section synopsis)))))))))) + (loop name section synopsis kind)))))))))) (define (man-files directory) "Return the list of man pages found under DIRECTORY, recursively."