Jim Meyering <[EMAIL PROTECTED]> wrote: > Eric Blake <[EMAIL PROTECTED]> wrote: > >> Contrast the following: >> >> $ mkdir a b c >> $ echo 1 > a/f >> $ echo 2 > b/f >> $ cp -v a/f b/f c --remove-destination >> `a/f' -> `c/f' >> cp: will not overwrite just-created `c/f' with `b/f' >> >> with the similar: >> >> $ rm c/f >> $ ln -vf a/f b/f c >> `c/f' => `a/f' >> `c/f' => `b/f' >> $ cat c/f >> 2 >> >> Oops - we overwrote the just-created c/f with a link to b/f. Similarly >> with ln -s, where we lose the just-created link to a/f. > > Good catch. > Fixing this is important to avoid loss that would otherwise happen like this: > > ln -f a/f b/f c && rm -rf a b > > If ln succeeds, the user should be assured that removing the > just-linked files is ok, since there should be hard links in c/. > If ln exits successfully but leaves no link to one of the source > files, then a user running the above will lose whatever is in "a/f".
Thanks for spotting that. I've pushed this fix: Don't let ln be a party to destroying user data. * src/ln.c: Include "file-set.h", "hash.h" and "hash-triple.h". (dest_set, DEST_INFO_INITIAL_CAPACITY): New globals. (do_link): Refuse to remove a just-created link. Record a name,dev,ino triple for each link we create. (main): Initialize dest_set, if needed. * tests/mv/childproof: Test for the above fix. * NEWS: Document this. Reported by Eric Blake. Move functions from copy.c into new modules, since ln needs them, too. * bootstrap.conf (gnulib_modules): Add file-set. * gl/lib/file-set.c (record_file, seen_file): Functions from copy.c. * gl/lib/file-set.h: Add prototypes. * gl/lib/hash-triple.c (triple_hash, triple_hash_no_name): (triple_compare, triple_free): Functions from copy.c. * gl/lib/hash-triple.h (struct F_triple): Define. From copy.c. Add prototypes. * gl/modules/file-set: New module. * gl/modules/hash-triple: New module. * src/Makefile.am (copy_sources): New variable. (ginstall_SOURCES, cp_SOURCES, mv_SOURCES): Use it. * src/copy.c: Include hash-triple.h. No longer include hash-pjw.h. (copy_internal): Don't pass a NULL third argument to record_file, since that function no longer accepts that. (record_file): Move this function to file-set.c. Along the way, remove the code to allow a NULL stat-buffer pointer. Adjust sole caller. (seen_file): Move this function to file-set.c. (struct F_triple): Move declaration to hash-triple.h. (triple_compare, triple_free, triple_hash, triple_hash_no_name): Move these functions to hash-triple.c. --------------------------------------------- commit d02e4e77753f580ab91afc5915333222edc82104 Author: Jim Meyering <[EMAIL PROTECTED]> Date: Thu Aug 23 11:51:01 2007 +0200 Don't let ln be a party to destroying user data. * src/ln.c: Include "file-set.h", "hash.h" and "hash-triple.h". (dest_set, DEST_INFO_INITIAL_CAPACITY): New globals. (do_link): Refuse to remove a just-created link. Record a name,dev,ino triple for each link we create. (main): Initialize dest_set, if needed. * tests/mv/childproof: Test for the above fix. * NEWS: Document this. Reported by Eric Blake. Signed-off-by: Jim Meyering <[EMAIL PROTECTED]> diff --git a/ChangeLog b/ChangeLog index e20e96f..4947123 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2007-08-23 Jim Meyering <[EMAIL PROTECTED]> + Don't let ln be a party to destroying user data. + * src/ln.c: Include "file-set.h", "hash.h" and "hash-triple.h". + (dest_set, DEST_INFO_INITIAL_CAPACITY): New globals. + (do_link): Refuse to remove a just-created link. + Record a name,dev,ino triple for each link we create. + (main): Initialize dest_set, if needed. + * tests/mv/childproof: Test for the above fix. + * NEWS: Document this. + Reported by Eric Blake. + Move functions from copy.c into new modules, since ln needs them, too. * bootstrap.conf (gnulib_modules): Add file-set. * gl/lib/file-set.c (record_file, seen_file): Functions from copy.c. diff --git a/NEWS b/NEWS index c548c0b..6a0f18d 100644 --- a/NEWS +++ b/NEWS @@ -56,6 +56,18 @@ GNU coreutils NEWS -*- outline -*- ptx longer accepts the --copyright option. who no longer accepts -i or --idle. +** Improved robustness + + ln -f can no longer silently clobber a just-created hard link. + In some cases, ln could be seen as being responsible for data loss. + For example, given directories a, b, c, and files a/f and b/f, we + should be able to do this safely: ln -f a/f b/f c && rm -f a/f b/f + However, before this change, ln would succeed, and thus cause the + loss of the contents of a/f. + + stty no longer silently accepts certain invalid hex values + in its 35-colon commmand-line argument + ** Bug fixes cp attempts to read a regular file, even if stat says it is empty. @@ -130,11 +142,6 @@ GNU coreutils NEWS -*- outline -*- tr -c no longer aborts when translating with Set2 larger than the complement of Set1. [introduced with the original version, in 1992] -** Improved robustness - - stty no longer silently accepts certain invalid hex values - in its 35-colon commmand-line argument - * Noteworthy changes in release 6.9 (2007-03-22) [stable] diff --git a/src/ln.c b/src/ln.c index 3ddcfdf..aa0e473 100644 --- a/src/ln.c +++ b/src/ln.c @@ -22,11 +22,14 @@ #include <getopt.h> #include "system.h" -#include "same.h" #include "backupfile.h" #include "error.h" #include "filenamecat.h" +#include "file-set.h" +#include "hash.h" +#include "hash-triple.h" #include "quote.h" +#include "same.h" #include "yesno.h" /* The official name of this program (e.g., no `g' prefix). */ @@ -82,6 +85,15 @@ static bool hard_dir_link; symlink-to-dir before creating the new link. */ static bool dereference_dest_dir_symlinks = true; +/* This is a set of destination name/inode/dev triples for hard links + created by ln. Use this data structure to avoid data loss via a + sequence of commands like this: + rm -rf a b c; mkdir a b c; touch a/f b/f; ln -f a/f b/f c && rm -r a b */ +static Hash_table *dest_set; + +/* Initial size of the dest_set hash table. */ +enum { DEST_INFO_INITIAL_CAPACITY = 61 }; + static struct option const long_options[] = { {"backup", optional_argument, NULL, 'b'}, @@ -178,6 +190,18 @@ do_link (const char *source, const char *dest) } } + /* If the current target was created as a hard link to another + source file, then refuse to unlink it. */ + if (dest_lstat_ok + && dest_set != NULL + && seen_file (dest_set, dest, &dest_stats)) + { + error (0, 0, + _("will not overwrite just-created %s with %s"), + quote_n (0, dest), quote_n (1, source)); + return false; + } + /* If --force (-f) has been specified without --backup, then before making a link ln must remove the destination file if it exists. (with --backup, it just renames any existing destination file) @@ -278,6 +302,10 @@ do_link (const char *source, const char *dest) if (ok) { + /* Right after creating a hard link, do this: (note dest name and + source_stats, which are also the just-linked-destinations stats) */ + record_file (dest_set, dest, &source_stats); + if (verbose) { if (dest_backup) @@ -514,6 +542,29 @@ main (int argc, char **argv) if (target_directory) { int i; + + /* Create the data structure we'll use to record which hard links we + create. Used to ensure that ln detects an obscure corner case that + might result in user data loss. Create it only if needed. */ + if (2 <= n_files + && remove_existing_files + /* Don't bother trying to protect symlinks, since ln clobbering + a just-created symlink won't ever lead to real data loss. */ + && ! symbolic_link + /* No destination hard link can be clobbered when making + numbered backups. */ + && backup_type != numbered_backups) + + { + dest_set = hash_initialize (DEST_INFO_INITIAL_CAPACITY, + NULL, + triple_hash, + triple_compare, + triple_free); + if (dest_set == NULL) + xalloc_die (); + } + ok = true; for (i = 0; i < n_files; ++i) { diff --git a/tests/mv/childproof b/tests/mv/childproof index 092afc8..e35afb6 100755 --- a/tests/mv/childproof +++ b/tests/mv/childproof @@ -1,8 +1,9 @@ #!/bin/sh -# Ensure that cp/mv don't clobber a just-copied file. -# With fileutils-4.1 and earlier, this test would fail. +# Ensure that cp/mv/ln don't clobber a just-copied/moved/linked file. +# With fileutils-4.1 and earlier, this test would fail for cp and mv. +# With coreutils-6.9 and earlier, this test would fail for ln. -# Copyright (C) 2001, 2004, 2006 Free Software Foundation, Inc. +# Copyright (C) 2001, 2004, 2006-2007 Free Software Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,6 +22,7 @@ if test "$VERBOSE" = yes; then set -x cp --version mv --version + ln --version fi . $srcdir/../envvar-check @@ -87,4 +89,15 @@ test -f b/g && fail=1 # b/g should have been moved test -f c/f || fail=1 test -f c/g || fail=1 +# Test ln -f. + +rm -f a/f b/f c/f +echo a > a/f || fail=1 +echo b > b/f || fail=1 +ln -f a/f b/f c 2> /dev/null && fail=1 +# a/f and c/f must be linked +test `stat --format %i a/f` = `stat --format %i c/f` || fail=1 +# b/f and c/f must not be linked +test `stat --format %i b/f` = `stat --format %i c/f` && fail=1 + (exit $fail); exit $fail commit 22ed81c410c197003782ba379cb3148306b0cd8a Author: Jim Meyering <[EMAIL PROTECTED]> Date: Thu Aug 23 10:47:16 2007 +0200 Move functions from copy.c into new modules, since ln needs them, too. * bootstrap.conf (gnulib_modules): Add file-set. * gl/lib/file-set.c (record_file, seen_file): Functions from copy.c. * gl/lib/file-set.h: Add prototypes. * gl/lib/hash-triple.c (triple_hash, triple_hash_no_name): (triple_compare, triple_free): Functions from copy.c. * gl/lib/hash-triple.h (struct F_triple): Define. From copy.c. Add prototypes. * gl/modules/file-set: New module. * gl/modules/hash-triple: New module. * src/Makefile.am (copy_sources): New variable. (ginstall_SOURCES, cp_SOURCES, mv_SOURCES): Use it. * src/copy.c: Include hash-triple.h. No longer include hash-pjw.h. (copy_internal): Don't pass a NULL third argument to record_file, since that function no longer accepts that. (record_file): Move this function to file-set.c. Along the way, remove the code to allow a NULL stat-buffer pointer. Adjust sole caller. (seen_file): Move this function to file-set.c. (struct F_triple): Move declaration to hash-triple.h. (triple_compare, triple_free, triple_hash, triple_hash_no_name): Move these functions to hash-triple.c. Signed-off-by: Jim Meyering <[EMAIL PROTECTED]> diff --git a/ChangeLog b/ChangeLog index ea436cc..e20e96f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,29 @@ 2007-08-23 Jim Meyering <[EMAIL PROTECTED]> + Move functions from copy.c into new modules, since ln needs them, too. + * bootstrap.conf (gnulib_modules): Add file-set. + * gl/lib/file-set.c (record_file, seen_file): Functions from copy.c. + * gl/lib/file-set.h: Add prototypes. + * gl/lib/hash-triple.c (triple_hash, triple_hash_no_name): + (triple_compare, triple_free): Functions from copy.c. + * gl/lib/hash-triple.h (struct F_triple): Define. From copy.c. + Add prototypes. + * gl/modules/file-set: New module. + * gl/modules/hash-triple: New module. + * src/Makefile.am (copy_sources): New variable. + (ginstall_SOURCES, cp_SOURCES, mv_SOURCES): Use it. + * src/copy.c: Include hash-triple.h. + No longer include hash-pjw.h. + (copy_internal): Don't pass a NULL third argument to record_file, + since that function no longer accepts that. + (record_file): Move this function to file-set.c. + Along the way, remove the code to allow a NULL stat-buffer pointer. + Adjust sole caller. + (seen_file): Move this function to file-set.c. + (struct F_triple): Move declaration to hash-triple.h. + (triple_compare, triple_free, triple_hash, triple_hash_no_name): + Move these functions to hash-triple.c. + bootstrap: generate more ignorable names * bootstrap (slurp): When generating ignorable names, also map .sin to .sed, .gperf to .c, and .y to .c. diff --git a/bootstrap.conf b/bootstrap.conf index 68896c7..3fe0497 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -45,6 +45,7 @@ gnulib_modules=" cycle-check d-ino d-type diacrit dirfd dirname dup2 error euidaccess exclude exitfail fchdir fcntl fcntl-safer fdl + file-set file-type fileblocks filemode filenamecat fnmatch-gnu fopen-safer fprintftime diff --git a/gl/lib/file-set.c b/gl/lib/file-set.c new file mode 100644 index 0000000..ad03f90 --- /dev/null +++ b/gl/lib/file-set.c @@ -0,0 +1,56 @@ +#include <config.h> +#include "file-set.h" + +#include "hash-triple.h" +#include "xalloc.h" + +/* Record destination file, FILE, and dev/ino from *STATS, + in the hash table, HT. If HT is NULL, return immediately. + If memory allocation fails, exit immediately. */ +void +record_file (Hash_table *ht, char const *file, struct stat const *stats) +{ + struct F_triple *ent; + + if (ht == NULL) + return; + + ent = xmalloc (sizeof *ent); + ent->name = xstrdup (file); + ent->st_ino = stats->st_ino; + ent->st_dev = stats->st_dev; + + { + struct F_triple *ent_from_table = hash_insert (ht, ent); + if (ent_from_table == NULL) + { + /* Insertion failed due to lack of memory. */ + xalloc_die (); + } + + if (ent_from_table != ent) + { + /* There was alread a matching entry in the table, so ENT was + not inserted. Free it. */ + triple_free (ent); + } + } +} + +/* Return true if there is an entry in hash table, HT, + for the file described by FILE and STATS. */ +bool +seen_file (Hash_table const *ht, char const *file, + struct stat const *stats) +{ + struct F_triple new_ent; + + if (ht == NULL) + return false; + + new_ent.name = (char *) file; + new_ent.st_ino = stats->st_ino; + new_ent.st_dev = stats->st_dev; + + return !!hash_lookup (ht, &new_ent); +} diff --git a/gl/lib/file-set.h b/gl/lib/file-set.h new file mode 100644 index 0000000..a5a159e --- /dev/null +++ b/gl/lib/file-set.h @@ -0,0 +1,12 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <stdbool.h> + +#include "hash.h" + +extern void record_file (Hash_table *ht, char const *file, + struct stat const *stats) + __attribute__((nonnull(2, 3))); + +extern bool seen_file (Hash_table const *ht, char const *file, + struct stat const *stats); diff --git a/gl/lib/hash-triple.c b/gl/lib/hash-triple.c new file mode 100644 index 0000000..dfe2a59 --- /dev/null +++ b/gl/lib/hash-triple.c @@ -0,0 +1,57 @@ +#include <config.h> + +#include "hash-triple.h" + +#include <stdlib.h> + +#include "hash-pjw.h" +#include "same.h" +#include "same-inode.h" + +/* Hash an F_triple. */ +size_t +triple_hash (void const *x, size_t table_size) +{ + struct F_triple const *p = x; + + /* Also take the name into account, so that when moving N hard links to the + same file (all listed on the command line) all into the same directory, + we don't experience any N^2 behavior. */ + /* FIXME-maybe: is it worth the overhead of doing this + just to avoid N^2 in such an unusual case? N would have + to be very large to make the N^2 factor noticable, and + one would probably encounter a limit on the length of + a command line before it became a problem. */ + size_t tmp = hash_pjw (p->name, table_size); + + /* Ignoring the device number here should be fine. */ + return (tmp | p->st_ino) % table_size; +} + +/* Hash an F_triple. */ +size_t +triple_hash_no_name (void const *x, size_t table_size) +{ + struct F_triple const *p = x; + + /* Ignoring the device number here should be fine. */ + return p->st_ino % table_size; +} + +/* Compare two F_triple structs. */ +bool +triple_compare (void const *x, void const *y) +{ + struct F_triple const *a = x; + struct F_triple const *b = y; + return (SAME_INODE (*a, *b) && same_name (a->name, b->name)) ? true : false; +} + +/* Free an F_triple. */ +void +triple_free (void *x) +{ + struct F_triple *a = x; + free (a->name); + free (a); +} diff --git a/gl/lib/hash-triple.h b/gl/lib/hash-triple.h new file mode 100644 index 0000000..7abe970 --- /dev/null +++ b/gl/lib/hash-triple.h @@ -0,0 +1,21 @@ +#ifndef HASH_TRIPLE_H +#define HASH_TRIPLE_H + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdbool.h> + +/* Describe a just-created or just-renamed destination file. */ +struct F_triple +{ + char *name; + ino_t st_ino; + dev_t st_dev; +}; + +extern size_t triple_hash (void const *x, size_t table_size); +extern size_t triple_hash_no_name (void const *x, size_t table_size); +extern bool triple_compare (void const *x, void const *y); +extern void triple_free (void *x); + +#endif diff --git a/gl/modules/file-set b/gl/modules/file-set new file mode 100644 index 0000000..7895cda --- /dev/null +++ b/gl/modules/file-set @@ -0,0 +1,27 @@ +Description: +Very specialized set-of-files code. + +Files: +lib/file-set.c +lib/file-set.h + +Depends-on: +hash +hash-triple +stdbool +xalloc +xalloc-die + +configure.ac: + +Makefile.am: +lib_SOURCES += file-set.c + +Include: +"file-set.h" + +License: +GPL + +Maintainer: +Jim Meyering diff --git a/gl/modules/hash-triple b/gl/modules/hash-triple new file mode 100644 index 0000000..b746d47 --- /dev/null +++ b/gl/modules/hash-triple @@ -0,0 +1,25 @@ +Description: +Hash functions for file-related triples: name, device, inode. + +Files: +lib/hash-triple.c +lib/hash-triple.h + +Depends-on: +hash-pjw +same +same-inode + +configure.ac: + +Makefile.am: +lib_SOURCES += hash-triple.c + +Include: +"hash-triple.h" + +License: +GPL + +Maintainer: +Jim Meyering diff --git a/src/Makefile.am b/src/Makefile.am index 7e481ad..43f138c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -200,24 +200,27 @@ uninstall-local: rm -f $(installed_su); \ else :; fi +copy_sources = copy.c cp-hash.c + # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid # confusion with the `install' target. The install rule transforms `ginstall' # to install before applying any user-specified name transformations. transform = s/ginstall/install/; @program_transform_name@ -ginstall_SOURCES = install.c copy.c cp-hash.c +ginstall_SOURCES = install.c $(copy_sources) # This is for the '[' program. Automake transliterates '[' to '_'. __SOURCES = lbracket.c -cp_SOURCES = cp.c copy.c cp-hash.c +cp_SOURCES = cp.c $(copy_sources) dir_SOURCES = ls.c ls-dir.c vdir_SOURCES = ls.c ls-vdir.c +ln_SOURCES = ln.c ls_SOURCES = ls.c ls-ls.c chown_SOURCES = chown.c chown-core.c chgrp_SOURCES = chgrp.c chown-core.c -mv_SOURCES = mv.c copy.c cp-hash.c remove.c +mv_SOURCES = mv.c remove.c $(copy_sources) rm_SOURCES = rm.c remove.c uname_SOURCES = uname.c uname-uname.c diff --git a/src/copy.c b/src/copy.c index b7bf92a..c1d8519 100644 --- a/src/copy.c +++ b/src/copy.c @@ -44,7 +44,7 @@ #include "full-write.h" #include "getpagesize.h" #include "hash.h" -#include "hash-pjw.h" +#include "hash-triple.h" #include "lchmod.h" #include "quote.h" #include "same.h" @@ -77,14 +77,6 @@ struct dir_list dev_t dev; }; -/* Describe a just-created or just-renamed destination file. */ -struct F_triple -{ - char *name; - ino_t st_ino; - dev_t st_dev; -}; - /* Initial size of the cp.dest_info hash table. */ #define DEST_INFO_INITIAL_CAPACITY 61 @@ -904,54 +896,6 @@ overwrite_prompt (char const *dst_name, struct stat const *dst_sb) } } -/* Hash an F_triple. */ -static size_t -triple_hash (void const *x, size_t table_size) -{ - struct F_triple const *p = x; - - /* Also take the name into account, so that when moving N hard links to the - same file (all listed on the command line) all into the same directory, - we don't experience any N^2 behavior. */ - /* FIXME-maybe: is it worth the overhead of doing this - just to avoid N^2 in such an unusual case? N would have - to be very large to make the N^2 factor noticable, and - one would probably encounter a limit on the length of - a command line before it became a problem. */ - size_t tmp = hash_pjw (p->name, table_size); - - /* Ignoring the device number here should be fine. */ - return (tmp | p->st_ino) % table_size; -} - -/* Hash an F_triple. */ -static size_t -triple_hash_no_name (void const *x, size_t table_size) -{ - struct F_triple const *p = x; - - /* Ignoring the device number here should be fine. */ - return p->st_ino % table_size; -} - -/* Compare two F_triple structs. */ -static bool -triple_compare (void const *x, void const *y) -{ - struct F_triple const *a = x; - struct F_triple const *b = y; - return (SAME_INODE (*a, *b) && same_name (a->name, b->name)) ? true : false; -} - -/* Free an F_triple. */ -static void -triple_free (void *x) -{ - struct F_triple *a = x; - free (a->name); - free (a); -} - /* Initialize the hash table implementing a set of F_triple entries corresponding to destination files. */ extern void @@ -1941,7 +1885,11 @@ copy_internal (char const *src_name, char const *dst_name, } if (command_line_arg) - record_file (x->dest_info, dst_name, NULL); + { + struct stat sb; + if (lstat (dst_name, &sb) == 0) + record_file (x->dest_info, dst_name, &sb); + } if ( ! preserve_metadata) return true; _______________________________________________ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils