The new test where `split -n 1/2 /dev/zero` exhausts /tmp
will trigger other false positive failures in the test suite
which happens often when running tests with RUN_VERY_EXPENSIVE_TESTS set.
The attached patches tackle this by consolidating temp file handling
for tac and split, so that have a configurable $TMPDIR.
We then use $TMPDIR in an adjusted test to point to
a mounted file system to provide better isolation.
cheers,
Pádraig
From 47e1388d6fe4a8015de8f3c5521ed851aef6a193 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 18 Jul 2023 18:38:42 +0100
Subject: [PATCH 1/4] maint: add syntax check to ensure safe mkstemp usage
One needs to include stdlib--.h if using mkstemp()
lest one hits esoteric bugs with closed stdin etc.
* cfg.mk (sc_require_stdlib_safer): Add a new syntax check.
(sc_require_stdio_safer): Fix this; broken since commit fa7ed969c3.
---
cfg.mk | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/cfg.mk b/cfg.mk
index 2d6856e15..9c399595f 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -561,7 +561,7 @@ sc_prohibit_short_facl_mode_spec:
# Ensure that "stdio--.h" is used where appropriate.
sc_require_stdio_safer:
@if $(VC_LIST_EXCEPT) | grep -l '\.[ch]$$' > /dev/null; then \
- files=$$(grep -l '$(begword)freopen \?(' $$($(VC_LIST_EXCEPT) \
+ files=$$(grep -El '$(begword)freopen ?\(' $$($(VC_LIST_EXCEPT)\
| grep '\.[ch]$$')); \
test -n "$$files" && grep -LE 'include "stdio--.h"' $$files \
| grep . && \
@@ -570,6 +570,18 @@ sc_require_stdio_safer:
else :; \
fi
+# Ensure that "stdlib--.h" is used where appropriate.
+sc_require_stdlib_safer:
+ @if $(VC_LIST_EXCEPT) | grep -l '\.[ch]$$' > /dev/null; then \
+ files=$$(grep -El '$(begword)mkstemp ?\(' $$($(VC_LIST_EXCEPT)\
+ | grep '\.[ch]$$')); \
+ test -n "$$files" && grep -LE 'include "stdlib--.h"' $$files \
+ | grep . && \
+ { echo '$(ME): the above files should use "stdlib--.h"' \
+ 1>&2; exit 1; } || :; \
+ else :; \
+ fi
+
sc_prohibit_perl_hash_quotes:
@prohibit="\{'[A-Z_]+' *[=}]" \
halt="in Perl code, write \$$hash{KEY}, not \$$hash{'K''EY'}" \
--
2.41.0
From 0487b0bdc5eaf987e13affe8a0d08fa6820133ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 18 Jul 2023 15:39:05 +0100
Subject: [PATCH 2/4] tac: fall back to /tmp if $TMPDIR is unavailable
This also refactors temp_stream() to its own module,
in preparation for use by split.
* src/tac.c: Refactor temp_stream() out to ...
* src/temp-stream.c: ... A new module mostly refactored from tac,
but uses tmpdir to more robustly support $TMPDIR,
while falling back to /tmp if not available.
* src/temp-stream.h: The new module interface.
* src/local.mk: Reference the new module from tac.
* po/POTFILES.in: Reference the new module with translatable strings.
* NEWS: Mention the user visible improvements to tac TMPDIR handling.
---
NEWS | 2 +
po/POTFILES.in | 1 +
src/local.mk | 3 +
src/tac.c | 121 +---------------------------------
src/temp-stream.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++
src/temp-stream.h | 6 ++
tests/tac/tac.pl | 4 +-
7 files changed, 179 insertions(+), 123 deletions(-)
create mode 100644 src/temp-stream.c
create mode 100644 src/temp-stream.h
diff --git a/NEWS b/NEWS
index b21a2b74d..2708371c6 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,8 @@ GNU coreutils NEWS -*- outline -*-
split now uses more tuned access patterns for its potentially large input.
This was seen to improve throughput by 5% when reading from SSD.
+ tac now falls back to '/tmp' if a configured $TMPDIR is unavailable.
+
* Noteworthy changes in release 9.3 (2023-04-18) [stable]
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 9dbbe32f3..695bf6fa8 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -125,6 +125,7 @@ src/tac-pipe.c
src/tac.c
src/tail.c
src/tee.c
+src/temp-stream.c
src/test.c
src/timeout.c
src/touch.c
diff --git a/src/local.mk b/src/local.mk
index 3e3d3f696..dd50ba7ae 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -59,6 +59,7 @@ noinst_HEADERS = \
src/set-fields.h \
src/statx.h \
src/system.h \
+ src/temp-stream.h \
src/uname.h
EXTRA_DIST += \
@@ -395,6 +396,8 @@ src_arch_SOURCES = src/uname.c src/uname-arch.c
src_cut_SOURCES = src/cut.c src/set-fields.c
src_numfmt_SOURCES = src/numfmt.c src/set-fields.c
+src_tac_SOURCES = src/tac.c src/temp-stream.c
+
src_tail_SOURCES = src/tail.c src/iopoll.c
src_tee_SOURCES = src/tee.c src/iopoll.c
diff --git a/src/tac.c b/src/tac.c
index e52d4b7f2..285f99a74 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -45,7 +45,7 @@ tac -r -s '.\|
#include "filenamecat.h"
#include "safe-read.h"
-#include "stdlib--.h"
+#include "temp-stream.h"
#include "xbinary-io.h"
/* The official name of this program (e.g., no 'g' prefix). */
@@ -55,18 +55,6 @@ tac -r -s '.\|
proper_name ("Jay Lepreau"), \
proper_name ("David MacKenzie")
-#if defined __MSDOS__ || defined _WIN32
-/* Define this to non-zero on systems for which the regular mechanism
- (of unlinking an open file and expecting to be able to write, seek
- back to the beginning, then reread it) doesn't work. E.g., on Windows
- and DOS systems. */
-# define DONT_UNLINK_WHILE_OPEN 1
-#endif
-
-
-#ifndef DEFAULT_TMPDIR
-# define DEFAULT_TMPDIR "/tmp"
-#endif
/* The number of bytes per atomic read. */
#define INITIAL_READSIZE 8192
@@ -381,113 +369,6 @@ tac_seekable (int input_fd, char const *file, off_t file_pos)
}
}
-#if DONT_UNLINK_WHILE_OPEN
-
-/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
- Using atexit like this is wrong, since it can fail
- when called e.g. 32 or more times.
- But this isn't a big deal, since the code is used only on WOE/DOS
- systems, and few people invoke tac on that many nonseekable files. */
-
-static char const *file_to_remove;
-static FILE *fp_to_close;
-
-static void
-unlink_tempfile (void)
-{
- fclose (fp_to_close);
- unlink (file_to_remove);
-}
-
-static void
-record_or_unlink_tempfile (char const *fn, FILE *fp)
-{
- if (!file_to_remove)
- {
- file_to_remove = fn;
- fp_to_close = fp;
- atexit (unlink_tempfile);
- }
-}
-
-#else
-
-static void
-record_or_unlink_tempfile (char const *fn, MAYBE_UNUSED FILE *fp)
-{
- unlink (fn);
-}
-
-#endif
-
-/* A wrapper around mkstemp that gives us both an open stream pointer,
- FP, and the corresponding FILE_NAME. Always return the same FP/name
- pair, rewinding/truncating it upon each reuse. */
-static bool
-temp_stream (FILE **fp, char **file_name)
-{
- static char *tempfile = nullptr;
- static FILE *tmp_fp;
- if (tempfile == nullptr)
- {
- char const *t = getenv ("TMPDIR");
- char const *tempdir = t ? t : DEFAULT_TMPDIR;
- tempfile = mfile_name_concat (tempdir, "tacXXXXXX", nullptr);
- if (tempdir == nullptr)
- {
- error (0, 0, _("memory exhausted"));
- return false;
- }
-
- /* FIXME: there's a small window between a successful mkstemp call
- and the unlink that's performed by record_or_unlink_tempfile.
- If we're interrupted in that interval, this code fails to remove
- the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
- the window is much larger -- it extends to the atexit-called
- unlink_tempfile.
- FIXME: clean up upon fatal signal. Don't block them, in case
- $TMPFILE is a remote file system. */
-
- int fd = mkstemp (tempfile);
- if (fd < 0)
- {
- error (0, errno, _("failed to create temporary file in %s"),
- quoteaf (tempdir));
- goto Reset;
- }
-
- tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
- if (! tmp_fp)
- {
- error (0, errno, _("failed to open %s for writing"),
- quoteaf (tempfile));
- close (fd);
- unlink (tempfile);
- Reset:
- free (tempfile);
- tempfile = nullptr;
- return false;
- }
-
- record_or_unlink_tempfile (tempfile, tmp_fp);
- }
- else
- {
- clearerr (tmp_fp);
- if (fseeko (tmp_fp, 0, SEEK_SET) < 0
- || ftruncate (fileno (tmp_fp), 0) < 0)
- {
- error (0, errno, _("failed to rewind stream for %s"),
- quoteaf (tempfile));
- return false;
- }
- }
-
- *fp = tmp_fp;
- *file_name = tempfile;
- return true;
-}
-
/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
and file name. Return the number of bytes copied, or -1 on error. */
diff --git a/src/temp-stream.c b/src/temp-stream.c
new file mode 100644
index 000000000..993e44c46
--- /dev/null
+++ b/src/temp-stream.c
@@ -0,0 +1,165 @@
+/* temp-stream.c -- provide a stream to a per process temp file
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "stdlib--.h" /* For mkstemp that returns safer FDs. */
+#include "system.h"
+#include "tmpdir.h"
+
+#include "temp-stream.h"
+
+
+#if defined __MSDOS__ || defined _WIN32
+/* Define this to non-zero on systems for which the regular mechanism
+ (of unlinking an open file and expecting to be able to write, seek
+ back to the beginning, then reread it) doesn't work. E.g., on Windows
+ and DOS systems. */
+# define DONT_UNLINK_WHILE_OPEN 1
+#endif
+
+#if DONT_UNLINK_WHILE_OPEN
+
+/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
+ Using atexit like this is wrong, since it can fail
+ when called e.g. 32 or more times.
+ But this isn't a big deal, since the code is used only on WOE/DOS
+ systems, and few people invoke tac on that many nonseekable files. */
+
+static char const *file_to_remove;
+static FILE *fp_to_close;
+
+static void
+unlink_tempfile (void)
+{
+ fclose (fp_to_close);
+ unlink (file_to_remove);
+}
+
+static void
+record_or_unlink_tempfile (char const *fn, FILE *fp)
+{
+ if (!file_to_remove)
+ {
+ file_to_remove = fn;
+ fp_to_close = fp;
+ atexit (unlink_tempfile);
+ }
+}
+
+#else
+
+static void
+record_or_unlink_tempfile (char const *fn, MAYBE_UNUSED FILE *fp)
+{
+ unlink (fn);
+}
+
+#endif
+
+/* A wrapper around mkstemp that gives us both an open stream pointer,
+ FP, and the corresponding FILE_NAME. Always return the same FP/name
+ pair, rewinding/truncating it upon each reuse.
+
+ Note this honors $TMPDIR, unlike the standard defined tmpfile().
+
+ Returns TRUE on success. */
+bool
+temp_stream (FILE **fp, char **file_name)
+{
+ static char *tempfile = nullptr;
+ static FILE *tmp_fp;
+ if (tempfile == nullptr)
+ {
+ char *tempbuf = NULL;
+ size_t tempbuf_len = 128;
+
+ while (true)
+ {
+ if (! (tempbuf = realloc (tempbuf, tempbuf_len)))
+ {
+ error (0, errno, _("failed to make temporary file name"));
+ return false;
+ }
+
+ if (path_search (tempbuf, tempbuf_len, NULL, "cutmp", true) == 0)
+ break;
+
+ if (errno != EINVAL || PATH_MAX / 2 < tempbuf_len)
+ {
+ error (0, errno == EINVAL ? ENAMETOOLONG : errno,
+ _("failed to make temporary file name"));
+ return false;
+ }
+
+ tempbuf_len *= 2;
+ }
+
+ tempfile = tempbuf;
+
+ /* FIXME: there's a small window between a successful mkstemp call
+ and the unlink that's performed by record_or_unlink_tempfile.
+ If we're interrupted in that interval, this code fails to remove
+ the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
+ the window is much larger -- it extends to the atexit-called
+ unlink_tempfile.
+ FIXME: clean up upon fatal signal. Don't block them, in case
+ $TMPDIR is a remote file system. */
+
+ int fd = mkstemp (tempfile);
+ if (fd < 0)
+ {
+ error (0, errno, _("failed to create temporary file %s"),
+ quoteaf (tempfile));
+ goto Reset;
+ }
+
+ tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
+ if (! tmp_fp)
+ {
+ error (0, errno, _("failed to open %s for writing"),
+ quoteaf (tempfile));
+ close (fd);
+ unlink (tempfile);
+ Reset:
+ free (tempfile);
+ tempfile = nullptr;
+ return false;
+ }
+
+ record_or_unlink_tempfile (tempfile, tmp_fp);
+ }
+ else
+ {
+ clearerr (tmp_fp);
+ if (fseeko (tmp_fp, 0, SEEK_SET) < 0
+ || ftruncate (fileno (tmp_fp), 0) < 0)
+ {
+ error (0, errno, _("failed to rewind stream for %s"),
+ quoteaf (tempfile));
+ return false;
+ }
+ }
+
+ *fp = tmp_fp;
+ if (file_name)
+ *file_name = tempfile;
+ return true;
+}
diff --git a/src/temp-stream.h b/src/temp-stream.h
new file mode 100644
index 000000000..6c32e0c8d
--- /dev/null
+++ b/src/temp-stream.h
@@ -0,0 +1,6 @@
+/* A wrapper around mkstemp that gives us both an open stream pointer,
+ FP, and the corresponding FILE_NAME. Always return the same FP/name
+ pair, rewinding/truncating it upon each reuse.
+
+ Note this honors $TMPDIR, unlike the standard defined tmpfile(). */
+extern bool temp_stream (FILE **fp, char **file_name);
diff --git a/tests/tac/tac.pl b/tests/tac/tac.pl
index 1c188e41c..dd67c80ee 100755
--- a/tests/tac/tac.pl
+++ b/tests/tac/tac.pl
@@ -74,9 +74,7 @@ my @Tests =
['pipe-bad-tmpdir',
{ENV => "TMPDIR=$bad_dir"},
{IN_PIPE => "a\n"},
- {ERR_SUBST => "s,'$bad_dir': .*,...,"},
- {ERR => "$prog: failed to create temporary file in ...\n"},
- {EXIT => 1}],
+ {OUT=>"a\n"}],
# coreutils-8.5's tac would double-free its primary buffer.
['double-free', {IN=>$long_line}, {OUT=>$long_line}],
--
2.41.0
From d310200d138e0d14e5525936ef5bb19f2594969e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 18 Jul 2023 18:55:30 +0100
Subject: [PATCH 3/4] split: honor $TMPDIR for temp files
* bootstrap.conf: Depend on tmpdir rather than tmpfile,
as the standard tmpfile() doesn't honor $TMPDIR.
* src/split.c (copy_to_tmpfile): Adjust to call temp_stream() rather
than tmpfile();
* NEWS: Mention the improvement.
---
NEWS | 2 ++
bootstrap.conf | 2 +-
src/local.mk | 1 +
src/split.c | 5 +++--
4 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/NEWS b/NEWS
index 2708371c6..7d2ca7f6b 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,8 @@ GNU coreutils NEWS -*- outline -*-
split now uses more tuned access patterns for its potentially large input.
This was seen to improve throughput by 5% when reading from SSD.
+ split now supports a configurable $TMPDIR for handling any temporary files.
+
tac now falls back to '/tmp' if a configured $TMPDIR is unavailable.
diff --git a/bootstrap.conf b/bootstrap.conf
index aaec1fb03..6ce27d5dd 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -274,7 +274,7 @@ gnulib_modules="
time_rz
timer-time
timespec
- tmpfile
+ tmpdir
tzset
uname
unicodeio
diff --git a/src/local.mk b/src/local.mk
index dd50ba7ae..cb9b39274 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -396,6 +396,7 @@ src_arch_SOURCES = src/uname.c src/uname-arch.c
src_cut_SOURCES = src/cut.c src/set-fields.c
src_numfmt_SOURCES = src/numfmt.c src/set-fields.c
+src_split_SOURCES = src/split.c src/temp-stream.c
src_tac_SOURCES = src/tac.c src/temp-stream.c
src_tail_SOURCES = src/tail.c src/iopoll.c
diff --git a/src/split.c b/src/split.c
index bfe391728..cf776364c 100644
--- a/src/split.c
+++ b/src/split.c
@@ -40,6 +40,7 @@
#include "quote.h"
#include "sig2str.h"
#include "sys-limits.h"
+#include "temp-stream.h"
#include "xbinary-io.h"
#include "xdectoint.h"
#include "xstrtol.h"
@@ -279,8 +280,8 @@ CHUNKS may be:\n\
static off_t
copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
{
- FILE *tmp = tmpfile ();
- if (!tmp)
+ FILE *tmp;
+ if (!temp_stream (&tmp, nullptr))
return -1;
off_t copied = 0;
off_t r;
--
2.41.0
From b1728a881f1fc08ff969ca527057a9005402a327 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 18 Jul 2023 12:40:57 +0100
Subject: [PATCH 4/4] tests: split: provide more isolated /tmp handling
* tests/split/l-chunk.sh: Move the "expensive" portion to ...
* tests/split/l-chunk-root.sh: .. A new test split from l-chunk.sh
which uses an isolated TMPDIR, rather than exhausting /tmp,
as that gives false positive failures with some other coreutils tests
like tac-2-nonseekable.sh and shuf-reservoir.sh at least.
* tests/local.mk: Reference the new test.
---
tests/local.mk | 1 +
tests/split/l-chunk-root.sh | 44 +++++++++++++++++++++++++++++++++++++
tests/split/l-chunk.sh | 12 ----------
3 files changed, 45 insertions(+), 12 deletions(-)
create mode 100755 tests/split/l-chunk-root.sh
diff --git a/tests/local.mk b/tests/local.mk
index f35728229..a7088afbc 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -137,6 +137,7 @@ all_root_tests = \
tests/rm/one-file-system.sh \
tests/rm/read-only.sh \
tests/rm/empty-immutable-skip.sh \
+ tests/split/l-chunk-root.sh \
tests/tail/append-only.sh \
tests/tail/end-of-device.sh \
tests/touch/now-owned-by-other.sh
diff --git a/tests/split/l-chunk-root.sh b/tests/split/l-chunk-root.sh
new file mode 100755
index 000000000..352b3d28a
--- /dev/null
+++ b/tests/split/l-chunk-root.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+# test splitting into newline delineated chunks from infinite imput
+
+# Copyright (C) 2023 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ split
+require_root_
+
+cwd=$(pwd)
+cleanup_() { cd /; umount "$cwd/mnt"; }
+
+# Create a file system to provide an isolated $TMPDIR
+dd if=/dev/zero of=blob bs=8192 count=200 &&
+mkdir mnt &&
+mkfs -t ext2 -F blob &&
+mount -oloop blob mnt ||
+ skip_ "insufficient mount/ext2 support"
+export TMPDIR="$cwd/mnt"
+
+# 'split' should fail eventially when
+# creating an infinitely long output file.
+
+returns_ 1 split -n l/2 /dev/zero || fail=1
+rm x??
+
+# Repeat the above, but with 1/2, not l/2:
+returns_ 1 split -n 1/2 /dev/zero || fail=1
+rm x??
+
+Exit $fail
diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh
index bc9ad122c..23c787871 100755
--- a/tests/split/l-chunk.sh
+++ b/tests/split/l-chunk.sh
@@ -35,18 +35,6 @@ split -n l/10 /dev/null || fail=1
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1
rm x??
-# 'split' should reject any attempt to create an infinitely
-# long output file.
-# This test is very expensive as it runs out of /tmp space.
-if test "${RUN_VERY_EXPENSIVE_TESTS+set}" = set; then
- returns_ 1 split -n l/2 /dev/zero || fail=1
- rm x??
-
- # Repeat the above, but with 1/2, not l/2:
- returns_ 1 split -n 1/2 /dev/zero || fail=1
- rm x??
-fi
-
# Ensure --elide-empty-files is honored
split -e -n l/10 /dev/null || fail=1
returns_ 1 stat x?? 2>/dev/null || fail=1
--
2.41.0