Hi,
Am Mittwoch, den 27.03.2019, 21:31 +0100 schrieb Fabien COELHO:
> > > I still think that the speed should compute a double to avoid integer
> > > rounding errors within the computation. ISTM that rounding should only be
> > > done for display in the end.
> >
> > Ok, also done this way. I decided to print only full MB and not any
> > further digits as those don't seem very relevant.
>
> For the computation to be in double, you must convert to double somewhere
> in the formula, otherwise it is computed as ints and only cast as a double
> afterwards. Maybe:
>
> current_speed = (1000.0 / MEGABYTES) * current_size / elapsed;
>
> Or anything which converts to double early.
Are you sure, seeing elapsed is a double already? If I print out two
additional fractional digits for current_speed, I get two non-zero
numbers, are those garbage?
Anyway, I've done that now as it doesn't hurt.
> Instead of casting percent to int, maybe use "%.0f" as well, just like
> current_speed?
Ok.
> + if ((blockno % 100 == 0) && show_progress)
>
> I'd invert the condition to avoid a modulo when not needed.
Ok.
> > > I'm okay with calling the report on each file even if this means every few
> > > GB...
> >
> > For my I/O throttling branch I've backed off to only call the report
> > every 100 blocks (and at the end of the file). I've added that logic to
> > this patch as well.
>
> The 100 blocks = 800 KiB looks arbitrary. What is the rational? ISTM that
> postgres will tend to store a power of two number of pages on full
> segments, so maybe there could be a rational for 1024. Not sure.
It was arbitrary. I've changed it to 1024 now which looks a bit better.
> > > Someone suggested ETA, and it seems rather simple to do. What about
> > > adding it?
> >
> > I don't know, just computing the ETA is easy of course. But you'd have
> > to fiddle with seconds/minutes/hours conversions cause the runtime could
> > be hours. Then, you don't want to have it bumping around weirdly when
> > your I/O rate changes, cf. https://xkcd.com/612/ That doesn't sounds
> > simpler than the signal trigger we might get rid of.
>
> Ok, it is more complicated that it looks for large sizes if second is not
> the right display unit.
Right, new version attached.
Michael
--
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax: +49 2166 9901-100
Email: michael.ba...@credativ.de
credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer
Unser Umgang mit personenbezogenen Daten unterliegt
folgenden Bestimmungen: https://www.credativ.de/datenschutz
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index 1f4d4ab8b4..d18072a067 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -136,6 +136,19 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-P</option></term>
+ <term><option>--progress</option></term>
+ <listitem>
+ <para>
+ Enable progress reporting. Turning this on will deliver an approximate
+ progress report during the checksum verification. Sending the
+ <systemitem>SIGUSR1</systemitem> signal will toggle progress reporting
+ on or off during the verification run (not available on Windows).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-V</option></term>
<term><option>--version</option></term>
<listitem>
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 5c185ebed8..6fb7bc0b64 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -15,6 +15,8 @@
#include "postgres_fe.h"
#include <dirent.h>
+#include <signal.h>
+#include <time.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -24,6 +26,7 @@
#include "common/file_utils.h"
#include "getopt_long.h"
#include "pg_getopt.h"
+#include "portability/instr_time.h"
#include "storage/bufpage.h"
#include "storage/checksum.h"
#include "storage/checksum_impl.h"
@@ -37,6 +40,7 @@ static ControlFileData *ControlFile;
static char *only_relfilenode = NULL;
static bool do_sync = true;
static bool verbose = false;
+static bool show_progress = false;
typedef enum
{
@@ -59,6 +63,14 @@ static PgChecksumMode mode = PG_MODE_CHECK;
static const char *progname;
+/*
+ * Progress status information.
+ */
+int64 total_size = 0;
+int64 current_size = 0;
+instr_time last_progress_update;
+instr_time scan_started;
+
static void
usage(void)
{
@@ -73,6 +85,7 @@ usage(void)
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -r RELFILENODE check only relation with specified relfilenode\n"));
+ printf(_(" -P, --progress show progress information\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nIf no data directory (DATADIR) is specified, "
@@ -97,6 +110,83 @@ static const char *const skip[] = {
NULL,
};
+static void
+toggle_progress_report(int signum)
+{
+
+ /* we handle SIGUSR1 only, and toggle the value of show_progress */
+ if (signum == SIGUSR1)
+ {
+ /*
+ * Skip to the next line in order to avoid overwriting half of
+ * the progress report line with the final output.
+ */
+ if (show_progress && isatty(fileno(stderr)))
+ fprintf(stderr, "\n");
+ show_progress = !show_progress;
+ }
+}
+
+/*
+ * Report current progress status. Parts borrowed from
+ * PostgreSQLs' src/bin/pg_basebackup.c
+ */
+static void
+report_progress(bool force)
+{
+ instr_time now;
+ double elapsed;
+ double total_percent;
+ double current_speed;
+
+ char totalstr[32];
+ char currentstr[32];
+
+ INSTR_TIME_SET_CURRENT(now);
+
+ /* Make sure we report at most once every 250 milliseconds */
+ if ((INSTR_TIME_GET_MILLISEC(now) -
+ INSTR_TIME_GET_MILLISEC(last_progress_update) < 250) && !force)
+ return;
+
+ /* Save current time */
+ last_progress_update = now;
+
+ /* Elapsed time in milliseconds since start of scan */
+ elapsed = (INSTR_TIME_GET_MILLISEC(now) -
+ INSTR_TIME_GET_MILLISEC(scan_started));
+
+ /* Adjust total size if current_size is larger */
+ if (current_size > total_size)
+ total_size = current_size;
+
+ /* Calculate current percent done */
+ total_percent = total_size ? 100.0 * current_size / total_size : 0.0;
+
+#define MEGABYTES (1024 * 1024)
+
+ /*
+ * Calculate current speed, converting current_size from bytes to
+ * megabytes and elapsed from milliseconds to seconds.
+ */
+ current_speed = (current_size / MEGABYTES) / (elapsed / 1000.0);
+
+ snprintf(totalstr, sizeof(totalstr), INT64_FORMAT,
+ total_size / MEGABYTES);
+ snprintf(currentstr, sizeof(currentstr), INT64_FORMAT,
+ current_size / MEGABYTES);
+
+ /*
+ * Print five blanks at the end so the end of previous lines which were
+ * longer don't remain partly visible.
+ */
+ fprintf(stderr, "%s/%s MB (%.0f%%, %.0f MB/s)%5s",
+ currentstr, totalstr, total_percent, current_speed, "");
+
+ /* Stay on the same line if reporting to a terminal */
+ fprintf(stderr, isatty(fileno(stderr)) ? "\r" : "\n");
+}
+
static bool
skipfile(const char *fn)
{
@@ -153,6 +243,7 @@ scan_file(const char *fn, BlockNumber segmentno)
continue;
csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
+ current_size += r;
if (mode == PG_MODE_CHECK)
{
if (csum != header->pd_checksum)
@@ -183,6 +274,13 @@ scan_file(const char *fn, BlockNumber segmentno)
exit(1);
}
}
+
+ /*
+ * Call report_progress() every 1024 blocks if progress
+ * reporting is requested.
+ */
+ if (show_progress && (blockno % 1024 == 0))
+ report_progress(false);
}
if (verbose)
@@ -195,12 +293,17 @@ scan_file(const char *fn, BlockNumber segmentno)
_("%s: checksums enabled in file \"%s\"\n"), progname, fn);
}
+ /* Make sure progress is reported at least once per file */
+ if (show_progress)
+ report_progress(false);
+
close(f);
}
-static void
-scan_directory(const char *basedir, const char *subdir)
+static int64
+scan_directory(const char *basedir, const char *subdir, bool sizeonly)
{
+ int64 dirsize = 0;
char path[MAXPGPATH];
DIR *dir;
struct dirent *de;
@@ -279,16 +382,20 @@ scan_directory(const char *basedir, const char *subdir)
/* Relfilenode not to be included */
continue;
- scan_file(fn, segmentno);
+ dirsize += st.st_size;
+
+ if (!sizeonly)
+ scan_file(fn, segmentno);
}
#ifndef WIN32
else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
#else
else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
#endif
- scan_directory(path, de->d_name);
+ dirsize += scan_directory(path, de->d_name, sizeonly);
}
closedir(dir);
+ return dirsize;
}
int
@@ -300,6 +407,7 @@ main(int argc, char *argv[])
{"disable", no_argument, NULL, 'd'},
{"enable", no_argument, NULL, 'e'},
{"no-sync", no_argument, NULL, 'N'},
+ {"progress", no_argument, NULL, 'P'},
{"verbose", no_argument, NULL, 'v'},
{NULL, 0, NULL, 0}
};
@@ -327,7 +435,7 @@ main(int argc, char *argv[])
}
}
- while ((c = getopt_long(argc, argv, "cD:deNr:v", long_options, &option_index)) != -1)
+ while ((c = getopt_long(argc, argv, "cD:deNPr:v", long_options, &option_index)) != -1)
{
switch (c)
{
@@ -357,6 +465,9 @@ main(int argc, char *argv[])
}
only_relfilenode = pstrdup(optarg);
break;
+ case 'P':
+ show_progress = true;
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
@@ -436,6 +547,29 @@ main(int argc, char *argv[])
exit(1);
}
+#ifndef WIN32
+ /*
+ * Assign SIGUSR1 signal handler to toggle progress status information
+ */
+ pqsignal(SIGUSR1, toggle_progress_report);
+#endif
+
+ /*
+ * As progress status information may be requested even after start of
+ * operation, we need to scan the directory tree(s) twice, once to get
+ * the idea how much data we need to scan and finally to do the real
+ * legwork.
+ */
+ total_size = scan_directory(DataDir, "global", true);
+ total_size += scan_directory(DataDir, "base", true);
+ total_size += scan_directory(DataDir, "pg_tblspc", true);
+
+ /*
+ * Remember start time. Required to calculate the current speed in
+ * report_progress()
+ */
+ INSTR_TIME_SET_CURRENT(scan_started);
+
if (ControlFile->data_checksum_version == 0 &&
mode == PG_MODE_DISABLE)
{
@@ -453,9 +587,20 @@ main(int argc, char *argv[])
/* Operate on all files if checking or enabling checksums */
if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
{
- scan_directory(DataDir, "global");
- scan_directory(DataDir, "base");
- scan_directory(DataDir, "pg_tblspc");
+ scan_directory(DataDir, "global", false);
+ scan_directory(DataDir, "base", false);
+ scan_directory(DataDir, "pg_tblspc", false);
+
+ /*
+ * Done. Move to next line in case progress information was shown.
+ * Otherwise we clutter the summary output.
+ */
+ if (show_progress)
+ {
+ report_progress(true);
+ if (isatty(fileno(stderr)))
+ fprintf(stderr, "\n");
+ }
printf(_("Checksum operation completed\n"));
printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files));