Hello,

I've created a patch for tar, which adds support for lz4 and zstd compression. Both compression tools have become increasingly popular and offer high-speed compression and decompression, making them interesting alternatives to previous compression tools.

lz4 is similar to lzo, but offers faster decompression. zstd is similar to gzip, but offers much faster compression and decompression.

See also:

    http://lz4.github.io/lz4/

    http://facebook.github.io/zstd/

For example, creating an archive from a directory of 1.12 GB containing binary and ASCII files ("/usr/local/gnu/"):

Compression

name         time     size       ratio
gnu.tar.gz     57.2s  534077354  44.3%
gnu.tar.bz2  1:53.9s  509401491  42.3%
gnu.tar.xz   7:15.6s  423886520  35.2%
gnu.tar.lz   7:37.2s  425162846  35.3%
gnu.tar.lzo     4.7s  676569472  56.2%
gnu.tar.lz4     4.9s  676600069  56.2%
gnu.tar.zst    10.7s  517897416  43.0%

Decompression

name         time
gnu.tar.gz    7.1s
gnu.tar.bz2  45.2s
gnu.tar.xz   26.3s
gnu.tar.lz   30.4s
gnu.tar.lzo   3.5s
gnu.tar.lz4   2.0s
gnu.tar.zst   3.2s

A second example using the Linux kernel source code (4.12.4), containing 697 MB of data:

Compression

name           time     size       ratio
linux.tar.gz     28.2s  156988489  21.4%
linux.tar.bz2  1:05.5s  120659282  16.5%
linux.tar.xz   4:26.9s  102214892  13.9%
linux.tar.lz   4:18.4s  104105244  14.2%
linux.tar.lzo     3.6s  261745137  35.7%
linux.tar.lz4     3.9s  252707015  34.5%
linux.tar.zst     5.9s  150448272  20.5%

Decompression

name           time
linux.tar.gz    3.6s
linux.tar.bz2  19.4s
linux.tar.xz    7.2s
linux.tar.lz    8.1s
linux.tar.lzo   2.5s
linux.tar.lz4   1.5s
linux.tar.zst   2.1s

(The tools were used with their default arguments. The hardware used was an AMD FX8350 4GHz.)

Attached files are:

patch-part-1 - diffs of buffer.c config.h.in configure.ac suffix.c tar.c
patch-part-2 - diffs of tar.texi tar.1 cs.po de.po fi.po ga.po id.po ru.po sl.po zh_CN.po

Cheers

--- tar-1.29/src/buffer.c       2016-03-14 20:58:16.000000000 +0000
+++ tar-1.29.1/src/buffer.c     2017-08-05 11:51:57.457112307 +0100
@@ -270,6 +270,8 @@
   ct_lzip,
   ct_lzma,
   ct_lzop,
+  ct_lz4,
+  ct_zstd,
   ct_xz
 };
 
@@ -298,6 +300,8 @@
   { ct_lzip,     4, "LZIP" },
   { ct_lzma,     6, "\xFFLZMA" },
   { ct_lzop,     4, "\211LZO" },
+  { ct_lz4,      4, "\x04\x22\x4D\x18" },
+  { ct_zstd,     4, "\x28\xB5\x2F\xFD" },
   { ct_xz,       6, "\xFD" "7zXZ" },
 };
 
@@ -313,6 +317,8 @@
   { ct_lzma,     LZMA_PROGRAM,     "--lzma" },
   { ct_lzma,     XZ_PROGRAM,       "-J" },
   { ct_lzop,     LZOP_PROGRAM,     "--lzop" },
+  { ct_lz4,      LZ4_PROGRAM,      "--lz4" },
+  { ct_zstd,     ZSTD_PROGRAM,     "--zstd" },
   { ct_xz,       XZ_PROGRAM,       "-J" },
   { ct_none }
 };
--- tar-1.29/config.h.in        2016-05-16 09:52:47.000000000 +0100
+++ tar-1.29.1/config.h.in      2017-08-05 13:17:22.327294420 +0100
@@ -2190,6 +2190,9 @@
    slash. */
 #undef LSTAT_FOLLOWS_SLASHED_SYMLINK
 
+/* Define to the program name of lz4 compressor program */
+#undef LZ4_PROGRAM
+
 /* Define to the program name of lzip compressor program */
 #undef LZIP_PROGRAM
 
@@ -2454,6 +2457,9 @@
 /* Define to the program name of xz compressor program */
 #undef XZ_PROGRAM
 
+/* Define to the program name of zstd compressor program */
+#undef ZSTD_PROGRAM
+
 /* Enable large inode numbers on Mac OS X 10.5. */
 #undef _DARWIN_USE_64_BIT_INODE
 
--- tar-1.29/configure.ac       2016-05-16 09:51:12.000000000 +0100
+++ tar-1.29.1/configure.ac     2017-08-05 12:02:08.206629003 +0100
@@ -249,6 +249,8 @@
 TAR_COMPR_PROGRAM(lzip)
 TAR_COMPR_PROGRAM(lzma)
 TAR_COMPR_PROGRAM(lzop)
+TAR_COMPR_PROGRAM(lz4)
+TAR_COMPR_PROGRAM(zstd)
 TAR_COMPR_PROGRAM(xz)
 
 AC_MSG_CHECKING(for default archive format)
--- tar-1.29/src/suffix.c       2016-01-20 09:26:32.000000000 +0000
+++ tar-1.29.1/src/suffix.c     2017-08-05 12:57:33.987681075 +0100
@@ -43,6 +43,8 @@
   { S(lzma, LZMA) },
   { S(tlz,  LZMA) },
   { S(lzo,  LZOP) },
+  { S(lz4,  LZ4) },
+  { S(zst,  ZSTD) },
   { S(xz,   XZ) },
   { S(txz,  XZ) }, /* Slackware */
   { NULL }
--- tar-1.29/src/tar.c  2016-03-24 05:42:14.000000000 +0000
+++ tar-1.29.1/src/tar.c        2017-08-05 11:31:35.793518014 +0100
@@ -293,6 +293,8 @@
   LZIP_OPTION,
   LZMA_OPTION,
   LZOP_OPTION,
+  LZ4_OPTION,
+  ZSTD_OPTION,
   MODE_OPTION,
   MTIME_OPTION,
   NEWER_MTIME_OPTION,
@@ -681,6 +683,8 @@
   {"lzip", LZIP_OPTION, 0, 0, NULL, GRID+1 },
   {"lzma", LZMA_OPTION, 0, 0, NULL, GRID+1 },
   {"lzop", LZOP_OPTION, 0, 0, NULL, GRID+1 },
+  {"lz4", LZ4_OPTION, 0, 0, NULL, GRID+1 },
+  {"zstd", ZSTD_OPTION, 0, 0, NULL, GRID+1 },
   {"xz", 'J', 0, 0, NULL, GRID+1 },
 #undef GRID
 
@@ -1125,6 +1129,14 @@
     case LZOP_OPTION:
       s = xasprintf (_("filter the archive through %s"), LZOP_PROGRAM);
 
+    case LZ4_OPTION:
+      s = xasprintf (_("filter the archive through %s"), LZ4_PROGRAM);
+      break;
+
+    case ZSTD_OPTION:
+      s = xasprintf (_("filter the archive through %s"), ZSTD_PROGRAM);
+      break;
+
     case 'J':
       s = xasprintf (_("filter the archive through %s"), XZ_PROGRAM);
       break;
@@ -1496,6 +1508,14 @@
       set_use_compress_program_option (LZOP_PROGRAM, args->loc);
       break;
 
+    case LZ4_OPTION:
+      set_use_compress_program_option (LZ4_PROGRAM, args->loc);
+      break;
+
+    case ZSTD_OPTION:
+      set_use_compress_program_option (ZSTD_PROGRAM, args->loc);
+      break;
+
     case 'm':
       touch_option = true;
       break;
--- tar-1.29/doc/tar.texi       2016-04-14 09:50:55.000000000 +0100
+++ tar-1.29.1/doc/tar.texi     2017-08-05 13:11:30.062498623 +0100
@@ -2971,6 +2971,16 @@
 This option tells @command{tar} to read or write archives through
 @command{lzop}.  @xref{gzip}.
 
+@item --lz4
+
+This option tells @command{tar} to read or write archives through
+@command{lz4}.  @xref{gzip}.
+
+@item --zstd
+
+This option tells @command{tar} to read or write archives through
+@command{zstd}.  @xref{gzip}.
+
 @opsummary{mode}
 @item --mode=@var{permissions}
 
@@ -9554,14 +9564,17 @@
 @cindex lzip
 @cindex lzma
 @cindex lzop
+@cindex lz4
+@cindex zstd
 @cindex compress
 @GNUTAR{} is able to create and read compressed archives.  It supports
 a wide variety of compression programs, namely: @command{gzip},
 @command{bzip2}, @command{lzip}, @command{lzma}, @command{lzop},
-@command{xz} and traditional @command{compress}. The latter is
-supported mostly for backward compatibility, and we recommend
-against using it, because it is by far less effective than the other
-compression programs@footnote{It also had patent problems in the past.}.
+@command{lz4}, @command{zstd}, @command{xz} and traditional
+@command{compress}.  The latter is supported mostly for backward
+compatibility, and we recommend against using it, because it is by far
+less effective than the other compression programs@footnote{It also
+had patent problems in the past.}.
 
 Creating a compressed archive is simple: you just specify a
 @dfn{compression option} along with the usual archive creation
@@ -9570,10 +9583,11 @@
 (@option{--bzip2}) to create a @command{bzip2} compressed archive,
 @option{--lzip} to create an @asis{lzip} compressed archive,
 @option{-J} (@option{--xz}) to create an @asis{XZ} archive,
-@option{--lzma} to create an @asis{LZMA} compressed
-archive, @option{--lzop} to create an @asis{LSOP} archive, and
-@option{-Z} (@option{--compress}) to use @command{compress} program.
-For example:
+@option{--lzma} to create an @asis{LZMA} compressed archive,
+@option{--lzop} to create an @asis{LSOP} archive, @option{--lz4} to
+create an @asis{LZ4} archive, @option{--zstd} to create an @asis{ZST}
+archive, and @option{-Z} (@option{--compress}) to use
+@command{compress} program.  For example:
 
 @smallexample
 $ @kbd{tar czf archive.tar.gz .}
@@ -9693,6 +9707,14 @@
 @item --lzop
 Filter the archive through @command{lzop}.
 
+@opindex lz4
+@item --lz4
+Filter the archive through @command{lz4}.
+
+@opindex zstd
+@item --zstd
+Filter the archive through @command{zstd}.
+
 @opindex compress
 @opindex uncompress
 @item -Z
@@ -9764,6 +9786,8 @@
 @item @samp{.lzma} @tab @command{lzma}
 @item @samp{.tlz} @tab @command{lzma}
 @item @samp{.lzo} @tab @command{lzop}
+@item @samp{.lz4} @tab @command{lz4}
+@item @samp{.zst} @tab @command{zstd}
 @item @samp{.xz} @tab @command{xz}
 @end multitable
 
--- tar-1.29/doc/tar.1  2016-03-23 14:34:31.000000000 +0000
+++ tar-1.29.1/doc/tar.1        2017-08-05 12:21:04.050226023 +0100
@@ -13,7 +13,7 @@
 .\"
 .\" You should have received a copy of the GNU General Public License
 .\" along with this program.  If not, see <http://www.gnu.org/licenses/>.
-.TH TAR 1 "March 23, 2016" "TAR" "GNU TAR Manual"
+.TH TAR 1 "August 5, 2017" "TAR" "GNU TAR Manual"
 .SH NAME
 tar \- an archiving utility
 .SH SYNOPSIS
@@ -815,6 +815,14 @@
 Filter the archive through
 .BR lzop (1).
 .TP
+\fB\-\-lz4\fR
+Filter the archive through
+.BR lz4 (1).
+.TP
+\fB\-\-zstd\fR
+Filter the archive through
+.BR zstd (1).
+.TP
 \fB\-\-no\-auto\-compress\fR
 Do not use archive suffix to determine the compression program.
 .TP
@@ -1282,6 +1290,8 @@
 .BR gzip (1),
 .BR lzma (1),
 .BR lzop (1),
+.BR lz4 (1),
+.BR zstd (1),
 .BR rmt (8),
 .BR symlink (7),
 .BR tar (5),
--- tar-1.29/po/cs.po   2016-05-16 07:55:11.000000000 +0100
+++ tar-1.29.1/po/cs.po 2017-08-05 12:45:28.982138853 +0100
@@ -3085,6 +3085,12 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "archiv protáhne skrze lzop"
 
+#~ msgid "filter the archive through lz4"
+#~ msgstr "archiv protáhne skrze lz4"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "archiv protáhne skrze zstd"
+
 #~ msgid "block size"
 #~ msgstr "velikost bloku"
 
--- tar-1.29/po/de.po   2016-05-16 07:55:11.000000000 +0100
+++ tar-1.29.1/po/de.po 2017-08-05 12:38:34.827664336 +0100
@@ -3101,6 +3101,12 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "Archiv durch lzop filtern"
 
+#~ msgid "filter the archive through lz4"
+#~ msgstr "Archiv durch lz4 filtern"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "Archiv durch zstd filtern"
+
 #~ msgid "Warning: the -I option is not supported; perhaps you meant -j or -T?"
 #~ msgstr ""
 #~ "Warnung: Die Option -I ist nicht unterstützt, meinen Sie -j oder -T?"
--- tar-1.29/po/fi.po   2016-05-16 07:55:12.000000000 +0100
+++ tar-1.29.1/po/fi.po 2017-08-05 12:41:31.745588733 +0100
@@ -3019,6 +3019,14 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "ohjaa arkisto gzip-ohjelman läpi"
 
+#, fuzzy
+#~ msgid "filter the archive through lz4"
+#~ msgstr "ohjaa arkisto lz4-ohjelman läpi"
+
+#, fuzzy
+#~ msgid "filter the archive through zstd"
+#~ msgstr "ohjaa arkisto zstd-ohjelman läpi"
+
 #~ msgid "rmtd: Cannot allocate buffer space\n"
 #~ msgstr "rmtd: Puskuritilaa ei voi varata\n"
 
--- tar-1.29/po/ga.po   2016-05-16 07:55:12.000000000 +0100
+++ tar-1.29.1/po/ga.po 2017-08-05 12:39:14.036426387 +0100
@@ -3122,6 +3122,12 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "scag an chartlann le lzop"
 
+#~ msgid "filter the archive through lz4"
+#~ msgstr "scag an chartlann le lz4"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "scag an chartlann le zstd"
+
 #~ msgid "%s: Invalid group"
 #~ msgstr "%s: Grúpa neamhbhailí"
 
--- tar-1.29/po/id.po   2016-05-16 07:55:12.000000000 +0100
+++ tar-1.29.1/po/id.po 2017-08-05 12:39:49.570210535 +0100
@@ -3061,6 +3061,12 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "saring archive melalui lzop"
 
+#~ msgid "filter the archive through lz4"
+#~ msgstr "saring archive melalui lz4"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "saring archive melalui zstd"
+
 #~ msgid "Warning: the -I option is not supported; perhaps you meant -j or -T?"
 #~ msgstr ""
 #~ "Peringatan: option -l tidak didukung; mungkin maksud anda -j atau -T?"
--- tar-1.29/po/ru.po   2016-05-16 07:55:13.000000000 +0100
+++ tar-1.29.1/po/ru.po 2017-08-05 12:45:00.316314491 +0100
@@ -3097,3 +3097,9 @@
 
 #~ msgid "filter the archive through lzop"
 #~ msgstr "пропустить архив через lzop"
+
+#~ msgid "filter the archive through lz4"
+#~ msgstr "пропустить архив через lz4"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "пропустить архив через zstd"
--- tar-1.29/po/sl.po   2016-05-16 07:55:13.000000000 +0100
+++ tar-1.29.1/po/sl.po 2017-08-05 12:44:13.786599325 +0100
@@ -3095,6 +3095,14 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "filtriraj arhiv skozi gzip"
 
+#, fuzzy
+#~ msgid "filter the archive through lz4"
+#~ msgstr "filtriraj arhiv skozi lz4"
+
+#, fuzzy
+#~ msgid "filter the archive through zstd"
+#~ msgstr "filtriraj arhiv skozi zstd"
+
 #~ msgid "[.]NUMBER"
 #~ msgstr "[.]N"
 
--- tar-1.29/po/zh_CN.po        2016-05-16 07:55:14.000000000 +0100
+++ tar-1.29.1/po/zh_CN.po      2017-08-05 12:46:07.721901805 +0100
@@ -2982,6 +2982,12 @@
 #~ msgid "filter the archive through lzop"
 #~ msgstr "通过 lzop 过滤归档"
 
+#~ msgid "filter the archive through lz4"
+#~ msgstr "通过 lz4 过滤归档"
+
+#~ msgid "filter the archive through zstd"
+#~ msgstr "通过 zstd 过滤归档"
+
 #~ msgid "Warning: the -I option is not supported; perhaps you meant -j or -T?"
 #~ msgstr "警告:不支持 -I 选项,或许您的意思是 -j 或者 -T ?"
 

Reply via email to