On Thu, Sep 29, 2022 at 08:57:04AM +0000, Job Snijders wrote:
> Hi all,
>
> I often find myself piping data through ... | awk '{print length}' | ...
> I figured there should be a more direct way that requires less typing.
> Perhaps other developers have a similar itch?
>
> The FreeBSD, NetBSD, Dragonfly, and GNU variants of the wc(1) utility
> have a similar -L feature.
That isn't an argument for merit or good taste. Choice of flag, sure.
wc counts items in files. Finding the longest item indeed sounds
like a task better suited to awk.
>
> Kind regards,
>
> Job
>
> Index: wc.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/wc/wc.1,v
> retrieving revision 1.27
> diff -u -p -r1.27 wc.1
> --- wc.1 24 Oct 2016 13:46:58 -0000 1.27
> +++ wc.1 21 Sep 2022 15:47:29 -0000
> @@ -41,7 +41,7 @@
> .Sh SYNOPSIS
> .Nm wc
> .Op Fl c | m
> -.Op Fl hlw
> +.Op Fl hLlw
> .Op Ar
> .Sh DESCRIPTION
> The
> @@ -68,6 +68,14 @@ is written to the standard output.
> Use unit suffixes: Byte, Kilobyte, Megabyte, Gigabyte, Terabyte,
> Petabyte, and Exabyte in order to reduce the number of digits to four or
> fewer
> using powers of 2 for sizes (K=1024, M=1048576, etc.).
> +.It Fl L
> +Write the length of the longest line to the standard output.
> +Length is the number of bytes counted, or the number of characters if the
> +.Fl m
> +flag is specified.
> +If more than one input file is specified,
> +the length of the longest line of all files is reported as the value of
> +.Qq total .
> .It Fl l
> The number of lines in each input file
> is written to the standard output.
> @@ -128,9 +136,9 @@ utility is compliant with the
> .St -p1003.1-2008
> specification.
> .Pp
> -The flag
> -.Op Fl h
> -is an extension to that specification.
> +The flags
> +.Op Fl Lh
> +are extensions to that specification.
> .Sh HISTORY
> A
> .Nm
> Index: wc.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/wc/wc.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 wc.c
> --- wc.c 2 Sep 2022 15:21:40 -0000 1.30
> +++ wc.c 21 Sep 2022 15:47:29 -0000
> @@ -44,12 +44,12 @@
>
> #define _MAXBSIZE (64 * 1024)
>
> -int64_t tlinect, twordct, tcharct;
> -int doline, doword, dochar, humanchar, multibyte;
> +int64_t tlinect, twordct, tcharct, tlongest;
> +int doline, doword, dochar, dolongest, humanchar, multibyte;
> int rval;
> extern char *__progname;
>
> -static void print_counts(int64_t, int64_t, int64_t, const char *);
> +static void print_counts(int64_t, int64_t, int64_t, int64_t, const char *);
> static void format_and_print(int64_t);
> static void cnt(const char *);
>
> @@ -63,8 +63,11 @@ main(int argc, char *argv[])
> if (pledge("stdio rpath", NULL) == -1)
> err(1, "pledge");
>
> - while ((ch = getopt(argc, argv, "lwchm")) != -1)
> + while ((ch = getopt(argc, argv, "Llwchm")) != -1)
> switch(ch) {
> + case 'L':
> + dolongest = 1;
> + break;
> case 'l':
> doline = 1;
> break;
> @@ -84,7 +87,7 @@ main(int argc, char *argv[])
> case '?':
> default:
> fprintf(stderr,
> - "usage: %s [-c | -m] [-hlw] [file ...]\n",
> + "usage: %s [-c | -m] [-hLlw] [file ...]\n",
> __progname);
> return 1;
> }
> @@ -96,7 +99,7 @@ main(int argc, char *argv[])
> * if you don't get any arguments, you have to turn them
> * all on.
> */
> - if (!doline && !doword && !dochar)
> + if (!doline && !doword && !dochar && !dolongest)
> doline = doword = dochar = 1;
>
> if (!*argv) {
> @@ -109,7 +112,8 @@ main(int argc, char *argv[])
> } while(*++argv);
>
> if (dototal)
> - print_counts(tlinect, twordct, tcharct, "total");
> + print_counts(tlinect, twordct, tcharct, tlongest,
> + "total");
> }
>
> return rval;
> @@ -127,11 +131,11 @@ cnt(const char *path)
> wchar_t wc;
> short gotsp;
> ssize_t len;
> - int64_t linect, wordct, charct;
> + uint64_t linect, wordct, charct, longct, tmpll;
> struct stat sbuf;
> int fd;
>
> - linect = wordct = charct = 0;
> + linect = wordct = charct = longct = tmpll = 0;
> stream = NULL;
> if (path != NULL) {
> file = path;
> @@ -180,12 +184,19 @@ cnt(const char *path)
> * faster to get lines than to get words, since
> * the word count requires some logic.
> */
> - else if (doline) {
> + else if (doline || dolongest) {
> while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
> charct += len;
> - for (C = buf; len--; ++C)
> - if (*C == '\n')
> + for (C = buf; len--; ++C) {
> + if (*C == '\n') {
> + if (tmpll > longct)
> + longct = tmpll;
> + tmpll = 0;
> ++linect;
> + } else {
> + ++tmpll;
> + }
> + }
> }
> if (len == -1) {
> warn("%s", file);
> @@ -243,10 +254,16 @@ cnt(const char *path)
> wc = L'?';
> } else if (len == 0)
> len = 1;
> + if (wc != L'\n')
> + ++tmpll;
> if (iswspace(wc)) {
> gotsp = 1;
> - if (wc == L'\n')
> + if (wc == L'\n') {
> + if (tmpll > longct)
> + longct = tmpll;
> + tmpll = 0;
> ++linect;
> + }
> } else if (gotsp) {
> gotsp = 0;
> ++wordct;
> @@ -259,7 +276,7 @@ cnt(const char *path)
> }
> }
>
> - print_counts(linect, wordct, charct, path);
> + print_counts(linect, wordct, charct, longct, path);
>
> /*
> * Don't bother checking doline, doword, or dochar -- speeds
> @@ -268,6 +285,8 @@ cnt(const char *path)
> tlinect += linect;
> twordct += wordct;
> tcharct += charct;
> + if (dolongest && longct > tlongest)
> + tlongest = longct;
>
> if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
> warn("%s", file);
> @@ -289,7 +308,8 @@ format_and_print(int64_t v)
> }
>
> static void
> -print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
> +print_counts(int64_t lines, int64_t words, int64_t chars, int64_t longest,
> + const char *name)
> {
> if (doline)
> format_and_print(lines);
> @@ -297,6 +317,8 @@ print_counts(int64_t lines, int64_t word
> format_and_print(words);
> if (dochar)
> format_and_print(chars);
> + if (dolongest)
> + format_and_print(longest);
>
> if (name)
> printf(" %s\n", name);
>
>