Module Name:    src
Committed By:   kre
Date:           Tue Aug  6 14:55:48 UTC 2024

Modified Files:
        src/usr.bin/printf: printf.1 printf.c

Log Message:
Add %C format conversion and -L option to printf(1)

%C does what everyone always thought %c should do, but doesn't,
and operates rather like the %c conversion in printf(3) (to be
more precise, like %lc).   It takes a code point integer value
in the current locale's LC_CTYPE and prints the character designated.

-L (this printf's first, and only, option) makes the floating conversions
use long double instead of double.

In the manual (printf.1) document both of those, and also be more
precise as to when things are affecting bytes, and when they're
manipulating characters (which makes no difference if LC_ALL=C).


To generate a diff of this commit:
cvs rdiff -u -r1.37 -r1.38 src/usr.bin/printf/printf.1
cvs rdiff -u -r1.56 -r1.57 src/usr.bin/printf/printf.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/printf/printf.1
diff -u src/usr.bin/printf/printf.1:1.37 src/usr.bin/printf/printf.1:1.38
--- src/usr.bin/printf/printf.1:1.37	Mon Feb 13 23:02:27 2023
+++ src/usr.bin/printf/printf.1	Tue Aug  6 14:55:48 2024
@@ -1,4 +1,4 @@
-.\"	$NetBSD: printf.1,v 1.37 2023/02/13 23:02:27 andvar Exp $
+.\"	$NetBSD: printf.1,v 1.38 2024/08/06 14:55:48 kre Exp $
 .\"
 .\" Copyright (c) 1989, 1990, 1993
 .\"	The Regents of the University of California.  All rights reserved.
@@ -32,7 +32,7 @@
 .\"
 .\"	from: @(#)printf.1	8.1 (Berkeley) 6/6/93
 .\"
-.Dd May 19, 2021
+.Dd August 6, 2024
 .Dt PRINTF 1
 .Os
 .Sh NAME
@@ -40,6 +40,7 @@
 .Nd formatted output
 .Sh SYNOPSIS
 .Nm
+.Op Fl L
 .Ar format
 .Op Ar arguments  ...
 .Sh DESCRIPTION
@@ -56,6 +57,14 @@ each of which causes printing of the nex
 .Ar argument  .
 .Pp
 The
+.Fl L
+option causes all floating point values resulting from format
+conversions to be printed using
+.Em long double
+formats, rather than the default
+.Em double .
+.Pp
+The
 .Ar arguments
 after the first are treated as strings if the corresponding format is
 either
@@ -69,8 +78,9 @@ otherwise it is evaluated as a C\~consta
 .It
 A leading plus or minus sign is allowed.
 .It
-If the leading character is a single or double quote, the value is the ASCII
+If the leading character is a single or double quote, the value is the
 code of the next character.
+No further characters are permitted.
 .El
 .Pp
 The format string is reused as often as necessary to satisfy the
@@ -154,6 +164,7 @@ character specifying that the value shou
 For
 .Cm b ,
 .Cm c ,
+.Cm C ,
 .Cm d ,
 and
 .Cm s
@@ -219,7 +230,7 @@ if both are used;
 .It Field Width :
 An optional digit string specifying a
 .Em field width ;
-if the output string has fewer characters than the field width it will
+if the output string has fewer bytes than the field width it will
 be space-padded on the left (or right, if the left-adjustment indicator
 has been given) to make up the field width (note that a leading zero
 is a flag, but an embedded zero is part of a field width);
@@ -233,7 +244,7 @@ for
 .Cm e
 and
 .Cm f
-formats, or the maximum number of characters to be printed
+formats, or the maximum number of bytes to be printed
 from a string
 .Sm off
 .Pf ( Cm b ,
@@ -245,7 +256,7 @@ formats); if the digit string is missing
 as zero;
 .It Format :
 A character which indicates the type of format to use (one of
-.Cm diouxXfFeEgGaAbBcs ) .
+.Cm diouxXfFeEgGaAbBcCs ) .
 .El
 .Pp
 A field width or precision may be
@@ -396,10 +407,16 @@ formats described above.
 The first character of
 .Ar argument
 is printed.
+.It Cm C
+The
+.Ar argument ,
+which must represent an integer constant,
+with an optional leading plus or minus sign,
+is treated as a wide character code point, and printed.
 .It Cm s
 Characters from the string
 .Ar argument
-are printed until the end is reached or until the number of characters
+are printed until the end is reached or until the number of bytes
 indicated by the precision specification is reached; if the
 precision is omitted, all characters in the string are printed.
 .El
@@ -416,6 +433,8 @@ must be preceded by a word consisting of
 .Pq Sq Fl Fl
 to prevent it
 from being interpreted as an option string.
+See
+.Xr getopt 3 .
 .Sh EXIT STATUS
 .Ex -std
 .Sh SEE ALSO
@@ -436,7 +455,9 @@ are optional in POSIX.
 .Pp
 The behaviour of the
 .Cm \&%B
-format and the
+and
+.Cm \&%C
+formats and the
 .Cm \e\(aq ,
 .Cm \e\*q ,
 .Cm \ee ,
@@ -466,7 +487,9 @@ One might expect the
 format to do likewise, but in fact it does not.
 .Pp
 To convert a string representation of a decimal, octal, or hexadecimal
-number into the corresponding character, two nested
+number into the corresponding character,
+using a portable invocation,
+two nested
 .Nm
 invocations may be used, in which the inner invocation
 converts the input to an octal string, and the outer
@@ -475,3 +498,9 @@ For example, the following command outpu
 is 0x0a, which is a newline in ASCII:
 .Pp
 .Dl printf \*q$(printf \(aq\e\e%o\(aq 0x0a)\*q
+.Pp
+In this implementation of
+.Nm
+it is possible to achieve the same result using one invocation:
+.Pp
+.Dl printf %C 0x0a

Index: src/usr.bin/printf/printf.c
diff -u src/usr.bin/printf/printf.c:1.56 src/usr.bin/printf/printf.c:1.57
--- src/usr.bin/printf/printf.c:1.56	Tue Aug  6 07:48:16 2024
+++ src/usr.bin/printf/printf.c	Tue Aug  6 14:55:48 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: printf.c,v 1.56 2024/08/06 07:48:16 kre Exp $	*/
+/*	$NetBSD: printf.c,v 1.57 2024/08/06 14:55:48 kre Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -41,7 +41,7 @@ __COPYRIGHT("@(#) Copyright (c) 1989, 19
 #if 0
 static char sccsid[] = "@(#)printf.c	8.2 (Berkeley) 3/22/95";
 #else
-__RCSID("$NetBSD: printf.c,v 1.56 2024/08/06 07:48:16 kre Exp $");
+__RCSID("$NetBSD: printf.c,v 1.57 2024/08/06 14:55:48 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -68,13 +68,13 @@ __RCSID("$NetBSD: printf.c,v 1.56 2024/0
 static void	 conv_escape_str(char *, void (*)(int), int);
 static char	*conv_escape(char *, char *, int);
 static char	*conv_expand(const char *);
-static char	 getchr(void);
-static double	 getdouble(void);
+static wchar_t	 getchr(void);
+static long double getdouble(void);
 static int	 getwidth(void);
 static intmax_t	 getintmax(void);
 static char	*getstr(void);
-static char	*mklong(const char *, char);
-static intmax_t	 wide_char(const char *);
+static char	*mklong(const char *, char, char);
+static intmax_t	 wide_char(const char *, int);
 static void      check_conversion(const char *, const char *);
 static void	 usage(void);
 
@@ -85,6 +85,7 @@ static char	*b_fmt;
 
 static int	rval;
 static char  **gargv;
+static int	long_double;
 
 #ifdef BUILTIN		/* csh builtin */
 #define main progprintf
@@ -142,6 +143,7 @@ main(int argc, char *argv[])
 #endif
 
 	rval = 0;	/* clear for builtin versions (avoid holdover) */
+	long_double = 0;
 	clearerr(stdout);	/* for the builtin version */
 
 	if (argc > 2 && strchr(argv[1], '%') == NULL) {
@@ -173,8 +175,11 @@ main(int argc, char *argv[])
 		 * the strchr() test above.
 		 */
 
-		while ((o = getopt(argc, argv, "")) != -1) {
+		while ((o = getopt(argc, argv, "L")) != -1) {
 			switch (o) {
+			case 'L':
+				long_double = 1;
+				break;
 			case '?':
 			default:
 				usage();
@@ -318,10 +323,20 @@ main(int argc, char *argv[])
 				printf("%s", b_fmt);
 				break;
 			}
+			case 'C': {
+				wchar_t p = (wchar_t)getintmax();
+				char *f = mklong(start, 'c', 'l');
+
+				PF(f, p);
+				if (error < 0)
+					goto out;
+				break;
+			}
 			case 'c': {
-				char p = getchr();
+				wchar_t p = getchr();
+				char *f = mklong(start, ch, 'l');
 
-				PF(start, p);
+				PF(f, p);
 				if (error < 0)
 					goto out;
 				break;
@@ -337,7 +352,7 @@ main(int argc, char *argv[])
 			case 'd':
 			case 'i': {
 				intmax_t p = getintmax();
-				char *f = mklong(start, ch);
+				char *f = mklong(start, ch, 'j');
 
 				PF(f, p);
 				if (error < 0)
@@ -349,7 +364,7 @@ main(int argc, char *argv[])
 			case 'x':
 			case 'X': {
 				uintmax_t p = (uintmax_t)getintmax();
-				char *f = mklong(start, ch);
+				char *f = mklong(start, ch, 'j');
 
 				PF(f, p);
 				if (error < 0)
@@ -364,9 +379,15 @@ main(int argc, char *argv[])
 			case 'F':
 			case 'g':
 			case 'G': {
-				double p = getdouble();
+				long double p = getdouble();
 
-				PF(start, p);
+				if (long_double) {
+					char * f = mklong(start, ch, 'L');
+					PF(f, p);
+				} else {
+					double pp = (double)p;
+					PF(start, pp);
+				}
 				if (error < 0)
 					goto out;
 				break;
@@ -639,7 +660,7 @@ conv_expand(const char *str)
 }
 
 static char *
-mklong(const char *str, char ch)
+mklong(const char *str, char ch, char longer)
 {
 	static char copy[64];
 	size_t len;	
@@ -651,18 +672,18 @@ mklong(const char *str, char ch)
 		rval = 1;
 	}
 	(void)memmove(copy, str, len - 3);
-	copy[len - 3] = 'j';
+	copy[len - 3] = longer;
 	copy[len - 2] = ch;
 	copy[len - 1] = '\0';
 	return copy;	
 }
 
-static char
+static wchar_t
 getchr(void)
 {
 	if (!*gargv)
 		return 0;
-	return **gargv++;
+	return (wchar_t)wide_char(*gargv++, 0);
 }
 
 static char *
@@ -710,7 +731,7 @@ getintmax(void)
 	gargv++;
 
 	if (*cp == '\"' || *cp == '\'')
-		return wide_char(cp);
+		return wide_char(cp, 1);
 
 	errno = 0;
 	val = strtoimax(cp, &ep, 0);
@@ -718,10 +739,10 @@ getintmax(void)
 	return val;
 }
 
-static double
+static long double
 getdouble(void)
 {
-	double val;
+	long double val;
 	char *ep;
 
 	if (!*gargv)
@@ -729,36 +750,44 @@ getdouble(void)
 
 	/* This is a NetBSD extension, not required by POSIX (it is useless) */
 	if (*(ep = *gargv) == '\"' || *ep == '\'')
-		return (double)wide_char(ep);
+		return (long double)wide_char(ep, 1);
 
 	errno = 0;
-	val = strtod(*gargv, &ep);
+	val = strtold(*gargv, &ep);
 	check_conversion(*gargv++, ep);
 	return val;
 }
 
 /*
- * XXX This is just a placeholder for a later version which
- *     will do mbtowc() on p+1 (and after checking that all of the
- *     string has been consumed) return that value.
+ * Fetch a wide character from the string given
  *
- * This (mbtowc) behaviour is required by POSIX (as is the check
- * that the whole arg is consumed).
+ * if all that character must consume the entire string
+ * after an initial leading byte (ascii char) is ignored,
+ * (used for parsing intger args using the 'X syntax)
  *
- * What follows is actually correct if we assume that LC_CTYPE=C
- * (or something else similar that is a single byte charset).
+ * if !all then there is no requirement that the whole
+ * string be consumed (remaining characters are just ignored)
+ * but the character is to start at *p.
+ * (used for fetching the first chartacter of a string arg for %c)
  */
 static intmax_t
-wide_char(const char *p)
+wide_char(const char *p, int all)
 {
-	intmax_t ch = (intmax_t)(unsigned char)p[1];
-
-	if (ch != 0 && p[2] != '\0') {
+	wchar_t wch;
+	size_t len;
+	int n;
+
+	(void)mbtowc(NULL, NULL, 0);
+	n = mbtowc(&wch, p + all, len = strlen(p + all));
+	if (n < 0) {
+		warn("%s", p);
+		rval = -1;
+	} else if (all && (size_t)n != len) {
 		warnx("%s: not completely converted", p);
 		rval = 1;
 	}
 
-	return ch;
+	return (intmax_t) wch;
 }
 
 static void
@@ -779,5 +808,6 @@ check_conversion(const char *s, const ch
 static void
 usage(void)
 {
-	(void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname());
+	(void)fprintf(stderr,
+	    "Usage: %s [-L] format [arg ...]\n", getprogname());
 }

Reply via email to