From 2efd906f8f4ae21d76dfa1c9e2e37468dec68560 Mon Sep 17 00:00:00 2001
From: David Christensen <david.christensen@crunchydata.com>
Date: Wed, 28 Apr 2021 10:38:37 -0500
Subject: [PATCH 2/2] Expand the supported units in pg_size_bytes to cover all
 units

---
 src/backend/utils/adt/dbsize.c       |  63 ++++++++++-----
 src/include/catalog/pg_proc.dat      |   2 +-
 src/test/regress/expected/dbsize.out | 117 ++++++++++++++++-----------
 src/test/regress/sql/dbsize.sql      |  14 ++--
 4 files changed, 120 insertions(+), 76 deletions(-)

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index f25ec5c1af..ffa6488da8 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -708,7 +708,6 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 			   *endptr;
 	char		saved_char;
 	Numeric		num;
-	int64		result;
 	bool		have_digits = false;
 
 	str = text_to_cstring(arg);
@@ -760,8 +759,9 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 		char	   *cp;
 
 		/*
-		 * Note we might one day support EB units, so if what follows 'E'
-		 * isn't a number, just treat it all as a unit to be parsed.
+		 * If what follows 'e' isn't a number, we just treat it all as a unit
+		 * to be parsed; this allows us to support both exponential notation
+		 * and EB units.
 		 */
 		exponent = strtol(endptr + 1, &cp, 10);
 		(void) exponent;		/* Silence -Wunused-result warnings */
@@ -791,7 +791,20 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 	/* Handle possible unit */
 	if (*strptr != '\0')
 	{
-		int64		multiplier = 0;
+		int64		multiplier = 1;
+		int         i;
+		int         unit_count = 9; /* sizeof units table */
+		char       *units[] = {
+			"bytes",
+			"kb",
+			"mb",
+			"gb",
+			"tb",
+			"pb",
+			"eb",
+			"zb",
+			"yb",
+		};
 
 		/* Trim any trailing whitespace */
 		endptr = str + VARSIZE_ANY_EXHDR(arg) - 1;
@@ -802,26 +815,26 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 		endptr++;
 		*endptr = '\0';
 
-		/* Parse the unit case-insensitively */
-		if (pg_strcasecmp(strptr, "bytes") == 0)
-			multiplier = (int64) 1;
-		else if (pg_strcasecmp(strptr, "kb") == 0)
-			multiplier = (int64) 1024;
-		else if (pg_strcasecmp(strptr, "mb") == 0)
-			multiplier = ((int64) 1024) * 1024;
-
-		else if (pg_strcasecmp(strptr, "gb") == 0)
-			multiplier = ((int64) 1024) * 1024 * 1024;
-
-		else if (pg_strcasecmp(strptr, "tb") == 0)
-			multiplier = ((int64) 1024) * 1024 * 1024 * 1024;
+		for (i = 0; i < unit_count; i++) {
+			printf("strptr: %s units: %s", strptr, units[i]);
+			if (pg_strcasecmp(strptr, units[i]) == 0)
+				break;
+			/* 
+			 * Note: int64 isn't large enough to store the full multiplier
+			 * going past ~ 9EB, but since this is a fixed value, we can apply
+			 * it twice, thus storing use 2 ** 5 = 32 here, but 2 ** 10 = 1024
+			 * on actual conversion to numeric.
+			 */
+			multiplier *= 32;
+		}
 
-		else
+		if (i == unit_count)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("invalid size: \"%s\"", text_to_cstring(arg)),
 					 errdetail("Invalid size unit: \"%s\".", strptr),
-					 errhint("Valid units are \"bytes\", \"kB\", \"MB\", \"GB\", and \"TB\".")));
+					 errhint("Valid units are \"bytes\", \"kB\", \"MB\", \"GB\", \"TB\", "
+						 "\"PB\", \"EB\", \"ZB\", and \"YB\".")));
 
 		if (multiplier > 1)
 		{
@@ -832,13 +845,19 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 			num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
 													  NumericGetDatum(mul_num),
 													  NumericGetDatum(num)));
+
+			/* second application to get around int64 limitations in unit multipliers */
+			num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
+													  NumericGetDatum(mul_num),
+													  NumericGetDatum(num)));
 		}
 	}
 
-	result = DatumGetInt64(DirectFunctionCall1(numeric_int8,
-											   NumericGetDatum(num)));
+	/* now finally truncate, since this is always in integer-like units */
+	num = DatumGetNumeric(DirectFunctionCall1(numeric_ceil,
+											  NumericGetDatum(num)));
 
-	PG_RETURN_INT64(result);
+	PG_RETURN_NUMERIC(num);
 }
 
 /*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index acbcae4607..628dcd69a8 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7192,7 +7192,7 @@
   prosrc => 'pg_size_pretty_numeric' },
 { oid => '3334',
   descr => 'convert a size in human-readable format with size units into bytes',
-  proname => 'pg_size_bytes', prorettype => 'int8', proargtypes => 'text',
+  proname => 'pg_size_bytes', prorettype => 'numeric', proargtypes => 'text',
   prosrc => 'pg_size_bytes' },
 { oid => '2997',
   descr => 'disk space usage for the specified table, including TOAST, free space and visibility map',
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
index b93d40fb02..8993522904 100644
--- a/src/test/regress/expected/dbsize.out
+++ b/src/test/regress/expected/dbsize.out
@@ -64,53 +64,65 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('1'), ('123bytes'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '),
-            ('1TB'), ('3000 TB'), ('1e6 MB')) x(size);
-   size   |  pg_size_bytes   
-----------+------------------
- 1        |                1
- 123bytes |              123
- 1kB      |             1024
- 1MB      |          1048576
-  1 GB    |       1073741824
- 1.5 GB   |       1610612736
- 1TB      |    1099511627776
- 3000 TB  | 3298534883328000
- 1e6 MB   |    1048576000000
-(9 rows)
+            ('1TB'), ('3000 TB'), ('1e6 MB'), ('99 PB'), ('45 EB'), ('5.1ZB'),
+            ('1.17 YB')) x(size);
+   size   |       pg_size_bytes       
+----------+---------------------------
+ 1        |                         1
+ 123bytes |                       123
+ 1kB      |                      1024
+ 1MB      |                   1048576
+  1 GB    |                1073741824
+ 1.5 GB   |                1610612736
+ 1TB      |             1099511627776
+ 3000 TB  |          3298534883328000
+ 1e6 MB   |             1048576000000
+ 99 PB    |        111464090777419776
+ 45 EB    |      51881467707308113920
+ 5.1ZB    |    6021017265658797647463
+ 1.17 YB  | 1414443208949116134406226
+(13 rows)
 
 -- case-insensitive units are supported
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('1'), ('123bYteS'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '),
-            ('1tb'), ('3000 tb'), ('1e6 mb')) x(size);
-   size   |  pg_size_bytes   
-----------+------------------
- 1        |                1
- 123bYteS |              123
- 1kb      |             1024
- 1mb      |          1048576
-  1 Gb    |       1073741824
- 1.5 gB   |       1610612736
- 1tb      |    1099511627776
- 3000 tb  | 3298534883328000
- 1e6 mb   |    1048576000000
-(9 rows)
+            ('1tb'), ('3000 tb'), ('1e6 mb'), ('99 pb'), ('45 eB'), ('5.1Zb'),
+            ('1.17 yb')) x(size);
+   size   |       pg_size_bytes       
+----------+---------------------------
+ 1        |                         1
+ 123bYteS |                       123
+ 1kb      |                      1024
+ 1mb      |                   1048576
+  1 Gb    |                1073741824
+ 1.5 gB   |                1610612736
+ 1tb      |             1099511627776
+ 3000 tb  |          3298534883328000
+ 1e6 mb   |             1048576000000
+ 99 pb    |        111464090777419776
+ 45 eB    |      51881467707308113920
+ 5.1Zb    |    6021017265658797647463
+ 1.17 yb  | 1414443208949116134406226
+(13 rows)
 
 -- negative numbers are supported
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('-1'), ('-123bytes'), ('-1kb'), ('-1mb'), (' -1 Gb'), ('-1.5 gB '),
-            ('-1tb'), ('-3000 TB'), ('-10e-1 MB')) x(size);
-   size    |   pg_size_bytes   
------------+-------------------
- -1        |                -1
- -123bytes |              -123
- -1kb      |             -1024
- -1mb      |          -1048576
-  -1 Gb    |       -1073741824
- -1.5 gB   |       -1610612736
- -1tb      |    -1099511627776
- -3000 TB  | -3298534883328000
- -10e-1 MB |          -1048576
-(9 rows)
+            ('-1tb'), ('-3000 TB'), ('-10e-1 MB'), ('-19e-4eb'), ('-18YB')) x(size);
+   size    |        pg_size_bytes        
+-----------+-----------------------------
+ -1        |                          -1
+ -123bytes |                        -123
+ -1kb      |                       -1024
+ -1mb      |                    -1048576
+  -1 Gb    |                 -1073741824
+ -1.5 gB   |                 -1610612736
+ -1tb      |              -1099511627776
+ -3000 TB  |           -3298534883328000
+ -10e-1 MB |                    -1048576
+ -19e-4eb  |           -2190550858753009
+ -18YB     | -21760664753063325144711168
+(11 rows)
 
 -- different cases with allowed points
 SELECT size, pg_size_bytes(size) FROM
@@ -128,29 +140,38 @@ SELECT size, pg_size_bytes(size) FROM
  -.0 gb |             0
 (8 rows)
 
+-- valid inputs outside bigint range (previous errors)
+SELECT pg_size_bytes('9223372036854775807.9');
+    pg_size_bytes    
+---------------------
+ 9223372036854775808
+(1 row)
+
+SELECT pg_size_bytes('1e100');
+                                             pg_size_bytes                                             
+-------------------------------------------------------------------------------------------------------
+ 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+(1 row)
+
 -- invalid inputs
 SELECT pg_size_bytes('1 AB');
 ERROR:  invalid size: "1 AB"
 DETAIL:  Invalid size unit: "AB".
-HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
+HINT:  Valid units are "bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB".
 SELECT pg_size_bytes('1 AB A');
 ERROR:  invalid size: "1 AB A"
 DETAIL:  Invalid size unit: "AB A".
-HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
+HINT:  Valid units are "bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB".
 SELECT pg_size_bytes('1 AB A    ');
 ERROR:  invalid size: "1 AB A    "
 DETAIL:  Invalid size unit: "AB A".
-HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
-SELECT pg_size_bytes('9223372036854775807.9');
-ERROR:  bigint out of range
-SELECT pg_size_bytes('1e100');
-ERROR:  bigint out of range
+HINT:  Valid units are "bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB".
 SELECT pg_size_bytes('1e1000000000000000000');
 ERROR:  value overflows numeric format
 SELECT pg_size_bytes('1 byte');  -- the singular "byte" is not supported
 ERROR:  invalid size: "1 byte"
 DETAIL:  Invalid size unit: "byte".
-HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
+HINT:  Valid units are "bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB".
 SELECT pg_size_bytes('');
 ERROR:  invalid size: ""
 SELECT pg_size_bytes('kb');
@@ -168,6 +189,6 @@ ERROR:  invalid size: ".+912"
 SELECT pg_size_bytes('+912+ kB');
 ERROR:  invalid size: "+912+ kB"
 DETAIL:  Invalid size unit: "+ kB".
-HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
+HINT:  Valid units are "bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB".
 SELECT pg_size_bytes('++123 kB');
 ERROR:  invalid size: "++123 kB"
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
index a97dacbf7b..2a4abcfc65 100644
--- a/src/test/regress/sql/dbsize.sql
+++ b/src/test/regress/sql/dbsize.sql
@@ -30,29 +30,33 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('1'), ('123bytes'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '),
-            ('1TB'), ('3000 TB'), ('1e6 MB')) x(size);
+            ('1TB'), ('3000 TB'), ('1e6 MB'), ('99 PB'), ('45 EB'), ('5.1ZB'),
+            ('1.17 YB')) x(size);
 
 -- case-insensitive units are supported
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('1'), ('123bYteS'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '),
-            ('1tb'), ('3000 tb'), ('1e6 mb')) x(size);
+            ('1tb'), ('3000 tb'), ('1e6 mb'), ('99 pb'), ('45 eB'), ('5.1Zb'),
+            ('1.17 yb')) x(size);
 
 -- negative numbers are supported
 SELECT size, pg_size_bytes(size) FROM
     (VALUES ('-1'), ('-123bytes'), ('-1kb'), ('-1mb'), (' -1 Gb'), ('-1.5 gB '),
-            ('-1tb'), ('-3000 TB'), ('-10e-1 MB')) x(size);
+            ('-1tb'), ('-3000 TB'), ('-10e-1 MB'), ('-19e-4eb'), ('-18YB')) x(size);
 
 -- different cases with allowed points
 SELECT size, pg_size_bytes(size) FROM
      (VALUES ('-1.'), ('-1.kb'), ('-1. kb'), ('-0. gb'),
              ('-.1'), ('-.1kb'), ('-.1 kb'), ('-.0 gb')) x(size);
 
+-- valid inputs outside bigint range (previous errors)
+SELECT pg_size_bytes('9223372036854775807.9');
+SELECT pg_size_bytes('1e100');
+
 -- invalid inputs
 SELECT pg_size_bytes('1 AB');
 SELECT pg_size_bytes('1 AB A');
 SELECT pg_size_bytes('1 AB A    ');
-SELECT pg_size_bytes('9223372036854775807.9');
-SELECT pg_size_bytes('1e100');
 SELECT pg_size_bytes('1e1000000000000000000');
 SELECT pg_size_bytes('1 byte');  -- the singular "byte" is not supported
 SELECT pg_size_bytes('');
-- 
2.30.1 (Apple Git-130)

