Hi 2016-01-04 12:46 GMT+01:00 Shulgin, Oleksandr <oleksandr.shul...@zalando.de> :
> On Wed, Dec 30, 2015 at 8:28 PM, Pavel Stehule <pavel.steh...@gmail.com> > wrote: > >> >> >> 2015-12-30 17:33 GMT+01:00 Robert Haas <robertmh...@gmail.com>: >> >>> On Mon, Dec 28, 2015 at 8:45 AM, Shulgin, Oleksandr >>> <oleksandr.shul...@zalando.de> wrote: >>> > I didn't check out earlier versions of this patch, but the latest one >>> still >>> > changes pg_size_pretty() to emit PB suffix. >>> > >>> > I don't think it is worth it to throw a number of changes together like >>> > that. We should focus on adding pg_size_bytes() first and make it >>> > compatible with both pg_size_pretty() and existing GUC units: that is >>> > support suffixes up to TB and make sure they have the meaning of >>> powers of >>> > 2^10, not 10^3. Re-using the table present in guc.c would be a plus. >>> > >>> > Next, we could think about adding handling of PB suffix on input and >>> output, >>> > but I don't see a big problem if that is emitted as 1024TB or the user >>> has >>> > to specify it as 1024TB in a GUC or argument to pg_size_bytes(): an >>> minor >>> > inconvenience only. >>> >>> +1 to everything in this email. >>> >> >> so I removed support for PB and SI units. Now the >> memory_unit_conversion_table is shared. >> > > Looks better, thanks. > > I'm not sure why the need to touch the regression test for > pg_size_pretty(): > > ! 10.5 | 10.5 bytes | -10.5 bytes > ! 1000.5 | 1000.5 bytes | -1000.5 bytes > ! 1000000.5 | 977 kB | -977 kB > ! 1000000000.5 | 954 MB | -954 MB > ! 1000000000000.5 | 931 GB | -931 GB > ! 1000000000000000.5 | 909 TB | -909 TB > > fixed > A nitpick, this loop: > > + while (*cp) > + { > + if ((isdigit(*cp) || *cp == '.') && ndigits < MAX_DIGITS) > + digits[ndigits++] = *cp++; > + else > + break; > + } > > would be a bit easier to parse if spelled as: > > + while (*cp && (isdigit(*cp) || *cp == '.') && ndigits < MAX_DIGITS) > + digits[ndigits++] = *cp++; > fixed > > On the other hand, this seems to truncate the digits silently: > > + digits[ndigits] = '\0'; > > I don't think we want that, e.g: > > postgres=# select pg_size_bytes('9223372036854775807.9'); > ERROR: invalid unit "9" > HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB". > > I think making a mutable copy of the input string and truncating it before > passing to numeric_in() would make more sense--no need to hard-code > MAX_DIGITS. The same goes for hard-coding MAX_UNIT_LEN, e.g. compare the > following two outputs: > > postgres=# select pg_size_bytes('1 KiB'); > ERROR: invalid unit "KiB" > HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB". > > postgres=# select pg_size_bytes('1024 bytes'); > ERROR: invalid format > > fixed > I believe we should see a similar error message and a hint in the latter > case. (No, I don't think we should add support for 'bytes' as a unit, not > even for "compatibility" with pg_size_pretty()--for one, I don't think it > would be wise to expect pg_size_bytes() to be able to deparse *every* > possible output produced by pg_size_pretty() as it's purpose is > human-readable display; also, pg_size_pretty() can easily produce output > that doesn't fit into bigint type, or is just negative) > > Code comments and doc change need proof-reading by a native English > speaker, which I am not. > Regards Pavel > > -- > Alex > >
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index 8ef9fce..6b921ae *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** postgres=# SELECT * FROM pg_xlogfile_nam *** 17607,17612 **** --- 17607,17615 ---- <primary>pg_relation_size</primary> </indexterm> <indexterm> + <primary>pg_size_bytes</primary> + </indexterm> + <indexterm> <primary>pg_size_pretty</primary> </indexterm> <indexterm> *************** postgres=# SELECT * FROM pg_xlogfile_nam *** 17677,17682 **** --- 17680,17696 ---- </entry> </row> <row> + <entry> + <literal><function>pg_size_bytes(<type>text</type>)</function></literal> + </entry> + <entry><type>bigint</type></entry> + <entry> + Converts a size in human-readable format with size units + into bytes. The parameter is case insensitive string. Following + units are supported: kB, MB, GB, TB. + </entry> + </row> + <row> <entry> <literal><function>pg_size_pretty(<type>bigint</type>)</function></literal> </entry> diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c new file mode 100644 index 2084692..ce97467 *** a/src/backend/utils/adt/dbsize.c --- b/src/backend/utils/adt/dbsize.c *************** *** 25,30 **** --- 25,31 ---- #include "storage/fd.h" #include "utils/acl.h" #include "utils/builtins.h" + #include "utils/guc.h" #include "utils/numeric.h" #include "utils/rel.h" #include "utils/relfilenodemap.h" *************** pg_size_pretty_numeric(PG_FUNCTION_ARGS) *** 700,705 **** --- 701,811 ---- } /* + * Convert human readable size to long int. + * + * Due suppor decimal value and case insensitivity of units + * a function parse_intcannot be used. + */ + Datum + pg_size_bytes(PG_FUNCTION_ARGS) + { + text *arg = PG_GETARG_TEXT_PP(0); + const char *str = text_to_cstring(arg); + const char *strptr = str; + char *buffer; + char *bufptr; + Numeric num; + int64 result; + + /* working buffer cannot be longer than original string */ + buffer = (char *) palloc(VARSIZE_ANY_EXHDR(arg) + 1); + bufptr = buffer; + + /* Skip leading spaces */ + while (isspace((unsigned char) *strptr)) + strptr++; + + switch (*strptr) + { + /* ignore plus symbol */ + case '+': + strptr++; + break; + case '-': + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("size cannot be negative"))); + } + + /* copy digits to working buffer */ + while (*strptr && (isdigit(*strptr) || *strptr == '.')) + *bufptr++ = *strptr++; + *bufptr = '\0'; + + /* don't allow empty string */ + if (*buffer == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" is not number", str))); + + num = DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum(buffer), 0, -1)); + + /* allow whitespace between integer and unit */ + while (isspace(*strptr)) + strptr++; + + /* Handle possible unit */ + if (*strptr != '\0') + { + int multiplier; + Numeric mul_num; + const char *hintmsg; + const char *unitstr = strptr; + + bufptr = buffer; + + /* copy chars to buffer and stop on space */ + while (*strptr && !isspace(*strptr)) + *bufptr++ = *strptr++; + *bufptr = '\0'; + + /* + * Use buffer as unit if there are not any nonspace char, + * else use a original unit string. + */ + while (isspace(*strptr)) + strptr++; + if (*strptr == '\0') + unitstr = buffer; + + if (!parse_memory_unit(unitstr, &multiplier, &hintmsg)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid unit: \"%s\"", unitstr), + errhint("%s", _(hintmsg)))); + + /* + * Now, the multiplier is in KB unit. It should be multiplied by 1024 + * before usage + */ + mul_num = DatumGetNumeric(DirectFunctionCall1(int8_numeric, + Int64GetDatum(multiplier * 1024L))); + + num = DatumGetNumeric(DirectFunctionCall2(numeric_mul, + NumericGetDatum(mul_num), + NumericGetDatum(num))); + } + + result = DatumGetInt64(DirectFunctionCall1(numeric_int8, NumericGetDatum(num))); + + pfree(buffer); + pfree(str); + + PG_RETURN_INT64(result); + } + + /* * Get the filenode of a relation * * This is expected to be used in queries like diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c new file mode 100644 index 38ba82f..00021fd *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** convert_from_base_unit(int64 base_value, *** 5238,5243 **** --- 5238,5272 ---- /* + * Parse value as some known memory unit to their size in bytes. + * Used in pg_size_bytes function. Against convert_to_base_unit, a string + * comparation is case insensitive. + */ + bool + parse_memory_unit(const char *unit, int *multiplier, + const char **hintmsg) + { + int i; + + for (i = 0; *memory_unit_conversion_table[i].unit; i++) + { + const unit_conversion *conv = &memory_unit_conversion_table[i]; + + if ( conv->base_unit == GUC_UNIT_KB && + strcasecmp(unit, conv->unit) == 0) + { + *multiplier = conv->multiplier; + return true; + } + } + + *hintmsg = memory_units_hint; + + return false; + } + + + /* * Try to parse value as an integer. The accepted formats are the * usual decimal, octal, or hexadecimal formats, optionally followed by * a unit name if "flags" indicates a unit is allowed. diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index e5d6c77..4b73e72 *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DATA(insert OID = 2286 ( pg_total_relati *** 3662,3667 **** --- 3662,3669 ---- DESCR("total disk space usage for the specified table and associated indexes"); DATA(insert OID = 2288 ( pg_size_pretty PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 25 "20" _null_ _null_ _null_ _null_ _null_ pg_size_pretty _null_ _null_ _null_ )); DESCR("convert a long int to a human readable text using size units"); + DATA(insert OID = 3317 ( pg_size_bytes PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "25" _null_ _null_ _null_ _null_ _null_ pg_size_bytes _null_ _null_ _null_ )); + DESCR("convert a human readable text with size units to long int bytes"); DATA(insert OID = 3166 ( pg_size_pretty PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 25 "1700" _null_ _null_ _null_ _null_ _null_ pg_size_pretty_numeric _null_ _null_ _null_ )); DESCR("convert a numeric to a human readable text using size units"); DATA(insert OID = 2997 ( pg_table_size PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ pg_table_size _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h new file mode 100644 index bbaa2ce..5b08a6a *** a/src/include/utils/builtins.h --- b/src/include/utils/builtins.h *************** extern Datum pg_relation_size(PG_FUNCTIO *** 462,467 **** --- 462,468 ---- extern Datum pg_total_relation_size(PG_FUNCTION_ARGS); extern Datum pg_size_pretty(PG_FUNCTION_ARGS); extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS); + extern Datum pg_size_bytes(PG_FUNCTION_ARGS); extern Datum pg_table_size(PG_FUNCTION_ARGS); extern Datum pg_indexes_size(PG_FUNCTION_ARGS); extern Datum pg_relation_filenode(PG_FUNCTION_ARGS); diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h new file mode 100644 index e1de1a5..3bfe0f4 *** a/src/include/utils/guc.h --- b/src/include/utils/guc.h *************** extern int NewGUCNestLevel(void); *** 357,362 **** --- 357,364 ---- extern void AtEOXact_GUC(bool isCommit, int nestLevel); extern void BeginReportingGUCOptions(void); extern void ParseLongOption(const char *string, char **name, char **value); + extern bool parse_memory_unit(const char *unit, int *multiplier, + const char **hintmsg); extern bool parse_int(const char *value, int *result, int flags, const char **hintmsg); extern bool parse_real(const char *value, double *result); diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out new file mode 100644 index aa513e7..89a2308 *** a/src/test/regress/expected/dbsize.out --- b/src/test/regress/expected/dbsize.out *************** SELECT size, pg_size_pretty(size), pg_si *** 35,37 **** --- 35,77 ---- 1000000000000000.5 | 909 TB | -909 TB (12 rows) + SELECT pg_size_bytes(size) FROM + (VALUES('1'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '), + ('1TB'), ('3000 TB')) x(size); + pg_size_bytes + ------------------ + 1 + 1024 + 1048576 + 1073741824 + 1610612736 + 1099511627776 + 3298534883328000 + (7 rows) + + -- case insensitive units are supported + SELECT pg_size_bytes(size) FROM + (VALUES('1'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '), + ('1tb')) x(size); + pg_size_bytes + --------------- + 1 + 1024 + 1048576 + 1073741824 + 1610612736 + 1099511627776 + (6 rows) + + --should fail + SELECT pg_size_bytes('1 AB'); + ERROR: invalid unit: "AB" + HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB". + SELECT pg_size_bytes('1 AB A'); + ERROR: invalid unit: "AB A" + HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB". + select pg_size_bytes('9223372036854775807.9'); + ERROR: bigint out of range + select pg_size_bytes('1024 bytes'); + ERROR: invalid unit: "bytes" + HINT: Valid units for this parameter are "kB", "MB", "GB", and "TB". diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql new file mode 100644 index c118090..5caab78 *** a/src/test/regress/sql/dbsize.sql --- b/src/test/regress/sql/dbsize.sql *************** SELECT size, pg_size_pretty(size), pg_si *** 10,12 **** --- 10,27 ---- (10.5::numeric), (1000.5::numeric), (1000000.5::numeric), (1000000000.5::numeric), (1000000000000.5::numeric), (1000000000000000.5::numeric)) x(size); + + SELECT pg_size_bytes(size) FROM + (VALUES('1'), ('1kB'), ('1MB'), (' 1 GB'), ('1.5 GB '), + ('1TB'), ('3000 TB')) x(size); + + -- case insensitive units are supported + SELECT pg_size_bytes(size) FROM + (VALUES('1'), ('1kb'), ('1mb'), (' 1 Gb'), ('1.5 gB '), + ('1tb')) x(size); + + --should fail + SELECT pg_size_bytes('1 AB'); + SELECT pg_size_bytes('1 AB A'); + select pg_size_bytes('9223372036854775807.9'); + select pg_size_bytes('1024 bytes');
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers