Re: [HACKERS] Reducing the overhead of NUMERIC data

Simon Riggs Wed, 02 Nov 2005 10:39:24 -0800

On Tue, 2005-11-01 at 17:55 -0500, Tom Lane wrote:
> "Jim C. Nasby" <[EMAIL PROTECTED]> writes:
> > FWIW, most databases I've used limit NUMERIC to 38 digits, presumably to
> > fit length info into 1 or 2 bytes. So there's something to be said for a
> > small numeric type that has less overhead and a large numeric (what we
> > have today).
> 
> I don't think it'd be worth having 2 types.  Remember that the weight is
> measured in base-10k digits.  Suppose for instance
>       sign            1 bit
>       weight          7 bits (-64 .. +63)
>       dscale          8 bits (0..255)
> This gives us a dynamic range of 1e-256 to 1e255 as well as the ability
> to represent up to 255 displayable fraction digits.  Does anyone know
> any real database applications where that's not enough?
> 
> (I'm neglecting NaN here in supposing we need only 1 bit for sign,
> but we could have a special encoding for NaN.  Perhaps disallow the
> weight = -64 case and use that to signal NaN.)


I've coded a short patch to do this, which is the result of two
alternate patches and some thinking, but maybe not enough yet.

The patch given here is different on two counts from above:

This sets...
#define NUMERIC_MAX_PRECISION           64

since

#define NUMERIC_MAX_RESULT_SCALE        (NUMERIC_MAX_PRECISION * 2)

We don't seem to be able to use all of the bits actually available to us
in the format. Perhaps we need to decouple these now? Previously, we had
room for 14 bits, which gave a maximum of 16384. We were using
NUMERIC_MAX of 1000, so doubling it didn't give problems.

The above on-disk format showed sign & weight together, whereas the
current code has sign and dscale together. Trying to put sign and weight
together is somewhat difficult, since weight is itself a signed value. 

I coded it up that way around, which is reasonably straightforward but
harder than the patch enclosed here. But AFAICS - which isn't that far
normally I grant you, doing things that way around would require some
twos-complement work to get things correct when weight is negative. That
worries me.

IMHO we should accept the step down in maximum numeric precision (down
to "only" 64 digits) rather than put extra processing into every
manipulation of a NUMERIC datatype. With luck, I've misunderstood and we
can have both performance and precision. 

If not, I commend 64 digits to you as sufficient for every imaginable
purpose - saving 2 bytes off every numeric column. (And still 28 decimal
places more accurate than Oracle).

Best Regards, Simon Riggs

Index: src/include/utils/numeric.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/numeric.h,v
retrieving revision 1.20
diff -c -c -r1.20 numeric.h
*** src/include/utils/numeric.h	1 Jan 2005 05:43:09 -0000	1.20
--- src/include/utils/numeric.h	2 Nov 2005 18:06:03 -0000
***************
*** 15,24 ****
  #define _PG_NUMERIC_H_
  
  /*
!  * Hardcoded precision limit - arbitrary, but must be small enough that
!  * dscale values will fit in 14 bits.
   */
! #define NUMERIC_MAX_PRECISION		1000
  
  /*
   * Internal limits on the scales chosen for calculation results
--- 15,24 ----
  #define _PG_NUMERIC_H_
  
  /*
!  * Hardcoded precision limit - must be small enough that
!  * dscale values will fit into the number of bits available in NumericData
   */
! #define NUMERIC_MAX_PRECISION		64
  
  /*
   * Internal limits on the scales chosen for calculation results
***************
*** 39,49 ****
  /*
   * Sign values and macros to deal with packing/unpacking n_sign_dscale
   */
! #define NUMERIC_SIGN_MASK	0xC000
! #define NUMERIC_POS			0x0000
! #define NUMERIC_NEG			0x4000
! #define NUMERIC_NAN			0xC000
! #define NUMERIC_DSCALE_MASK 0x3FFF
  #define NUMERIC_SIGN(n)		((n)->n_sign_dscale & NUMERIC_SIGN_MASK)
  #define NUMERIC_DSCALE(n)	((n)->n_sign_dscale & NUMERIC_DSCALE_MASK)
  #define NUMERIC_IS_NAN(n)	(NUMERIC_SIGN(n) != NUMERIC_POS &&	\
--- 39,49 ----
  /*
   * Sign values and macros to deal with packing/unpacking n_sign_dscale
   */
! #define NUMERIC_SIGN_MASK	0xC0
! #define NUMERIC_POS			0x00
! #define NUMERIC_NEG			0x40
! #define NUMERIC_NAN			0xC0
! #define NUMERIC_DSCALE_MASK 0x3F
  #define NUMERIC_SIGN(n)		((n)->n_sign_dscale & NUMERIC_SIGN_MASK)
  #define NUMERIC_DSCALE(n)	((n)->n_sign_dscale & NUMERIC_DSCALE_MASK)
  #define NUMERIC_IS_NAN(n)	(NUMERIC_SIGN(n) != NUMERIC_POS &&	\
***************
*** 61,74 ****
  typedef struct NumericData
  {
  	int32		varlen;			/* Variable size (std varlena header) */
! 	int16		n_weight;		/* Weight of 1st digit	*/
! 	uint16		n_sign_dscale;	/* Sign + display scale */
  	char		n_data[1];		/* Digits (really array of NumericDigit) */
  } NumericData;
  
  typedef NumericData *Numeric;
  
! #define NUMERIC_HDRSZ	(sizeof(int32) + sizeof(int16) + sizeof(uint16))
  
  
  /*
--- 61,74 ----
  typedef struct NumericData
  {
  	int32		varlen;			/* Variable size (std varlena header) */
! 	int8		n_weight;		/* Weight of 1st digit	*/
! 	uint8		n_sign_dscale;	/* Sign + display scale */
  	char		n_data[1];		/* Digits (really array of NumericDigit) */
  } NumericData;
  
  typedef NumericData *Numeric;
  
! #define NUMERIC_HDRSZ	(sizeof(int32) + sizeof(int8) + sizeof(uint8))
  
  
  /*
Index: src/test/regress/expected/numeric.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/numeric.out,v
retrieving revision 1.17
diff -c -c -r1.17 numeric.out
*** src/test/regress/expected/numeric.out	26 Jun 2005 03:04:18 -0000	1.17
--- src/test/regress/expected/numeric.out	2 Nov 2005 18:06:04 -0000
***************
*** 1,16 ****
  --
  -- NUMERIC
  --
! CREATE TABLE num_data (id int4, val numeric(210,10));
! CREATE TABLE num_exp_add (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_sub (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_div (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_mul (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_sqrt (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_ln (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_log10 (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_power_10_ln (id int4, expected numeric(210,10));
! CREATE TABLE num_result (id1 int4, id2 int4, result numeric(210,10));
  -- ******************************
  -- * The following EXPECTED results are computed by bc(1)
  -- * with a scale of 200
--- 1,16 ----
  --
  -- NUMERIC
  --
! CREATE TABLE num_data (id int4, val numeric(64,10));
! CREATE TABLE num_exp_add (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_sub (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_div (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_mul (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_sqrt (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_ln (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_log10 (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_power_10_ln (id int4, expected numeric(64,10));
! CREATE TABLE num_result (id1 int4, id2 int4, result numeric(64,10));
  -- ******************************
  -- * The following EXPECTED results are computed by bc(1)
  -- * with a scale of 200
***************
*** 590,602 ****
  (0 rows)
  
  DELETE FROM num_result;
! INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
      FROM num_data t1, num_data t2
      WHERE t2.val != '0.0';
! SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
      FROM num_result t1, num_exp_div t2
      WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
!     AND t1.result != round(t2.expected, 80);
   id1 | id2 | result | expected 
  -----+-----+--------+----------
  (0 rows)
--- 590,602 ----
  (0 rows)
  
  DELETE FROM num_result;
! INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 60)
      FROM num_data t1, num_data t2
      WHERE t2.val != '0.0';
! SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 60) as expected
      FROM num_result t1, num_exp_div t2
      WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
!     AND t1.result != round(t2.expected, 60);
   id1 | id2 | result | expected 
  -----+-----+--------+----------
  (0 rows)
***************
*** 649,655 ****
  -- * POWER(10, LN(value)) check
  -- ******************************
  DELETE FROM num_result;
! INSERT INTO num_result SELECT id, 0, POWER(numeric '10', LN(ABS(round(val,200))))
      FROM num_data
      WHERE val != '0.0';
  SELECT t1.id1, t1.result, t2.expected
--- 649,655 ----
  -- * POWER(10, LN(value)) check
  -- ******************************
  DELETE FROM num_result;
! INSERT INTO num_result SELECT id, 0, POWER(numeric '10', LN(ABS(round(val,60))))
      FROM num_data
      WHERE val != '0.0';
  SELECT t1.id1, t1.result, t2.expected
Index: src/test/regress/sql/numeric.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/numeric.sql,v
retrieving revision 1.12
diff -c -c -r1.12 numeric.sql
*** src/test/regress/sql/numeric.sql	26 Jun 2005 03:04:37 -0000	1.12
--- src/test/regress/sql/numeric.sql	2 Nov 2005 18:06:04 -0000
***************
*** 2,18 ****
  -- NUMERIC
  --
  
! CREATE TABLE num_data (id int4, val numeric(210,10));
! CREATE TABLE num_exp_add (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_sub (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_div (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_mul (id1 int4, id2 int4, expected numeric(210,10));
! CREATE TABLE num_exp_sqrt (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_ln (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_log10 (id int4, expected numeric(210,10));
! CREATE TABLE num_exp_power_10_ln (id int4, expected numeric(210,10));
  
! CREATE TABLE num_result (id1 int4, id2 int4, result numeric(210,10));
  
  
  -- ******************************
--- 2,18 ----
  -- NUMERIC
  --
  
! CREATE TABLE num_data (id int4, val numeric(64,10));
! CREATE TABLE num_exp_add (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_sub (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_div (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_mul (id1 int4, id2 int4, expected numeric(64,10));
! CREATE TABLE num_exp_sqrt (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_ln (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_log10 (id int4, expected numeric(64,10));
! CREATE TABLE num_exp_power_10_ln (id int4, expected numeric(64,10));
  
! CREATE TABLE num_result (id1 int4, id2 int4, result numeric(64,10));
  
  
  -- ******************************
***************
*** 579,591 ****
      AND t1.result != t2.expected;
  
  DELETE FROM num_result;
! INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
      FROM num_data t1, num_data t2
      WHERE t2.val != '0.0';
! SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
      FROM num_result t1, num_exp_div t2
      WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
!     AND t1.result != round(t2.expected, 80);
  
  -- ******************************
  -- * Square root check
--- 579,591 ----
      AND t1.result != t2.expected;
  
  DELETE FROM num_result;
! INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 60)
      FROM num_data t1, num_data t2
      WHERE t2.val != '0.0';
! SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 60) as expected
      FROM num_result t1, num_exp_div t2
      WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
!     AND t1.result != round(t2.expected, 60);
  
  -- ******************************
  -- * Square root check
***************
*** 626,632 ****
  -- * POWER(10, LN(value)) check
  -- ******************************
  DELETE FROM num_result;
! INSERT INTO num_result SELECT id, 0, POWER(numeric '10', LN(ABS(round(val,200))))
      FROM num_data
      WHERE val != '0.0';
  SELECT t1.id1, t1.result, t2.expected
--- 626,632 ----
  -- * POWER(10, LN(value)) check
  -- ******************************
  DELETE FROM num_result;
! INSERT INTO num_result SELECT id, 0, POWER(numeric '10', LN(ABS(round(val,60))))
      FROM num_data
      WHERE val != '0.0';
  SELECT t1.id1, t1.result, t2.expected

---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Re: [HACKERS] Reducing the overhead of NUMERIC data

Reply via email to