Hi!

We have found another performance problem in this transform -- very slow
conversion via I/O from PostgreSQL numerics (which are used for the
representation of jsonb numbers) to Python Decimals.


Attached patch with fix.

We are simply trying first to convert numeric to int64 if is does not have
digits after the decimal point, and then construct Python Int instead of
Decimal.  Standard Python json.loads() does the same for exact integers.

A special function numeric_to_exact_int64() was added to numeric.c.  Existing
numeric_int8() can't be used here because it rounds input numeric.



Performance results (see the second attached file jsonb_plplython_tests.sql
for the function definitions):

 - calculating the length of the passed jsonb object (input transformation):

   py_jsonb_length_trans opt            2761,873 ms
   py_jsonb_length_trans               10419,230 ms
   py_jsonb_length_json                 8691,201 ms

 - returning integer arrays (output transformation):

   py_jsonb_ret_int_array_trans opt     3284,810 ms
   py_jsonb_ret_int_array_trans         4540,063 ms
   py_jsonb_ret_int_array_raw           5100,793 ms
   py_jsonb_ret_int_array_json          9887,821 ms

 - returning float arrays (output transformation):

   py_jsonb_ret_float_array_trans opt   5699,360 ms
   py_jsonb_ret_float_array_trans       5735,854 ms
   py_jsonb_ret_float_array_raw         6516,514 ms
   py_jsonb_ret_float_array_json       10869,213 ms


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

>From ad090568f4eedf4cf0177f6ea5b7bb282f583bfb Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Thu, 21 Jun 2018 18:11:57 +0300
Subject: [PATCH] Optimize numeric conversions in jsonb_plpython

---
 contrib/jsonb_plpython/jsonb_plpython.c | 29 +++++++++++++++++++++++++++--
 src/backend/utils/adt/numeric.c         | 21 +++++++++++++++++++++
 src/include/utils/numeric.h             |  1 +
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/contrib/jsonb_plpython/jsonb_plpython.c b/contrib/jsonb_plpython/jsonb_plpython.c
index d6d6eeb..33ef15a 100644
--- a/contrib/jsonb_plpython/jsonb_plpython.c
+++ b/contrib/jsonb_plpython/jsonb_plpython.c
@@ -110,6 +110,13 @@ PLyObject_FromJsonbValue(JsonbValue *jsonbValue)
 			{
 				Datum		num;
 				char	   *str;
+				int64		intval;
+
+				if (numeric_to_exact_int64(jsonbValue->val.numeric, &intval))
+#ifndef HAVE_LONG_INT_64
+					if ((long) intval == intval)
+#endif
+						return PyInt_FromLong((long) intval);
 
 				num = NumericGetDatum(jsonbValue->val.numeric);
 				str = DatumGetCString(DirectFunctionCall1(numeric_out, num));
@@ -324,7 +331,26 @@ static JsonbValue *
 PLyNumber_ToJsonbValue(PyObject *obj, JsonbValue *jbvNum)
 {
 	Numeric		num;
-	char	   *str = PLyObject_AsString(obj);
+	char	   *str;
+
+	jbvNum->type = jbvNumeric;
+
+	if (PyInt_Check(obj))
+	{
+		long		val = PyInt_AsLong(obj);
+
+		if (val != -1 || !PyErr_Occurred())
+		{
+			jbvNum->val.numeric =
+				DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+													Int64GetDatum((int64) val)));
+			return jbvNum;
+		}
+
+		PyErr_Clear();
+	}
+
+	str = PLyObject_AsString(obj);
 
 	PG_TRY();
 	{
@@ -356,7 +382,6 @@ PLyNumber_ToJsonbValue(PyObject *obj, JsonbValue *jbvNum)
 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 				 (errmsg("cannot convert NaN to jsonb"))));
 
-	jbvNum->type = jbvNumeric;
 	jbvNum->val.numeric = num;
 
 	return jbvNum;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 82a1429..eefaa6d 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -3348,6 +3348,27 @@ numeric_float4(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(result);
 }
 
+/*
+ * numeric_get_int64() -
+ *
+ *	Try to convert numeric to int64 if it is an exact integer (i.e. it does
+ *	not have digits after the decimal point).  Return true if okay.
+ */
+bool
+numeric_to_exact_int64(Numeric num, int64 *result)
+{
+	NumericVar	var;
+
+	if (NUMERIC_IS_NAN(num))
+		return false;
+
+	if (NUMERIC_DSCALE(num) != 0)
+		return false;	/* digits after the decimal point are not allowed */
+
+	init_var_from_num(num, &var);
+
+	return numericvar_to_int64(&var, result);
+}
 
 /* ----------------------------------------------------------------------
  *
diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h
index cd8da8b..ab49c0e 100644
--- a/src/include/utils/numeric.h
+++ b/src/include/utils/numeric.h
@@ -60,5 +60,6 @@ extern bool numeric_is_nan(Numeric num);
 int32		numeric_maximum_size(int32 typmod);
 extern char *numeric_out_sci(Numeric num, int scale);
 extern char *numeric_normalize(Numeric num);
+extern bool numeric_to_exact_int64(Numeric value, int64 *out);
 
 #endif							/* _PG_NUMERIC_H_ */
-- 
2.7.4

Attachment: jsonb_plpython_tests.sql
Description: application/sql

Reply via email to