[issue4258] Use 30-bit digits instead of 15-bit digits for Python integers.

STINNER Victor Thu, 06 Nov 2008 04:47:29 -0800

STINNER Victor <[EMAIL PROTECTED]> added the comment:

I wrote a patch to compute stat about PyLong function calls.


make (use setup.py):

PyLong_FromLong: 168572 calls, min=( 0,  ), avg=(1.4,    ), max=(  3,    )
long_bool:        48682 calls, min=( 0,  ), avg=(0.2,    ), max=(  2,    )
long_add:         39527 calls, min=( 0, 0), avg=(0.9, 1.0), max=(  2,   3)
long_compare:     39145 calls, min=( 0, 0), avg=(1.2, 1.1), max=(  3,   3)
PyLong_AsLong:    33689 calls, min=( 0,  ), avg=(0.9,    ), max=( 45,    )
long_sub:         13091 calls, min=( 0, 0), avg=(0.9, 0.8), max=(  1,   1)
long_bitwise:      4636 calls, min=( 0, 0), avg=(0.8, 0.6), max=(  2,   2)
long_hash:         1097 calls, min=( 0,  ), avg=(0.9,    ), max=(  3,    )
long_mul:           221 calls, min=( 0, 0), avg=(0.8, 1.1), max=(  2,   2)
long_invert:        204 calls, min=( 0,  ), avg=(1.0,    ), max=(  1,    )
long_neg:            35 calls, min=( 1,  ), avg=(1.0,    ), max=(  1,    )
long_format:          3 calls, min=( 0,  ), avg=(0.7,    ), max=(  1,    )
long_mod:             3 calls, min=( 1, 1), avg=(1.0, 1.0), max=(  1,   1)
long_pow:             1 calls, min=( 1, 1), avg=(1.0, 1.0), max=(  1,   1)

pystone:

PyLong_FromLong:1587652 calls, min=( 0,  ), avg=(1.0,    ), max=(  3,    )
long_add:        902487 calls, min=( 0, 0), avg=(1.0, 1.0), max=(  2,   2)
long_compare:    651165 calls, min=( 0, 0), avg=(1.0, 1.0), max=(  3,   3)
PyLong_AsLong:   252476 calls, min=( 0,  ), avg=(1.0,    ), max=(  2,    )
long_sub:        250032 calls, min=( 1, 0), avg=(1.0, 1.0), max=(  1,   1)
long_bool:       102655 calls, min=( 0,  ), avg=(0.5,    ), max=(  1,    )
long_mul:        100015 calls, min=( 0, 0), avg=(1.0, 1.0), max=(  1,   2)
long_div:         50000 calls, min=( 1, 1), avg=(1.0, 1.0), max=(  1,   1)
long_hash:          382 calls, min=( 0,  ), avg=(1.1,    ), max=(  2,    )
long_bitwise:       117 calls, min=( 0, 0), avg=(1.0, 1.0), max=(  1,   2)
long_format:          1 calls, min=( 2,  ), avg=(2.0,    ), max=(  2,    )

min/avg/max are the integer digit count (minimum, average, maximum).

What can we learn from this numbers?

PyLong_FromLong(), long_add() and long_compare() are the 3 most common 
operations on integers. 

Except PyLong_FromLong(), long_compare() and long_format(), arguments of the 
functions are mostly in range [-2^15; 2^15].

Biggest number is a number of 45 digits: maybe just one call to long_add(). 
Except this number/call, the biggest numbers have between 2 and 3 digits. 

long_bool() is never called with number bigger than 2 digits.

long_sub() is never called with number bigger than 1 digit!

Added file: http://bugs.python.org/file11952/long_stat.patch

_______________________________________
Python tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue4258>
_______________________________________

diff --git a/Include/graminit.h b/Include/graminit.h
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 8f7ad4c..b45f809 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -29,6 +29,128 @@ static PyLongObject small_ints[NSMALLNEGINTS + 
NSMALLPOSINTS];
 int quick_int_allocs, quick_neg_int_allocs;
 #endif
 
+typedef enum {
+       STAT_FROMLONG = 0,
+       STAT_BOOL,
+       STAT_COMPARE,
+       STAT_RICHCOMPARE,
+       STAT_ADD,
+       STAT_ASLONG,
+       STAT_SUB,
+       STAT_BITWISE,
+       STAT_HASH,
+       STAT_INVERT,
+       STAT_NEG,
+       STAT_FORMAT,
+       STAT_MUL,
+       STAT_MOD,
+       STAT_POW,
+       STAT_DIV,
+       STAT_TRUEDIV,
+       STAT_RSHIFT,
+       STAT_LSHIFT,
+       STAT_LONG,
+       STAT_FLOAT,
+       STAT_ABS,
+       STAT_DIVMOD,
+       STAT_FORMAT_ADV,
+       STAT_ROUND,
+
+       STAT_COUNT
+} stat_id_t;
+
+typedef struct {
+       int min;
+       int max;
+       int total;
+} stat_digits_t;
+
+typedef struct {
+       int id;
+       int calls;
+       stat_digits_t a;
+       stat_digits_t b;
+} stat_data_t;
+
+stat_data_t _PyLong_stat[STAT_COUNT];
+
+void init_stat(void)
+{
+       unsigned int id;
+       stat_data_t* data;
+       stat_digits_t *a, *b;
+       for (id=0; id<STAT_COUNT; id++) {
+               data = &_PyLong_stat[id];
+               data->id = id;
+               data->calls = 0;
+               a = &data->a;
+               a->min = INT_MAX;
+               a->max = INT_MIN;
+               a->total = 0;
+               b = &data->b;
+               b->min = INT_MAX;
+               b->max = INT_MIN;
+               b->total = 0;
+       }
+}
+
+int cmp_stat(const void *va, const void *vb)
+{
+       const stat_data_t* a = (const stat_data_t*)va;
+       const stat_data_t* b = (const stat_data_t*)vb;
+       if (a->calls < b->calls)
+               return 1;
+       else if (a->calls > b->calls)
+               return -1;
+       else
+               return 0;
+}
+
+void dump_stat(void)
+{
+       unsigned int id;
+       stat_data_t* data;
+       stat_digits_t *a, *b;
+       double avga, avgb;
+       qsort(_PyLong_stat, STAT_COUNT, sizeof(_PyLong_stat[0]), cmp_stat);
+       for (id=0; id<STAT_COUNT; id++) {
+               data = &_PyLong_stat[id];
+               printf("[%-2i] %-7u calls", data->id, data->calls);
+               if (!data->calls) {
+                       printf("\n");
+                       continue;
+               }
+               a = &data->a;
+               avga = (double)a->total / data->calls;
+               b = &data->b;
+               avgb = (double)b->total / data->calls;
+               printf("min=(% 2i,% 2i), avg=(%+.1f, %+.1f), max=(% 3i, % 
3i)\n", a->min, b->min, avga, avgb, a->max, b->max);
+       }
+       printf("\n");
+}
+
+void update_digits(stat_digits_t* stat, PyLongObject* v)
+{
+       int n;
+       if (v)
+               n = ABS(Py_SIZE(v));
+       else
+               n = -1;
+       stat->total += n;
+       if (n < stat->min)
+               stat->min = n;
+       if (n > stat->max)
+               stat->max = n;
+}
+
+void update_stat(stat_id_t id, PyLongObject* a, PyLongObject* b)
+{
+       stat_data_t* data = &_PyLong_stat[id];
+       data->calls += 1;
+       update_digits(&data->a, a);
+       update_digits(&data->b, b);
+}
+
 static PyObject *
 get_small_int(int ival)
 {
@@ -47,7 +169,7 @@ get_small_int(int ival)
                return get_small_int(ival); \
        } while(0)
 
-static PyLongObject * 
+static PyLongObject *
 maybe_small_long(PyLongObject *v)
 {
        if (v && ABS(Py_SIZE(v)) <= 1) {
@@ -133,7 +255,7 @@ _PyLong_New(Py_ssize_t size)
           This computation would be incorrect on systems
           which have padding before the digits; with 16-bit
           digits this should not happen. */
-       result = PyObject_MALLOC(sizeof(PyVarObject) + 
+       result = PyObject_MALLOC(sizeof(PyVarObject) +
                                 size*sizeof(digit));
        if (!result) {
                PyErr_NoMemory();
@@ -171,8 +293,8 @@ _PyLong_Copy(PyLongObject *src)
 
 /* Create a new long int object from a C long int */
 
-PyObject *
-PyLong_FromLong(long ival)
+static PyObject *
+_PyLong_FromLong(long ival)
 {
        PyLongObject *v;
         unsigned long abs_ival;
@@ -233,6 +355,14 @@ PyLong_FromLong(long ival)
        return (PyObject *)v;
 }
 
+PyObject *
+PyLong_FromLong(long ival)
+{
+       PyObject* v = _PyLong_FromLong(ival);
+       update_stat(STAT_FROMLONG, (PyLongObject*)v, NULL);
+       return v;
+}
+
 /* Create a new long int object from a C unsigned long int */
 
 PyObject *
@@ -396,7 +526,7 @@ PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
                else {
                        *overflow = Py_SIZE(v) > 0 ? 1 : -1;
                        /* res is already set to -1 */
-               }       
+               }
        }
  exit:
        if (do_decref) {
@@ -405,13 +535,14 @@ PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
        return res;
 }
 
-long 
+long
 PyLong_AsLong(PyObject *obj)
 {
        int overflow;
+       update_stat(STAT_ASLONG, (PyLongObject*)obj, NULL);
        long result = PyLong_AsLongAndOverflow(obj, &overflow);
        if (overflow) {
-               /* XXX: could be cute and give a different 
+               /* XXX: could be cute and give a different
                   message for overflow == -1 */
                PyErr_SetString(PyExc_OverflowError,
                                "Python int too large to convert to C long");
@@ -1498,6 +1629,8 @@ _PyLong_Format(PyObject *aa, int base)
        assert(base >= 2 && base <= 36);
        size_a = ABS(Py_SIZE(a));
 
+       update_stat(STAT_FORMAT, (PyLongObject*)aa, NULL);
+
        /* Compute a rough upper bound for the length of the string */
        i = base;
        bits = 0;
@@ -2211,6 +2344,8 @@ long_compare(PyLongObject *a, PyLongObject *b)
 {
        Py_ssize_t sign;
 
+       update_stat(STAT_COMPARE, a, b);
+
        if (Py_SIZE(a) != Py_SIZE(b)) {
                if (ABS(Py_SIZE(a)) == 0 && ABS(Py_SIZE(b)) == 0)
                        sign = 0;
@@ -2237,7 +2372,8 @@ long_richcompare(PyObject *self, PyObject *other, int op)
 {
        PyObject *result;
        CHECK_BINOP(self, other);
-       result = Py_CmpToRich(op, long_compare((PyLongObject*)self, 
+       update_stat(STAT_RICHCOMPARE, (PyLongObject*)self, 
(PyLongObject*)other);
+       result = Py_CmpToRich(op, long_compare((PyLongObject*)self,
                                               (PyLongObject*)other));
        return result;
 }
@@ -2249,6 +2385,8 @@ long_hash(PyLongObject *v)
        Py_ssize_t i;
        int sign;
 
+       update_stat(STAT_HASH, v, NULL);
+
        /* This is designed so that Python ints and longs with the
           same value hash to the same value, otherwise comparisons
           of mapping keys will turn out weird */
@@ -2381,6 +2519,7 @@ long_add(PyLongObject *a, PyLongObject *b)
        PyLongObject *z;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_ADD, a, b);
 
        if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) {
                PyObject *result = PyLong_FromLong(MEDIUM_VALUE(a) +
@@ -2411,6 +2550,7 @@ long_sub(PyLongObject *a, PyLongObject *b)
        PyLongObject *z;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_SUB, a, b);
 
        if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) {
                PyObject* r;
@@ -2840,6 +2980,7 @@ long_mul(PyLongObject *a, PyLongObject *b)
        PyLongObject *z;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_MUL, a, b);
 
        if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) {
                PyObject *r;
@@ -2925,6 +3066,7 @@ long_div(PyObject *a, PyObject *b)
        PyLongObject *div;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_DIV, (PyLongObject*)a, (PyLongObject*)b);
        if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0)
                div = NULL;
        return (PyObject *)div;
@@ -2937,6 +3079,8 @@ long_true_divide(PyObject *a, PyObject *b)
        int failed, aexp = -1, bexp = -1;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_TRUEDIV, (PyLongObject*)a, (PyLongObject*)b);
+
        ad = _PyLong_AsScaledDouble((PyObject *)a, &aexp);
        bd = _PyLong_AsScaledDouble((PyObject *)b, &bexp);
        failed = (ad == -1.0 || bd == -1.0) && PyErr_Occurred();
@@ -2977,8 +3121,9 @@ static PyObject *
 long_mod(PyObject *a, PyObject *b)
 {
        PyLongObject *mod;
-       
+
        CHECK_BINOP(a, b);
+       update_stat(STAT_MOD, (PyLongObject*)a, (PyLongObject*)b);
 
        if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
                mod = NULL;
@@ -2992,6 +3137,7 @@ long_divmod(PyObject *a, PyObject *b)
        PyObject *z;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_DIVMOD, (PyLongObject*)a, (PyLongObject*)b);
 
        if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, &mod) < 0) {
                return NULL;
@@ -3027,6 +3173,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
 
        /* a, b, c = v, w, x */
        CHECK_BINOP(v, w);
+       update_stat(STAT_POW, (PyLongObject*)v, (PyLongObject*)w);
        a = (PyLongObject*)v; Py_INCREF(a);
        b = (PyLongObject*)w; Py_INCREF(b);
        if (PyLong_Check(x)) {
@@ -3199,6 +3346,7 @@ long_invert(PyLongObject *v)
        /* Implement ~x as -(x+1) */
        PyLongObject *x;
        PyLongObject *w;
+       update_stat(STAT_INVERT, v, NULL);
        if (ABS(Py_SIZE(v)) <=1)
                return PyLong_FromLong(-(MEDIUM_VALUE(v)+1));
        w = (PyLongObject *)PyLong_FromLong(1L);
@@ -3216,6 +3364,7 @@ static PyObject *
 long_neg(PyLongObject *v)
 {
        PyLongObject *z;
+       update_stat(STAT_NEG, v, NULL);
        if (ABS(Py_SIZE(v)) <= 1)
                return PyLong_FromLong(-MEDIUM_VALUE(v));
        z = (PyLongObject *)_PyLong_Copy(v);
@@ -3227,6 +3376,7 @@ long_neg(PyLongObject *v)
 static PyObject *
 long_abs(PyLongObject *v)
 {
+       update_stat(STAT_ABS, v, NULL);
        if (Py_SIZE(v) < 0)
                return long_neg(v);
        else
@@ -3236,6 +3386,7 @@ long_abs(PyLongObject *v)
 static int
 long_bool(PyLongObject *v)
 {
+       update_stat(STAT_BOOL, v, NULL);
        return ABS(Py_SIZE(v)) != 0;
 }
 
@@ -3248,6 +3399,7 @@ long_rshift(PyLongObject *a, PyLongObject *b)
        digit lomask, himask;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_RSHIFT, a, b);
 
        if (Py_SIZE(a) < 0) {
                /* Right shifting negative numbers is harder */
@@ -3310,6 +3462,7 @@ long_lshift(PyObject *v, PyObject *w)
        twodigits accum;
 
        CHECK_BINOP(a, b);
+       update_stat(STAT_LSHIFT, a, b);
 
        shiftby = PyLong_AsLong((PyObject *)b);
        if (shiftby == -1L && PyErr_Occurred())
@@ -3369,6 +3522,8 @@ long_bitwise(PyLongObject *a,
        digit diga, digb;
        PyObject *v;
 
+       update_stat(STAT_BITWISE, a, b);
+
        if (Py_SIZE(a) < 0) {
                a = (PyLongObject *) long_invert(a);
                if (a == NULL)
@@ -3492,6 +3647,7 @@ long_or(PyObject *a, PyObject *b)
 static PyObject *
 long_long(PyObject *v)
 {
+       update_stat(STAT_LONG, (PyLongObject*)v, NULL);
        if (PyLong_CheckExact(v))
                Py_INCREF(v);
        else
@@ -3503,6 +3659,7 @@ static PyObject *
 long_float(PyObject *v)
 {
        double result;
+       update_stat(STAT_FLOAT, (PyLongObject*)v, NULL);
        result = PyLong_AsDouble(v);
        if (result == -1.0 && PyErr_Occurred())
                return NULL;
@@ -3608,6 +3765,7 @@ long__format__(PyObject *self, PyObject *args)
 
        if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
                return NULL;
+       update_stat(STAT_FORMAT_ADV, (PyLongObject*)self, NULL);
        return _PyLong_FormatAdvanced(self,
                                      PyUnicode_AS_UNICODE(format_spec),
                                      PyUnicode_GET_SIZE(format_spec));
@@ -3621,7 +3779,7 @@ long_round(PyObject *self, PyObject *args)
        int ndigits = UNDEF_NDIGITS;
        double x;
        PyObject *res;
-       
+
        if (!PyArg_ParseTuple(args, "|i", &ndigits))
                return NULL;
 
@@ -3629,6 +3787,7 @@ long_round(PyObject *self, PyObject *args)
                return long_long(self);
 
        /* If called with two args, defer to float.__round__(). */
+       update_stat(STAT_ROUND, (PyLongObject*)self, NULL);
        x = PyLong_AsDouble(self);
        if (x == -1.0 && PyErr_Occurred())
                return NULL;
@@ -3682,19 +3841,19 @@ static PyMethodDef long_methods[] = {
 };
 
 static PyGetSetDef long_getset[] = {
-    {"real", 
+    {"real",
      (getter)long_long, (setter)NULL,
      "the real part of a complex number",
      NULL},
-    {"imag", 
+    {"imag",
      (getter)long_getN, (setter)NULL,
      "the imaginary part of a complex number",
      (void*)0},
-    {"numerator", 
+    {"numerator",
      (getter)long_long, (setter)NULL,
      "the numerator of a rational number in lowest terms",
      NULL},
-    {"denominator", 
+    {"denominator",
      (getter)long_getN, (setter)NULL,
      "the denominator of a rational number in lowest terms",
      (void*)1},
@@ -3812,7 +3971,7 @@ _PyLong_Init(void)
                        _Py_NewReference(op);
                        /* _Py_NewReference sets the ref count to 1 but
                         * the ref count might be larger. Set the refcnt
-                        * to the original refcnt + 1 */         
+                        * to the original refcnt + 1 */
                        Py_REFCNT(op) = refcnt + 1;
                        assert(Py_SIZE(op) == size);
                        assert(v->ob_digit[0] == abs(ival));
@@ -3824,6 +3983,7 @@ _PyLong_Init(void)
                v->ob_digit[0] = abs(ival);
        }
 #endif
+       init_stat();
        return 1;
 }
 
@@ -3841,4 +4001,5 @@ PyLong_Fini(void)
                _Py_ForgetReference((PyObject*)v);
        }
 #endif
+       dump_stat();
 }
diff --git a/Python/graminit.c b/Python/graminit.c

_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue4258] Use 30-bit digits instead of 15-bit digits for Python integers.

Reply via email to