The pure Python in-tree JSON parser is *much* slower than the in-tree C JSON parser. A local test parsing a 100Mb JSON file showed the Python version taking 270 seconds. With the C wrapper, it took under 4 seconds.
The C extension will be used automatically if it can be built. If the extension fails to build, a warning is displayed and the build is restarted without the extension. The Serializer class is replaced with Python's built-in JSON library since the ability to process chunked data is not needed in that case. The extension should work with both Python 2.7 and Python 3.3+. --- Makefile.am | 2 +- python/automake.mk | 3 + python/ovs/_json.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++ python/ovs/json.py | 11 +++ python/setup.py | 51 +++++++++- 5 files changed, 332 insertions(+), 3 deletions(-) create mode 100644 python/ovs/_json.c diff --git a/Makefile.am b/Makefile.am index 69dbe3d..8cb8523 100644 --- a/Makefile.am +++ b/Makefile.am @@ -251,7 +251,7 @@ config-h-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ git --no-pager grep -L '#include <config\.h>' `git ls-files | grep '\.c$$' | \ - grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows'`; \ + grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \ then \ echo "See above for list of violations of the rule that"; \ echo "every C source file must #include <config.h>."; \ diff --git a/python/automake.mk b/python/automake.mk index dc62422..ecad39d 100644 --- a/python/automake.mk +++ b/python/automake.mk @@ -46,6 +46,9 @@ EXTRA_DIST += \ python/README.rst \ python/setup.py +# C extension support. +EXTRA_DIST += python/ovs/_json.c + PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles) EXTRA_DIST += $(PYFILES) PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover) diff --git a/python/ovs/_json.c b/python/ovs/_json.c new file mode 100644 index 0000000..c4e2af3 --- /dev/null +++ b/python/ovs/_json.c @@ -0,0 +1,268 @@ +#include "Python.h" +#include <openvswitch/lib/json.h> +#include "structmember.h" + +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#endif + +typedef struct { + PyObject_HEAD + struct json_parser *_parser; +} json_ParserObject; + +static void +Parser_dealloc(json_ParserObject * p) +{ + json_parser_abort(p->_parser); + Py_TYPE(p)->tp_free(p); +} + +static PyObject * +Parser_new(PyTypeObject * type, PyObject * args, PyObject * kwargs) +{ + json_ParserObject *self; + static char *kwlist[] = { "check_trailer", NULL }; + PyObject *check_trailer = NULL; + int ct_int = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist, + &check_trailer)) { + return NULL; + } + + if (check_trailer != NULL) { + ct_int = PyObject_IsTrue(check_trailer); + if (ct_int < 0) { + return NULL; + } else if (ct_int) { + ct_int = JSPF_TRAILER; + } + } + + self = (json_ParserObject *) type->tp_alloc(type, 0); + if (self != NULL) { + self->_parser = json_parser_create(ct_int); + } + + return (PyObject *) self; +} + +static PyObject * +Parser_feed(json_ParserObject * self, PyObject * args) +{ + Py_ssize_t input_sz; + PyObject *input; + size_t rd; + char *input_str; + + if (self->_parser == NULL) { + return NULL; + } + + if (!PyArg_UnpackTuple(args, "input", 1, 1, &input)) { + return NULL; + } +#ifdef IS_PY3K + if ((input_str = PyUnicode_AsUTF8AndSize(input, &input_sz)) == NULL) { +#else + if (PyString_AsStringAndSize(input, &input_str, &input_sz) < 0) { +#endif + return NULL; + } + + rd = json_parser_feed(self->_parser, input_str, (size_t) input_sz); + +#ifdef IS_PY3K + return PyLong_FromSize_t(rd); +#else + return PyInt_FromSize_t(rd); +#endif +} + +static PyObject * +Parser_is_done(json_ParserObject * self) +{ + if (self->_parser == NULL) { + return NULL; + } + return PyBool_FromLong(json_parser_is_done(self->_parser)); +} + +static PyObject * +json_to_python(struct json *json) +{ + switch (json->type) { + case JSON_NULL: + Py_RETURN_NONE; + case JSON_FALSE: + Py_RETURN_FALSE; + case JSON_TRUE: + Py_RETURN_TRUE; + case JSON_OBJECT:{ + struct shash_node *node; + PyObject *dict = PyDict_New(); + + if (dict == NULL) { + return PyErr_NoMemory(); + } + SHASH_FOR_EACH(node, json->u.object) { + PyObject *key = PyUnicode_FromString(node->name); + PyObject *val = json_to_python(node->data); + + if (!(key && val) || PyDict_SetItem(dict, key, val)) { + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(dict); + return NULL; + } + + Py_XDECREF(key); + Py_XDECREF(val); + } + return dict; + } + case JSON_ARRAY:{ + int i; + PyObject *arr = PyList_New(json->u.array.n); + + if (arr == NULL) { + return PyErr_NoMemory(); + } + for (i = 0; i < json->u.array.n; i++) { + PyObject *item = json_to_python(json->u.array.elems[i]); + + if (!item || PyList_SetItem(arr, i, item)) { + Py_XDECREF(arr); + return NULL; + } + } + return arr; + } + case JSON_REAL: + if (json->u.real != 0) { + return PyFloat_FromDouble(json->u.real); + } /* fall through to treat 0 as int */ + case JSON_INTEGER: +#ifdef IS_PY3K + return PyLong_FromLong((long) json->u.integer); +#else + return PyInt_FromLong((long) json->u.integer); +#endif + + case JSON_STRING: + return PyUnicode_FromString(json->u.string); + default: + return NULL; + } +} + +static PyObject * +Parser_finish(json_ParserObject * self) +{ + struct json *json; + PyObject *obj; + + if (self->_parser == NULL) { + return NULL; + } + + json = json_parser_finish(self->_parser); + self->_parser = NULL; + obj = json_to_python(json); + return obj; +} + +static PyMethodDef Parser_methods[] = { + {"feed", (PyCFunction) Parser_feed, METH_VARARGS, + "Feed data to the parser and return the index of the last object."}, + {"is_done", (PyCFunction) Parser_is_done, METH_NOARGS, + "Whether the parser has finished decoding an object."}, + {"finish", (PyCFunction) Parser_finish, METH_NOARGS, + "Finish parsing and return Python object parsed."}, + {NULL}, +}; + +static PyTypeObject json_ParserType = { + PyVarObject_HEAD_INIT(NULL, 0) + "ovs._json.Parser", /* tp_name */ + sizeof (json_ParserObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) Parser_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + "Parser objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Parser_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + Parser_new, /* tp_new */ +}; + +#ifdef IS_PY3K +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "ovs._json", /* m_name */ + "OVS JSON Parser module", /* m_doc */ + 0, /* m_size */ + 0, /* m_methods */ + 0, /* m_slots */ + 0, /* m_traverse */ + 0, /* m_clear */ + 0, /* m_free */ +}; + +#define INITERROR return NULL +#else /* !IS_PY3K */ +#define INITERROR return +#endif + +PyMODINIT_FUNC +#ifdef IS_PY3K +PyInit__json(void) +#else +init_json(void) +#endif +{ + PyObject *m; + + if (PyType_Ready(&json_ParserType) < 0) { + INITERROR; + } +#ifdef IS_PY3K + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule3("ovs._json", NULL, "OVS JSON Parser module"); +#endif + + Py_INCREF(&json_ParserType); + PyModule_AddObject(m, "Parser", (PyObject *) & json_ParserType); +#ifdef IS_PY3K + return m; +#endif +} diff --git a/python/ovs/json.py b/python/ovs/json.py index ff986ea..ea0400a 100644 --- a/python/ovs/json.py +++ b/python/ovs/json.py @@ -18,6 +18,11 @@ import sys import six from six.moves import range +try: + import ovs._json +except ImportError: + pass + __pychecker__ = 'no-stringiter' escapes = {ord('"'): u"\\\"", @@ -165,6 +170,12 @@ class Parser(object): # Maximum height of parsing stack. # MAX_HEIGHT = 1000 + def __new__(cls, *args, **kwargs): + try: + return ovs._json.Parser(*args, **kwargs) + except NameError: + return super(Parser, cls).__new__(cls) + def __init__(self, check_trailer=False): self.check_trailer = check_trailer diff --git a/python/setup.py b/python/setup.py index 49c8c4e..19c1f18 100644 --- a/python/setup.py +++ b/python/setup.py @@ -13,6 +13,10 @@ from __future__ import print_function import sys +from distutils.command.build_ext import build_ext +from distutils.errors import CCompilerError, DistutilsExecError, \ + DistutilsPlatformError + import setuptools VERSION = "unknown" @@ -25,8 +29,33 @@ except IOError: file=sys.stderr) sys.exit(-1) +ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) +if sys.platform == 'win32': + ext_errors += (IOError, ValueError) + + +class BuildFailed(Exception): + pass + + +class try_build_ext(build_ext): + # This class allows C extension building to fail + # NOTE: build_ext is not a new-style class + + def run(self): + try: + build_ext.run(self) + except DistutilsPlatformError: + raise BuildFailed() -setuptools.setup( + def build_extension(self, ext): + try: + build_ext.build_extension(self, ext) + except ext_errors: + raise BuildFailed() + + +setup_args = dict( name='ovs', description='Open vSwitch library', version=VERSION, @@ -46,5 +75,23 @@ setuptools.setup( 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', - ] + ], + ext_modules=[setuptools.Extension("ovs._json", sources=["ovs/_json.c"], + libraries=['openvswitch'])], + cmdclass={'build_ext': try_build_ext}, ) + +try: + setuptools.setup(**setup_args) +except BuildFailed: + BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, " + "speedups are not enabled.") + print("*" * 75) + print(BUILD_EXT_WARNING) + print("Failure information, if any, is above.") + print("Retrying the build without the C extension.") + print("*" * 75) + + del(setup_args['cmdclass']) + del(setup_args['ext_modules']) + setuptools.setup(**setup_args) -- 1.8.3.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev