URL: https://github.com/freeipa/freeipa/pull/459 Author: tiran Title: #459: Faster JSON encoder/decoder Action: opened
PR body: """ Improve performance of FreeIPA's JSON serializer and deserializer. * Don't indent and sort keys. Both options trigger a slow path in Python's json package. Without indention and sorting, encoding mostly happens in optimized C code. * Replace O(n) type checks with O(1) type lookup and eliminate the use of isinstance(). * Check each client capability only once for every conversion. * Use decoder's obj_hook feature to traverse the object tree once and to eliminate calls to isinstance(). Closes: https://fedorahosted.org/freeipa/ticket/6655 Signed-off-by: Christian Heimes <chei...@redhat.com> """ To pull the PR as Git branch: git remote add ghfreeipa https://github.com/freeipa/freeipa git fetch ghfreeipa pull/459/head:pr459 git checkout pr459
From d00d547d0024fe5712a33ccabcd5c03564b10223 Mon Sep 17 00:00:00 2001 From: Christian Heimes <chei...@redhat.com> Date: Mon, 13 Feb 2017 09:46:39 +0100 Subject: [PATCH] Faster JSON encoder/decoder Improve performance of FreeIPA's JSON serializer and deserializer. * Don't indent and sort keys. Both options trigger a slow path in Python's json package. Without indention and sorting, encoding mostly happens in optimized C code. * Replace O(n) type checks with O(1) type lookup and eliminate the use of isinstance(). * Check each client capability only once for every conversion. * Use decoder's obj_hook feature to traverse the object tree once and to eliminate calls to isinstance(). Closes: https://fedorahosted.org/freeipa/ticket/6655 Signed-off-by: Christian Heimes <chei...@redhat.com> --- ipalib/rpc.py | 209 +++++++++++++++++++++++++++++++------------------ ipaserver/rpcserver.py | 6 +- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/ipalib/rpc.py b/ipalib/rpc.py index 7d9f6ec..25abc7f 100644 --- a/ipalib/rpc.py +++ b/ipalib/rpc.py @@ -274,67 +274,140 @@ def xml_dumps(params, version, methodname=None, methodresponse=False, ) -def json_encode_binary(val, version): - ''' - JSON cannot encode binary values. We encode binary values in Python str - objects and text in Python unicode objects. In order to allow a binary - object to be passed through JSON we base64 encode it thus converting it to - text which JSON can transport. To assure we recognize the value is a base64 - encoded representation of the original binary value and not confuse it with - other text we convert the binary value to a dict in this form: - - {'__base64__' : base64_encoding_of_binary_value} - - This modification of the original input value cannot be done "in place" as - one might first assume (e.g. replacing any binary items in a container - (e.g. list, tuple, dict) with the base64 dict because the container might be - an immutable object (i.e. a tuple). Therefore this function returns a copy - of any container objects it encounters with tuples replaced by lists. This - is O.K. because the JSON encoding will map both lists and tuples to JSON - arrays. - ''' - - if isinstance(val, dict): - new_dict = {} - for k, v in val.items(): - new_dict[k] = json_encode_binary(v, version) - return new_dict - elif isinstance(val, (list, tuple)): - new_list = [json_encode_binary(v, version) for v in val] - return new_list - elif isinstance(val, bytes): - encoded = base64.b64encode(val) - if not six.PY2: - encoded = encoded.decode('ascii') - return {'__base64__': encoded} - elif isinstance(val, Decimal): - return unicode(val) - elif isinstance(val, DN): - return str(val) - elif isinstance(val, datetime.datetime): - if capabilities.client_has_capability(version, 'datetime_values'): +class _JSONConverter(dict): + __slots__ = ('version', '_cap_datetime', '_cap_dnsname') + + _identity = object() + + def __init__(self, version, _identity=_identity): + super(_JSONConverter, self).__init__() + self.version = version + self._cap_datetime = None + self._cap_dnsname = None + self.update({ + unicode: _identity, + bool: _identity, + type(None): _identity, + float: _identity, + Decimal: unicode, + DN: str, + Principal: unicode, + DNSName: self._enc_dnsname, + datetime.datetime: self._enc_datetime, + bytes: self._enc_bytes, + list: self._enc_list, + tuple: self._enc_list, + dict: self._enc_dict, + }) + # int, long + for t in six.integer_types: + self[t] = _identity + + def __missing__(self, typ): + # walk MRO to find best match + for c in typ.__mro__: + if c in self: + self[typ] = self[c] + return self[c] + # use issubclass to check for registered ABCs + for c in self: + if issubclass(typ, c): + self[typ] = self[c] + return self[c] + raise TypeError(typ) + + def convert(self, obj, _identity=_identity): + # obj.__class__ is twice as fast as type(obj) + func = self[obj.__class__] + return obj if func is _identity else func(obj) + + def _enc_datetime(self, val): + cap = self._cap_datetime + if cap is None: + cap = capabilities.client_has_capability(self.version, + 'datetime_values') + self._cap_datetime = cap + if cap: return {'__datetime__': val.strftime(LDAP_GENERALIZED_TIME_FORMAT)} else: return val.strftime(LDAP_GENERALIZED_TIME_FORMAT) - elif isinstance(val, DNSName): - if capabilities.client_has_capability(version, 'dns_name_values'): + + def _enc_dnsname(self, val): + cap = self._cap_dnsname + if cap is None: + cap = capabilities.client_has_capability(self.version, + 'dns_name_values') + self._cap_dnsname = cap + if cap: return {'__dns_name__': unicode(val)} else: return unicode(val) - elif isinstance(val, Principal): - return unicode(val) + + def _enc_bytes(self, val): + encoded = base64.b64encode(val) + if not six.PY2: + encoded = encoded.decode('ascii') + return {'__base64__': encoded} + + def _enc_list(self, val, _identity=_identity): + result = [] + append = result.append + for v in val: + func = self[v.__class__] + append(v if func is _identity else func(v)) + return result + + def _enc_dict(self, val, _identity=_identity, _iteritems=six.iteritems): + result = {} + for k, v in _iteritems(val): + func = self[v.__class__] + result[k] = v if func is _identity else func(v) + return result + + +def json_encode_binary(val, version): + """ + JSON cannot encode binary values. We encode binary values in Python str + objects and text in Python unicode objects. In order to allow a binary + object to be passed through JSON we base64 encode it thus converting it to + text which JSON can transport. To assure we recognize the value is a base64 + encoded representation of the original binary value and not confuse it with + other text we convert the binary value to a dict in this form: + + {'__base64__' : base64_encoding_of_binary_value} + + This modification of the original input value cannot be done "in place" as + one might first assume (e.g. replacing any binary items in a container + (e.g. list, tuple, dict) with the base64 dict because the container might be + an immutable object (i.e. a tuple). Therefore this function returns a copy + of any container objects it encounters with tuples replaced by lists. This + is O.K. because the JSON encoding will map both lists and tuples to JSON + arrays. + """ + result = _JSONConverter(version).convert(val) + return json.dumps(result) + + +def _ipa_obj_hook(dct): + if '__base64__' in dct: + return base64.b64decode(dct['__base64__']) + elif '__datetime__' in dct: + return datetime.datetime.strptime(dct['__datetime__'], + LDAP_GENERALIZED_TIME_FORMAT) + elif '__dns_name__' in dct: + return DNSName(dct['__dns_name__']) else: - return val + return dct def json_decode_binary(val): - ''' + """ JSON cannot transport binary data. In order to transport binary data we convert binary data to a form like this: - {'__base64__' : base64_encoding_of_binary_value} + {'__base64__' : base64_encoding_of_binary_value} - see json_encode_binary() + see json_encode_binary() After JSON had decoded the JSON stream back into a Python object we must recursively scan the object looking for any dicts which might represent @@ -345,31 +418,11 @@ def json_decode_binary(val): don't modify objects in place because of side effects which may be dangerous. Thus we elect to spend a few more cycles and avoid the possibility of unintended side effects in favor of robustness. - ''' + """ + if isinstance(val, bytes): + val = val.decode('utf-8') - if isinstance(val, dict): - if '__base64__' in val: - return base64.b64decode(val['__base64__']) - elif '__datetime__' in val: - return datetime.datetime.strptime(val['__datetime__'], - LDAP_GENERALIZED_TIME_FORMAT) - elif '__dns_name__' in val: - return DNSName(val['__dns_name__']) - else: - return dict((k, json_decode_binary(v)) for k, v in val.items()) - elif isinstance(val, list): - return tuple(json_decode_binary(v) for v in val) - else: - if isinstance(val, bytes): - try: - return val.decode('utf-8') - except UnicodeDecodeError: - raise ConversionError( - name=val, - error='incorrect type' - ) - else: - return val + return json.loads(val, object_hook=_ipa_obj_hook) def decode_fault(e, encoding='UTF-8'): @@ -1091,27 +1144,27 @@ def __request(self, name, args): payload = json_encode_binary(payload, version) if self.__verbose >= 2: - root_logger.info('Request: %s', - json.dumps(payload, sort_keys=True, indent=4)) + root_logger.info( + 'Request: %s', + json.dumps(json.loads(payload), sort_keys=True, indent=4) + ) response = self.__transport.request( self.__host, self.__handler, - json.dumps(payload).encode('utf-8'), + payload.encode('utf-8'), verbose=self.__verbose >= 3, ) try: - response = json_decode_binary( - json.loads(response.decode('utf-8'))) + response = json_decode_binary(response) except ValueError as e: raise JSONError(error=str(e)) if self.__verbose >= 2: root_logger.info( 'Response: %s', - json.dumps(json_encode_binary(response, version), - sort_keys=True, indent=4) + json.dumps(response, sort_keys=True, indent=4) ) error = response.get('error') if error: diff --git a/ipaserver/rpcserver.py b/ipaserver/rpcserver.py index 45550fb..82ca03c 100644 --- a/ipaserver/rpcserver.py +++ b/ipaserver/rpcserver.py @@ -489,13 +489,12 @@ def marshal(self, result, error, _id=None, principal=unicode(principal), version=unicode(VERSION), ) - response = json_encode_binary(response, version) - dump = json.dumps(response, sort_keys=True, indent=4) + dump = json_encode_binary(response, version) return dump.encode('utf-8') def unmarshal(self, data): try: - d = json.loads(data) + d = json_decode_binary(data) except ValueError as e: raise JSONError(error=e) if not isinstance(d, dict): @@ -504,7 +503,6 @@ def unmarshal(self, data): raise JSONError(error=_('Request is missing "method"')) if 'params' not in d: raise JSONError(error=_('Request is missing "params"')) - d = json_decode_binary(d) method = d['method'] params = d['params'] _id = d.get('id')
-- Manage your subscription for the Freeipa-devel mailing list: https://www.redhat.com/mailman/listinfo/freeipa-devel Contribute to FreeIPA: http://www.freeipa.org/page/Contribute/Code