Hello! Currently I'm trying to write small xmlrpc server for html data processing. Processing is done by html tidy lib, but the problem is that it has massive memory leak. As processing is blocking operation I'm running it in thread, but after some time and huge html document processing daemon eats all memory. I wondering if its possible to load utidylib in thread, do processing and after this kill thread and release memory? Or maybe something like deferToProcess? Thanks in advance!
#!/usr/bin/env python # -*- coding: utf-8 -*- import utidylib from twisted.internet import epollreactor epollreactor.install() from twisted.internet import protocol, defer, threads, reactor from twisted.web import xmlrpc, server from twisted.python import log, threadpool import sys reload(sys) sys.setdefaultencoding('utf-8') log.startLogging(sys.stdout) import codecs import gc gc.enable() gc.set_debug(gc.DEBUG_LEAK) gc.set_threshold(1) class TidyProtocol(xmlrpc.XMLRPC): def xmlrpc_tidify(self, data): defered = threads.deferToThread(self.tidyParse, data) defered.addCallback(self.returnToClient) return defered def tidyParse(self, data): options = { 'drop-proprietary-attributes': '1', 'output-xhtml': '1', 'wrap': '0', 'bare': '0', 'clean': '1', 'doctype': 'omit', 'show-body-only': '1', 'word-2000': '0', 'escape-cdata': '0', 'hide-comments': '1', 'force-output': '1', 'alt-text': '', 'show-errors': '0', 'show-warnings': '0', 'tidy-mark': '0', 'char-encoding': 'utf8', } if data['html'] == None: return None else: htmldata = data['html'].encode() print "Tidy start" return tidy.parseString(htmldata, **options) def returnToClient(self, data): gc.collect() print "Tidy end, retunring result" return data if __name__ == '__main__': r = TidyProtocol() reactor.listenTCP(1100, server.Site(r)) reactor.run() _______________________________________________ Twisted-Python mailing list Twisted-Python@twistedmatrix.com http://twistedmatrix.com/cgi-bin/mailman/listinfo/twisted-python