Hi all, I have created an example using libxml2 based in the code that appears in http://xmlsoft.org/python.html. My example processes an enough amount of html files to see that the memory consumption rises till the process ends (I check it with the 'top' command).
I don“t know if I am forgetting something in the code, as I have not been able to find any example on the web. Thanks in advance, Cesar Note: I have also tried to put the cleanup functions inside the 'for' loop. ****************************************] The Code [**************************************** #!/usr/bin/python -u import libxml2 #------------------------------------------------------------------------------ # Memory debug specific libxml2.debugMemory(1) #------------------------------------------------------------------------------ class callback: def startDocument(self): print "." def endDocument(self): pass def startElement(self, tag, attrs): pass def endElement(self, tag): pass def characters(self, data): pass def warning(self, msg): pass def error(self, msg): pass def fatalError(self, msg): pass #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ import os import sys programName = os.path.basename(sys.argv[0]) if len(sys.argv) != 2: print "Use: %s <dir html files>" % programName sys.exit(1) inputPath = sys.argv[1] if not os.path.exists (inputPath): print "Error: directory does not exist" sys.exit(1) inputFileNames = [] dirContent = os.listdir(inputPath) for fichero in dirContent: extension1=fichero.rfind(".htm") extension2=fichero.rfind(".html") dot = fichero.rfind(".") extension = max(extension1,extension2) if extension != -1 and extension == dot: inputFileNames.append (fichero) if len(inputFileNames) == 0: print "Error: no input files" sys.exit(1) handler = callback() NUM_ITERS = 5 for i in range(NUM_ITERS): for inputFileName in inputFileNames: print inputFileName inputFilePath = inputPath + inputFileName f = open(inputFilePath) data = f.read() f.close() ctxt = libxml2.htmlCreatePushParser(handler, "", 0, inputFileName) ctxt.htmlParseChunk(data, len(data), 1) ctxt = None # Memory debug specific libxml2.cleanupParser() if libxml2.debugMemory(1) == 0: print "OK" else: print "Memory leak %d bytes" % (libxml2.debugMemory(1)) libxml2.dumpMemory() # Other cleanup functions #libxml2.cleanupCharEncodingHandlers() #libxml2.cleanupEncodingAliases() #libxml2.cleanupGlobals() #libxml2.cleanupInputCallbacks() #libxml2.cleanupOutputCallbacks() #libxml2.cleanupPredefinedEntities() -- http://mail.python.org/mailman/listinfo/python-list