Hi, I have a python script that supposed to go through a folder, pick the zipped files, unzip them and process the data inside. I'm not sure where i'm going wrong with this script because it all seems correct:
#! /usr/bin/env python import zipfile import os from elementtree import ElementTree as ET import MySQLdb import sys sys.stdout = open("log",'w') sys.stderr = open("log",'w') username = 'xxx' password = 'xxx' host = 'xxx' database = 'xxx' infolder = "/home/username/received/" outfolder = "/home/username/webapps/app1/public/processed/" class RecursiveFileIterator: def __init__ (self, *rootDirs): self.dirQueue = list (rootDirs) self.includeDirs = None self.fileQueue = [] def __getitem__ (self, index): while len (self.fileQueue) == 0: self.nextDir () result = self.fileQueue [0] del self.fileQueue [0] return result def nextDir (self): dir = self.dirQueue [0] # fails with IndexError, which is fine # for iterator interface del self.dirQueue [0] list = os.listdir (dir) join = os.path.join isdir = os.path.isdir for basename in list: fullPath = join (dir, basename) if isdir (fullPath): self.dirQueue.append (fullPath) if self.includeDirs: self.fileQueue.append (fullPath) else: self.fileQueue.append (fullPath) def unzip(folder): filelist = RecursiveFileIterator(folder) for one in filelist: xmlname = one.replace(".zip",".xml") pngname = one.replace(".zip",".png") mp3name = one.replace(".zip",".mp3") zfile = zipfile.ZipFile(one,'r') for filename in zfile.namelist(): data = zfile.read(filename) file = open(infolder+"/"+filename,'w') file.write(data) file.close() parse_xml(xmlname) os.rename(xmlname,outfolder+xmlname.strip(infolder+'/')) try: os.rename(pngname,outfolder+pngname.strip(infolder+'/')) except: print one+" has no picture file" try: os.rename(mp3name,outfolder+mp3name.strip(infolder+'/')) except: print one+" has no sound file" os.remove(one) def parse_xml(filename): topic =[] f = open(filename,'r') #print f content = f.read() f.close() #print content element = ET.XML(content) #Extract the elements from xml file for subelement in element: if subelement.tag=='datestamp': date = str(subelement.text) if subelement.tag=='properties': for each in subelement: if each.tag=='name': name = str(each.text) elif each.tag=='age': age = str(each.text) elif each.tag=='gender': gender = str(each.text) elif each.tag=='email': email = str(each.text) elif each.tag=='language': language = str(each.text) elif each.tag=='otherlanguage': otherlanguage = str(each.text) elif each.tag=='country': country = str(each.text) elif each.tag=='city': city = str(each.text) elif each.tag=='referral': referral = str(each.text) if subelement.tag=='recording': for sub_subelement in subelement: if sub_subelement.tag=='duration': duration = str(sub_subelement.text) if sub_subelement.tag=='file': sound = str(sub_subelement.text) if sub_subelement.tag=='image': picture = str(sub_subelement.text) if subelement.tag=='summary': summary = str(subelement.text) if subelement.tag=='categories': for sub_subelement in subelement: if sub_subelement.text == 'True': topic.append(str(sub_subelement.tag)) if sub_subelement.tag == 'othercategory': topic.append(str(sub_subelement.text)) db=MySQLdb.connect(host=host,user=username,passwd=password,db=database) c = db.cursor() topic = str(topic) c.execute("""INSERT INTO stories(name,age,gender,email,language,otherlanguage,country,city,referral,duration,audiofilename,picture,summary,topic) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""", (name,age,gender,email,language,otherlanguage,country,city,referral,duration,sound,picture,summary,topic)) unzip(infolder) The error I keep getting is: Traceback (most recent call last): File "processor3.py", line 124, in ? unzip(infolder) File "processor3.py", line 53, in unzip zfile = zipfile.ZipFile(one,'r') File "/usr/lib/python2.4/zipfile.py", line 210, in __init__ self._GetContents() File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents self._RealGetContents() File "/usr/lib/python2.4/zipfile.py", line 240, in _RealGetContents endrec = _EndRecData(fp) File "/usr/lib/python2.4/zipfile.py", line 83, in _EndRecData fpin.seek(-22, 2) # Assume no archive comment. IOError: [Errno 22] Invalid argument I have confirmed that the folder contains the zipped files, and that the zipped files contain the xml file. No reason why it shouldn't work. The server is running Python 2.4. Please help me. -- http://mail.python.org/mailman/listinfo/python-list