Hi,
I have a python script that supposed to go through a folder, pick the
zipped files, unzip them and process the data inside. I'm not sure
where i'm going wrong with this script because it all seems correct:
#! /usr/bin/env python
import zipfile
import os
from elementtree import ElementTree as ET
import MySQLdb
import sys
sys.stdout = open("log",'w')
sys.stderr = open("log",'w')
username = 'xxx'
password = 'xxx'
host = 'xxx'
database = 'xxx'
infolder = "/home/username/received/"
outfolder = "/home/username/webapps/app1/public/processed/"
class RecursiveFileIterator:
def __init__ (self, *rootDirs):
self.dirQueue = list (rootDirs)
self.includeDirs = None
self.fileQueue = []
def __getitem__ (self, index):
while len (self.fileQueue) == 0:
self.nextDir ()
result = self.fileQueue [0]
del self.fileQueue [0]
return result
def nextDir (self):
dir = self.dirQueue [0] # fails with IndexError, which is
fine
# for iterator interface
del self.dirQueue [0]
list = os.listdir (dir)
join = os.path.join
isdir = os.path.isdir
for basename in list:
fullPath = join (dir, basename)
if isdir (fullPath):
self.dirQueue.append (fullPath)
if self.includeDirs:
self.fileQueue.append (fullPath)
else:
self.fileQueue.append (fullPath)
def unzip(folder):
filelist = RecursiveFileIterator(folder)
for one in filelist:
xmlname = one.replace(".zip",".xml")
pngname = one.replace(".zip",".png")
mp3name = one.replace(".zip",".mp3")
zfile = zipfile.ZipFile(one,'r')
for filename in zfile.namelist():
data = zfile.read(filename)
file = open(infolder+"/"+filename,'w')
file.write(data)
file.close()
parse_xml(xmlname)
os.rename(xmlname,outfolder+xmlname.strip(infolder+'/'))
try:
os.rename(pngname,outfolder+pngname.strip(infolder+'/'))
except:
print one+" has no picture file"
try:
os.rename(mp3name,outfolder+mp3name.strip(infolder+'/'))
except:
print one+" has no sound file"
os.remove(one)
def parse_xml(filename):
topic =[]
f = open(filename,'r')
#print f
content = f.read()
f.close()
#print content
element = ET.XML(content)
#Extract the elements from xml file
for subelement in element:
if subelement.tag=='datestamp':
date = str(subelement.text)
if subelement.tag=='properties':
for each in subelement:
if each.tag=='name':
name = str(each.text)
elif each.tag=='age':
age = str(each.text)
elif each.tag=='gender':
gender = str(each.text)
elif each.tag=='email':
email = str(each.text)
elif each.tag=='language':
language = str(each.text)
elif each.tag=='otherlanguage':
otherlanguage = str(each.text)
elif each.tag=='country':
country = str(each.text)
elif each.tag=='city':
city = str(each.text)
elif each.tag=='referral':
referral = str(each.text)
if subelement.tag=='recording':
for sub_subelement in subelement:
if sub_subelement.tag=='duration':
duration = str(sub_subelement.text)
if sub_subelement.tag=='file':
sound = str(sub_subelement.text)
if sub_subelement.tag=='image':
picture = str(sub_subelement.text)
if subelement.tag=='summary':
summary = str(subelement.text)
if subelement.tag=='categories':
for sub_subelement in subelement:
if sub_subelement.text == 'True':
topic.append(str(sub_subelement.tag))
if sub_subelement.tag == 'othercategory':
topic.append(str(sub_subelement.text))
db=MySQLdb.connect(host=host,user=username,passwd=password,db=database)
c = db.cursor()
topic = str(topic)
c.execute("""INSERT INTO
stories(name,age,gender,email,language,otherlanguage,country,city,referral,duration,audiofilename,picture,summary,topic)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
(name,age,gender,email,language,otherlanguage,country,city,referral,duration,sound,picture,summary,topic))
unzip(infolder)
The error I keep getting is:
Traceback (most recent call last):
File "processor3.py", line 124, in ?
unzip(infolder)
File "processor3.py", line 53, in unzip
zfile = zipfile.ZipFile(one,'r')
File "/usr/lib/python2.4/zipfile.py", lin