On Apr 5, 8:00 pm, hlubenow <[EMAIL PROTECTED]> wrote: > erikcw wrote: > > Hi all, > > > I'm trying to extract zip file (containing an xml file) from an email > > so I can process it. But I'm running up against some brick walls. > > I've been googling and reading all afternoon, and can't seem to figure > > it out. > > > Here is what I have so far. > > > p = POP3("mail.server.com") > > print p.getwelcome() > > # authentication, etc. > > print p.user("USER") > > print p.pass_("PASS") > > print "This mailbox has %d messages, totaling %d bytes." % p.stat() > > msg_list = p.list() > > print msg_list > > if not msg_list[0].startswith('+OK'): > > # Handle error > > exit(1) > > > for msg in msg_list[1]: > > msg_num, _ = msg.split() > > resp = p.retr(msg_num) > > if resp[0].startswith('+OK'): > > #print resp, '=======================\n' > > #extract message body and attachment. > > parsed_msg = email.message_from_string('\n'.join(resp[1])) > > payload= parsed_msg.get_payload(decode=True) > > print payload #doesn't seem to work > > else: > > pass# Deal with error retrieving message. > > > How do I: > > a) retrieve the body of the email into a string so I can do some > > processing? (I can get at the header attributes without any trouble) > > b) retrieve the zip file attachment, and unzip into a string for xml > > processing? > > > Thanks so much for your help! > > Erik > > Hi, > > some weeks ago I wrote some code to extract attachments from emails. > It's not that long, so maybe it could be of help for you: > > ------------------------------------------- > > #!/usr/bin/env python > > import poplib > import email > import os > import sys > import string > > # > # attsave.py > # Check emails at PROVIDER for attachments and save them to SAVEDIR. > # > > PROVIDER = "pop.YourMailProvider.de" > USER = "YourUserName" > PASSWORD = "YourPassword" > > SAVEDIR = "/home/YourUserDirectory" > > def saveAttachment(mstring): > > filenames = [] > attachedcontents = [] > > msg = email.message_from_string(mstring) > > for part in msg.walk(): > > fn = part.get_filename() > > if fn <> None: > filenames.append(fn) > attachedcontents.append(part.get_payload()) > > for i in range(len(filenames)): > fp = file(SAVEDIR + "/" + filenames[i], "wb") > fp.write(attachedcontents[i]) > print 'Found and saved attachment "' + filenames[i] + '".' > fp.close() > > try: > client = poplib.POP3(PROVIDER) > except: > print "Error: Provider not found." > sys.exit(1) > > client.user(USER) > client.pass_(PASSWORD) > > anzahl_mails = len(client.list()[1]) > > for i in range(anzahl_mails): > lines = client.retr(i + 1)[1] > mailstring = string.join(lines, "\n") > saveAttachment(mailstring) > > client.quit() > > ------------------------------------------- > > See you > > H.
Thanks H! I'm now able to get the name of the zip file, and the contents (is it still encoded?). I now need to be able to unzip the zip file into a string and get the body of the email into a string. Here is my updated code: p = POP3("mail.**********.com") print p.getwelcome() # authentication, etc. print p.user("USER") print p.pass_("PASS") print "This mailbox has %d messages, totaling %d bytes." % p.stat() msg_list = p.list() print msg_list if not msg_list[0].startswith('+OK'): # Handle error in listings exit(1) for msg in msg_list[1]: msg_num, _ = msg.split() resp = p.retr(msg_num) if resp[0].startswith('+OK'): #print resp, '=======================\n' parsed_msg = email.message_from_string('\n'.join(resp[1])) for part in parsed_msg.walk(): fn = part.get_filename() if fn <> None: fileObj = StringIO.StringIO() fileObj.write( part.get_payload() ) #attachment = zlib.decompress(part.get_payload()) #print zipfile.is_zipfile(fileObj) attachment = zipfile.ZipFile(fileObj) print fn, '\n', attachment payload= parsed_msg.get_payload(decode=True) print payload else: pass# Deal with error retrieving message. I get this error: Traceback (most recent call last): File "wa.py", line 208, in <module> attachment = zipfile.ZipFile(fileObj) File "/usr/lib/python2.5/zipfile.py", line 346, in __init__ self._GetContents() File "/usr/lib/python2.5/zipfile.py", line 366, in _GetContents self._RealGetContents() File "/usr/lib/python2.5/zipfile.py", line 378, in _RealGetContents raise BadZipfile, "File is not a zip file" zipfile.BadZipfile: File is not a zip file Is the zip file still encoded? Or am I passing in the wrong arguments to the zipfile module? Thanks for your help! Erik -- http://mail.python.org/mailman/listinfo/python-list