Ahmed wrote... > I am working on a project where I need to parse incoming emails > (Microsoft outlook)
I'm not sure if you are able to bypass Outlook (and have Python fetch the mail itself using poplib), but if you are, the following code might be useful. I use this to pry apart emails which might contain multiple MIME parts. from email.Parser import Parser from rfc822 import parseaddr import poplib import smtplib popserver="pop.site.com" popuser="[EMAIL PROTECTED]" poppassword="secret" # split a message into an header- and body part def separate(msg): if isinstance(msg,str): msg=msg.split('\n') emptyline=msg.index('') return msg[:emptyline],msg[emptyline+1:] # return a certain headerline from the headers def headerline(header,tag="From: "): for h in header: if h.startswith(tag): return h[len(tag)+1:] return "" # enumerate recursively the contents of a MIME message # remember the first text/plain and text/html part(s) that is found # also remember if any other parts were found (like attachments) # def enummimeparts(msg,extract,level=1,verbose=False): m=Parser().parsestr(msg) if m.is_multipart(): if verbose: print '\t'*level,'multipart' for part in m.get_payload(): enummimeparts(part.as_string(),extract,level+1,verbose) else: t=m.get_content_type() if verbose: print '\t'*level,t if t=="text/plain": if not "text/plain" in extract: headers,body=separate(m.as_string()) extract["text/plain"]='\n'.join(body) else: extract["others"]=True elif t=="text/html": if not "text/html" in extract: headers,body=separate(m.as_string()) extract["text/html"]='\n'.join(body) else: extract["others"]=True else: extract["others"]=True # extract the first 'text/plain' and 'text/html' mime-parts from a message def extracttext(msg): extract={} enummimeparts(msg,extract) return extract.get("text/plain",None),extract.get("text/html",None),extract.get("ot hers",False) def processmessage(msgnr): # get a message from the POP server, extract the parts response,lines,bytes=pop.retr(msgnr) msg='\n'.join(lines) headers,body=separate(lines) name,fromaddress=parseaddr(headerline(headers,"From:")) subject=headerline(headers,"Subject:") logging.info(subject+" ("+fromaddress+")") (plain,html,others)=extracttext(msg) # prefer flat text; if not present in the message, fallback to HTML content (if any) texttoprocess="" if plain: texttoprocess=plain elif html: texttoprocess=html # now do something useful with the text processtext(texttoprocess) # delete message from pop server after processing pop.dele(msgnr) # connect to the pop server and process all messages logging.info("Checking pop server '%s', user '%s'" % (popserver,popuser)) pop=poplib.POP3(popserver) pop.user(popuser) pop.pass_(poppassword) stat=pop.stat() if stat[0]: for n in range(stat[0]): processmessage(n+1) pop.quit() -- "The ability of the OSS process to collect and harness the collective IQ of thousands of individuals across the Internet is simply amazing." - Vinod Vallopillil http://www.catb.org/~esr/halloween/halloween4.html -- http://mail.python.org/mailman/listinfo/python-list