On Wednesday, 30 October 2019 12:40:06 UTC+5:30, eman banerjee wrote: > Hi > > I am working on a project where we make connections to webapp mail and > extract subject, sender,body etc from mails and save it in dataframe and > insert it n SQL DB. > > My next challenge is to remove any duplicate mails from mailbox. > Could you kindly help me. > It can be a new mail which is entering the mailbox is first checked , if its > a duplicate email,it will not be inserted to mail box > > > Thanks
code is below class EmailAnalysis: '''Method help to communicate with outlook''' def emailExchangeCredentials(self): try: cipher = Fernet(b'***************') account_details = b'*****************' user_details = b'****************' sec_details = b'*****************' account_details_decrypt = cipher.decrypt(account_details).decode('utf-8') user_details_decrypt = cipher.decrypt(user_details).decode('utf-8') sec_details_decrypt = cipher.decrypt(sec_details).decode('utf-8') credentials = Credentials(user_details_decrypt, sec_details_decrypt) account = Account(account_details_decrypt, credentials=credentials, autodiscover=True) return account except: account = 'Failure' return account def createDBConn(self): cipher = Fernet(b'*********************') ip = b'*******************' port = **** service_name = b'*************' attuid = b'**********************' password = b'********************' ip_decrypt = cipher.decrypt(ip).decode('utf-8') service_name_decrypt = cipher.decrypt(service_name).decode('utf-8') attuid_decrypt = cipher.decrypt(attuid).decode('utf-8') password_decrypt = cipher.decrypt(password).decode('utf-8') dsn_tns = ora.makedsn(ip_decrypt, port,service_name_decrypt) conn = ora.connect(attuid_decrypt,password_decrypt,dsn_tns) return conn def extractEmail(self, account, Email_Data, conn): self.Email_Data = Email_Data SUBJECT = [] SENDER = [] JOB_NAME = [] EMAIL_DATE = [] EMAIL_BODY = [] REMEDIATION_ID = [] PRIORITY = [] i = 0 to_folder = account.inbox / 'Read-Mail' cursor = conn.cursor() #print("Type.......................:",type(account.inbox.all())) for item in account.inbox.all().order_by('-datetime_received')[:20]: if len(item.sender.email_address.split('*****.com'))>1: if len(item.subject.split('GREEN'))<2: SUBJECT.append(item.subject) SENDER.append(item.sender.email_address) find_Job = item.sender.email_address.split('@') job_name = find_Job[0] JOB_NAME.append(str(job_name)) #print("Date time received: ",item.datetime_received.date()) EMAIL_DATE.append(item.datetime_received.date()) date = item.datetime_received.date() date_ = date.strftime('%Y-%m-%d') print("Date:",date_) soup = BeautifulSoup(item.body) email_body_unfor = soup.get_text() #print("Emain Body Ori:- ",email_body_unfor) email_body_unfor_arr = email_body_unfor.split('-->') if len(email_body_unfor_arr) > 1: email_body_unfor_foot = email_body_unfor_arr[1] #print("Emain Body Ori2:- ",email_body_unfor_foot) email_body_unfor_sp_arr = email_body_unfor_foot.split("This message is for the designated recipient only") if len(email_body_unfor_sp_arr) > 1: email_body_unfor_sp = email_body_unfor_sp_arr[0] #print("Emain Body Ori3:- ",email_body_unfor_sp) email_body_1 = email_body_unfor_sp.strip() email_body_2 = email_body_1.rstrip() email_body = email_body_2.lstrip() #print("Email Body: ",email_body) EMAIL_BODY.append(email_body) else: email_body_1 = email_body_unfor.strip() email_body_2 = email_body_1.rstrip() email_body = email_body_2.lstrip() EMAIL_BODY.append(email_body) i = i+1 i = 1000+i Issue_ID = i PROCESS_NAME = str(find_Job[0]) MESSAGE_1 = '' MESSAGE_2 = '' MESSAGE = email_body sql = "INSERT INTO ***DBname** (SENDER,SUBJECT,EMAIL_BODY,EMAIL_DATE) VALUES ("+"'"+str(item.sender.email_address)+"',"+"'"+str(item.subject)+"',"+"'"+str(email_body)+"',"+"'"+str(date_)+"')" print('SQL :- ',sql) result = cursor.execute(sql.encode('utf-8')) conn.commit() #print('to folder:-',to_folder) item.move(to_folder) else: item.move(to_folder) else: item.move(to_folder) cursor.close() conn.close() Email_Data = pd.DataFrame(list(zip(SUBJECT, SENDER,EMAIL_DATE, EMAIL_BODY)),columns =['SUBJECT', 'SENDER', 'EMAIL_DATE', 'EMAIL_BODY']) return Email_Data def emailAnalysisController(self,Task_Details): try: #print('inside controller') self.Task_Details = Task_Details acc = self.emailExchangeCredentials(self) if acc == 'Failure': acc = self.emailExchangeCredentials(self) Email_Data = pd.DataFrame(columns=['EMAIL_DATE','SENDER','EMAIL_BODY','SUBJECT']) Email_Data['EMAIL_DATE'] = pd.to_datetime(Email_Data['EMAIL_DATE']) conn = self.createDBConn(self) Email_Data = self.extractEmail(self, acc,Email_Data, conn) exection_status = 'Success' return exection_status except: exection_status = 'Failure' return exection_status -- https://mail.python.org/mailman/listinfo/python-list