give me a solution for this problem.It is urgent -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/4666ae9f-d9cd-42f7-b85a-62172aa1298fn%40googlegroups.com.
import io import json import re import pytesseract as pt from matplotlib import pyplot as plt import matplotlib.image as Image import cv2 import cv2 as cv import sys import numpy as np from PIL import Image,ImageEnhance
pt.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe' path='C:\Windows\DigitalLocker\p1.jpeg' img=cv2.imread(path) blur = cv2.GaussianBlur(img,(5,5),0) median = cv2.medianBlur(blur,5) blur = cv2.bilateralFilter(median,9,75,75) text=pt.pytesseract.image_to_string(blur) def findword(textlist, wordstring): lineno = -1 for wordline in textlist: xx = wordline.split( ) if ([w for w in xx if re.search(wordstring, w)]): lineno = textlist.index(wordline) textlist = textlist[lineno+1:] return textlist return textlist name = None fname = None dob = None pan = None nameline = [] dobline = [] panline = [] text0 = [] text1 = [] text2 = [] lines = text.split('\n') for lin in lines: s = lin.strip() s = lin.replace('\n','') s = s.rstrip() s = s.lstrip() text1.append(s) text1 = list(filter(None,text1)) lineno = 0 for wordline in text1: xx = wordline.split('\n') if ([w for w in xx if re.search('(INCOMETAXDEPARWENT|INCOME|TAX|GOW|GOVT|GOVERNMENT|OVERNMENT|VERNMENT|DEPARTMENT|EPARTMENT|PARTMENT|ARTMENT|INDIA|NDIA)$', w)]): text1 = list(text1) lineno = text1.index(wordline) break text0 = text1[lineno+1:] try: # Cleaning first names name = text0[0] name = name.rstrip() name = name.lstrip() name = name.replace("8", "B") name = name.replace("0", "D") name = name.replace("6", "G") name = name.replace("1", "I") name = re.sub('[^a-zA-Z] +', ' ', name) # Cleaning Father's name fname = text0[1] fname = fname.rstrip() fname = fname.lstrip() fname = fname.replace("8", "S") fname = fname.replace("0", "O") fname = fname.replace("6", "G") fname = fname.replace("1", "I") fname = fname.replace("\"", "A") fname = re.sub('[^a-zA-Z] +', ' ', fname) # Cleaning DOB dob = text0[2][:10] dob = dob.rstrip() dob = dob.lstrip() dob = dob.replace('l', '/') dob = dob.replace('L', '/') dob = dob.replace('I', '/') dob = dob.replace('i', '/') dob = dob.replace('|', '/') dob = dob.replace('\"', '/1') dob = dob.replace(" ", "") # Cleaning PAN Card details text0 = findword(text1, '(Pormanam|Number|umber|Account|ccount|count|Permanent|ermanent|manent|wumm)$') panline = text0[0] pan = panline.rstrip() pan = pan.lstrip() pan = pan.replace(" ", "") pan = pan.replace("\"", "") pan = pan.replace(";", "") pan = pan.replace("%", "L") except: pass data = {} data['Name'] = name data['Father Name'] = fname data['Date of Birth'] = dob data['PAN'] = pan data['ID Type'] = "PAN" print(data) def findword(textlist, wordstring): lineno = -1 for wordline in textlist: xx = wordline.split( ) if ([w for w in xx if re.search(wordstring, w)]): lineno = textlist.index(wordline) textlist = textlist[lineno+1:] return textlist return textlist try: to_unicode = unicode except NameError: to_unicode = str with io.open('info1.json', 'w', encoding='utf-8') as outfile: data = json.dumps(data, indent=4, sort_keys=True, separators=(',', ': '), ensure_ascii=False) outfile.write(to_unicode(data)) with open('info1.json', encoding='utf-8') as data: data_loaded = json.load(data) if data_loaded['ID Type'] == 'PAN': print("\n---------- PAN Details ----------") print("\nPAN Number: ",data_loaded['PAN']) print("\nName: ",data_loaded['Name']) print("\nFather's Name: ",data_loaded['Father Name']) print("\nDate Of Birth: ", data_loaded['Date of Birth'])