Thanks a lot for your response and example. I will try today. I dont have much experience with cffi. So I decided to write my tesseract python interface with just ctypes. The following is my code for the same. It seems to work. So for loading the tesseract library it takes about 160ms and recognizing the text with (psm 6) takes about 140ms. These are the results with tessdata_best. So in order to get better performance I decided to create 100 instances of my class and rotate them in an array. That way my recognizing time is under ~140ms. But I definetely want to try with cffi as it is a cleaner interface to understand and somehow feel might be better in performance.
import cv2 import ctypes import ctypes.util from datetime import datetime class TesseractError(Exception): pass class Tesseract(object): _lib = None _api = None class TessBaseAPI(ctypes._Pointer): _type_ = type('_TessBaseAPI', (ctypes.Structure,), {}) @classmethod def setup_lib(cls, lib_path=None): if cls._lib is not None: return if lib_path is None: lib_path = "/usr/local/lib/libtesseract.so.4" cls._lib = lib = ctypes.CDLL(lib_path) # source: # https://github.com/tesseract-ocr/tesseract/blob/95ea778745edd1cdf6ee22f9fe653b9e061d5708/src/api/capi.h lib.TessBaseAPICreate.restype = cls.TessBaseAPI lib.TessBaseAPIDelete.restype = None # void lib.TessBaseAPIDelete.argtypes = ( cls.TessBaseAPI,) # handle lib.TessBaseAPIInit3.argtypes = (cls.TessBaseAPI, ctypes.c_char_p, ctypes.c_char_p) lib.TessBaseAPISetImage.restype = None lib.TessBaseAPISetImage.argtypes = (cls.TessBaseAPI, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int) lib.TessBaseAPISetVariable.argtypes = (cls.TessBaseAPI, ctypes.c_char_p, ctypes.c_char_p) lib.TessBaseAPIGetUTF8Text.restype = ctypes.c_char_p lib.TessBaseAPIGetUTF8Text.argtypes = ( cls.TessBaseAPI,) def __init__(self, language='eng_best', datapath=None, lib_path=None): if self._lib is None: self.setup_lib(lib_path) self._api = self._lib.TessBaseAPICreate() print "initializing tesseract!!!!" if self._lib.TessBaseAPIInit3(self._api, datapath, language): print "Tesseract initialization failed!!" raise TesseractError('initialization failed') def __del__(self): if not self._lib or not self._api: return if not getattr(self, 'closed', False): self._lib.TessBaseAPIDelete(self._api) self.closed = True def _check_setup(self): if not self._lib: raise TesseractError('lib not configured') if not self._api: raise TesseractError('api not created') def set_image(self, imagedata, width, height, bytes_per_pixel, bytes_per_line=None): self._check_setup() if bytes_per_line is None: bytes_per_line = width * bytes_per_pixel print "bytes per line={}".format(bytes_per_line) self._lib.TessBaseAPISetImage(self._api, imagedata, width, height, bytes_per_pixel, bytes_per_line) def set_variable(self, key, val): self._check_setup() self._lib.TessBaseAPISetVariable(self._api, key, val) def get_utf8_text(self): self._check_setup() return self._lib.TessBaseAPIGetUTF8Text(self._api) def get_text(self): self._check_setup() result = self._lib.TessBaseAPIGetUTF8Text(self._api) if result: return result.decode('utf-8') def convert_to_grayscale(image_data): return cv2.cvtColor(image_data, cv2.COLOR_BGR2GRAY) # a method to make it look similar to tesslib.py def tesseract_process_image2(tess, frame_piece): grayscaled = len(frame_piece.frame.shape) == 2 if not grayscaled: image_data = convert_to_grayscale(frame_piece.frame) height, width = frame_piece.frame.shape tess.set_variable("tesseract_char_whitelist", frame_piece.whitelist) tess.set_variable("tessedit_pageseg_mode", str(frame_piece.psm)) # tess.set_variable("user_words_suffix", "user-data") # tess.set_variable("user_pattern_suffix", "user-pattern") tess.set_variable("image_default_resolution", "70") tess.set_image(frame_piece.frame.ctypes, width, height, 1) text = tess.get_utf8_text() return text.strip() class FramePiece(object): def __init__(self, img, whitelist): self.frame = img self.whitelist = whitelist if whitelist else "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890" self.psm = 6 # overloaded method for view page def tesseract_process_image(tess, frame, whitelist=None): frame_piece = FramePiece(frame, whitelist) return tesseract_process_image2(tess, frame_piece) ############################### TESTING ###################### if __name__ == '__main__': img = cv2.imread('/data/framecache/testing.jpg') height, width, depth = img.shape print datetime.utcnow() tess = Tesseract() print "ocr image Start:{}".format(datetime.utcnow()) frame_piece = FramePiece(img) res = tesseract_process_image2(tess, frame_piece) print res On Tuesday, April 2, 2019 at 2:45:13 PM UTC-7, zdenop wrote: > > OK. I have more time to look at you code and I see there few problems: > > - Whitelist does not work at tesseract 4.0 (search for more details in > forum/issue tracker) > - Setting variable: there is no variable like " > image_default_resolution" in tesseract 4.x - you need to check return > value of command. Try something like this: > > # Example to set variable > var = "user_defined_dpi" > value = "250" > ret_val = tesseract.TessBaseAPISetVariable(api, var.encode(), > value.encode()) > print(f"SetVariable for {var}={value} {'was sucessfull' if ret_val else > 'failed'}.") > # Example to get int variable > dpi = ffi.new('int *') > ret_val = tesseract.TessBaseAPIGetIntVariable(api, var.encode(), dpi) > print((f"Can not get int value for '{var}'.", f"{var}: > {dpi[0]}.")[ret_val]) > > > - Setting data to tesseract: did you check if there in no error? Try > to get thresholded (binarized) image from tesseract to check that: > > thresholded_pix = tesseract.TessBaseAPIGetThresholdedImage(api) > if thresholded_pix == ffi.NULL: > print(thresholded_pix) > print("There is no image in tesseract api. Did you use SetImage > function?") > else: > filename = "thresholded_pix.png" > if leptonica.pixWrite(filename.encode(), thresholded_pix, > leptonica.IFF_PNG): > print("There was problem to save thresholded image to disk.") > else: > print(f"Thresholded image to disk as '{filename}'.") > > > - and then you will see that your usage of TessBaseAPISetImagedoes > not what you expect. Following code worked for my test images, but I do > not guarantee it will for for all types of image (RGBA, grays scale, > binary): > > img = cv2.imread("test.jpg", flags=cv2.IMREAD_COLOR) > tesseract.TessBaseAPISetImage(api, > ffi.cast("unsigned char*", img.ctypes.data), > img.shape[1], img.shape[0], img.shape[2], int(img.size/img.shape[0])) > > > I hope this will help you. Please share you result, especially if you make > this code more robust. > > > Zdenko > > > ut 2. 4. 2019 o 8:54 Zdenko Podobny <zde...@gmail.com <javascript:>> > napísal(a): > >> My code use PIL to open image files. >> >> You need to convert cv image structure to pix or send the data to >> tesseract[1] with >> void SetImage(const unsigned char* imagedata, int width, int height, int >> bytes_per_pixel, int bytes_per_line)[2] >> >> [1] >> https://stackoverflow.com/questions/8115368/converting-cvmat-for-tesseract >> >> [2] https://github.com/tesseract- >> >> ocr/tesseract/blob/3e7144e79654d9da7a180a3c51e843afb4a77491/src/api/baseapi.h#L333 >> >> If you have problem, post your code and image. >> >> Zdenko >> >> >> ut 2. 4. 2019 o 8:46 Guru Govindan <gurunatha...@gmail.com <javascript:>> >> napísal(a): >> >>> Hi Thanks a lot for the great post. I am trying to use cffi wrapper in >>> my project as the pytesseract was really slow. >>> I use opencv to read my image and I am trying to use your cffi setup and >>> initiate. But I the tesseract is not recognizing any text. >>> >>> import ctypes >>> import os >>> >>> import cffi >>> >>> import cv2 >>> >>> tessdata = "/home/vagrant/reelz_base/shared/tessdata/" >>> >>> ffi = cffi.FFI() >>> >>> ffi.cdef(""" >>> typedef signed char l_int8; >>> typedef unsigned char l_uint8; >>> typedef short l_int16; >>> typedef unsigned short l_uint16; >>> typedef int l_int32; >>> typedef unsigned int l_uint32; >>> typedef float l_float32; >>> typedef double l_float64; >>> typedef long long l_int64; >>> typedef unsigned long long l_uint64; >>> typedef int l_ok; /*!< return type 0 if OK, 1 on error */ >>> >>> >>> >>> typedef struct TessBaseAPI TessBaseAPI; >>> typedef struct ETEXT_DESC ETEXT_DESC; >>> typedef struct TessPageIterator TessPageIterator; >>> typedef struct TessResultIterator TessResultIterator; >>> typedef int BOOL; >>> >>> typedef enum TessOcrEngineMode { >>> OEM_TESSERACT_ONLY = 0, >>> OEM_LSTM_ONLY = 1, >>> OEM_TESSERACT_LSTM_COMBINED = 2, >>> OEM_DEFAULT = 3} TessOcrEngineMode; >>> >>> typedef enum TessPageSegMode { >>> PSM_OSD_ONLY = 0, >>> PSM_AUTO_OSD = 1, >>> PSM_AUTO_ONLY = 2, >>> PSM_AUTO = 3, >>> PSM_SINGLE_COLUMN = 4, >>> PSM_SINGLE_BLOCK_VERT_TEXT = 5, >>> PSM_SINGLE_BLOCK = 6, >>> PSM_SINGLE_LINE = 7, >>> PSM_SINGLE_WORD = 8, >>> PSM_CIRCLE_WORD = 9, >>> PSM_SINGLE_CHAR = 10, >>> PSM_SPARSE_TEXT = 11, >>> PSM_SPARSE_TEXT_OSD = 12, >>> PSM_COUNT = 13} TessPageSegMode; >>> >>> typedef enum TessPageIteratorLevel { >>> RIL_BLOCK = 0, >>> RIL_PARA = 1, >>> RIL_TEXTLINE = 2, >>> RIL_WORD = 3, >>> RIL_SYMBOL = 4} TessPageIteratorLevel; >>> >>> TessPageIterator* TessBaseAPIAnalyseLayout(TessBaseAPI* handle); >>> TessPageIterator* TessResultIteratorGetPageIterator(TessResultIterator* >>> handle); >>> >>> BOOL TessPageIteratorNext(TessPageIterator* handle, >>> TessPageIteratorLevel level); >>> BOOL TessPageIteratorBoundingBox(const TessPageIterator* handle, >>> TessPageIteratorLevel level, >>> int* left, int* top, int* right, int* >>> bottom); >>> >>> const char* TessVersion(); >>> >>> TessBaseAPI* TessBaseAPICreate(); >>> void TessBaseAPIDelete(TessBaseAPI* handle); >>> >>> size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device); >>> >>> void TessBaseAPISetInputName( TessBaseAPI* handle, const char* name); >>> >>> const char* TessBaseAPIGetInputName(TessBaseAPI* handle); >>> >>> void TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix); >>> >>> int TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); >>> >>> >>> BOOL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, >>> const char* value); >>> BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, >>> const char* value); >>> >>> void TessBaseAPIPrintVariables( const TessBaseAPI* handle, FILE* >>> fp); >>> >>> BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const >>> char* filename); >>> >>> int TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const >>> char* language, TessOcrEngineMode oem, >>> char** configs, int >>> configs_size); >>> int TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const >>> char* language, TessOcrEngineMode oem); >>> >>> int TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const >>> char* language); >>> >>> int TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const >>> char* language, TessOcrEngineMode mode, >>> char** configs, int configs_size, >>> char** vars_vec, char** vars_values, size_t vars_vec_size, >>> BOOL set_only_non_debug_params); >>> >>> void TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* >>> imagedata, int width, int height, >>> int bytes_per_pixel, int >>> bytes_per_line); >>> >>> int TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); >>> TessResultIterator* TessBaseAPIGetIterator(TessBaseAPI* handle); >>> BOOL TessResultIteratorNext(TessResultIterator* handle, >>> TessPageIteratorLevel level); >>> char* TessResultIteratorGetUTF8Text(const TessResultIterator* handle, >>> TessPageIteratorLevel level); >>> float TessResultIteratorConfidence(const TessResultIterator* handle, >>> TessPageIteratorLevel level); >>> char* TessBaseAPIGetUTF8Text(TessBaseAPI* handle); >>> const char* TessResultIteratorWordFontAttributes(const >>> TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, >>> BOOL* >>> is_underlined, BOOL* is_monospace, BOOL* is_serif, >>> BOOL* >>> is_smallcaps, int* pointsize, int* font_id); >>> BOOL TessResultIteratorWordIsFromDictionary(const TessResultIterator* >>> handle); >>> BOOL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); >>> BOOL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* >>> handle); >>> BOOL TessResultIteratorSymbolIsSubscript(const TessResultIterator* >>> handle); >>> BOOL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); >>> >>> void TessBaseAPIEnd(TessBaseAPI* handle); >>> """) >>> >>> tess_libname = "/usr/local/lib/libtesseract.so.4" >>> >>> if os.path.exists(tess_libname): >>> tesseract = ffi.dlopen(tess_libname) >>> >>> api = None >>> tesseract_version = ffi.string(tesseract.TessVersion()) >>> print('Tesseract-ocr version', tesseract_version.decode('utf-8')) >>> api = tesseract.TessBaseAPICreate() >>> >>> c_ubyte_p = ctypes.POINTER(ctypes.c_ubyte) >>> >>> lang="eng" >>> oem = tesseract.OEM_DEFAULT >>> tesseract.TessBaseAPIInit3(api, tessdata.encode(), lang.encode()) >>> img = cv2.imread("/data/framecache/testing.jpg") >>> >>> tesseract.TessBaseAPISetDebugVariable(api, "tesseract_char_whitelist", >>> "ABCDEFGHIJKLMNOPQRSTUVWXYZ") >>> tesseract.TessBaseAPISetDebugVariable(api, "tessedit_pageseg_mode", "8") >>> tesseract.TessBaseAPISetVariable(api, "image_default_resolution", "72") >>> >>> tesseract.TessBaseAPISetImage(api, ffi.cast("unsigned char*", >>> img.ctypes.data), img.shape[0], img.shape[1], 1, img.shape[1]) >>> >>> # tesseract.TessBaseAPIRecognize(api, ffi.NULL) >>> >>> utf8_text = >>> ffi.string(tesseract.TessBaseAPIGetUTF8Text(api)).decode('utf-8') >>> print utf8_text >>> >>> >>> >>> >>> On Sunday, March 24, 2019 at 10:10:36 AM UTC-7, zdenop wrote: >>>> >>>> Hi all, >>>> >>>> I publish my test / example how to use tesseract C-API in python3 via >>>> cffi[1]. >>>> >>>> I am aware of pytesseract module, which seems to be widely used. It is >>>> wrapping tesseract executable, so IMO it could have some limitation e.g. >>>> from point of performance (it using disk operation for input and output). >>>> >>>> It is in form of jupyter notebook[3] (github is able to show it, but >>>> not run ;-)) so you can interactively view what is happening. >>>> >>>> My aim is not to create new tesseract python wrapper (I do not have a >>>> time for it, and I am not able to create nice python code as pytesseract >>>> has :-) ) so it is not robust: I just did it on windows 64 bit, but IMO is >>>> should be possible with small modification to use in Linux and Mac. If >>>> needed I can add 32bit windows libs... >>>> >>>> Personally I would like have python tesseract and leptonica module >>>> using directly its API... I know that James Barlow already started to >>>> wrapping leptonica, but it is (not yet?) available as independent module >>>> (it is part of OCRmyPDF). >>>> >>>> Anyway I hope this will help somebody. >>>> >>>> [1] https://github.com/zdenop/SimpleTesseractPythonWrapper >>>> [2] https://pypi.org/project/pytesseract/ >>>> [3] >>>> https://github.com/zdenop/SimpleTesseractPythonWrapper/blob/master/SimpleTesseractPythonWrapper.ipynb >>>> >>>> [4] https://github.com/jbarlow83/OCRmyPDF/tree/master/src/ocrmypdf/lib >>>> >>>> >>>> Zdenko >>>> >>> -- >>> You received this message because you are subscribed to the Google >>> Groups "tesseract-ocr" group. >>> To unsubscribe from this group and stop receiving emails from it, send >>> an email to tesser...@googlegroups.com <javascript:>. >>> To post to this group, send email to tesser...@googlegroups.com >>> <javascript:>. >>> Visit this group at https://groups.google.com/group/tesseract-ocr. >>> To view this discussion on the web visit >>> https://groups.google.com/d/msgid/tesseract-ocr/d6253a61-d796-4cce-8412-f2cc61e83000%40googlegroups.com >>> >>> <https://groups.google.com/d/msgid/tesseract-ocr/d6253a61-d796-4cce-8412-f2cc61e83000%40googlegroups.com?utm_medium=email&utm_source=footer> >>> . >>> For more options, visit https://groups.google.com/d/optout. >>> >> -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To post to this group, send email to tesseract-ocr@googlegroups.com. Visit this group at https://groups.google.com/group/tesseract-ocr. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/a247e3c2-cc64-43ab-8f9e-9ef20544764a%40googlegroups.com. For more options, visit https://groups.google.com/d/optout.