Hi There, I recently migrated to tesseract 4 and tried pytesseract. But that is too slow for the kind of application I am creating.
I wanted to write my own python wrapper that would use the tesseract.so.4 directly. I saw the example on the github page for the same ( https://github.com/tesseract-ocr/tesseract/wiki/APIExample) But I have issues initializing TessBaseAPIInit4(). I have multiple variables that I want to set and it keeps getting segmentation fault. I am not sure what I am doing wrong. I also tried the C++ version of it. Please let me know if it is a known problem. import os from ctypes import * lang = "eng" filename = "/data/framecache/testing.jpg" libname = "/usr/local/lib/libtesseract.so.4" TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX') if not TESSDATA_PREFIX: TESSDATA_PREFIX = "/home/vagrant/reelz_base/shared/tessdata" print "TESSDATA_PREFIX={}".format(TESSDATA_PREFIX) tesseract = cdll.LoadLibrary(libname) tesseract.TessVersion.restype = c_char_p tesseract_version = tesseract.TessVersion() api = tesseract.TessBaseAPICreate() print api vars_vec = ["tesseract_char_whitelist", "tessedit_pageseg_mode", "image_default_resolution"] vars_values = ["ABCDEFGHIJKLMNOPQRSTUVWXYZ", "6", "70"] vec_arr_type = c_char_p * len(vars_vec) vars_vec_arr = vec_arr_type() for i, val in enumerate(vars_vec): vars_vec_arr[i] = val print "vars_vec_arr={}".format(vars_vec_arr) val_arr_type = c_char_p * len(vars_values) vars_val_arr = val_arr_type() for i, val in enumerate(vars_values): vars_val_arr[i] = val print "vars_val_arr={}".format(vars_val_arr) rc = tesseract.TessBaseAPIInit4(api, TESSDATA_PREFIX, lang , 2, None, 0, vars_vec_arr, vars_val_arr, 0, c_bool(False)) -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To post to this group, send email to tesseract-ocr@googlegroups.com. Visit this group at https://groups.google.com/group/tesseract-ocr. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/10d9034d-f516-405a-be23-fe23d25d8ea4%40googlegroups.com. For more options, visit https://groups.google.com/d/optout.