I'm new in Tesseract and trying to train my own fonts on Tesseract 5.3.2 but I have to know if the electricity is cut off or if I cancel vs code or something like that of the process of training then if I run the training command again so after that it starts from begging or from electricity cut off?
I have already to tested it but every time starts from begging. so I need to know any method to apply this problem to handle this. because it takes a lot of time and is not necessary to start by begging every time or it's normal? I use this command to create text-to-image.tif files for multiple fonts in Tesseract 5.3.2: import os import random import pathlib import subprocess training_text_file = 'langdata/ben.training_text' font_list = ['FL Badhon Ansari Rh. Unicode', 'F Khairuddin Barbarusa Rah. Uni', 'F Mahfuj Art Unicode Italic', 'F Mahfuj Art Unicode', 'FL Niribili Plain Unicode', 'FL Niribili Plain Unicode Itali Italic' ] # Add more fonts as needed lines = [] with open(training_text_file, 'r') as input_file: for line in input_file.readlines(): lines.append(line.strip()) output_directory = 'tesstrain/data/ben-ground-truth' if not os.path.exists(output_directory): os.mkdir(output_directory) random.shuffle(lines) count = 100 lines = lines[:count] line_count = 0 for line in lines: for font in font_list: training_text_file_name = pathlib.Path(training_text_file).stem line_training_text = os.path.join( output_directory, f'{training_text_file_name}_{line_count}.gt.txt') with open(line_training_text, 'w') as output_file: output_file.writelines([line]) file_base_name = f'ben_{line_count}' subprocess.run([ 'text2image', f'--font={font}', f'--text={line_training_text}', f'--outputbase={output_directory}/{file_base_name}', '--max_pages=1', '--strip_unrenderable_words', '--leading=32', '--xsize=3600', '--ysize=350', '--char_spacing=1.0', '--exposure=0', '--unicharset_file=langdata/ben.unicharset' ]) line_count += 1 and this command is for training : import subprocess # List of font names font_names = ['ben'] for font in font_names: command = f"TESSDATA_PREFIX=../tesseract/tessdata make training MODEL_NAME={font} START_MODEL=ben TESSDATA=../tesseract/tessdata MAX_ITERATIONS=10000 LANG_TYPE=Indic" subprocess.run(command, shell=True) -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/0e2880d9-64c0-4659-b497-902a5747caf4n%40googlegroups.com.