# import subprocess # # List of font names # font_names = ['ben']
# for font in font_names: # command = f"lstmtraining --continue_from data/ben/checkpoints/ben_19.535_298_300.checkpoint --traineddata data/ben/ben.traineddata --model_output data/ben/checkpoints/ben --train_listfile data/ben/list.train --eval_listfile data/ben/list.eval --max_iterations 1000" # subprocess.run(command, shell=True) i fixed the problem and this code work by selected checkpoint On Thursday, 20 July, 2023 at 7:51:43 am UTC+6 Ali hussain wrote: > I'm new in Tesseract and trying to train my own fonts on Tesseract 5.3.2 > but I have to know if the electricity is cut off or if I cancel vs code or > something like that of the process of training then if I run the training > command again so after that it starts from begging or from electricity cut > off? > > I have already to tested it but every time starts from begging. so I need > to know any method to apply this problem to handle this. because it takes a > lot of time and is not necessary to start by begging every time or it's > normal? > > > I use this command to create text-to-image.tif files for multiple fonts in > Tesseract 5.3.2: > > import os > import random > import pathlib > import subprocess > > training_text_file = 'langdata/ben.training_text' > font_list = ['FL Badhon Ansari Rh. Unicode', > 'F Khairuddin Barbarusa Rah. Uni', > 'F Mahfuj Art Unicode Italic', > 'F Mahfuj Art Unicode', > 'FL Niribili Plain Unicode', > 'FL Niribili Plain Unicode Itali Italic' > ] # Add more fonts as needed > > lines = [] > > with open(training_text_file, 'r') as input_file: > for line in input_file.readlines(): > lines.append(line.strip()) > > output_directory = 'tesstrain/data/ben-ground-truth' > > if not os.path.exists(output_directory): > os.mkdir(output_directory) > > random.shuffle(lines) > > count = 100 > > lines = lines[:count] > > line_count = 0 > for line in lines: > for font in font_list: > training_text_file_name = pathlib.Path(training_text_file).stem > line_training_text = os.path.join( > output_directory, > f'{training_text_file_name}_{line_count}.gt.txt') > with open(line_training_text, 'w') as output_file: > output_file.writelines([line]) > > file_base_name = f'ben_{line_count}' > > subprocess.run([ > 'text2image', > f'--font={font}', > f'--text={line_training_text}', > f'--outputbase={output_directory}/{file_base_name}', > '--max_pages=1', > '--strip_unrenderable_words', > '--leading=32', > '--xsize=3600', > '--ysize=350', > '--char_spacing=1.0', > '--exposure=0', > '--unicharset_file=langdata/ben.unicharset' > ]) > > line_count += 1 > > > > and this command is for training : > > import subprocess > > # List of font names > font_names = ['ben'] > > for font in font_names: > command = f"TESSDATA_PREFIX=../tesseract/tessdata make training > MODEL_NAME={font} START_MODEL=ben TESSDATA=../tesseract/tessdata > MAX_ITERATIONS=10000 LANG_TYPE=Indic" > subprocess.run(command, shell=True) -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/b13ba048-7eb8-41fc-82cc-fdc1ec2247e4n%40googlegroups.com.