Like I said I didn't test it :) apparently you need to seek to zero after tesseract and not after image.open as tesseract is reading it. Fixed version (which I actually tested):
# -*- coding: utf-8 -*- import pytesseract from PIL import Image from pydal.validators import Validator, ValidationError class IMG_HAS_TEXT(Validator): def __init__(self, check_tokens=None, error_message="Image doesn't have the required text"): self.error_message = error_message self.check_tokens = check_tokens or [] def validate(self, value, record_id=None): try: image = Image.open(value.file) text = pytesseract.image_to_string(image).lower() value.file.seek(0) if not text or not all(token in text for token in self.check_tokens): raise ValidationError(self.translator(self.error_message)) return value except Exception as e: raise ValidationError(self.translator(self.error_message)) class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): def __init__(self, error_message="The uploaded file is not a receipt or invoice!"): self.error_message = error_message self.check_tokens = ("receipt", "invoice") A sábado, 18 de março de 2023 à(s) 07:20:04 UTC, mostwanted escreveu: > Hey Leonel, thanks for your reply but the above code is still saving > empty (0byte) images > > On Friday, March 17, 2023 at 5:22:07 PM UTC+2 Leonel Câmara wrote: > >> What's happening here is that your validator is reading the file and it's >> not "rewinding" it afterwards so when the next step gets it the file >> descriptor is at the end of the file hence the ' bytes size. >> >> I would recommend rewriting it to something like this (the important part >> being the value.file.seek(0)) note that I haven't tested this code at all. >> >> import pytesseract >> from PIL import Image >> from pydal.validators import Validator, ValidationError >> >> class IMG_HAS_TEXT(Validator): >> >> def __init__(self, check_tokens=None, error_message="Image doesn't >> have the required text"): >> self.error_message = error_message >> self.check_tokens = check_tokens or [] >> >> def validate(self, value, record_id=None): >> try: >> image = Image.open(value.file) >> value.file.seek(0) >> text = pytesseract.image_to_string(image).lower() >> if not text or not all(token in text for token in >> self.check_tokens): >> raise ValidationError(self.translator(self.error_message)) >> return value >> except: >> raise ValidationError(self.translator(self.error_message)) >> >> class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): >> >> def __init__(self, error_message="The uploaded file is not a receipt >> or invoice!"): >> self.error_message = error_message >> self.check_tokens = ("receipt", "invoice") >> >> >> A terça-feira, 14 de março de 2023 à(s) 20:55:11 UTC, mostwanted escreveu: >> >>> I have a little problem with my validator code, its saving 0 byte images >>> and I just don't know why, the value variable is an image that actually >>> contains an image, I have tested it several times to see what it contains >>> and its an image but for some reason when it comes to saving it in the >>> database it saves an empty image, please assist me where I could be going >>> wrong. Regards >>> >>> from gluon import * >>> import pytesseract >>> from PIL import Image >>> >>> class IS_RECEIPT_OR_INVOICE(object): >>> def __init__(self, error_message="The uploaded file is not a receipt >>> or invoice!"): >>> self.error_message = error_message >>> >>> def __call__(self, value): >>> error = None >>> try: >>> image = Image.open(value.file) >>> text = pytesseract.image_to_string(image) >>> if "receipt" not in text.lower() and "invoice" not in >>> text.lower() and "RECEIPT" not in text and "INVOICE" not in text: >>> error = self.error_message >>> except: >>> error = self.error_message >>> return (value, error) >> >> -- Resources: - http://web2py.com - http://web2py.com/book (Documentation) - http://github.com/web2py/web2py (Source code) - https://code.google.com/p/web2py/issues/list (Report Issues) --- You received this message because you are subscribed to the Google Groups "web2py-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to web2py+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/web2py/eb4345c4-46c1-40b6-ae9c-86a033444b24n%40googlegroups.com.