On 10/24/20 8:40 AM, Thomas Huth wrote:
On 24/10/2020 08.35, Thomas Huth wrote:
On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote:
We are going to reuse the tesseract OCR code.
Create a new tesseract_ocr() helper and use it.

Signed-off-by: Philippe Mathieu-Daudé <f4...@amsat.org>
---
  tests/acceptance/machine_m68k_nextcube.py | 21 +++++----------------
  tests/acceptance/tesseract_utils.py       | 18 ++++++++++++++++++
  2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/tests/acceptance/machine_m68k_nextcube.py 
b/tests/acceptance/machine_m68k_nextcube.py
index 3c7400c43e4..09e2745cc52 100644
--- a/tests/acceptance/machine_m68k_nextcube.py
+++ b/tests/acceptance/machine_m68k_nextcube.py
@@ -7,13 +7,11 @@
import os
  import time
-import logging
from avocado_qemu import Test
  from avocado import skipUnless
-from avocado.utils import process
-from tesseract_utils import tesseract_available
+from tesseract_utils import tesseract_available, tesseract_ocr
PIL_AVAILABLE = True
  try:
@@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self):
      def test_bootrom_framebuffer_ocr_with_tesseract_v3(self):
          screenshot_path = os.path.join(self.workdir, "dump.ppm")
          self.check_bootrom_framebuffer(screenshot_path)
-
-        console_logger = logging.getLogger('console')
-        text = process.run("tesseract %s stdout" % screenshot_path).stdout_text
-        for line in text.split('\n'):
-            if len(line):
-                console_logger.debug(line)
+        lines = tesseract_ocr(screenshot_path, tesseract_version=3)
+        text = '\n'.join(lines)
          self.assertIn('Backplane', text)
          self.assertIn('Ethernet address', text)
@@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self):
      def test_bootrom_framebuffer_ocr_with_tesseract_v4(self):
          screenshot_path = os.path.join(self.workdir, "dump.ppm")
          self.check_bootrom_framebuffer(screenshot_path)
-
-        console_logger = logging.getLogger('console')
-        proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path)
-        text = proc.stdout_text
-        for line in text.split('\n'):
-            if len(line):
-                console_logger.debug(line)
+        lines = tesseract_ocr(screenshot_path, tesseract_version=4)
+        text = '\n'.join(lines)
          self.assertIn('Testing the FPU, SCC', text)
          self.assertIn('System test failed. Error code', text)
          self.assertIn('Boot command', text)
diff --git a/tests/acceptance/tesseract_utils.py 
b/tests/acceptance/tesseract_utils.py
index acd6e8c2faa..72cd9ab7989 100644
--- a/tests/acceptance/tesseract_utils.py
+++ b/tests/acceptance/tesseract_utils.py
@@ -6,7 +6,9 @@
  # later. See the COPYING file in the top-level directory.
import re
+import logging
+from avocado.utils import process
  from avocado.utils.path import find_command, CmdNotFoundError
def tesseract_available(expected_version):
@@ -26,3 +28,19 @@ def tesseract_available(expected_version):
          return False
      # now this is guaranteed to be a digit
      return int(match.groups()[0]) == expected_version
+
+
+def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
+    console_logger = logging.getLogger('tesseract')
+    console_logger.debug(image_path)
+    if tesseract_version == 4:
+        tesseract_args += ' --oem 1'
+    proc = process.run("tesseract {} {} stdout".format(tesseract_args,
+                                                       image_path))
+    lines = []
+    for line in proc.stdout_text.split('\n'):
+        sline = line.strip()
+        if len(sline):
+            console_logger.debug(sline)
+            lines += [sline]
+    return lines

Would it make sense to completely hide the tesseract version handling in
this new tesseract_utils.py file now, so that the tests themselves do not
have to worry about this anymore

Yes, good idea.

(i.e. would it be possible to merge
test_bootrom_framebuffer_ocr_with_tesseract_v3 and
test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?)

If I've got that right, there is also now a proper release 4 of Tesseract,
so maybe we can simply scratch the testing with version 3 now?

Good to know, I'll have a look. Thanks!


  Thomas


Reply via email to