Merge pull request #120349 from lukegb/debug-release-2009

nixos/test-driver: use a variety of different Tesseract settings for OCR
2021-04-23 21:04:02 +01:00 · 2021-04-23 21:04:02 +01:00 · 6e4f8b06f5
commit 6e4f8b06f5
parent c9c3e3793b 4de343cccf
2 changed files with 70 additions and 33 deletions
--- a/nixos/doc/manual/development/writing-nixos-tests.xml
+++ b/nixos/doc/manual/development/writing-nixos-tests.xml
@ -186,6 +186,25 @@ start_all()
     </para>
    </listitem>
   </varlistentry>
+   <varlistentry>
+    <term>
+     <methodname>get_screen_text_variants</methodname>
+    </term>
+    <listitem>
+     <para>
+      Return a list of different interpretations of what is currently visible
+      on the machine's screen using optical character recognition. The number
+      and order of the interpretations is not specified and is subject to
+      change, but if no exception is raised at least one will be returned.
+     </para>
+     <note>
+      <para>
+       This requires passing <option>enableOCR</option> to the test attribute
+       set.
+      </para>
+     </note>
+    </listitem>
+   </varlistentry>
   <varlistentry>
    <term>
     <methodname>get_screen_text</methodname>
@ -350,7 +369,8 @@ start_all()
     <para>
      Wait until the supplied regular expressions matches the textual contents
      of the screen by using optical character recognition (see
-      <methodname>get_screen_text</methodname>).
+     <methodname>get_screen_text</methodname> and
+     <methodname>get_screen_text_variants</methodname>).
     </para>
     <note>
      <para>
--- a/nixos/lib/test-driver/test-driver.py
+++ b/nixos/lib/test-driver/test-driver.py
@ -1,7 +1,7 @@
 #! /somewhere/python3
 from contextlib import contextmanager, _GeneratorContextManager
 from queue import Queue, Empty
-from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List
+from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List, Iterable
 from xml.sax.saxutils import XMLGenerator
 import queue
 import io
@ -205,6 +205,37 @@ class Logger:
        self.xml.endElement("nest")


+def _perform_ocr_on_screenshot(
+    screenshot_path: str, model_ids: Iterable[int]
+) -> List[str]:
+    if shutil.which("tesseract") is None:
+        raise Exception("OCR requested but enableOCR is false")
+
+    magick_args = (
+        "-filter Catrom -density 72 -resample 300 "
+        + "-contrast -normalize -despeckle -type grayscale "
+        + "-sharpen 1 -posterize 3 -negate -gamma 100 "
+        + "-blur 1x65535"
+    )
+
+    tess_args = f"-c debug_file=/dev/null --psm 11"
+
+    cmd = f"convert {magick_args} {screenshot_path} tiff:{screenshot_path}.tiff"
+    ret = subprocess.run(cmd, shell=True, capture_output=True)
+    if ret.returncode != 0:
+        raise Exception(f"TIFF conversion failed with exit code {ret.returncode}")
+
+    model_results = []
+    for model_id in model_ids:
+        cmd = f"tesseract {screenshot_path}.tiff - {tess_args} --oem {model_id}"
+        ret = subprocess.run(cmd, shell=True, capture_output=True)
+        if ret.returncode != 0:
+            raise Exception(f"OCR failed with exit code {ret.returncode}")
+        model_results.append(ret.stdout.decode("utf-8"))
+
+    return model_results
+
+
 class Machine:
    def __init__(self, args: Dict[str, Any]) -> None:
        if "name" in args:
@ -637,43 +668,29 @@ class Machine:
        """Debugging: Dump the contents of the TTY<n>"""
        self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty))

+    def _get_screen_text_variants(self, model_ids: Iterable[int]) -> List[str]:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            screenshot_path = os.path.join(tmpdir, "ppm")
+            self.send_monitor_command(f"screendump {screenshot_path}")
+            return _perform_ocr_on_screenshot(screenshot_path, model_ids)
+
+    def get_screen_text_variants(self) -> List[str]:
+        return self._get_screen_text_variants([0, 1, 2])
+
    def get_screen_text(self) -> str:
-        if shutil.which("tesseract") is None:
-            raise Exception("get_screen_text used but enableOCR is false")
-
-        magick_args = (
-            "-filter Catrom -density 72 -resample 300 "
-            + "-contrast -normalize -despeckle -type grayscale "
-            + "-sharpen 1 -posterize 3 -negate -gamma 100 "
-            + "-blur 1x65535"
-        )
-
-        tess_args = "-c debug_file=/dev/null --psm 11 --oem 2"
-
-        with self.nested("performing optical character recognition"):
-            with tempfile.NamedTemporaryFile() as tmpin:
-                self.send_monitor_command("screendump {}".format(tmpin.name))
-
-                cmd = "convert {} {} tiff:- | tesseract - - {}".format(
-                    magick_args, tmpin.name, tess_args
-                )
-                ret = subprocess.run(cmd, shell=True, capture_output=True)
-                if ret.returncode != 0:
-                    raise Exception(
-                        "OCR failed with exit code {}".format(ret.returncode)
-                    )
-
-                return ret.stdout.decode("utf-8")
+        return self._get_screen_text_variants([2])[0]

    def wait_for_text(self, regex: str) -> None:
        def screen_matches(last: bool) -> bool:
-            text = self.get_screen_text()
-            matches = re.search(regex, text) is not None
+            variants = self.get_screen_text_variants()
+            for text in variants:
+                if re.search(regex, text) is not None:
+                    return True

-            if last and not matches:
-                self.log("Last OCR attempt failed. Text was: {}".format(text))
+            if last:
+                self.log("Last OCR attempt failed. Text was: {}".format(variants))

-            return matches
+            return False

        with self.nested("waiting for {} to appear on screen".format(regex)):
            retry(screen_matches)