qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

tesseract_utils.py (1440B)


      1 # ...
      2 #
      3 # Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
      4 #
      5 # This work is licensed under the terms of the GNU GPL, version 2 or
      6 # later. See the COPYING file in the top-level directory.
      7 
      8 import re
      9 import logging
     10 
     11 from avocado.utils import process
     12 from avocado.utils.path import find_command, CmdNotFoundError
     13 
     14 def tesseract_available(expected_version):
     15     try:
     16         find_command('tesseract')
     17     except CmdNotFoundError:
     18         return False
     19     res = process.run('tesseract --version')
     20     try:
     21         version = res.stdout_text.split()[1]
     22     except IndexError:
     23         version = res.stderr_text.split()[1]
     24     return int(version.split('.')[0]) == expected_version
     25 
     26     match = re.match(r'tesseract\s(\d)', res)
     27     if match is None:
     28         return False
     29     # now this is guaranteed to be a digit
     30     return int(match.groups()[0]) == expected_version
     31 
     32 
     33 def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
     34     console_logger = logging.getLogger('tesseract')
     35     console_logger.debug(image_path)
     36     if tesseract_version == 4:
     37         tesseract_args += ' --oem 1'
     38     proc = process.run("tesseract {} {} stdout".format(tesseract_args,
     39                                                        image_path))
     40     lines = []
     41     for line in proc.stdout_text.split('\n'):
     42         sline = line.strip()
     43         if len(sline):
     44             console_logger.debug(sline)
     45             lines += [sline]
     46     return lines