2018-09-10 17:27:56 +02:00
|
|
|
Index: current/src/pyocr/cuneiform.py
|
|
|
|
===================================================================
|
|
|
|
--- current.orig/src/pyocr/cuneiform.py
|
|
|
|
+++ current/src/pyocr/cuneiform.py
|
2017-09-02 05:18:38 +02:00
|
|
|
@@ -27,13 +27,9 @@ from . import error
|
|
|
|
from . import util
|
|
|
|
|
|
|
|
|
|
|
|
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
|
|
|
-CUNEIFORM_CMD = 'cuneiform'
|
2018-09-10 17:27:56 +02:00
|
|
|
+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
|
2017-09-02 05:18:38 +02:00
|
|
|
|
|
|
|
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
|
|
|
|
- "/usr/local/share/cuneiform",
|
|
|
|
- "/usr/share/cuneiform",
|
|
|
|
-]
|
2018-09-10 17:27:56 +02:00
|
|
|
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
|
2017-09-02 05:18:38 +02:00
|
|
|
|
|
|
|
LANGUAGES_LINE_PREFIX = "Supported languages: "
|
|
|
|
LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
|
2018-09-10 17:27:56 +02:00
|
|
|
Index: current/src/pyocr/libtesseract/tesseract_raw.py
|
|
|
|
===================================================================
|
|
|
|
--- current.orig/src/pyocr/libtesseract/tesseract_raw.py
|
|
|
|
+++ current/src/pyocr/libtesseract/tesseract_raw.py
|
|
|
|
@@ -1,52 +1,13 @@
|
2017-09-02 05:18:38 +02:00
|
|
|
import ctypes
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
-import sys
|
|
|
|
|
|
|
|
from ..error import TesseractError
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
-TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
|
|
|
|
-libnames = []
|
|
|
|
-
|
|
|
|
-if getattr(sys, 'frozen', False):
|
|
|
|
- # Pyinstaller integration
|
|
|
|
- libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")]
|
|
|
|
- libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")]
|
|
|
|
- tessdata = os.path.join(sys._MEIPASS, "data")
|
|
|
|
- if not os.path.exists(os.path.join(tessdata, "tessdata")):
|
|
|
|
- logger.warning(
|
|
|
|
- "Running from container, but no tessdata ({}) found !".format(
|
|
|
|
- tessdata
|
|
|
|
- )
|
|
|
|
- )
|
|
|
|
- else:
|
|
|
|
- TESSDATA_PREFIX = tessdata
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-if sys.platform[:3] == "win":
|
|
|
|
- libnames += [
|
|
|
|
- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
|
|
|
|
- # Windows ?
|
|
|
|
- "../vs2010/DLL_Release/libtesseract302.dll",
|
2018-09-10 17:27:56 +02:00
|
|
|
- # prefer the most recent first
|
|
|
|
- "libtesseract305.dll",
|
|
|
|
- "libtesseract304.dll",
|
|
|
|
- "libtesseract303.dll",
|
2017-09-02 05:18:38 +02:00
|
|
|
- "libtesseract302.dll",
|
2018-09-10 17:27:56 +02:00
|
|
|
- "libtesseract400.dll", # Tesseract 4 is still in alpha stage
|
|
|
|
- "libtesseract.dll",
|
2017-09-02 05:18:38 +02:00
|
|
|
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
|
|
|
|
- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
|
|
|
|
- ]
|
|
|
|
-else:
|
|
|
|
- libnames += [
|
|
|
|
- "libtesseract.so.4",
|
|
|
|
- "libtesseract.so.3",
|
|
|
|
- ]
|
|
|
|
-
|
2018-09-10 17:27:56 +02:00
|
|
|
+libnames = [ "@tesseract@/lib/libtesseract.so" ]
|
2017-09-02 05:18:38 +02:00
|
|
|
|
2018-09-10 17:27:56 +02:00
|
|
|
g_libtesseract = None
|
2017-09-02 05:18:38 +02:00
|
|
|
|
2018-09-10 17:27:56 +02:00
|
|
|
@@ -346,12 +307,11 @@ def init(lang=None):
|
2017-09-02 05:18:38 +02:00
|
|
|
try:
|
|
|
|
if lang:
|
|
|
|
lang = lang.encode("utf-8")
|
|
|
|
- prefix = None
|
|
|
|
- if TESSDATA_PREFIX:
|
|
|
|
- prefix = TESSDATA_PREFIX.encode("utf-8")
|
2018-09-10 17:27:56 +02:00
|
|
|
+ prefix = os.getenv('TESSDATA_PREFIX', '@tesseract@/share/tessdata')
|
2017-09-02 05:18:38 +02:00
|
|
|
+ os.environ['TESSDATA_PREFIX'] = prefix
|
|
|
|
g_libtesseract.TessBaseAPIInit3(
|
|
|
|
ctypes.c_void_p(handle),
|
|
|
|
- ctypes.c_char_p(prefix),
|
|
|
|
+ ctypes.c_char_p(prefix.encode('utf-8')),
|
|
|
|
ctypes.c_char_p(lang)
|
|
|
|
)
|
|
|
|
g_libtesseract.TessBaseAPISetVariable(
|
2018-09-10 17:27:56 +02:00
|
|
|
Index: current/src/pyocr/tesseract.py
|
|
|
|
===================================================================
|
|
|
|
--- current.orig/src/pyocr/tesseract.py
|
|
|
|
+++ current/src/pyocr/tesseract.py
|
|
|
|
@@ -31,8 +31,7 @@ from .builders import DigitBuilder # ba
|
2017-09-02 05:18:38 +02:00
|
|
|
from .error import TesseractError # backward compatibility
|
|
|
|
from .util import digits_only
|
|
|
|
|
|
|
|
-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
|
|
|
|
-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
|
2018-09-10 17:27:56 +02:00
|
|
|
+TESSERACT_CMD = '@tesseract@/bin/tesseract'
|
2017-09-02 05:18:38 +02:00
|
|
|
|
|
|
|
TESSDATA_EXTENSION = ".traineddata"
|
|
|
|
|