nixpkgs/pkgs/tools/text/ocrmypdf/0001-Make-compatible-with-pdfminer.six-version-20200720.patch
Pascal Bach 29c1e061be ocrmypdf: 10.2.0 -> 10.3.0
Add patch to make it compatible with pdfminer 20200720
Submitted upstream as https://github.com/jbarlow83/OCRmyPDF/pull/596
2020-07-23 21:49:22 +02:00

52 lines
2 KiB
Diff

From 4315b58e0bffedd145cec61f96062292cd98278e Mon Sep 17 00:00:00 2001
From: Pascal Bach <pascal.bach@nextrem.ch>
Date: Thu, 23 Jul 2020 21:37:33 +0200
Subject: [PATCH] Make compatible with pdfminer.six version 20200720
---
setup.py | 2 +-
src/ocrmypdf/pdfinfo/layout.py | 8 ++++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/setup.py b/setup.py
index bd95ed9..d1f4ab1 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@ setup(
'cffi >= 1.9.1', # must be a setup and install requirement
'coloredlogs >= 14.0', # strictly optional
'img2pdf >= 0.3.0, < 0.4', # pure Python, so track HEAD closely
- 'pdfminer.six >= 20191110, <= 20200517',
+ 'pdfminer.six >= 20191110, <= 20200720',
'pikepdf >= 1.14.0, < 2',
'Pillow >= 7.0.0',
'pluggy >= 0.13.0',
diff --git a/src/ocrmypdf/pdfinfo/layout.py b/src/ocrmypdf/pdfinfo/layout.py
index 98bd82e..8b41e14 100644
--- a/src/ocrmypdf/pdfinfo/layout.py
+++ b/src/ocrmypdf/pdfinfo/layout.py
@@ -26,7 +26,11 @@ import pdfminer.pdfdevice
import pdfminer.pdfinterp
from pdfminer.converter import PDFLayoutAnalyzer
from pdfminer.layout import LAParams, LTChar, LTPage, LTTextBox
-from pdfminer.pdfdocument import PDFTextExtractionNotAllowed
+try:
+ from pdfminer.pdfdocument import PDFTextExtractionNotAllowedError
+except ImportError:
+ # Fallback for pdfminer < 20200720
+ from pdfminer.pdfdocument import PDFTextExtractionNotAllowed as PDFTextExtractionNotAllowedError
from pdfminer.pdffont import PDFSimpleFont, PDFUnicodeNotDefined
from pdfminer.pdfpage import PDFPage
from pdfminer.utils import bbox2str, matrix2str
@@ -239,7 +243,7 @@ def get_page_analysis(infile, pageno, pscript5_mode):
with Path(infile).open('rb') as f:
page = PDFPage.get_pages(f, pagenos=[pageno], maxpages=0)
interp.process_page(next(page))
- except PDFTextExtractionNotAllowed:
+ except PDFTextExtractionNotAllowedError:
raise EncryptedPdfError()
finally:
if pscript5_mode:
--
2.27.0