From 08d506c4604db02dc2712fe25553d838acea5232 Mon Sep 17 00:00:00 2001 From: Antoine BOSSY Date: Sun, 17 Nov 2019 15:51:21 +0100 Subject: [PATCH] [bp] Use builtin helper to read data from pdf. --- modules/bp/pages/accountlist.py | 48 ++------------------------------- 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/modules/bp/pages/accountlist.py b/modules/bp/pages/accountlist.py index 26d88bcafa..2239d76275 100644 --- a/modules/bp/pages/accountlist.py +++ b/modules/bp/pages/accountlist.py @@ -19,7 +19,6 @@ from __future__ import unicode_literals -from io import BytesIO import re from decimal import Decimal @@ -36,6 +35,7 @@ ) from weboob.exceptions import BrowserUnavailable from weboob.tools.compat import urljoin, unicode +from weboob.tools.pdf import extract_text from .base import MyHTMLPage @@ -432,52 +432,8 @@ def parse(self, el): class AccountRIB(LoggedPage, RawPage): iban_regexp = r'[A-Z]{2}\d{12}[0-9A-Z]{11}\d{2}' - def __init__(self, *args, **kwargs): - super(AccountRIB, self).__init__(*args, **kwargs) - - self.parsed_text = b'' - - try: - try: - from pdfminer.pdfdocument import PDFDocument - from pdfminer.pdfpage import PDFPage - newapi = True - except ImportError: - from pdfminer.pdfparser import PDFDocument - newapi = False - from pdfminer.pdfparser import PDFParser, PDFSyntaxError - from pdfminer.converter import TextConverter - from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter - except ImportError: - self.logger.warning('Please install python-pdfminer to get IBANs') - else: - parser = PDFParser(BytesIO(self.doc)) - try: - if newapi: - doc = PDFDocument(parser) - else: - doc = PDFDocument() - parser.set_document(doc) - doc.set_parser(parser) - except PDFSyntaxError: - return - - rsrcmgr = PDFResourceManager() - out = BytesIO() - device = TextConverter(rsrcmgr, out) - interpreter = PDFPageInterpreter(rsrcmgr, device) - if newapi: - pages = PDFPage.create_pages(doc) - else: - doc.initialize() - pages = doc.get_pages() - for page in pages: - interpreter.process_page(page) - - self.parsed_text = out.getvalue() - def get_iban(self): - m = re.search(self.iban_regexp, self.parsed_text.decode('utf-8')) + m = re.search(self.iban_regexp, extract_text(self.data)) if m: return unicode(m.group(0)) return None -- GitLab