Commit 71839a76 authored by Antoine BOSSY's avatar Antoine BOSSY

Use builtin helper to read data from pdf.

parent a86b4989
Pipeline #2718 failed with stages
......@@ -28,7 +28,7 @@ from weboob.capabilities.bank import Account, Loan, AccountOwnership
from weboob.capabilities.contact import Advisor
from weboob.capabilities.profile import Person
from weboob.browser.elements import ListElement, ItemElement, method, TableElement
from weboob.browser.pages import LoggedPage, RawPage, PartialHTMLPage, HTMLPage
from weboob.browser.pages import LoggedPage, RawPage, PartialHTMLPage, HTMLPage, PDFPage
from weboob.browser.filters.html import Link, TableCell, Attr
from weboob.browser.filters.standard import (
CleanText, CleanDecimal, Regexp, Env, Field, Currency,
......@@ -36,6 +36,7 @@ from weboob.browser.filters.standard import (
)
from weboob.exceptions import BrowserUnavailable
from weboob.tools.compat import urljoin, unicode
from weboob.tools.pdf import extract_text
from .base import MyHTMLPage
......@@ -421,55 +422,11 @@ class Advisor(LoggedPage, MyHTMLPage):
self.env['name'] = agency
class AccountRIB(LoggedPage, RawPage):
class AccountRIB(LoggedPage, PDFPage):
iban_regexp = r'[A-Z]{2}\d{12}[0-9A-Z]{11}\d{2}'
def __init__(self, *args, **kwargs):
super(AccountRIB, self).__init__(*args, **kwargs)
self.parsed_text = b''
try:
try:
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
newapi = True
except ImportError:
from pdfminer.pdfparser import PDFDocument
newapi = False
from pdfminer.pdfparser import PDFParser, PDFSyntaxError
from pdfminer.converter import TextConverter
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
except ImportError:
self.logger.warning('Please install python-pdfminer to get IBANs')
else:
parser = PDFParser(BytesIO(self.doc))
try:
if newapi:
doc = PDFDocument(parser)
else:
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
except PDFSyntaxError:
return
rsrcmgr = PDFResourceManager()
out = BytesIO()
device = TextConverter(rsrcmgr, out)
interpreter = PDFPageInterpreter(rsrcmgr, device)
if newapi:
pages = PDFPage.create_pages(doc)
else:
doc.initialize()
pages = doc.get_pages()
for page in pages:
interpreter.process_page(page)
self.parsed_text = out.getvalue()
def get_iban(self):
m = re.search(self.iban_regexp, self.parsed_text.decode('utf-8'))
m = re.search(self.iban_regexp, extract_text(self.data))
if m:
return unicode(m.group(0))
return None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment