From 886f203e76e554d7d805521f9ad06e73cdb446f0 Mon Sep 17 00:00:00 2001 From: Florian Duguet Date: Wed, 16 Jan 2019 16:21:38 +0100 Subject: [PATCH] [ing] optimize download_document ing website is stateful store current subscription to avoid doing new request to reach the good page each time we want to download a doc --- modules/ing/browser.py | 49 ++++++++++++++++++++++++++++---------- modules/ing/module.py | 12 +++------- modules/ing/pages/bills.py | 48 +++++++++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 27 deletions(-) diff --git a/modules/ing/browser.py b/modules/ing/browser.py index 56b83fb5cd..0c1ca03230 100644 --- a/modules/ing/browser.py +++ b/modules/ing/browser.py @@ -120,6 +120,10 @@ def __init__(self, *args, **kwargs): self.multispace = None self.current_space = None + # ing website is stateful, so we need to store the current subscription when download document to be sure + # we download file for the right subscription + self.current_subscription = None + def do_login(self): assert self.password.isdigit() assert self.birthday.isdigit() @@ -535,25 +539,44 @@ def get_subscriptions(self): self.billpage.go() if self.loginpage.is_here(): self.do_login() - return self.billpage.go().iter_account() + subscriptions = list(self.billpage.go().iter_subscriptions()) else: - return self.page.iter_account() + subscriptions = list(self.page.iter_subscriptions()) + + self.cache['subscriptions'] = {} + for sub in subscriptions: + self.cache['subscriptions'][sub.id] = sub + + return subscriptions + + def _go_to_subscription(self, subscription): + # ing website is not stateless, make sure we are on the correct documents page before doing anything else + if self.current_subscription and self.current_subscription.id == subscription.id: + return - @need_login - def get_documents(self, subscription): self.billpage.go() - data = {"AJAXREQUEST": "_viewRoot", - "accountsel_form": "accountsel_form", - subscription._formid: subscription._formid, - "autoScroll": "", - "javax.faces.ViewState": subscription._javax, - "transfer_issuer_radio": subscription.id - } + data = { + "AJAXREQUEST": "_viewRoot", + "accountsel_form": "accountsel_form", + subscription._formid: subscription._formid, + "autoScroll": "", + "javax.faces.ViewState": subscription._javax, + "transfer_issuer_radio": subscription.id + } self.billpage.go(data=data) + self.current_subscription = subscription + + @need_login + def get_documents(self, subscription): + self._go_to_subscription(subscription) return self.page.iter_documents(subid=subscription.id) - def predownload(self, bill): - self.page.postpredown(bill._localid) + def download_document(self, bill): + subid = bill.id.split('-')[0] + # make sure we are on the right page to not download a document from another subscription + self._go_to_subscription(self.cache['subscriptions'][subid]) + self.page.go_to_year(bill._year) + return self.page.download_document(bill) ############# CapProfile ############# @start_with_main_site diff --git a/modules/ing/module.py b/modules/ing/module.py index 71a80cc3d7..c87fd76a77 100644 --- a/modules/ing/module.py +++ b/modules/ing/module.py @@ -27,10 +27,9 @@ SubscriptionNotFound, DocumentNotFound, DocumentTypes, ) from weboob.capabilities.profile import CapProfile -from weboob.capabilities.base import find_object, NotAvailable +from weboob.capabilities.base import find_object from weboob.tools.backend import Module, BackendConfig from weboob.tools.value import ValueBackendPassword, ValueDate -from weboob.browser.exceptions import ServerError from .browser import IngBrowser @@ -138,13 +137,8 @@ def iter_documents(self, subscription): def download_document(self, bill): if not isinstance(bill, Bill): bill = self.get_document(bill) - self.get_document(bill.id) - try: - self.browser.predownload(bill) - except ServerError: - return NotAvailable - assert(self.browser.response.headers['content-type'] in ["application/pdf", "application/download"]) - return self.browser.response.content + + return self.browser.download_document(bill).content def get_profile(self): return self.browser.get_profile() diff --git a/modules/ing/pages/bills.py b/modules/ing/pages/bills.py index 0051f442fa..1f3fa93cb8 100644 --- a/modules/ing/pages/bills.py +++ b/modules/ing/pages/bills.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . from weboob.capabilities.bill import DocumentTypes, Bill, Subscription -from weboob.browser.pages import HTMLPage, LoggedPage, pagination +from weboob.browser.pages import HTMLPage, LoggedPage, pagination, Form from weboob.browser.filters.standard import Filter, CleanText, Format, Field, Env, Date from weboob.browser.filters.html import Attr from weboob.browser.elements import ListElement, ItemElement, method @@ -32,9 +32,18 @@ def filter(self, txt): return formid +class MyForm(Form): + def submit(self, **kwargs): + """ + Submit the form but keep current browser.page + """ + kwargs.setdefault('data_encoding', self.page.encoding) + return self.page.browser.open(self.request, **kwargs) + + class BillsPage(LoggedPage, HTMLPage): @method - class iter_account(ListElement): + class iter_subscriptions(ListElement): item_xpath = '//ul[@class="unstyled striped"]/li' class item(ItemElement): @@ -45,16 +54,38 @@ class item(ItemElement): obj_label = CleanText('label') obj__formid = FormId(Attr('input', 'onclick')) - def postpredown(self, _id): - _id = _id.split("'")[3] + def get_selected_year(self): + return int(CleanText('//form[@id="years_form"]//ul/li[@class="rich-list-item selected"]')(self.doc)) + + def go_to_year(self, year): + if year == self.get_selected_year(): + return + + ref = Attr('//form[@id="years_form"]//ul//a[text()="%s"]' % year, 'id')(self.doc) + + self.FORM_CLASS = Form + form = self.get_form(name="years_form") + form.pop('years_form:j_idcl') + form.pop('years_form:_link_hidden_') + form['AJAXREQUEST'] = 'years_form:year_region' + form[ref] = ref + + return form.submit() + + def download_document(self, bill): + # MyForm do open, and not location to keep html page as self.page, to reduce number of request on this html page + self.FORM_CLASS = MyForm + _id = bill._localid.split("'")[3] + form = self.get_form(name="downpdf_form") form['statements_form'] = 'statements_form' form['statements_form:j_idcl'] = _id - form.submit() + return form.submit() @pagination @method class iter_documents(ListElement): + flush_at_end = True item_xpath = '//ul[@id="statements_form:statementsel"]/li' def next_page(self): @@ -77,6 +108,10 @@ def next_page(self): form[ref] = ref return form.request + def flush(self): + for obj in reversed(self.objects.values()): + yield obj + class item(ItemElement): klass = Bill @@ -89,3 +124,6 @@ class item(ItemElement): obj_format = u"pdf" obj_type = DocumentTypes.STATEMENT obj__localid = Attr('a[2]', 'onclick') + + def obj__year(self): + return int(CleanText('a[1]')(self).split(' ')[1]) -- GitLab