From 0e817accdc32380ef7b28c25774292435e4c5cb0 Mon Sep 17 00:00:00 2001 From: Vincent Ardisson Date: Fri, 6 Mar 2020 12:06:43 +0100 Subject: [PATCH] [s2e] use "show all" button for documents + improvements There might be more documents than displayed on the first documents page. A button "Afficher tout" exists which shows everything. It might be missing if we clicked it already (warning: the site is stateful for the session). Also, the id from URL might not reflect the document type, it has been observed to be "EDI/..." for various document types, without "RDC" or anything meaningful. So, type using the label instead. --- modules/s2e/browser.py | 27 ++++++---------- modules/s2e/pages.py | 72 +++++++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/modules/s2e/browser.py b/modules/s2e/browser.py index 22d29c0363..3737b6f2cd 100644 --- a/modules/s2e/browser.py +++ b/modules/s2e/browser.py @@ -33,7 +33,7 @@ EtoileGestionPage, EtoileGestionCharacteristicsPage, EtoileGestionDetailsPage, APIInvestmentDetailsPage, LyxorFundsPage, EsaliaDetailsPage, EsaliaPerformancePage, AmundiDetailsPage, AmundiPerformancePage, ProfilePage, - EServicePage, EServicePartialPage, + EServicePage, ) @@ -77,8 +77,11 @@ class S2eBrowser(LoginBrowser, StatesMixin): esalia_details = URL(r'https://www.societegeneralegestion.fr/psSGGestionEntr/productsheet/view', EsaliaDetailsPage) esalia_performance = URL(r'https://www.societegeneralegestion.fr/psSGGestionEntr/ezjscore/call(.*)_tab_2', EsaliaPerformancePage) - e_service_page = URL('/portal/salarie-(?P\w+)/mesdonnees/eservice\?scenario=ConsulterEService', EServicePage) - e_service_partial_page = URL('/portal/salarie-(?P\w+)/mesdonnees/eservice\?portal:componentId=a835f01c-278d-46c3-9910-06e43e7ccc5a&portal:type=resource', EServicePartialPage) + e_service_page = URL( + r'/portal/salarie-(?P\w+)/mesdonnees/eservice\?scenario=ConsulterEService', + r'/portal/salarie-(?P\w+)/mesdonnees/eservice', + EServicePage, + ) STATE_DURATION = 10 @@ -279,23 +282,11 @@ def get_profile(self): profile = self.page.get_profile() return profile + @need_login def iter_documents(self): self.e_service_page.stay_or_go(slug=self.SLUG) - viewstate = self.page.get_view_state() - - data = { - "pb68893:j_idt2:form": "pb68893:j_idt2:form", - "pb68893:j_idt2:form:onglets-value": "eService", - "javax.faces.ViewState": viewstate, - "org.richfaces.ajax.component": "pb68893:j_idt2:form:onglet4", - "pb68893:j_idt2:form:onglet4": "pb68893:j_idt2:form:onglet4", - "AJAX:EVENTS_COUNT": 1, - "javax.faces.partial.ajax": "true" - } - self.e_service_partial_page.go(data=data, slug=self.SLUG) - - documents = self.page.iter_documents() - return documents + self.page.show_more() + return self.page.iter_documents() class EsaliaBrowser(S2eBrowser): diff --git a/modules/s2e/pages.py b/modules/s2e/pages.py index 7cb1e495e8..ec105c1561 100644 --- a/modules/s2e/pages.py +++ b/modules/s2e/pages.py @@ -30,14 +30,15 @@ from weboob.browser.elements import ItemElement, TableElement, SkipItem, method from weboob.browser.filters.standard import ( CleanText, Date, Regexp, Eval, CleanDecimal, - Env, Field, MapIn, Upper, Format, Title, + Env, Field, MapIn, Upper, Format, Title, QueryValue, +) +from weboob.browser.filters.html import ( + Attr, TableCell, AbsoluteLink, XPath, ) -from weboob.browser.filters.html import Attr, TableCell from weboob.browser.filters.json import Dict from weboob.browser.exceptions import HTTPNotFound from weboob.capabilities.bank import Account, Investment, Pocket, Transaction from weboob.capabilities.profile import Person -from weboob.browser.filters.html import Link, XPath from weboob.capabilities.bill import Document, DocumentTypes from weboob.capabilities.base import NotAvailable, empty from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard @@ -885,25 +886,38 @@ def obj_performance_history(self): return perfs -class EServicePage(LoggedPage, HTMLPage): +DOCUMENT_TYPE_LABEL = { + 'RDC': DocumentTypes.STATEMENT, # is this in label? + 'Relevé de situation': DocumentTypes.STATEMENT, + 'Relevé de compte': DocumentTypes.STATEMENT, + 'Bulletin': DocumentTypes.STATEMENT, + 'Sit Pat': DocumentTypes.REPORT, # is this in label? + 'Situation de patrimoine': DocumentTypes.REPORT, + 'Avis': DocumentTypes.REPORT, +} + - def get_view_state(self): - # Note: the whole website is built with JavaServer Faces (JSF) ; and multiple parts of the pages - # can be partially replaced (via ajax calls) instead of full page refresh. - # The first page does not have the data we need, but a placeholder div (identified by the GUID below) - # which, at the load of this page, is filled with other data than the one we need. - # Another request is needed to provides the data (and, in a real browser, is replacing the div content). - # (However it seems not possible to only fetch the data, without loading the other page first) - # the ViewState is an important parameter for JSF web sites, thus we extract it and provide it - # to the browser. - view_state = XPath('//div[@id="a835f01c-278d-46c3-9910-06e43e7ccc5a"]//input[@id="javax.faces.ViewState"]/@value')(self.doc) - return view_state +class EServicePage(LoggedPage, HTMLPage): + def show_more(self): + form = self.get_form(xpath='//div[@id="gestion"]//form') + try: + # erehsbc: tout afficher + # bnppere: afficher tous les e-documents + button_el = form.el.xpath( + './/input[matches(@value,"Tout afficher|Afficher tous")]' + )[0] + except IndexError: + self.logger.debug('no "display all" button, everything already is displayed?') + return + buttonid = button_el.attrib['id'] -class EServicePartialPage(LoggedPage, PartialHTMLPage): - # Note: this is in fact an HTML page enclosed in some XML tags. - # As the XML tags offer no value I found it simpler to inherit from - # PartialHTMLPage than from XMLPage + form['javax.faces.source'] = buttonid + form['javax.faces.partial.event'] = 'click' + form['javax.faces.partial.execute'] = '%s @component' % buttonid + form['org.richfaces.ajax.component'] = buttonid + self.logger.debug('showing all documents') + form.submit() @method class iter_documents(TableElement): @@ -918,19 +932,11 @@ class item(ItemElement): obj_date = Date(CleanText(XPath('.//td[1]')), dayfirst=True) obj_label = Format('%s %s', CleanText(XPath('.//td[2]')), CleanText(XPath('.//td[1]'))) - # Note: the id is constructed from the file name, which gives us some interesting information: - # - Document type - # - Document date - # Ex: RDCdirect_28112018 - obj_id = Link('.//a') & Regexp(pattern=r'titrePDF=(.*)', nth=0) & CleanText(symbols='/ ') - obj_url = Link('.//a') obj_format = 'pdf' + obj_url = AbsoluteLink('.//a') - def obj_type(self): - result = DocumentTypes.OTHER - doc_type = Regexp(Link('.//a'), r'titrePDF=(.*) / ', nth=0)(self) - if doc_type == 'RDC': - result = DocumentTypes.STATEMENT - if doc_type == 'Sit Pat': - result = DocumentTypes.REPORT - return result + # Note: the id is constructed from the file name, which gives us some interesting information: + # - Document date + # Ex: RDCdirect_28112018link + obj_id = CleanText(QueryValue(obj_url, 'titrePDF'), symbols='/ ') + obj_type = MapIn(Field('label'), DOCUMENT_TYPE_LABEL, default=DocumentTypes.OTHER) -- GitLab