From 7b3524cb76b8a3ae64cd6e38641c978d801874ce Mon Sep 17 00:00:00 2001 From: Ludovic LANGE Date: Tue, 8 Jan 2019 00:09:19 +0100 Subject: [PATCH] [s2e] adding CapDocument As detailed in #198, adding CapDocument to s2e module allows us to list available PDF documents (operation statements, yearly reports) and download them. All "employee savings banks" that are using s2e module can benefit from this new capability : in order to do this, it's necessary to change the module definition of all these modules and have them inherit from CapDocument (otherwise the capability coming from the parent module -s2e- is not detected by weboob-config update). --- modules/s2e/browser.py | 22 ++++++++++++++++++ modules/s2e/module.py | 48 +++++++++++++++++++++++++++++++++++++- modules/s2e/pages.py | 53 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/modules/s2e/browser.py b/modules/s2e/browser.py index bb905397ce..22d29c0363 100644 --- a/modules/s2e/browser.py +++ b/modules/s2e/browser.py @@ -33,6 +33,7 @@ EtoileGestionPage, EtoileGestionCharacteristicsPage, EtoileGestionDetailsPage, APIInvestmentDetailsPage, LyxorFundsPage, EsaliaDetailsPage, EsaliaPerformancePage, AmundiDetailsPage, AmundiPerformancePage, ProfilePage, + EServicePage, EServicePartialPage, ) @@ -76,6 +77,9 @@ class S2eBrowser(LoginBrowser, StatesMixin): esalia_details = URL(r'https://www.societegeneralegestion.fr/psSGGestionEntr/productsheet/view', EsaliaDetailsPage) esalia_performance = URL(r'https://www.societegeneralegestion.fr/psSGGestionEntr/ezjscore/call(.*)_tab_2', EsaliaPerformancePage) + e_service_page = URL('/portal/salarie-(?P\w+)/mesdonnees/eservice\?scenario=ConsulterEService', EServicePage) + e_service_partial_page = URL('/portal/salarie-(?P\w+)/mesdonnees/eservice\?portal:componentId=a835f01c-278d-46c3-9910-06e43e7ccc5a&portal:type=resource', EServicePartialPage) + STATE_DURATION = 10 def __init__(self, config=None, *args, **kwargs): @@ -275,6 +279,24 @@ def get_profile(self): profile = self.page.get_profile() return profile + def iter_documents(self): + self.e_service_page.stay_or_go(slug=self.SLUG) + viewstate = self.page.get_view_state() + + data = { + "pb68893:j_idt2:form": "pb68893:j_idt2:form", + "pb68893:j_idt2:form:onglets-value": "eService", + "javax.faces.ViewState": viewstate, + "org.richfaces.ajax.component": "pb68893:j_idt2:form:onglet4", + "pb68893:j_idt2:form:onglet4": "pb68893:j_idt2:form:onglet4", + "AJAX:EVENTS_COUNT": 1, + "javax.faces.partial.ajax": "true" + } + self.e_service_partial_page.go(data=data, slug=self.SLUG) + + documents = self.page.iter_documents() + return documents + class EsaliaBrowser(S2eBrowser): BASEURL = 'https://salaries.esalia.com' diff --git a/modules/s2e/module.py b/modules/s2e/module.py index fc232d7694..26d9c67f8e 100644 --- a/modules/s2e/module.py +++ b/modules/s2e/module.py @@ -21,9 +21,15 @@ from weboob.tools.backend import Module from weboob.capabilities.bank import CapBankWealth from weboob.capabilities.profile import CapProfile +from weboob.capabilities.bank import Account +from weboob.capabilities.base import find_object, empty +from weboob.capabilities.bill import ( + CapDocument, Subscription, SubscriptionNotFound, + Document, DocumentNotFound, DocumentTypes, +) -class S2eModule(Module, CapBankWealth, CapProfile): +class S2eModule(Module, CapBankWealth, CapDocument, CapProfile): NAME = 's2e' DESCRIPTION = u'Épargne Salariale' MAINTAINER = u'Edouard Lambert' @@ -31,6 +37,8 @@ class S2eModule(Module, CapBankWealth, CapProfile): LICENSE = 'LGPLv3+' VERSION = '2.1' + accepted_document_types = (DocumentTypes.STATEMENT, DocumentTypes.REPORT) + def iter_accounts(self): return self.browser.iter_accounts() @@ -45,3 +53,41 @@ def iter_pocket(self, account): def get_profile(self): return self.browser.get_profile() + + # From weboob.capabilities.bill.CapDocument + def iter_subscription(self): + """Fake subscription - documents are attached to a subscription.""" + sub = Subscription() + sub.id = 'statements' + sub.label = u'Relevés électroniques / e-statements' + yield sub + + # From weboob.capabilities.bill.CapDocument + def get_subscription(self, _id): + return find_object(self.iter_subscription(), id=_id, error=SubscriptionNotFound) + + # From weboob.capabilities.bill.CapDocument + def iter_documents(self, subscription): + return self.browser.iter_documents() + + # From weboob.capabilities.bill.CapDocument + def get_document(self, _id): + return find_object(self.iter_documents(None), id=_id, error=DocumentNotFound) + + # From weboob.capabilities.bill.CapDocument + def download_document(self, document): + if not isinstance(document, Document): + document = self.get_document(document) + if empty(document.url): + return + return self.browser.open(document.url).content + + # From weboob.capabilities.collection.CapCollection + def iter_resources(self, objs, split_path): + """Merging implementation from CapDocument and CapBank.""" + if Account in objs: + self._restrict_level(split_path) + return self.iter_accounts() + if Subscription in objs: + self._restrict_level(split_path) + return self.iter_subscription() diff --git a/modules/s2e/pages.py b/modules/s2e/pages.py index bbc344823a..7cb1e495e8 100644 --- a/modules/s2e/pages.py +++ b/modules/s2e/pages.py @@ -37,6 +37,8 @@ from weboob.browser.exceptions import HTTPNotFound from weboob.capabilities.bank import Account, Investment, Pocket, Transaction from weboob.capabilities.profile import Person +from weboob.browser.filters.html import Link, XPath +from weboob.capabilities.bill import Document, DocumentTypes from weboob.capabilities.base import NotAvailable, empty from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard from weboob.exceptions import BrowserUnavailable, ActionNeeded, BrowserQuestion, BrowserIncorrectPassword @@ -881,3 +883,54 @@ def obj_performance_history(self): value = item['value'] perfs[duration] = Eval(lambda x: x / 100, CleanDecimal.US(value))(self) return perfs + + +class EServicePage(LoggedPage, HTMLPage): + + def get_view_state(self): + # Note: the whole website is built with JavaServer Faces (JSF) ; and multiple parts of the pages + # can be partially replaced (via ajax calls) instead of full page refresh. + # The first page does not have the data we need, but a placeholder div (identified by the GUID below) + # which, at the load of this page, is filled with other data than the one we need. + # Another request is needed to provides the data (and, in a real browser, is replacing the div content). + # (However it seems not possible to only fetch the data, without loading the other page first) + # the ViewState is an important parameter for JSF web sites, thus we extract it and provide it + # to the browser. + view_state = XPath('//div[@id="a835f01c-278d-46c3-9910-06e43e7ccc5a"]//input[@id="javax.faces.ViewState"]/@value')(self.doc) + return view_state + + +class EServicePartialPage(LoggedPage, PartialHTMLPage): + # Note: this is in fact an HTML page enclosed in some XML tags. + # As the XML tags offer no value I found it simpler to inherit from + # PartialHTMLPage than from XMLPage + + @method + class iter_documents(TableElement): + # Note: on this (partial) page, 'head' and 'items' are actually two different HTML tables. + # It seems to confuse TableCell filter, thus we fetch data using XPath filter. + # (As head_xpath is mandatory we provide its value nevertheless) + item_xpath = '//div[contains(@id,"panelEReleves_body")]/div/table/tbody[contains(@id,"tb")]/tr[td]' + head_xpath = '//div[contains(@id,"panelEReleves_body")]/table//th' + + class item(ItemElement): + klass = Document + + obj_date = Date(CleanText(XPath('.//td[1]')), dayfirst=True) + obj_label = Format('%s %s', CleanText(XPath('.//td[2]')), CleanText(XPath('.//td[1]'))) + # Note: the id is constructed from the file name, which gives us some interesting information: + # - Document type + # - Document date + # Ex: RDCdirect_28112018 + obj_id = Link('.//a') & Regexp(pattern=r'titrePDF=(.*)', nth=0) & CleanText(symbols='/ ') + obj_url = Link('.//a') + obj_format = 'pdf' + + def obj_type(self): + result = DocumentTypes.OTHER + doc_type = Regexp(Link('.//a'), r'titrePDF=(.*) / ', nth=0)(self) + if doc_type == 'RDC': + result = DocumentTypes.STATEMENT + if doc_type == 'Sit Pat': + result = DocumentTypes.REPORT + return result -- GitLab