From bc53e4cf0c828a2de994c1ce1d1caab4cc527fd1 Mon Sep 17 00:00:00 2001 From: Quentin Defenouillere Date: Wed, 18 Sep 2019 10:48:12 +0200 Subject: [PATCH] [axabanque] Repair browser AxaAssurance and implement iter_history from JSON The former code was partly obsolete, there is now a JSON for transactions. Closes: 13397@zendesk, 13412@zendesk --- modules/axabanque/browser.py | 193 +++++++++++++++++++++--------- modules/axabanque/pages/bank.py | 3 + modules/axabanque/pages/wealth.py | 117 +++++++++--------- 3 files changed, 199 insertions(+), 114 deletions(-) diff --git a/modules/axabanque/browser.py b/modules/axabanque/browser.py index 0495d4ed66..247cb2d66e 100644 --- a/modules/axabanque/browser.py +++ b/modules/axabanque/browser.py @@ -41,7 +41,10 @@ AccountsPage as BankAccountsPage, CBTransactionsPage, TransactionsPage, UnavailablePage, IbanPage, LifeInsuranceIframe, BoursePage, BankProfilePage, ) -from .pages.wealth import AccountsPage as WealthAccountsPage, InvestmentPage, HistoryPage, ProfilePage +from .pages.wealth import ( + AccountsPage as WealthAccountsPage, AccountDetailsPage, + InvestmentPage, HistoryPage, HistoryInvestmentsPage, ProfilePage, +) from .pages.transfer import ( RecipientsPage, AddRecipientPage, ValidateTransferPage, RegisterTransferPage, ConfirmTransferPage, RecipientConfirmationPage, @@ -97,31 +100,42 @@ class AXABanque(AXABrowser, StatesMixin): STATE_DURATION = 5 # Bank - bank_accounts = URL('transactionnel/client/liste-comptes.html', - 'transactionnel/client/liste-(?P.*).html', - 'webapp/axabanque/jsp/visionpatrimoniale/liste_panorama_.*\.faces', + bank_accounts = URL(r'transactionnel/client/liste-comptes.html', + r'transactionnel/client/liste-(?P.*).html', + r'webapp/axabanque/jsp/visionpatrimoniale/liste_panorama_.*\.faces', r'/webapp/axabanque/page\?code=(?P\d+)', - 'webapp/axabanque/client/sso/connexion\?token=(?P.*)', BankAccountsPage) - iban_pdf = URL('http://www.axabanque.fr/webapp/axabanque/formulaire_AXA_Banque/.*\.pdf.*', IbanPage) - cbttransactions = URL('webapp/axabanque/jsp/detailCarteBleu.*.faces', CBTransactionsPage) - transactions = URL('webapp/axabanque/jsp/panorama.faces', - 'webapp/axabanque/jsp/visionpatrimoniale/panorama_.*\.faces', - 'webapp/axabanque/jsp/detail.*.faces', - 'webapp/axabanque/jsp/.*/detail.*.faces', TransactionsPage) - unavailable = URL('login_errors/indisponibilite.*', - '.*page-indisponible.html.*', - '.*erreur/erreurBanque.faces', - 'http://www.axabanque.fr/message/maintenance.htm', UnavailablePage) + r'webapp/axabanque/client/sso/connexion\?token=(?P.*)', BankAccountsPage) + iban_pdf = URL(r'http://www.axabanque.fr/webapp/axabanque/formulaire_AXA_Banque/.*\.pdf.*', IbanPage) + cbttransactions = URL(r'webapp/axabanque/jsp/detailCarteBleu.*.faces', CBTransactionsPage) + transactions = URL(r'webapp/axabanque/jsp/panorama.faces', + r'webapp/axabanque/jsp/visionpatrimoniale/panorama_.*\.faces', + r'webapp/axabanque/jsp/detail.*.faces', + r'webapp/axabanque/jsp/.*/detail.*.faces', TransactionsPage) + unavailable = URL(r'login_errors/indisponibilite.*', + r'.*page-indisponible.html.*', + r'.*erreur/erreurBanque.faces', + r'http://www.axabanque.fr/message/maintenance.htm', UnavailablePage) + # Wealth - wealth_accounts = URL('https://espaceclient.axa.fr/$', - 'https://espaceclient.axa.fr/accueil.html', - 'https://connexion.adis-assurances.com', WealthAccountsPage) + wealth_accounts = URL( + 'https://espaceclient.axa.fr/$', + 'https://espaceclient.axa.fr/accueil.html', + 'https://connexion.adis-assurances.com', + WealthAccountsPage + ) investment = URL('https://espaceclient.axa.fr/.*content/ecc-popin-cards/savings/(\w+)/repartition', InvestmentPage) - history = URL('https://espaceclient.axa.fr/.*accueil/savings/(\w+)/contract', - 'https://espaceclient.axa.fr/#', HistoryPage) - - lifeinsurance_iframe = URL('https://assurance-vie.axabanque.fr/Consultation/SituationContrat.aspx', - 'https://assurance-vie.axabanque.fr/Consultation/HistoriqueOperations.aspx', LifeInsuranceIframe) + history = URL(r'https://espaceclient.axa.fr/accueil/savings/savings/contract/_jcr_content.eccGetSavingsOperations.json', HistoryPage) + history_investments = URL(r'https://espaceclient.axa.fr/accueil/savings/savings/contract/_jcr_content.eccGetSavingOperationDetail.json', HistoryInvestmentsPage) + details = URL( + 'https://espaceclient.axa.fr/.*accueil/savings/(\w+)/contract', + 'https://espaceclient.axa.fr/#', + AccountDetailsPage + ) + lifeinsurance_iframe = URL( + 'https://assurance-vie.axabanque.fr/Consultation/SituationContrat.aspx', + 'https://assurance-vie.axabanque.fr/Consultation/HistoriqueOperations.aspx', + LifeInsuranceIframe + ) # netfinca bourse bourse = URL(r'/transactionnel/client/homepage_bourseCAT.html', @@ -239,13 +253,13 @@ def go_account_pages(self, account, action): target = self.page.get_form_action(args['_form_name']) self.location(target, data=args) - @need_login def go_wealth_pages(self, account): self.wealth_accounts.go() self.location(account.url) self.location(self.page.get_account_url(account.url)) def get_netfinca_account(self, account): + # Important: this part is controlled by modules/lcl/pages.py self.go_account_pages(account, None) self.page.open_market() self.page.open_market_next() @@ -269,10 +283,9 @@ def iter_investment(self, account): return self.page.iter_investment() if account.id not in self.cache['invs']: - # do we still need it ?... - if account._acctype == "bank" and account._hasinv: - self.go_account_pages(account, "investment") - elif account._acctype == "investment": + if account._acctype == 'bank' and account._hasinv: + self.go_account_pages(account, 'investment') + elif account._acctype == 'investment': self.go_wealth_pages(account) investment_url = self.page.get_investment_url() if investment_url is None: @@ -314,36 +327,63 @@ def iter_history(self, account): yield tr # Side investment's website - if account._acctype == "investment": + if account._acctype == 'investment': + ''' + Transactions are available 10 by 10 in a JSON. + To access it, we need the account 'pid' and to increment + 'skip' for each transaction page until the JSON is empty. + However, transactions are not always in the chronological order. + ''' self.go_wealth_pages(account) - pagination_url = self.page.get_pagination_url() - try: - self.location(pagination_url, params={'skip': 0}) - except ClientError as e: - assert e.response.status_code == 406 - self.logger.info('not doing pagination for account %r, site seems broken', account) - for tr in self.page.iter_history(no_pagination=True): - yield tr + pid = self.page.get_pid() + skip = 0 + if not pid: + self.logger.warning('No pid available for account %s, transactions cannot be retrieved.', account.id) return - self.skip = 0 - for tr in self.page.iter_history(pagination_url=pagination_url): + + transactions = [] + self.go_to_transactions(pid, skip) + # Pagination: + while self.page.has_operations(): + for tr in self.page.iter_history(): + transactions.append(tr) + skip += 10 + self.go_to_transactions(pid, skip) + + for tr in sorted_transactions(transactions): + # Get investments for each transaction + params = { + 'oid': tr._oid, + 'pid': pid + } + self.history_investments.go(params=params) + if self.page.has_investments(): + tr.investments = list(self.page.iter_transaction_investments()) yield tr - # Main website withouth investments - elif account._acctype == "bank" and not account._hasinv and account.type != Account.TYPE_CARD: - self.go_account_pages(account, "history") + + # Main website without investments + elif account._acctype == 'bank' and not account._hasinv and account.type != Account.TYPE_CARD: + self.go_account_pages(account, 'history') if self.page.more_history(): for tr in self.page.get_history(): yield tr # Get deferred card history - elif account._acctype == "bank" and account.type == Account.TYPE_CARD: + elif account._acctype == 'bank' and account.type == Account.TYPE_CARD: for tr in sorted_transactions(self.deferred_card_transactions(account)): if tr.date <= date.today(): yield tr + def go_to_transactions(self, pid, skip): + params = { + 'pid': pid, + 'skip': skip + } + self.history.go(params=params) + def deferred_card_transactions(self, account): summary_date = NotAvailable - self.go_account_pages(account, "history") + self.go_account_pages(account, 'history') if self.page.get_deferred_card_history(): for tr in self.page.get_history(): @@ -510,9 +550,14 @@ class AXAAssurance(AXABrowser): BASEURL = 'https://espaceclient.axa.fr' accounts = URL(r'/accueil.html', WealthAccountsPage) + history = URL(r'/accueil/savings/savings/contract/_jcr_content.eccGetSavingsOperations.json', HistoryPage) + history_investments = URL(r'/accueil/savings/savings/contract/_jcr_content.eccGetSavingOperationDetail.json', HistoryInvestmentsPage) + details = URL( + r'.*accueil/savings/(\w+)/contract', + r'/#', + AccountDetailsPage + ) investment = URL(r'/content/ecc-popin-cards/savings/[^/]+/repartition', InvestmentPage) - history = URL(r'.*accueil/savings/(\w+)/contract', - r'/#', HistoryPage) documents = URL(r'/content/espace-client/accueil/mes-documents/attestations-d-assurances.content-inner.din_CERTIFICATE.html', DocumentsPage) download = URL(r'/content/ecc-popin-cards/technical/detailed/document.downloadPdf.html', r'/content/ecc-popin-cards/technical/detailed/document/_jcr_content/', @@ -525,7 +570,7 @@ def __init__(self, *args, **kwargs): self.cache['invs'] = {} def go_wealth_pages(self, account): - self.location("/" + account.url) + self.location('/' + account.url) self.location(self.page.get_account_url(account.url)) @need_login @@ -540,13 +585,14 @@ def iter_investment(self, account): if account.id not in self.cache['invs']: self.go_wealth_pages(account) investment_url = self.page.get_investment_url() - if investment_url is None: - self.logger.warning('no investment link for account %s, returning empty', account) + if not investment_url: # fake data, don't cache it + self.logger.warning('No investment URL available for account %s, investments cannot be retrieved.', account.id) return [] + self.location(investment_url) - detailed_view = self.page.detailed_view() portfolio_page = self.page + detailed_view = self.page.detailed_view() if detailed_view: self.location(detailed_view) self.cache['invs'][account.id] = list(self.page.iter_investment(currency=account.currency)) @@ -564,16 +610,47 @@ def iter_investment(self, account): @need_login def iter_history(self, account): + ''' + Transactions are available 10 by 10 in a JSON. + To access it, we need the account 'pid' and to increment + 'skip' for each transaction page until the JSON is empty. + However, transactions are not always in the chronological order. + ''' self.go_wealth_pages(account) - pagination_url = self.page.get_pagination_url() - try: - self.location(pagination_url, params={'skip': 0}) - except ClientError as e: - assert e.response.status_code == 406 - self.logger.info('not doing pagination for account %r, site seems broken', account) - return self.page.iter_history(no_pagination=True) + pid = self.page.get_pid() + skip = 0 + if not pid: + self.logger.warning('No pid available for account %s, transactions cannot be retrieved.', account.id) + return - return self.page.iter_history() + transactions = [] + self.go_to_transactions(pid, skip) + # Pagination: + while self.page.has_operations(): + for tr in self.page.iter_history(): + transactions.append(tr) + skip += 10 + self.go_to_transactions(pid, skip) + + for tr in sorted_transactions(transactions): + # Get investments for each transaction + params = { + 'oid': tr._oid, + 'pid': pid + } + self.history_investments.go(params=params) + if self.page.has_investments(): + tr.investments = list(self.page.iter_transaction_investments()) + else: + tr.investments = [] + yield tr + + def go_to_transactions(self, pid, skip): + params = { + 'pid': pid, + 'skip': skip + } + self.history.go(params=params) def iter_coming(self, account): raise NotImplementedError() diff --git a/modules/axabanque/pages/bank.py b/modules/axabanque/pages/bank.py index 5de5df62d8..b2cdce7ad9 100644 --- a/modules/axabanque/pages/bank.py +++ b/modules/axabanque/pages/bank.py @@ -129,6 +129,7 @@ def get_list(self): account._args = args account.label = CleanText().filter(tds[0].xpath('./ancestor::table[has-class("tableaux-pret-personnel")]/caption')) account.id = account.label.split()[-1] + args['paramNumContrat'] + account.number = account.id loan_details = self.browser.open('/webapp/axabanque/jsp/panorama.faces', data=args).page # Need to go back on home page after open self.browser.bank_accounts.open() @@ -175,6 +176,7 @@ def get_list(self): iframe_url = re.search("src:(.*),", script).group()[6:-2] account_details_iframe = self.browser.open(iframe_url, data=args) account.id = CleanText('//span[contains(@id,"NumeroContrat")]/text()')(account_details_iframe.page.doc) + account.number = account.id account._url = iframe_url account.type = account.TYPE_LIFE_INSURANCE account.balance = MyDecimal('//span[contains(@id,"MontantEpargne")]/text()')(account_details_iframe.page.doc) @@ -211,6 +213,7 @@ def get_list(self): if 'Valorisation' in account.label or 'Liquidités' in account.label: account.id += args[next(k for k in args.keys() if '_idcl' in k)].split('Jsp')[-1] + account.number = account.id # get accounts balance try: diff --git a/modules/axabanque/pages/wealth.py b/modules/axabanque/pages/wealth.py index 1c4e9f77af..7dece8b68c 100644 --- a/modules/axabanque/pages/wealth.py +++ b/modules/axabanque/pages/wealth.py @@ -20,23 +20,32 @@ from __future__ import unicode_literals import re +from decimal import Decimal -from weboob.browser.pages import HTMLPage, LoggedPage, pagination -from weboob.browser.elements import ListElement, ItemElement, method, TableElement +from weboob.browser.pages import HTMLPage, JsonPage, LoggedPage +from weboob.browser.elements import ListElement, ItemElement, TableElement, DictElement, method from weboob.browser.filters.standard import ( - Async, AsyncLoad, CleanDecimal, CleanText, Currency, Date, Eval, Field, Lower, MapIn, QueryValue, Regexp, + CleanDecimal, CleanText, Currency, Date, + Eval, Field, Lower, MapIn, QueryValue, Regexp, ) +from weboob.browser.filters.json import Dict from weboob.browser.filters.html import Attr, Link, TableCell from weboob.capabilities.bank import Account, Investment from weboob.capabilities.profile import Person -from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.capabilities.base import NotAvailable, NotLoaded, empty from weboob.tools.capabilities.bank.transactions import FrenchTransaction +def float_to_decimal(f): + if empty(f): + return NotAvailable + return Decimal(str(f)) + + class AccountsPage(LoggedPage, HTMLPage): @method class iter_accounts(ListElement): - item_xpath = '//div[contains(@data-route, "/savings/")]' + item_xpath = '//div[contains(@data-module-open-link--link, "/savings/")]' class item(ItemElement): klass = Account @@ -50,24 +59,15 @@ class item(ItemElement): 'epargne retraite novial': Account.TYPE_LIFE_INSURANCE, } - condition = lambda self: Field('balance')(self) is not NotAvailable - obj_id = Regexp(CleanText('.//span[has-class("small-title")]'), r'([\d/]+)') + obj_number = obj_id obj_label = CleanText('.//h3[has-class("card-title")]') obj_balance = CleanDecimal.French('.//p[has-class("amount-card")]') obj_valuation_diff = CleanDecimal.French('.//p[@class="performance"]', default=NotAvailable) - - def obj_url(self): - url = Attr('.', 'data-route')(self) - # The Assurance Vie xpath recently changed so we must verify that all - # the accounts now have "/savings/" instead of "/assurances-vie/". - assert "/savings/" in url - return url - obj_currency = Currency('.//p[has-class("amount-card")]') obj__acctype = "investment" - obj_type = MapIn(Lower(Field('label')), TYPES, Account.TYPE_UNKNOWN) + obj_url = Attr('.', 'data-module-open-link--link') class InvestmentPage(LoggedPage, HTMLPage): @@ -145,7 +145,7 @@ def obj_original_currency(self): return cur if self.env['currency'] != cur else NotLoaded def detailed_view(self): - return Attr('//button[contains(text(), "Vision détaillée")]', 'data-url', default=None)(self.doc) + return Attr('//button[contains(text(), "Vision détaillée")]', 'data-module-open-link--link', default=None)(self.doc) def is_detail(self): return bool(self.doc.xpath('//th[contains(text(), "Valeur de la part")]')) @@ -158,63 +158,68 @@ class Transaction(FrenchTransaction): ] -class HistoryPage(LoggedPage, HTMLPage): - def build_doc(self, content): - # we got empty pages at end of pagination - if not content.strip(): - content = b"" - return super(HistoryPage, self).build_doc(content) - +class AccountDetailsPage(LoggedPage, HTMLPage): def get_account_url(self, url): - return Attr('//a[@href="%s"]' % url, 'data-target')(self.doc) + return Attr('//a[@href="%s"]' % url, 'data-url')(self.doc) def get_investment_url(self): - return Attr('//div[has-class("card-distribution")]', 'data-url', default=None)(self.doc) + return Attr('//div[contains(@data-analytics-label, "repartition_par_fond")]', 'data-url', default=None)(self.doc) + + def get_pid(self): + return Attr('//div[@data-module="operations-movements"]', 'data-module-operations-movements--pid')(self.doc) + - def get_pagination_url(self): - return Attr('//div[contains(@class, "default")][@data-module-card-list--current-page]', 'data-module-card-list--url')(self.doc) +class HistoryPage(LoggedPage, JsonPage): + def has_operations(self): + return Dict('response/operations')(self.doc) @method - class get_investments(ListElement): - item_xpath = '//div[contains(@class, "card-support")]' + class iter_history(DictElement): + item_xpath = 'response/operations' class item(ItemElement): - klass = Investment + klass = Transaction - obj_label = CleanText('./div[contains(@class, "label")]') - obj_valuation = CleanDecimal.French(Regexp(CleanText('./div[contains(@class, "amount")]/span', replace=[(' ,', ',')]), r'(.*)€')) - obj_portfolio_share = Eval(lambda x: x / 100, CleanDecimal(Regexp(CleanText('./div[contains(@class, "amount")]/span'), r'.*€ (.*) %'))) + def condition(self): + # Only return validated transactions + return Dict('status')(self) == 'DONE' - @pagination - @method - class iter_history(ListElement): - item_xpath = '//div[contains(@data-url, "savingsdetailledcard")]' + obj_raw = Transaction.Raw(Dict('label')) + obj_date = Date(Dict('date')) + obj_amount = Eval(float_to_decimal, Dict('net_amount/value')) + obj_gross_amount = Eval(float_to_decimal, Dict('gross_amount/value')) + obj_type = Transaction.TYPE_BANK - def next_page(self): - if not CleanText(self.item_xpath, default=None)(self): - return - elif self.env.get('no_pagination'): - return + # 'oid' is used to get the transaction's investments + obj__oid = Dict('id') - return re.sub(r'(?<=\bskip=)(\d+)', lambda m: str(int(m.group(1)) + 10), self.page.url) + +class HistoryInvestmentsPage(LoggedPage, JsonPage): + @method + class iter_transaction_investments(DictElement): + item_xpath = 'response/operationDetail/transaction_lines' class item(ItemElement): - klass = Transaction + klass = Investment - load_details = Attr('.', 'data-url') & AsyncLoad + def condition(self): + # Some lines don't even have a label, we skip them + return Dict('fund_label', default=None)(self) - obj_raw = Transaction.Raw('.//div[@class="operations-movements-item-label"]') - obj_date = Date(CleanText('.//div[@class="operations-movements-item-date"]'), dayfirst=True) + obj_label = Dict('fund_label') + obj_valuation = Eval(float_to_decimal, Dict('amount/value')) + obj_unitvalue = Eval(float_to_decimal, Dict('fund_unit_value/value', default=None)) + obj_quantity = Eval(float_to_decimal, Dict('fund_shares_count/value', default=None)) + obj_vdate = Date(Dict('fund_unit_value/date', default=None), default=NotAvailable) - # sometimes this div contains a second span with text like "Vos bonus +0.15 %", - # we must avoid it and only take the first span - obj_amount = CleanDecimal.French(CleanText('.//div[@class="operations-movements-item-amount"]/span[1]', replace=[(' ,', ',')])) + def obj_portfolio_share(self): + raw_value = Eval(float_to_decimal, Dict('percentage', default=None))(self) + if empty(raw_value): + return NotAvailable + return raw_value / 100 - def obj_investments(self): - investments = list(Async('details').loaded_page(self).get_investments()) - for inv in investments: - inv.vdate = Field('date')(self) - return investments + def has_investments(self): + return Dict('response/operationDetail/transaction_lines', default=None)(self.doc) class ProfilePage(LoggedPage, HTMLPage): -- GitLab