From 3e29f496b05333163fb2b89a826fd03fe63f33f5 Mon Sep 17 00:00:00 2001 From: Julien Montagnat Date: Wed, 1 Jul 2020 17:23:28 +0200 Subject: [PATCH] [carrefourbanque] New implementation of CardHistoryPageJSON The pagination of the current CardHistoryPage is not working anymore. To get more transactions we need to use a JSON API. The POST method need a date_recup value which is a non-determinable timestamp that we get in the html of the CardHistoryPage. Everytime we call the JSON API, we get, the transactions of the previous calls, 40 new transactions and a new date_recup that we can use to do a new request. --- modules/carrefourbanque/browser.py | 37 ++++++++++++++++++++++++++---- modules/carrefourbanque/pages.py | 35 ++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/modules/carrefourbanque/browser.py b/modules/carrefourbanque/browser.py index a30b80ae24..d3979852a0 100644 --- a/modules/carrefourbanque/browser.py +++ b/modules/carrefourbanque/browser.py @@ -28,7 +28,7 @@ from .pages import ( LoginPage, MaintenancePage, HomePage, IncapsulaResourcePage, LoanHistoryPage, CardHistoryPage, SavingHistoryPage, - LifeInvestmentsPage, LifeHistoryPage + LifeInvestmentsPage, LifeHistoryPage, CardHistoryJsonPage, ) @@ -50,7 +50,9 @@ class CarrefourBanqueBrowser(LoginBrowser, StatesMixin): r'/espace-client/epargne-libre/historique-des-operations\?(.*)', SavingHistoryPage ) + card_history = URL(r'/espace-client/carte-credit/solde-dernieres-operations\?(.*)', CardHistoryPage) + card_history_json = URL(r'/espace-client/carte-credit/consultation_solde_ajax', CardHistoryJsonPage) life_history = URL(r'/espace-client/assurance-vie/historique-des-operations\?(.*)', LifeHistoryPage) life_investments = URL(r'/espace-client/assurance-vie/solde-dernieres-operations\?(.*)', LifeInvestmentsPage) @@ -153,7 +155,6 @@ def iter_investment(self, account): @need_login def iter_history(self, account): - self.home.stay_or_go() self.location(account.url) @@ -161,11 +162,39 @@ def iter_history(self, account): assert self.saving_history.is_here() elif account.type == Account.TYPE_CARD: assert self.card_history.is_here() + + previous_date = self.page.get_previous_date() + if previous_date: + total = 0 + loop_limit = 500 + for page in range(loop_limit): + self.card_history_json.go(data={'dateRecup': previous_date, 'index': 0}) + previous_date = self.page.get_previous_date() + + it = iter(self.page.iter_history()) + for _ in range(total): + # those transactions were returned on previous pages + next(it) + + for tr in it: + total += 1 + yield tr + + if not previous_date: + # last page + return + else: + self.logger.info( + "End of loop after %s iterations but still got a next page, it will miss some transactions", + loop_limit + ) + return + elif account.type == Account.TYPE_LOAN: assert self.loan_history.is_here() elif account.type == Account.TYPE_LIFE_INSURANCE: assert self.life_history.is_here() else: raise NotImplementedError() - - return self.page.iter_history(account) + for tr in self.page.iter_history(account): + yield tr diff --git a/modules/carrefourbanque/pages.py b/modules/carrefourbanque/pages.py index d802bee7f0..58b1f372d4 100644 --- a/modules/carrefourbanque/pages.py +++ b/modules/carrefourbanque/pages.py @@ -25,12 +25,13 @@ from PIL import Image from weboob.tools.json import json -from weboob.browser.pages import HTMLPage, LoggedPage, pagination -from weboob.browser.elements import ListElement, TableElement, ItemElement, method +from weboob.browser.pages import HTMLPage, LoggedPage, pagination, JsonPage +from weboob.browser.elements import ListElement, TableElement, ItemElement, method, DictElement from weboob.browser.filters.standard import ( - Regexp, Field, CleanText, CleanDecimal, Eval, Currency + Regexp, Field, CleanText, CleanDecimal, Eval, Currency, Date, ) from weboob.browser.filters.html import Link, TableCell, Attr, AttributeNotFound +from weboob.browser.filters.json import Dict from weboob.capabilities.bank import Account from weboob.capabilities.wealth import Investment from weboob.capabilities.base import NotAvailable @@ -221,7 +222,6 @@ class item(Transaction.TransactionElement): def obj_type(self): if len(self.el.xpath('./td')) <= 3: return Transaction.TYPE_BANK - col = TableCell('debittype', default=None) if col(self): debittype = CleanText(col)(self) @@ -333,4 +333,29 @@ class LoanHistoryPage(TransactionsPage): class CardHistoryPage(TransactionsPage): - pass + + def get_previous_date(self): + return Attr('//a[@id="op_precedente"]', 'date_recup', default=None)(self.doc) + +class CardHistoryJsonPage(LoggedPage, JsonPage): + + def get_previous_date(self): + return Dict('str_datePrecedente', default=None)(self.doc) + + @method + class iter_history(DictElement): + item_xpath = 'tab_historique' + + class item(ItemElement): + klass = Transaction + + obj_date = Date(CleanText(Dict('date')), dayfirst=True) + obj_raw = CleanText(Dict('label')) + obj_amount = CleanDecimal.French(Dict('amount')) + + def obj_type(self): + debittype = Dict('mode') + if debittype(self) == 'Différé': + return Transaction.TYPE_DEFERRED_CARD + else: + return Transaction.TYPE_CARD -- GitLab