From 460a5e557b463d5926262848ee982d6cc32c6a0f Mon Sep 17 00:00:00 2001 From: Guillaume Risbourg Date: Fri, 6 Sep 2019 17:06:24 +0200 Subject: [PATCH] [sogecartenet] Added transactions on multiple months By default the website returns only the transactions of the current month, but we can choose which month we want in the URL of the CSV document. So now we go get every transactions in the past until we get empty datas for 3 months in a row (the website doesn't stop us when there is no more data in the past, so we have to manually stop asking for datas). Closes: 13025@zendesk 8640@zendesk --- modules/sogecartenet/browser.py | 42 +++++++++++++++++++++++++++++---- modules/sogecartenet/pages.py | 4 ++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/modules/sogecartenet/browser.py b/modules/sogecartenet/browser.py index 064058090f..363d3d3548 100644 --- a/modules/sogecartenet/browser.py +++ b/modules/sogecartenet/browser.py @@ -18,7 +18,11 @@ # along with this weboob module. If not, see . +from dateutil.parser import parse as parse_date +from dateutil.relativedelta import relativedelta + from weboob.browser import LoginBrowser, URL, need_login +from weboob.tools.compat import urlparse, parse_qs, urlencode, urlunparse from .pages import LoginPage, AccountsPage, TransactionsPage, PassModificationPage @@ -31,6 +35,9 @@ class SogecartesBrowser(LoginBrowser): accounts = URL('/internationalisation/gestionParcCartes', AccountsPage) transactions = URL('/internationalisation/csv/operationsParCarte.*', TransactionsPage) + EMPTY_MONTHS_LIMIT_TRANSACTIONS = 3 + MAX_MONTHS_TRANSACTIONS = 48 + def load_state(self, state): pass @@ -53,7 +60,34 @@ def iter_accounts(self): @need_login def get_history(self, account): if not account._url: - return ([]) - self.location(account._url) - assert self.transactions.is_here() - return self.page.get_history() + return + + url = account._url + months_without_data = 0 + total_months = 0 + # If it makes more than 3 months that we get empty data or if it makes more than 48 months + # that we are gathering transactions we stop asking for transactions (the 48 months limit is + # just to avoid infinite loops) + while months_without_data < self.EMPTY_MONTHS_LIMIT_TRANSACTIONS and total_months < self.MAX_MONTHS_TRANSACTIONS: + self.location(url) + assert self.transactions.is_here() + if self.page.has_data(): + months_without_data = 0 + for tr in self.page.get_history(): + yield tr + else: + months_without_data += 1 + + # We change the end of the url by the previous month + # URL is like this : https://www.sogecartenet.fr/csv/operationsParCarte?TOP=1&NOCARTE=XXXXXXXXX&NOCONTRAT=XXXXXXXX&DATEARR=2019-10-01 + # Format of the date in the URL is : YYYY-MM-DD + parts = urlparse(url) + qs = parse_qs(parts.query) + tr_date = parse_date(qs['DATEARR'][0], yearfirst=True) - relativedelta(months=1) + qs['DATEARR'] = tr_date.date() + url = urlunparse( + parts._replace( + query=urlencode(qs, doseq=True) + ) + ) + total_months += 1 diff --git a/modules/sogecartenet/pages.py b/modules/sogecartenet/pages.py index f4fc44ef1a..a11c960c15 100644 --- a/modules/sogecartenet/pages.py +++ b/modules/sogecartenet/pages.py @@ -95,6 +95,10 @@ class TransactionsPage(SogeLoggedPage, CsvPage): ENCODING = 'iso_8859_1' HEADER = 1 FMTPARAMS = {'delimiter':';'} + + def has_data(self): + return not Dict('processing date')(self.doc[0]) == u'No data found' + @method class get_history(DictElement): class item(ItemElement): -- GitLab