From a2fcc209f17a828e46578c3f87733c5bc8cae6da Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 8 Feb 2013 14:04:25 +0100 Subject: [PATCH] support deferred cards --- modules/hsbc/backend.py | 9 ++-- modules/hsbc/browser.py | 38 +++++++++++---- modules/hsbc/pages/accounts.py | 87 +++++++++++++++++++++++++++++----- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/modules/hsbc/backend.py b/modules/hsbc/backend.py index 302d0180b2..0796064e35 100644 --- a/modules/hsbc/backend.py +++ b/modules/hsbc/backend.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2012 Romain Bignon +# Copyright(C) 2012-2013 Romain Bignon # # This file is part of weboob. # @@ -58,12 +58,13 @@ def get_account(self, _id): def iter_history(self, account): with self.browser: - for tr in self.browser.get_history(account._link_id): - if not tr._coming: + for tr in self.browser.get_history(account): + # If there are deferred cards, strip CB invoices. + if not tr._coming and not (tr.raw.startswith('FACTURES CB') or len(account._card_links) == 0): yield tr def iter_coming(self, account): with self.browser: - for tr in self.browser.get_history(account._link_id): + for tr in self.browser.get_history(account): if tr._coming: yield tr diff --git a/modules/hsbc/browser.py b/modules/hsbc/browser.py index 7cd93f8be5..035ace1b72 100644 --- a/modules/hsbc/browser.py +++ b/modules/hsbc/browser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2012 Romain Bignon +# Copyright(C) 2012-2013 Romain Bignon # # This file is part of weboob. # @@ -18,11 +18,13 @@ # along with weboob. If not, see . +from datetime import timedelta import urllib import re +from weboob.tools.date import LinearDateGuesser from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError -from .pages.accounts import AccountsListPage, HistoryPage +from .pages.accounts import AccountsListPage, CPTHistoryPage, CardHistoryPage __all__ = ['HSBC'] @@ -38,7 +40,8 @@ class HSBC(BaseBrowser): ENCODING = None # refer to the HTML encoding PAGES = {'https://client.hsbc.fr/session_absente.html': NotLoggedPage, 'https://client.hsbc.fr/cgi-bin/emcgi\?.*debr=COMPTES_PAN': AccountsListPage, - 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': HistoryPage + 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': CPTHistoryPage, + 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CB_IdPrestation=.*': CardHistoryPage, } _session = None @@ -91,9 +94,28 @@ def get_account(self, id): return None - def get_history(self, link): - if link is None: - return iter([]) + def get_history(self, account): + if account._link_id is None: + return - self.location(link) - return self.page.get_operations() + for tr in self._get_history(account._link_id): + yield tr + + for card in account._card_links: + for tr in self._get_history(card): + yield tr + + def _get_history(self, link): + num_page = 0 + guesser = LinearDateGuesser(date_max_bump=timedelta(45)) + while link is not None: + self.location(link) + + if self.page is None: + return + + for tr in self.page.get_operations(num_page, guesser): + yield tr + + link = self.page.get_next_link() + num_page += 1 diff --git a/modules/hsbc/pages/accounts.py b/modules/hsbc/pages/accounts.py index 6fb6cc4cfc..d40eec53c5 100644 --- a/modules/hsbc/pages/accounts.py +++ b/modules/hsbc/pages/accounts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2012 Romain Bignon +# Copyright(C) 2012-2013 Romain Bignon # # This file is part of weboob. # @@ -23,15 +23,15 @@ from weboob.tools.browser import BasePage from weboob.capabilities.bank import Account -from weboob.capabilities import NotAvailable from weboob.tools.capabilities.bank.transactions import FrenchTransaction -__all__ = ['AccountsListPage'] +__all__ = ['AccountsListPage', 'CPTHistoryPage', 'CardHistoryPage'] class AccountsListPage(BasePage): def get_list(self): + accounts = [] for tr in self.document.getiterator('tr'): tds = tr.findall('td') if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM': @@ -41,21 +41,33 @@ def get_list(self): account.id = tds[1].text.strip() a = tds[0].findall('a')[-1] - account.label = a.text.strip() + account.label = unicode(a.text.strip()) account._link_id = a.attrib['href'] - m = re.search('(\w+)_IdPrestation', account._link_id) - if not m or m.group(1) != 'CPT': - account._link_id = None - if m: - account.id += '.%s' % m.group(1) - balance = u''.join([txt.strip() for txt in tds[2].itertext()]) account.balance = Decimal(FrenchTransaction.clean_amount(balance)) + + # check account type + m = re.search('(\w+)_IdPrestation', account._link_id) + account_type = None + if m: + account_type = m.group(1) + if account_type != 'CPT': + account.id += '.%s' % account_type + + if account_type == 'CB': + accounts[0]._card_links.append(account._link_id) + if not accounts[0].coming: + accounts[0].coming = Decimal('0.0') + accounts[0].coming += account.balance + continue + account.currency = account.get_currency(tds[1].text) - account.coming = NotAvailable + account._card_links = [] + + accounts.append(account) - yield account + return iter(accounts) class Transaction(FrenchTransaction): PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), @@ -70,7 +82,14 @@ class Transaction(FrenchTransaction): ] class HistoryPage(BasePage): - def get_operations(self): + def get_next_link(self): + return None + + def get_operations(self, num_page, date_guesser): + raise NotImplementedError() + +class CPTHistoryPage(HistoryPage): + def get_operations(self, num_page, date_guesser): for script in self.document.getiterator('script'): if script.text is None or script.text.find('\nCL(0') < 0: continue @@ -81,3 +100,45 @@ def get_operations(self): op.set_amount(m.group(5)) op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None) yield op + +class CardHistoryPage(HistoryPage): + def get_next_link(self): + ok = False + for link in self.document.xpath('//form[@name="FORM_LIB_CARTE"]/a[@class="fleche"]'): + if link.attrib['href'].startswith('#'): + ok = True + elif ok: + # add CB_IdPrestation to handle the correct page on browser. + return link.attrib['href'] + '&CB_IdPrestation=' + + def parse_date(self, guesser, string, store=False): + day, month = map(int, string.split('/')) + return guesser.guess_date(day, month, store) + + def get_operations(self, num_page, date_guesser): + debit_date = None + for tr in self.document.xpath('//div[@id="tabs-1"]/table//tr'): + cols = tr.findall('td') + if len(cols) == 1: + text = self.parser.tocleanstring(cols[0]) + m = re.search('(\d+/\d+)', text) + if m: + # if there are several months on the same page, the second + # one's operations are already debited. + if debit_date is not None: + num_page += 1 + debit_date = self.parse_date(date_guesser, m.group(1), True) + continue + + if len(cols) < 4: + continue + + op = Transaction('') + op.parse(date=debit_date, + raw=self.parser.tocleanstring(cols[1])) + op.rdate = self.parse_date(date_guesser, self.parser.tocleanstring(cols[0])) + op.type = op.TYPE_CARD + op._coming = (num_page == 0) + op.set_amount(self.parser.tocleanstring(cols[-1]), + self.parser.tocleanstring(cols[-2])) + yield op -- GitLab