From a2fcc209f17a828e46578c3f87733c5bc8cae6da Mon Sep 17 00:00:00 2001
From: Romain Bignon <romain@budget-insight.com>
Date: Fri, 8 Feb 2013 14:04:25 +0100
Subject: [PATCH] support deferred cards

---
 modules/hsbc/backend.py        |  9 ++--
 modules/hsbc/browser.py        | 38 +++++++++++----
 modules/hsbc/pages/accounts.py | 87 +++++++++++++++++++++++++++++-----
 3 files changed, 109 insertions(+), 25 deletions(-)

diff --git a/modules/hsbc/backend.py b/modules/hsbc/backend.py
index 302d0180b2..0796064e35 100644
--- a/modules/hsbc/backend.py
+++ b/modules/hsbc/backend.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright(C) 2012 Romain Bignon
+# Copyright(C) 2012-2013 Romain Bignon
 #
 # This file is part of weboob.
 #
@@ -58,12 +58,13 @@ def get_account(self, _id):
 
     def iter_history(self, account):
         with self.browser:
-            for tr in self.browser.get_history(account._link_id):
-                if not tr._coming:
+            for tr in self.browser.get_history(account):
+                # If there are deferred cards, strip CB invoices.
+                if not tr._coming and not (tr.raw.startswith('FACTURES CB') or len(account._card_links) == 0):
                     yield tr
 
     def iter_coming(self, account):
         with self.browser:
-            for tr in self.browser.get_history(account._link_id):
+            for tr in self.browser.get_history(account):
                 if tr._coming:
                     yield tr
diff --git a/modules/hsbc/browser.py b/modules/hsbc/browser.py
index 7cd93f8be5..035ace1b72 100644
--- a/modules/hsbc/browser.py
+++ b/modules/hsbc/browser.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright(C) 2012  Romain Bignon
+# Copyright(C) 2012-2013  Romain Bignon
 #
 # This file is part of weboob.
 #
@@ -18,11 +18,13 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 
 
+from datetime import timedelta
 import urllib
 import re
 
+from weboob.tools.date import LinearDateGuesser
 from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError
-from .pages.accounts import AccountsListPage, HistoryPage
+from .pages.accounts import AccountsListPage, CPTHistoryPage, CardHistoryPage
 
 
 __all__ = ['HSBC']
@@ -38,7 +40,8 @@ class HSBC(BaseBrowser):
     ENCODING = None # refer to the HTML encoding
     PAGES = {'https://client.hsbc.fr/session_absente.html':                 NotLoggedPage,
              'https://client.hsbc.fr/cgi-bin/emcgi\?.*debr=COMPTES_PAN':    AccountsListPage,
-             'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': HistoryPage
+             'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': CPTHistoryPage,
+             'https://client.hsbc.fr/cgi-bin/emcgi\?.*CB_IdPrestation=.*':  CardHistoryPage,
             }
 
     _session = None
@@ -91,9 +94,28 @@ def get_account(self, id):
 
         return None
 
-    def get_history(self, link):
-        if link is None:
-            return iter([])
+    def get_history(self, account):
+        if account._link_id is None:
+            return
 
-        self.location(link)
-        return self.page.get_operations()
+        for tr in self._get_history(account._link_id):
+            yield tr
+
+        for card in account._card_links:
+            for tr in self._get_history(card):
+                yield tr
+
+    def _get_history(self, link):
+        num_page = 0
+        guesser = LinearDateGuesser(date_max_bump=timedelta(45))
+        while link is not None:
+            self.location(link)
+
+            if self.page is None:
+                return
+
+            for tr in self.page.get_operations(num_page, guesser):
+                yield tr
+
+            link = self.page.get_next_link()
+            num_page += 1
diff --git a/modules/hsbc/pages/accounts.py b/modules/hsbc/pages/accounts.py
index 6fb6cc4cfc..d40eec53c5 100644
--- a/modules/hsbc/pages/accounts.py
+++ b/modules/hsbc/pages/accounts.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright(C) 2012  Romain Bignon
+# Copyright(C) 2012-2013  Romain Bignon
 #
 # This file is part of weboob.
 #
@@ -23,15 +23,15 @@
 
 from weboob.tools.browser import BasePage
 from weboob.capabilities.bank import Account
-from weboob.capabilities import NotAvailable
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction
 
 
-__all__ = ['AccountsListPage']
+__all__ = ['AccountsListPage', 'CPTHistoryPage', 'CardHistoryPage']
 
 
 class AccountsListPage(BasePage):
     def get_list(self):
+        accounts = []
         for tr in self.document.getiterator('tr'):
             tds = tr.findall('td')
             if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM':
@@ -41,21 +41,33 @@ def get_list(self):
             account.id = tds[1].text.strip()
 
             a = tds[0].findall('a')[-1]
-            account.label = a.text.strip()
+            account.label = unicode(a.text.strip())
             account._link_id = a.attrib['href']
 
-            m = re.search('(\w+)_IdPrestation', account._link_id)
-            if not m or m.group(1) != 'CPT':
-                account._link_id = None
-                if m:
-                    account.id += '.%s' % m.group(1)
-
             balance = u''.join([txt.strip() for txt in tds[2].itertext()])
             account.balance = Decimal(FrenchTransaction.clean_amount(balance))
+
+            # check account type
+            m = re.search('(\w+)_IdPrestation', account._link_id)
+            account_type = None
+            if m:
+                account_type = m.group(1)
+                if account_type != 'CPT':
+                    account.id += '.%s' % account_type
+
+            if account_type == 'CB':
+                accounts[0]._card_links.append(account._link_id)
+                if not accounts[0].coming:
+                    accounts[0].coming = Decimal('0.0')
+                accounts[0].coming += account.balance
+                continue
+
             account.currency = account.get_currency(tds[1].text)
-            account.coming = NotAvailable
+            account._card_links = []
+
+            accounts.append(account)
 
-            yield account
+        return iter(accounts)
 
 class Transaction(FrenchTransaction):
     PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
@@ -70,7 +82,14 @@ class Transaction(FrenchTransaction):
                ]
 
 class HistoryPage(BasePage):
-    def get_operations(self):
+    def get_next_link(self):
+        return None
+
+    def get_operations(self, num_page, date_guesser):
+        raise NotImplementedError()
+
+class CPTHistoryPage(HistoryPage):
+    def get_operations(self, num_page, date_guesser):
         for script in self.document.getiterator('script'):
             if script.text is None or script.text.find('\nCL(0') < 0:
                 continue
@@ -81,3 +100,45 @@ def get_operations(self):
                 op.set_amount(m.group(5))
                 op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None)
                 yield op
+
+class CardHistoryPage(HistoryPage):
+    def get_next_link(self):
+        ok = False
+        for link in self.document.xpath('//form[@name="FORM_LIB_CARTE"]/a[@class="fleche"]'):
+            if link.attrib['href'].startswith('#'):
+                ok = True
+            elif ok:
+                # add CB_IdPrestation to handle the correct page on browser.
+                return link.attrib['href'] + '&CB_IdPrestation='
+
+    def parse_date(self, guesser, string, store=False):
+        day, month = map(int, string.split('/'))
+        return guesser.guess_date(day, month, store)
+
+    def get_operations(self, num_page, date_guesser):
+        debit_date = None
+        for tr in self.document.xpath('//div[@id="tabs-1"]/table//tr'):
+            cols = tr.findall('td')
+            if len(cols) == 1:
+                text = self.parser.tocleanstring(cols[0])
+                m = re.search('(\d+/\d+)', text)
+                if m:
+                    # if there are several months on the same page, the second
+                    # one's operations are already debited.
+                    if debit_date is not None:
+                        num_page += 1
+                    debit_date = self.parse_date(date_guesser, m.group(1), True)
+                continue
+
+            if len(cols) < 4:
+                continue
+
+            op = Transaction('')
+            op.parse(date=debit_date,
+                     raw=self.parser.tocleanstring(cols[1]))
+            op.rdate = self.parse_date(date_guesser, self.parser.tocleanstring(cols[0]))
+            op.type = op.TYPE_CARD
+            op._coming = (num_page == 0)
+            op.set_amount(self.parser.tocleanstring(cols[-1]),
+                          self.parser.tocleanstring(cols[-2]))
+            yield op
-- 
GitLab