browser.py 22.1 KB
Newer Older
Romain Bignon's avatar
Romain Bignon committed
1 2
# -*- coding: utf-8 -*-

Romain Bignon's avatar
Romain Bignon committed
3
# Copyright(C) 2012-2013  Romain Bignon
Romain Bignon's avatar
Romain Bignon committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.

20
from __future__ import unicode_literals
Romain Bignon's avatar
Romain Bignon committed
21

22
import re
23
import ssl
24
from datetime import timedelta, date
25
from lxml.etree import XMLSyntaxError
26
from collections import OrderedDict
Romain Bignon's avatar
Romain Bignon committed
27

Romain Bignon's avatar
Romain Bignon committed
28
from weboob.tools.date import LinearDateGuesser
29
from weboob.capabilities.bank import Account, AccountNotFound
30
from weboob.tools.capabilities.bank.transactions import sorted_transactions, keep_only_card_transactions
31
from weboob.tools.compat import parse_qsl, urlparse
32
from weboob.exceptions import BrowserIncorrectPassword
33
from weboob.browser import LoginBrowser, URL, need_login
34
from weboob.browser.exceptions import HTTPNotFound
35
from weboob.capabilities.base import find_object
36

37
from .pages.account_pages import (
38 39
    AccountsPage, OwnersListPage, CBOperationPage, CPTOperationPage, LoginPage,
    AppGonePage, RibPage, UnavailablePage, OtherPage, FrameContainer, ProfilePage,
40
)
41 42
from .pages.life_insurances import (
    LifeInsurancesPage, LifeInsurancePortal, LifeInsuranceMain, LifeInsuranceUseless,
43
    LifeNotFound,
44 45
)
from .pages.investments import (
46
    LogonInvestmentPage, ProductViewHelper, RetrieveAccountsPage, RetrieveInvestmentsPage,
47
    RetrieveLiquidityPage, RetrieveUselessPage, ScpiInvestmentPage,
48 49
)
from .pages.landing_pages import JSMiddleFramePage, JSMiddleAuthPage, InvestmentFormPage
Romain Bignon's avatar
Romain Bignon committed
50 51 52 53 54


__all__ = ['HSBC']


Vincent Paredes's avatar
Vincent Paredes committed
55 56
class HSBC(LoginBrowser):
    BASEURL = 'https://client.hsbc.fr'
57

58
    app_gone = False
59

60
    scpi_investment_page = URL(r'https://www.hsbc.fr/1/[0-9]/.*', ScpiInvestmentPage)
61 62
    connection =      URL(r'https://www.hsbc.fr/1/2/hsbc-france/particuliers/connexion', LoginPage)
    login =           URL(r'https://www.hsbc.fr/1/*', LoginPage)
63 64 65
    cptPage =         URL(r'/cgi-bin/emcgi.*\&Cpt=.*',
                          r'/cgi-bin/emcgi.*\&Epa=.*',
                          r'/cgi-bin/emcgi.*\&CPT_IdPrestation.*',
66
                          r'/cgi-bin/emcgi.*\&Ass_IdPrestation.*',
67 68
                          # FIXME are the previous patterns relevant in POST nav?
                          r'/cgi-bin/emcgi',
69
                          CPTOperationPage)
70
    cbPage =          URL(r'/cgi-bin/emcgi.*[\&\?]Cb=.*',
71
                          r'/cgi-bin/emcgi.*\&CB_IdPrestation.*',
72 73
                          # FIXME are the previous patterns relevant in POST nav?
                          r'/cgi-bin/emcgi',
74
                          CBOperationPage)
75 76
    appGone =     URL(r'/.*_absente.html',
                      r'/pm_absent_inter.html',
77
                      '/appli_absente_MBEL.html',
78
                      '/pm_absent_inter_MBEL.html',
79
                        AppGonePage)
80
    rib =             URL(r'/cgi-bin/emcgi', RibPage)
81
    accounts =        URL(r'/cgi-bin/emcgi', AccountsPage)
82
    owners_list = URL(r'/cgi-bin/emcgi', OwnersListPage)
83
    life_insurance_useless = URL(r'/cgi-bin/emcgi', LifeInsuranceUseless)
84
    profile = URL(r'/cgi-bin/emcgi', ProfilePage)
85
    unavailable = URL(r'/cgi-bin/emcgi', UnavailablePage)
86 87
    frame_page = URL(r'/cgi-bin/emcgi',
                     r'https://clients.hsbc.fr/cgi-bin/emcgi', FrameContainer)
Romain Bignon's avatar
Romain Bignon committed
88

89 90 91
    # other site
    life_insurance_portal = URL(r'/cgi-bin/emcgi', LifeInsurancePortal)
    life_insurance_main = URL('https://assurances.hsbc.fr/fr/accueil/b2c/accueil.html\?pointEntree=PARTIEGENERIQUEB2C', LifeInsuranceMain)
92
    life_insurances = URL('https://assurances.hsbc.fr/navigation', LifeInsurancesPage)
93
    life_not_found = URL(r'https://assurances.hsbc.fr/fr/404.html', LifeNotFound)
94

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
    # investment pages
    middle_frame_page = URL(r'/cgi-bin/emcgi', JSMiddleFramePage)
    middle_auth_page = URL(r'/cgi-bin/emcgi', JSMiddleAuthPage)
    investment_form_page = URL(
        r'https://www.hsbc.fr/1/[0-9]/authentication/sso-cwd\?customerFullName=.*',
        InvestmentFormPage
    )
    logon_investment_page = URL(r'https://investissements.clients.hsbc.fr/group-wd-gateway-war/gateway/LogonAuthentication', LogonInvestmentPage)
    retrieve_accounts_view = URL(
        r'https://investissements.clients.hsbc.fr/group-wd-gateway-war/gateway/wd/RetrieveProductView',
        RetrieveAccountsPage
    )
    retrieve_investments_page = URL(
        r'https://investissements.clients.hsbc.fr/group-wd-gateway-war/gateway/wd/RetrieveProductView',
        RetrieveInvestmentsPage
    )
    retrieve_liquidity_page = URL(
        r'https://investissements.clients.hsbc.fr/group-wd-gateway-war/gateway/wd/RetrieveProductView',
        RetrieveLiquidityPage
    )
115 116 117 118
    retrieve_useless_page = URL(
        r'https://investissements.clients.hsbc.fr/group-wd-gateway-war/gateway/wd/RetrieveProductView',
        RetrieveUselessPage
    )
119

120 121
    # catch-all
    other_page = URL(r'/cgi-bin/emcgi', OtherPage)
122

123
    def __init__(self, username, password, secret, *args, **kwargs):
124
        super(HSBC, self).__init__(username, password, *args, **kwargs)
125 126
        self.accounts_list = OrderedDict()
        self.unique_accounts_list = dict()
127
        self.secret = secret
128
        self.PEA_LISTING = {}
129
        self.owners = []
130

131 132 133
    def load_state(self, state):
        return

134 135 136 137 138 139 140 141
    def prepare_request(self, req):
        preq = super(HSBC, self).prepare_request(req)

        conn = self.session.adapters['https://'].get_connection(preq.url)
        conn.ssl_version = ssl.PROTOCOL_TLSv1

        return preq

Vincent Paredes's avatar
Vincent Paredes committed
142
    def do_login(self):
143 144 145
        self.session.cookies.clear()

        self.app_gone = False
146
        self.connection.go()
Vincent Paredes's avatar
Vincent Paredes committed
147
        self.page.login(self.username)
148

Vincent Paredes's avatar
Vincent Paredes committed
149
        no_secure_key_link = self.page.get_no_secure_key()
150

Vincent Paredes's avatar
Vincent Paredes committed
151
        if not no_secure_key_link:
Romain Bignon's avatar
Romain Bignon committed
152
            raise BrowserIncorrectPassword()
Vincent Paredes's avatar
Vincent Paredes committed
153
        self.location(no_secure_key_link)
Romain Bignon's avatar
Romain Bignon committed
154

155
        self.page.login_w_secure(self.password, self.secret)
156
        for _ in range(3):
157 158
            if self.login.is_here():
                self.page.useless_form()
Romain Bignon's avatar
Romain Bignon committed
159

Romain Bignon's avatar
Romain Bignon committed
160
        # This shitty website has 2 baseurl with only one difference: the 's' at the end of 'client'
161 162 163 164
        new_base_url = 'https://clients.hsbc.fr/'
        if new_base_url in self.url:
            self.BASEURL = new_base_url

165 166 167
        if self.frame_page.is_here():
            home_url = self.page.get_frame()
            self.js_url = self.page.get_js_url()
168

169
        if not home_url or not self.page.logged:
Romain Bignon's avatar
Romain Bignon committed
170
            raise BrowserIncorrectPassword()
171

Vincent Paredes's avatar
Vincent Paredes committed
172
        self.location(home_url)
Romain Bignon's avatar
Romain Bignon committed
173

174 175 176 177 178 179 180 181 182 183
    def go_post(self, url, data=None):
        # most of HSBC accounts links are actually handled by js code
        # which convert a GET query string to POST data.
        # not doing so often results in logout by the site
        q = dict(parse_qsl(urlparse(url).query))
        if data:
            q.update(data)
        url = url[:url.find('?')]
        self.location(url, data=q)

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
    def go_to_owner_accounts(self, owner):
        """
        The owners URLs change all the time so we must refresh them.
        If we try to go to a person's accounts page while we are already
        on this page, the website returns an empty page with the message
        "Pas de TIERS", so we must always go to the owners list before
        going to the owner's account page.
        """
        if not self.owners_list.is_here():
            self.go_post(self.js_url, data={'debr': 'OPTIONS_TIE'})

        if not self.owners_list.is_here():
            # Sometimes when we fetch info from a PEA account, the first POST
            # fails and we are blocked on some owner's AccountsPage.
            self.logger.warning('The owners list redirection failed, we must try again.')
            self.go_post(self.js_url, data={'debr': 'OPTIONS_TIE'})

        # Refresh owners URLs in case they changed:
        self.owners = self.page.get_owners_urls()
        self.go_post(self.owners[owner])
Sylvie Ye's avatar
Sylvie Ye committed
204

205
    @need_login
206 207 208 209 210 211 212 213 214
    def iter_account_owners(self):
        """
        Some connections have a "Compte de Tiers" section with several
        people each having their own accounts. We must fetch the account
        for each person and store the owner of each account.
        """
        if self.unique_accounts_list:
            for account in self.unique_accounts_list.values():
                yield account
215
        else:
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
            self.go_post(self.js_url, data={'debr': 'OPTIONS_TIE'})
            if self.owners_list.is_here():
                self.owners = self.page.get_owners_urls()

                # self.accounts_list will be a dictionary of owners each
                # containing a dictionary of the owner's accounts.
                for owner in range(len(self.owners)):
                    self.accounts_list[owner] = {}
                    self.update_accounts_list(owner, True)

                    # We must set an "_owner" attribute to each account.
                    for a in self.accounts_list[owner].values():
                        a._owner = owner

                    # go on cards page if there are cards accounts
                    for a in self.accounts_list[owner].values():
                        if a.type == Account.TYPE_CARD:
                            self.location(a.url)
                            break

                    # get all couples (card, parent) on cards page
                    all_card_and_parent = []
                    if self.cbPage.is_here():
                        all_card_and_parent = self.page.get_all_parent_id()
                        self.go_post(self.js_url, data={'debr': 'COMPTES_PAN'})

                    # update cards parent and currency
                    for a in self.accounts_list[owner].values():
                        if a.type == Account.TYPE_CARD:
                            for card in all_card_and_parent:
                                if a.id in card[0].replace(' ', ''):
                                    a.parent = find_object(self.accounts_list[owner].values(), id=card[1])
                                if a.parent and not a.currency:
                                    a.currency = a.parent.currency

                    # We must get back to the owners list before moving to the next owner:
                    self.go_post(self.js_url, data={'debr': 'OPTIONS_TIE'})

                # Fill a dictionary will all accounts without duplicating common accounts:
                for owner in self.accounts_list.values():
                    for account in owner.values():
                        if account.id not in self.unique_accounts_list.keys():
                            self.unique_accounts_list[account.id] = account

                for account in self.unique_accounts_list.values():
                    yield account
262

263 264 265 266
    @need_login
    def update_accounts_list(self, owner, iban=True):
        # Go to the owner's account page in case we are not there already:
        self.go_to_owner_accounts(owner)
Sylvie Ye's avatar
Sylvie Ye committed
267
        for a in self.page.iter_spaces_account():
268
            try:
269
                self.accounts_list[owner][a.id].url = a.url
270
            except KeyError:
271
                self.accounts_list[owner][a.id] = a
272

273 274
        if iban:
            self.location(self.js_url, params={'debr': 'COMPTES_RIB'})
275
            if self.rib.is_here():
276
                self.page.get_rib(self.accounts_list[owner])
Romain Bignon's avatar
Romain Bignon committed
277

278 279 280
    @need_login
    def _quit_li_space(self):
        if self.life_insurances.is_here():
281
            self.page.disconnect()
282

283 284
            self.session.cookies.pop('ErisaSession', None)
            self.session.cookies.pop('HBFR-INSURANCE-COOKIE-82', None)
285

286 287 288 289
        if self.life_not_found.is_here():
            # likely won't avoid having to login again anyway
            self.location(self.js_url)

290
        if self.frame_page.is_here():
291 292 293 294 295
            home_url = self.page.get_frame()
            self.js_url = self.page.get_js_url()

            self.location(home_url)

296 297 298 299
        if self.life_insurance_useless.is_here():
            data = {'debr': 'COMPTES_PAN'}
            self.go_post(self.js_url, data=data)

300
    @need_login
301
    def _go_to_life_insurance(self, account):
302
        self._quit_li_space()
303
        self.go_post(account.url)
304

305
        if self.accounts.is_here() or self.frame_page.is_here() or self.life_insurance_useless.is_here() or self.life_not_found.is_here():
306 307 308
            self.logger.warning('cannot go to life insurance %r', account)
            return False

309
        data = {'url_suivant': 'SITUATIONCONTRATB2C', 'strNumAdh': ''}
310
        data.update(self.page.get_lf_attributes(account.id))
311 312

        self.life_insurances.go(data=data)
313
        return True
314

Romain Bignon's avatar
Romain Bignon committed
315
    @need_login
316 317
    def get_history(self, account, coming=False, retry_li=True):
        self._quit_li_space()
318 319
        self.update_accounts_list(account._owner, False)
        account = self.accounts_list[account._owner][account.id]
320

321 322
        if account.url is None:
            return []
323

Sylvie Ye's avatar
Sylvie Ye committed
324
        if account.url.startswith('javascript') or '&Crd=' in account.url or account.type == Account.TYPE_LOAN:
325 326
            raise NotImplementedError()

327
        if account.type in (Account.TYPE_LIFE_INSURANCE, Account.TYPE_CAPITALISATION):
328
            if coming is True:
329
                return []
330

331
            try:
332 333 334
                if not self._go_to_life_insurance(account):
                    self._quit_li_space()
                    return []
335
            except (XMLSyntaxError, HTTPNotFound):
Baptiste Delpey's avatar
Baptiste Delpey committed
336
                self._quit_li_space()
337
                return []
338 339 340 341 342 343 344 345 346 347 348 349
            except AccountNotFound:
                self.go_post(self.js_url)

                # often if we visit life insurance subsite multiple times too quickly, the site just returns an error
                # so we just retry (we might relogin...)
                # TODO find out how to avoid the error, or avoid relogin
                if retry_li:
                    self.logger.warning('life insurance seems unavailable for account %s', account.id)
                    return self.get_history(account, coming, False)

                self.logger.error('life insurance seems unavailable for account %s', account.id)
                return []
350 351 352 353 354 355 356 357 358 359

            self.life_insurances.go(data={'url_suivant': 'HISTORIQUECONTRATB2C', 'strMonnaie': 'EURO'})

            history = [t for t in self.page.iter_history()]

            self._quit_li_space()

            return history

        try:
360
            self.go_post(self.accounts_list[account._owner][account.id].url)
361 362
        # sometime go to hsbc life insurance space do logout
        except HTTPNotFound:
363
            self.app_gone = True
364 365
            self.do_logout()
            self.do_login()
366

367
        # If we relogin on hsbc, all links have changed
368 369
        if self.app_gone:
            self.app_gone = False
370 371
            self.update_accounts_list(account._owner, False)
            self.location(self.accounts_list[account._owner][account.id].url)
372

Vincent Paredes's avatar
Vincent Paredes committed
373
        if self.page is None:
374
            return []
Romain Bignon's avatar
Romain Bignon committed
375

376 377 378
        # for 'fusion' space
        if hasattr(account, '_is_form') and account._is_form:
            # go on accounts page to get account form
379
            self.go_to_owner_accounts(account._owner)
380 381 382
            self.go_post(self.js_url, data={'debr': 'COMPTES_PAN'})
            self.page.go_history_page(account)

Vincent Paredes's avatar
Vincent Paredes committed
383 384
        if self.cbPage.is_here():
            guesser = LinearDateGuesser(date_max_bump=timedelta(45))
385
            history = list(self.page.get_history(date_guesser=guesser))
386 387 388 389 390

            for tr in history:
                if tr.type == tr.TYPE_UNKNOWN:
                    tr.type = tr.TYPE_DEFERRED_CARD

391 392 393 394 395 396
            if account.parent:
                # Fetching the card summaries from the parent account using the card id in the transaction labels:
                def match_card(tr):
                    return (account.id in tr.label.replace(' ', ''))
                history.extend(keep_only_card_transactions(self.get_history(account.parent), match_card))

397 398 399
            history = [tr for tr in history if (coming and tr.date > date.today()) or (not coming and tr.date <= date.today())]
            history = sorted_transactions(history)
            return history
400
        elif not coming:
Vincent Paredes's avatar
Vincent Paredes committed
401
            return self._get_history()
402 403
        else:
            raise NotImplementedError()
Romain Bignon's avatar
Romain Bignon committed
404

Vincent Paredes's avatar
Vincent Paredes committed
405 406 407
    def _get_history(self):
        for tr in self.page.get_history():
            yield tr
408

Sylvie Ye's avatar
Sylvie Ye committed
409
    def get_investments(self, account, retry_li=True):
410 411
        if not account.url:
            raise NotImplementedError()
412
        if account.type in (Account.TYPE_LIFE_INSURANCE, Account.TYPE_CAPITALISATION):
Sylvie Ye's avatar
Sylvie Ye committed
413
            return self.get_life_investments(account, retry_li=retry_li)
414
        elif account.type == Account.TYPE_PEA:
415
            return self.get_pea_investments(account)
416 417 418 419 420
        elif account.type == Account.TYPE_MARKET:
            # 'BOURSE_INV' need more security to get invest page
            if 'BOURSE_INV' in account.url:
                return self.get_pea_investments(account)
            return self.get_scpi_investments(account)
421
        else:
422 423
            raise NotImplementedError()

424
    def get_scpi_investments(self, account):
425 426
        if not account.url:
            raise NotImplementedError()
427
        # Clean account url
Célande Adrien's avatar
Célande Adrien committed
428 429 430 431 432
        m = re.search(r"'(.*)'", account.url)
        if m:
            account_url = m.group(1)
        else:
            account_url = account.url
433 434

        # Need to be on accounts page to go on scpi page
435
        self.go_to_owner_accounts(account._owner)
436 437 438 439 440 441 442 443 444
        self.accounts.go()
        # Go on scpi page
        self.location(account_url)
        # Go on scpi details page
        self.page.go_scpi_detail_page()
        # If there is more details page, go on that page
        self.page.go_more_scpi_detail_page()
        return self.page.iter_scpi_investment()

445
    def get_pea_investments(self, account):
446
        self.go_to_owner_accounts(account._owner)
447
        assert account.type in (Account.TYPE_PEA, Account.TYPE_MARKET)
448

449 450 451 452
        # When invest balance is 0, there is not link to go on market page
        if not account.balance:
            return []

453
        if not self.PEA_LISTING:
454 455 456 457
            # _go_to_wealth_accounts returns True if everything went well.
            if not self._go_to_wealth_accounts(account):
                self.logger.warning('Unable to connect to wealth accounts.')
                return []
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476

        # Get account number without "EUR"
        account_id = re.search(r'\d{4,}', account.id).group(0)
        pea_invests = []
        account = None

        if 'accounts' in self.PEA_LISTING:
            for acc in self.PEA_LISTING['accounts']:
                # acc.id is like XXX<account number>
                if account_id in acc.id:
                    account = acc
                    break
        # Account should be found
        assert account

        if 'liquidities' in self.PEA_LISTING:
            for liquidity in self.PEA_LISTING['liquidities']:
                if liquidity._invest_account_id == account.number:
                    pea_invests.append(liquidity)
477
        if 'investments' in self.PEA_LISTING:
478 479 480 481
            for invest in self.PEA_LISTING['investments']:
                if invest._invest_account_id == account.id:
                    pea_invests.append(invest)
        return pea_invests
482 483

    def get_life_investments(self, account, retry_li=True):
484
        self._quit_li_space()
485 486
        self.update_accounts_list(account._owner, False)
        account = self.accounts_list[account._owner][account.id]
487
        try:
488 489 490
            if not self._go_to_life_insurance(account):
                self._quit_li_space()
                return []
491
        except (XMLSyntaxError, HTTPNotFound):
Jean Walrave's avatar
Jean Walrave committed
492
            self._quit_li_space()
493
            return []
494 495 496 497 498 499 500 501 502 503 504
        except AccountNotFound:
            self.go_post(self.js_url)

            # often if we visit life insurance subsite multiple times too quickly, the site just returns an error
            # retry (we might relogin...)
            if retry_li:
                self.logger.warning('life insurance seems unavailable for account %s', account.id)
                return self.get_investments(account, False)

            self.logger.error('life insurance seems unavailable for account %s', account.id)
            return []
505 506 507 508 509 510

        investments = [i for i in self.page.iter_investments()]

        self._quit_li_space()

        return investments
511

512
    def _go_to_wealth_accounts(self, account):
513 514 515
        if not hasattr(self.page, 'get_middle_frame_url'):
            # if we can catch the URL, we go directly, else we need to browse
            # the website
516
            self.update_accounts_list(account._owner, False)
517 518

        self.location(self.page.get_middle_frame_url())
519

520 521 522
        if self.page.get_patrimoine_url():
            self.location(self.page.get_patrimoine_url())
            self.page.go_next()
523 524 525 526 527 528 529 530 531 532 533

            if self.login.is_here():
                self.logger.warning('Connection to the Logon page failed, we must try again.')
                self.do_login()
                self.update_accounts_list(account._owner, False)
                self.investment_form_page.go()
                # If reloggin did not help accessing the wealth space,
                # there is nothing more we can do to get there.
                if not self.investment_form_page.is_here():
                    return False

534 535 536 537
            self.page.go_to_logon()
            helper = ProductViewHelper(self)
            # we need to go there to initialize the session
            self.PEA_LISTING['accounts'] = list(helper.retrieve_accounts())
538
            self.PEA_LISTING['liquidities'] = list(helper.retrieve_liquidity())
539 540
            self.PEA_LISTING['investments'] = list(helper.retrieve_invests())
            self.connection.go()
541
            return True
542 543 544

    @need_login
    def get_profile(self):
545 546 547 548 549 550 551
        if not self.owners:
            self.go_post(self.js_url, data={'debr': 'OPTIONS_TIE'})
            if self.owners_list.is_here():
                self.owners = self.page.get_owners_urls()

        # The main owner of the connection is always the first of the list:
        self.go_to_owner_accounts(0)
552 553 554
        data = {'debr': 'PARAM'}
        self.go_post(self.js_url, data=data)
        return self.page.get_profile()