From dad4eb6dcd36fcb7818361d190f94a9876bbc39d Mon Sep 17 00:00:00 2001 From: Maxime Pommier Date: Mon, 27 May 2019 09:24:11 +0200 Subject: [PATCH] =?UTF-8?q?[caissedepargne]=C2=A0Improve=20the=20way=20tha?= =?UTF-8?q?t=20we=20get=20card's=20account.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Card cases is really tricky on the new website. There are 2 kinds of page where we can find cards infomation - CardsPage: List some of the PSU cards - CardsComingPage: On the coming transaction page (for a specific checking account), we can find all cards related to this checking account. Information to reach this CC is in the home page We have to go through this both kind of page for those reasons: - If there is no coming yet, the card will not be found in the home page and we will not be able to reach the CardsComingPage. But we can find it on CardsPage - Some cards are only on the CardsComingPage and not the CardsPage - In CardsPage, there are cards (with "Business" in the label) without checking account on the website (either history nor coming), so we skip them. - Some card on the CardsPage that have a checking account parent, but if we follow the link to reach it with CardsComingPage, we find an other card that not in CardsPage. Closes: 41709@sibi --- modules/caissedepargne/browser.py | 71 ++++++++++++++++++++++++------- modules/caissedepargne/pages.py | 37 +++++++++++++--- 2 files changed, 86 insertions(+), 22 deletions(-) diff --git a/modules/caissedepargne/browser.py b/modules/caissedepargne/browser.py index df9292b1d6..5ea13fe494 100644 --- a/modules/caissedepargne/browser.py +++ b/modules/caissedepargne/browser.py @@ -143,6 +143,7 @@ def __init__(self, nuser, *args, **kwargs): self.BASEURL = 'https://%s' % self.BASEURL self.is_cenet_website = False + self.new_website = True self.multi_type = False self.accounts = None self.loans = None @@ -436,29 +437,69 @@ def get_accounts_list(self): else: assert False, "new domain that hasn't been seen so far ?" + """ + Card cases are really tricky on the new website. + There are 2 kinds of page where we can find cards information + - CardsPage: List some of the PSU cards + - CardsComingPage: On the coming transaction page (for a specific checking account), + we can find all cards related to this checking account. Information to reach this + CC is in the home page + + We have to go through this both kind of page for those reasons: + - If there is no coming yet, the card will not be found in the home page and we will not + be able to reach the CardsComingPage. But we can find it on CardsPage + - Some cards are only on the CardsComingPage and not the CardsPage + - In CardsPage, there are cards (with "Business" in the label) without checking account on the + website (neither history nor coming), so we skip them. + - Some card on the CardsPage that have a checking account parent, but if we follow the link to + reach it with CardsComingPage, we find an other card that not in CardsPage. + """ + if self.new_website: + for account in self.accounts: + # Adding card's account that we find in CardsComingPage of each Checking account + if account._card_links: + self.home.go() + self.page.go_history(account._card_links) + for card in self.page.iter_cards(): + card.parent = account + card._coming_info = self.page.get_card_coming_info(card.number, card.parent._card_links.copy()) + self.accounts.append(card) + self.home.go() self.page.go_list() self.page.go_cards() - if self.cards.is_here() or self.cards_old.is_here(): - cards = list(self.page.iter_cards()) - for card in cards: + # We are on the new website. We already added some card, but we can find more of them on the CardsPage + if self.cards.is_here(): + for card in self.page.iter_cards(): card.parent = find_object(self.accounts, number=card._parent_id) - assert card.parent, 'card account %r parent was not found' % card + assert card.parent, 'card account parent %s was not found' % card + + # If we already added this card, we don't have to add it a second time + if find_object(self.accounts, number=card.number): + continue - # If we are in the new site, we have to get each card coming transaction link. - if self.cards.is_here(): - for card in cards: - info = card.parent._card_links + info = card.parent._card_links - # If info is filled, that mean there are comings transaction - card._coming_info = None - if info: - self.page.go_list() - self.page.go_history(info) - card._coming_info = self.page.get_card_coming_info(card.number, info.copy()) + # If card.parent._card_links is not filled, it mean this checking account + # has no coming transactions. + card._coming_info = None + if info: + self.page.go_list() + self.page.go_history(info) + card._coming_info = self.page.get_card_coming_info(card.number, info.copy()) - self.accounts.extend(cards) + if not card._coming_info: + self.logger.warning('Skip card %s (not found on checking account)', card.number) + continue + self.accounts.append(card) + + # We are on the old website. We add all card that we can find on the CardsPage + elif self.cards_old.is_here(): + for card in self.page.iter_cards(): + card.parent = find_object(self.accounts, number=card._parent_id) + assert card.parent, 'card account parent %s was not found' % card.number + self.accounts.append(card) # Some accounts have no available balance or label and cause issues # in the backend so we must exclude them from the accounts list: diff --git a/modules/caissedepargne/pages.py b/modules/caissedepargne/pages.py index db94715eb4..30996e23d3 100644 --- a/modules/caissedepargne/pages.py +++ b/modules/caissedepargne/pages.py @@ -291,6 +291,7 @@ def _add_account(self, accounts, link, label, account_type, balance, number=None return account = Account() + account._card_links = None account.id = info['id'] if is_rib_valid(info['id']): account.iban = rib2iban(info['id']) @@ -446,6 +447,7 @@ def get_loan_list(self): tds = tr.findall('td') account = Account() + account._card_links = None account.id = CleanText('./a')(tds[2]).split('-')[0].strip() account.label = CleanText('./a')(tds[2]).split('-')[-1].strip() account.type = Account.TYPE_LOAN @@ -864,6 +866,7 @@ class item(ItemElement): obj__parent_id = CleanText(TableCell('parent')) obj_balance = 0 obj_currency = Currency(TableCell('coming')) + obj__card_links = None def obj_coming(self): if CleanText(TableCell('coming'))(self) == '-': @@ -892,10 +895,25 @@ class CardsComingPage(IndexPage): def is_here(self): return CleanText('//h2[text()="Encours de carte à débit différé"]')(self.doc) - def get_card_coming_info(self, number, info): + @method + class iter_cards(ListElement): + item_xpath = '//table[contains(@class, "compte") and position() = 1]//tr[contains(@id, "MM_HISTORIQUE_CB") and position() < last()]' + + class item(ItemElement): + klass = Account + obj_id = Regexp(CleanText(Field('label'), replace=[('*', 'X')]), r'(\d{6}\X{6}\d{4})') + obj_type = Account.TYPE_CARD + obj_label = CleanText('./td[1]') + obj_balance = Decimal(0) + obj_coming = CleanDecimal.French('./td[2]') + obj_currency = Currency('./td[2]') + obj_number = Regexp(CleanText(Field('label')), r'(\d{6}\*{6}\d{4})') + obj__card_links = None + + def get_card_coming_info(self, number, info): # If the xpath match, that mean there are only one card - # We have enought information in `info` to get its coming transaction + # We have enough information in `info` to get its coming transaction if CleanText('//tr[@id="MM_HISTORIQUE_CB_rptMois0_ctl01_trItem"]')(self.doc): return info @@ -907,12 +925,16 @@ def get_card_coming_info(self, number, info): if Regexp(CleanText(xpath), r'(\d{6}\*{6}\d{4})')(self.doc) == number: return info - # For all card except the first one for the same check account, we have to get info through their href info - link = CleanText(Attr('//a[contains(text(),"%s")]' % number, 'href'))(self.doc) - infos = re.match(r'.*(DETAIL_OP_M0&[^\"]+).*', link) - info['link'] = infos.group(1) + # Some cards redirect to a checking account where we cannot found them. Since we have no details or history, + # we return None and skip them in the browser. + if CleanText('//a[contains(text(),"%s")]' % number)(self.doc): + # For all cards except the first one for the same check account, we have to get info through their href info + link = CleanText(Link('//a[contains(text(),"%s")]' % number))(self.doc) + infos = re.match(r'.*(DETAIL_OP_M0&[^\"]+).*', link) + info['link'] = infos.group(1) - return info + return info + return None class CardsOldWebsitePage(IndexPage): @@ -945,6 +967,7 @@ class item(ItemElement): obj_balance = 0 obj_coming = CleanDecimal.French(TableCell('coming')) obj_currency = Currency(TableCell('coming')) + obj__card_links = None def obj__parent_id(self): return self.page.get_account() -- GitLab