From 57fd3dd67fcd57c44ec5c34783910b7f5c9e63a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Sobucki?= <168-ssobucki@users.noreply.gitlab.budget-insight.com> Date: Tue, 19 Jan 2021 11:05:09 +0100 Subject: [PATCH] [boursorama] Replaced Async logic by fill_account Using Async isn't considered best practice, this commit replace Async logic by fill_account --- modules/boursorama/browser.py | 30 ++++++- modules/boursorama/pages.py | 150 +++++++++++++++------------------- 2 files changed, 96 insertions(+), 84 deletions(-) diff --git a/modules/boursorama/browser.py b/modules/boursorama/browser.py index 5e05ba23e6..94d6f6b9eb 100644 --- a/modules/boursorama/browser.py +++ b/modules/boursorama/browser.py @@ -360,6 +360,8 @@ def go_cards_number(self, link): def get_accounts_list(self): self.status.go() + self.accounts_list = None # necessary to loop again after being logged out + exc = None for _ in range(3): if self.accounts_list is not None: @@ -371,7 +373,7 @@ def get_accounts_list(self): has_account = False self.pro_accounts.go() if self.pro_accounts.is_here(): - self.accounts_list.extend(self.page.iter_accounts()) + self.accounts_list.extend(self.get_filled_accounts()) has_account = True else: # We dont want to let has_account=False if we landed on an unknown page @@ -387,7 +389,7 @@ def get_accounts_list(self): continue else: if self.accounts.is_here(): - self.accounts_list.extend(self.page.iter_accounts()) + self.accounts_list.extend(self.get_filled_accounts()) has_account = True else: # We dont want to let has_account=False if we landed on an unknown page @@ -445,6 +447,30 @@ def get_accounts_list(self): self.ownership_guesser() return self.accounts_list + def get_filled_accounts(self): + accounts_list = [] + for account in self.page.iter_accounts(): + try: + self.location(account.url) + except requests.exceptions.HTTPError as e: + # We do not yield life insurance accounts with a 404 error. Since we have verified, that + # it is a website scoped problem and not a bad request from our part. + if ( + e.response.status_code == 404 + and account.type == Account.TYPE_LIFE_INSURANCE + ): + self.logger.warning( + '404 ! Broken link for life insurance account (%s). Account will be skipped', + account.label + ) + continue + raise + + self.page.fill_account(obj=account) + if account.id: + accounts_list.append(account) + return accounts_list + def get_account(self, account_id=None, account_iban=None): acc_list = self.get_accounts_list() account = strict_find_object(acc_list, id=account_id) diff --git a/modules/boursorama/pages.py b/modules/boursorama/pages.py index 04abbc17e1..19702b8f88 100644 --- a/modules/boursorama/pages.py +++ b/modules/boursorama/pages.py @@ -28,8 +28,6 @@ import hashlib from functools import wraps -from requests.exceptions import HTTPError - from weboob.browser.pages import ( HTMLPage, LoggedPage, pagination, NextPage, FormNotFound, PartialHTMLPage, LoginPage, CsvPage, RawPage, JsonPage, @@ -37,7 +35,7 @@ from weboob.browser.elements import ListElement, ItemElement, method, TableElement, SkipItem, DictElement from weboob.browser.filters.standard import ( CleanText, CleanDecimal, Field, Format, - Regexp, Date, AsyncLoad, Async, Eval, Env, + Regexp, Date, Eval, Env, Currency as CleanCurrency, Map, Coalesce, MapIn, Lower, Base, ) @@ -327,8 +325,6 @@ class iter_accounts(ListElement): class item(ItemElement): klass = Account - load_details = Field('url') & AsyncLoad - def condition(self): # Ignore externally aggregated accounts and insurances: return ( @@ -342,59 +338,6 @@ def condition(self): obj_label = CleanText('.//a[has-class("account--name")] | .//div[has-class("account--name")]') obj_currency = FrenchTransaction.Currency('.//a[has-class("account--balance")]') - # Handle 404 error when using the Async filter - # Using the filter with a broken url link will raise a 404. - # The account's ID is parsed using the Async filter, so if we get a 404 we will skip the item - def obj_id(self): - account_type = Field('type')(self) - if account_type == Account.TYPE_CARD: - # When card is opposed it still appears on accounts page with a dead link and so, no id. Skip it. - if Attr('.//a[has-class("account--name")]', 'href')(self) == '#': - raise SkipItem() - return self.obj__idparts()[1] - - try: - # sometimes it's
sometimes it's

- account_id = Async( - 'details', - Regexp( - CleanText('//*[has-class("account-number")]', transliterate=True), - r'Reference du compte : (\d+)', - default=NotAvailable - ) - )(self) - except HTTPError as e: - # We only raise SkipItem for life insurance accounts with a 404 error. Since we have verified, that - # it is a website scoped problem and not a bad request from our part. - if ( - e.response.status_code == 404 - and account_type == Account.TYPE_LIFE_INSURANCE - ): - self.logger.warning( - '404 ! Broken link for life insurance account (%s). Account will be skipped', - Field('label')(self) - ) - raise SkipItem() - raise - if not account_id: - raise SkipItem() - return account_id - - obj_number = obj_id - - # assignments 'obj_x = ...' are evaluated before methods. Since 'obj_id' is a method and we want to catch - # Async induced 404 error in it, we must change 'obj_valuation_diff' to a method. Otherwise, 404 error will - # raise from 'obj_valuation_diff = Async(...)' and we won't get the desired behaviour. - def obj_valuation_diff(self): - return ( - Async('details') - & CleanDecimal( - '''//li[h4[text()="Total des +/- values"]]/h3 | - //li[span[text()="Total des +/- values latentes"]]/span[has-class("overview__value")]''', - replace_dots=True, default=NotAvailable - ) - )(self) - obj__holder = None obj__amount = CleanDecimal.French('.//a[has-class("account--balance")]') @@ -402,12 +345,6 @@ def obj_valuation_diff(self): def obj_balance(self): if Field('type')(self) != Account.TYPE_CARD: balance = Field('_amount')(self) - if Field('type')(self) in [Account.TYPE_PEA, Account.TYPE_LIFE_INSURANCE, Account.TYPE_MARKET]: - page = Async('details').loaded_page(self) - if isinstance(page, MarketPage): - updated_balance = page.get_balance(Field('type')(self)) - if updated_balance is not None: - return updated_balance return balance return Decimal('0') @@ -415,12 +352,6 @@ def obj_coming(self): # report deferred expenses in the coming attribute if Field('type')(self) == Account.TYPE_CARD: return Field('_amount')(self) - return Async( - 'details', - CleanDecimal( - u'//li[h4[text()="Mouvements à venir"]]/h3', replace_dots=True, default=NotAvailable - ) - )(self) def obj_type(self): # card url is /compte/cav/xxx/carte/yyy so reverse to match "carte" before "cav" @@ -439,12 +370,16 @@ def obj_type(self): if v: return v - page = Async('details').loaded_page(self) - if isinstance(page, LoanPage): - return Account.TYPE_LOAN - return Account.TYPE_UNKNOWN + def obj_id(self): + account_type = Field('type')(self) + if account_type == Account.TYPE_CARD: + # When card is opposed it still appears on accounts page with a dead link and so, no id. Skip it. + if Attr('.//a[has-class("account--name")]', 'href')(self) == '#': + raise SkipItem() + return self.obj__idparts()[1] + def obj_ownership(self): ownership = Coalesce( MapIn( @@ -477,21 +412,22 @@ def obj__webid(self): if parts: return parts[0] - # We do not yield other banks accounts for the moment. - def validate(self, obj): - return ( - not Async('details', CleanText(u'//h4[contains(text(), "Établissement bancaire")]'))(self) - and not Async('details', CleanText(u'//h4/div[contains(text(), "Établissement bancaire")]'))(self) - ) - class LoanPage(LoggedPage, HTMLPage): - LOAN_TYPES = { "PRÊT PERSONNEL": Account.TYPE_CONSUMER_CREDIT, "CLIC": Account.TYPE_CONSUMER_CREDIT, } + @method + class fill_account(ItemElement): + obj_id = Regexp( + CleanText('//*[has-class("account-number")]', transliterate=True), + r'Reference du compte : (\d+)', default=NotAvailable + ) + + obj_type = Account.TYPE_LOAN + @method class get_loan(ItemElement): @@ -629,6 +565,27 @@ class HistoryPage(LoggedPage, HTMLPage): """ transaction_klass = Transaction + @method + class fill_account(ItemElement): + def obj_id(self): + if self.obj.type == Account.TYPE_CARD: + return self.obj.id + + return Regexp( + CleanText('//*[has-class("account-number")]', transliterate=True), + r'Reference du compte : (\d+)', default=NotAvailable + )(self) + + obj_number = obj_id + + def obj_coming(self): + if self.obj.type == Account.TYPE_CARD: + return self.obj.coming + return CleanDecimal.French( + '//li[h4[text()="Mouvements à venir"]]/h3', + default=NotAvailable + )(self) + @otp_pagination @method class iter_history(ListElement): @@ -903,6 +860,35 @@ def inner(page, *args, **kwargs): class MarketPage(LoggedPage, HTMLPage): + @method + class fill_account(ItemElement): + obj_id = obj_number = Regexp( + CleanText('//*[has-class("account-number")]', transliterate=True), + r'Reference du compte : (\d+)', default=NotAvailable + ) + + obj_valuation_diff = ( + Coalesce( + CleanDecimal.French( + '//li[h4[text()="Total des +/- values"]]/h3', + default=NotAvailable + ), + CleanDecimal.French( + '//li[span[text()="Total des +/- values latentes"]]/span[has-class("overview__value")]', + default=NotAvailable + ), + default=NotAvailable + ) + ) + + def obj_balance(self): + # balance parsed on the dashboard might not be the most up to date value + # for market accounts + updated_balance = self.page.get_balance(self.obj.type) + if updated_balance is not None: + return updated_balance + return self.obj.balance + def get_balance(self, account_type): if account_type == Account.TYPE_LIFE_INSURANCE: txt = "Solde au" -- GitLab