From fec07df1c49694fc759d7325120d8e6bd2bba0fe Mon Sep 17 00:00:00 2001 From: Antoine BOSSY Date: Fri, 20 Dec 2019 07:41:25 +0100 Subject: [PATCH] [hsbc] Filter out duplicate transactions & ensure the debit date is always after the rdate. Previous xpath of get_history lead to duplicate coming transactions. DateGuesser had some invalid date (for ex 05/01/2019 instead of 05/01/2020) which led to some coming transactions not appearing. --- modules/hsbc/pages/account_pages.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/hsbc/pages/account_pages.py b/modules/hsbc/pages/account_pages.py index 5a6de6d554..033e4088fb 100644 --- a/modules/hsbc/pages/account_pages.py +++ b/modules/hsbc/pages/account_pages.py @@ -397,7 +397,7 @@ def history_tabs_urls(self): @method class get_history(Pagination, Transaction.TransactionsElement): head_xpath = '//table/thead/tr/th' - item_xpath = '//table/tbody/tr[count(td) > 3][count(.//td//a[contains(text(), "Opérations débitées le")])=0]' + item_xpath = '//table/tbody/tr[not(has-class("rupture"))]' # items to fetch are contained in /tr with at least 4 /td # but avoid /tr that are categories such as 'Opérations débitées le ...' @@ -409,7 +409,12 @@ class item(Transaction.TransactionElement): def obj_date(self): # debit date is guessed in text such as 'Opérations débitées le 05/07' - return DateGuesser(Regexp(CleanText(self.xpath('./preceding-sibling::tr[.//a[contains(text(), "Opérations débitées le")]][1]')), r'(\d{2}/\d{2})'), Env("date_guesser"))(self) + guessed_date = DateGuesser(Regexp(CleanText(self.xpath('./preceding-sibling::tr[.//a[contains(text(), "Opérations débitées le")]][1]')), r'(\d{2}/\d{2})'), Env("date_guesser"))(self) + # Handle the case where the guessed debit date would be before the rdate (happens when + # the debit date is in january whereas the rdate is in december). + if guessed_date < Field('rdate')(self): + return guessed_date.replace(year=guessed_date.year + 1) + return guessed_date def get_parent_id(self): # The parent id is in the details of the card -- GitLab