From a2db86970dd2afaf34724d6a6418e4ca9fd17893 Mon Sep 17 00:00:00 2001 From: Maxime Gasselin Date: Tue, 12 Feb 2019 15:57:46 +0100 Subject: [PATCH] [bp] remove triplicate transaction in bp pro The next_page navigation is totally illogical. In the next page button you can return to the first transactions. Morever no logical pattern in history url navigation can be exploited. Nevertheless, we can skip these pages with the comparaison of the first transaction of the page. Closes: 8257@zendesk 8318@zendesk 8292@zendesk 9235@zendesk 9589@zendesk --- modules/bp/browser.py | 1 + modules/bp/pages/pro.py | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/modules/bp/browser.py b/modules/bp/browser.py index 98ddee24b1..39a192a4fb 100644 --- a/modules/bp/browser.py +++ b/modules/bp/browser.py @@ -592,6 +592,7 @@ def get_history(self, account): self.location(v.path, params=args) + self.first_transactions = [] for tr in self.page.iter_history(): transactions.append(tr) transactions.sort(key=lambda tr: tr.rdate, reverse=True) diff --git a/modules/bp/pages/pro.py b/modules/bp/pages/pro.py index 50b3e5ba41..f80e4a9b68 100644 --- a/modules/bp/pages/pro.py +++ b/modules/bp/pages/pro.py @@ -84,7 +84,27 @@ class ProAccountHistory(LoggedPage, MyHTMLPage): @method class iter_history(ListElement): item_xpath = u'//div[@id="tabReleve"]//tbody/tr' - next_page = Link('//div[@class="pagination"]//li[@class="pagin-on-right"]/a') + + def next_page(self): + # The next page on the website can return pages already visited without logical mechanism + # Nevertheless we can skip these pages with the comparaison of the first transaction of the page + + next_page_xpath = '//div[@class="pagination"]//li[@class="pagin-on-right"]/a' + tr_xpath = '//tbody/tr[1]' + self.page.browser.first_transactions.append(CleanText(tr_xpath)(self.el)) + next_page_link = Link(next_page_xpath)(self.el) + next_page = self.page.browser.location(next_page_link) + first_transaction = CleanText(tr_xpath)(next_page.page.doc) + count = 0 # avoid an infinite loop + + while first_transaction in self.page.browser.first_transactions and count < 30: + next_page = self.page.browser.location(next_page_link) + next_page_link = Link(next_page_xpath)(next_page.page.doc) + first_transaction = CleanText(tr_xpath)(next_page.page.doc) + count += 1 + + if count < 30: + return next_page.page class item(ItemElement): klass = Transaction -- GitLab