Commit 7fa6e4e4 authored by Quentin Defenouillere's avatar Quentin Defenouillere Committed by Romain Bignon

[spirica] Only retrieve invests for 20 first transactions & handle logouts

I factorized the transactions investments into a separated method to
retrieve investments only for the first 20 transactions, otherwise the
spirica website is too slow. The rest of iter_history fetches
transactions from other pages without investments.
I also created a method that checks if we are logged in or not.
The current "transactions_page" page is easier to handle if we make it a
class attribute, so it can be stored and modified by all three methods.
parent 31d7046d
......@@ -21,6 +21,7 @@ from __future__ import unicode_literals
from weboob.browser import LoginBrowser, URL, need_login
from weboob.exceptions import BrowserIncorrectPassword
from weboob.browser.exceptions import ClientError
from .pages import LoginPage, AccountsPage, DetailsPage, MaintenancePage
......@@ -38,6 +39,7 @@ class SpiricaBrowser(LoginBrowser):
self.BASEURL = website
self.cache = {}
self.cache['invs'] = {}
self.transaction_page = None
def do_login(self):
self.login.go().login(self.username, self.password)
......@@ -65,25 +67,72 @@ class SpiricaBrowser(LoginBrowser):
self.cache['invs'][account.id] = invs
return self.cache['invs'][account.id]
def check_if_logged_in(self, url):
if self.login.is_here():
self.logger.warning('We were logged out during iter_history, proceed to re-login.')
self.do_login()
self.location(url)
self.page.go_historytab()
# Store new transaction_page after login:
self.transaction_page = self.page
@need_login
def get_transactions_with_investments(self, max_count, url):
transactions = []
for index, transaction in enumerate(self.page.iter_history()):
self.check_if_logged_in(url)
if index < max_count:
try:
self.transaction_page.go_investments_form(transaction._index)
except ClientError as e:
self.logger.warning(e)
# Check if we are logged out
if self.login.is_here():
self.check_if_logged_in(url)
if self.details.is_here():
transaction.investments = []
for inv in self.page.iter_transactions_investments():
# Only keep investments that have at least a label and a valuation:
if inv.label and inv.valuation:
transaction.investments.append(inv)
transactions.append(transaction)
return transactions
@need_login
def iter_history(self, account):
self.location(account.url)
self.page.go_historytab()
transaction_page = self.page
self.transaction_page = self.page
# Determining the number of transaction pages:
total_pages = int(self.page.count_transactions()) // 100
# Scraping transactions for each page:
for page_number in range(total_pages + 1):
self.page.go_historyall(page_number)
# Scraping transactions with their investments for the 20 first transactions.
# Sometimes go_historyall fails so we go back to the accounts page and retry.
if self.transaction_page.go_historyall(page_number=0):
for tr in self.get_transactions_with_investments(20, account.url):
yield tr
else:
self.logger.warning('The first go_historyall() failed, go back to account details and retry.')
self.location(account.url)
self.page.go_historytab()
self.transaction_page = self.page
if self.transaction_page.go_historyall(page_number=0):
for tr in self.get_transactions_with_investments(20, account.url):
yield tr
# Scraping other transaction pages without their investments:
for page_number in range(1, total_pages + 1):
self.check_if_logged_in(account.url)
if not self.transaction_page.go_historyall(page_number):
self.logger.warning('The first go_historyall() failed, go back to account details and retry.')
self.location(account.url)
self.page.go_historytab()
self.transaction_page = self.page
if not self.transaction_page.go_historyall(page_number):
self.logger.warning('The go_historyall() failed twice, these transactions will be skipped.')
continue
for transaction in self.page.iter_history():
transaction_page.go_investments_form(transaction._index)
transaction.investments = []
for inv in self.page.iter_transactions_investments():
# Only keep investments that have at least a label and a valuation:
if inv.label and inv.valuation:
transaction.investments.append(inv)
yield transaction
def fill_from_list(self, invs, objects_list):
......
......@@ -227,8 +227,11 @@ class DetailsPage(LoggedPage, HTMLPage):
def go_historyall(self, page_number):
form = self.get_form(xpath='//form[contains(@id, "ongletHistoOperations:ongletHistoriqueOperations")]')
# The form value varies (for example j_idt913 or j_idt62081) so we need to scrape it dynamically:
form_value = Attr('//div[@id="ongletHistoOperations:ongletHistoriqueOperations:newoperations"]/div[1]', 'id')(self.doc)
# The form value varies (for example j_idt913 or j_idt62081) so we need to scrape it dynamically.
# However, sometimes the form does not contain the 'id' attribute, in which case we must reload the page.
form_value = Attr('//div[@id="ongletHistoOperations:ongletHistoriqueOperations:newoperations"]/div[1]', 'id', default=None)(self.doc)
if not form_value:
return False
form['javax.faces.partial.ajax'] = 'true'
form['javax.faces.partial.execute'] = form_value
form['javax.faces.partial.render'] = form_value
......@@ -239,6 +242,7 @@ class DetailsPage(LoggedPage, HTMLPage):
form[form_value + '_rows'] = '100'
form[form_value + '_first'] = page_number * 100
form.submit()
return True
def go_investments_form(self, index):
form = self.get_form(xpath='//form[contains(@id, "ongletHistoOperations:ongletHistoriqueOperations")]')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment