From 9a2f0d55aa2fa353cc5f6a96d5b4c850aabf631a Mon Sep 17 00:00:00 2001 From: Oleg Plakhotniuk Date: Sun, 22 Jun 2014 22:31:35 +0200 Subject: [PATCH] Retrieve all transactions from the history. Merchant and regular account support. All transactions add up to balance. Signed-off-by: Oleg Plakhotniuk closes #1406 --- modules/paypal/backend.py | 2 +- modules/paypal/browser.py | 56 ++++++++++++++++++++---- modules/paypal/pages.py | 91 +++++++++++++++++++++++++-------------- modules/paypal/test.py | 29 +++++++++++++ 4 files changed, 136 insertions(+), 42 deletions(-) create mode 100644 modules/paypal/test.py diff --git a/modules/paypal/backend.py b/modules/paypal/backend.py index 1ea09925bc..ba5f83ed4f 100644 --- a/modules/paypal/backend.py +++ b/modules/paypal/backend.py @@ -56,5 +56,5 @@ def get_account(self, _id): def iter_history(self, account): with self.browser: - for history in self.browser.get_history(account): + for history in self.browser.get_download_history(account): yield history diff --git a/modules/paypal/browser.py b/modules/paypal/browser.py index 149ea5eba1..c2f3d89e19 100644 --- a/modules/paypal/browser.py +++ b/modules/paypal/browser.py @@ -20,6 +20,7 @@ from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage +import datetime __all__ = ['Paypal'] @@ -43,6 +44,8 @@ class Paypal(BaseBrowser): DEFAULT_TIMEOUT = 30 # CSV export is slow + BEGINNING = datetime.date(1998,6,1) # The day PayPal was founded + def home(self): self.location('https://' + self.DOMAIN + '/en/cgi-bin/webscr?cmd=_login-run') @@ -75,28 +78,65 @@ def get_account(self, _id): return self.page.get_account(_id) def get_history(self, account): - self.history() + self.history(start=self.BEGINNING, end=datetime.date.today()) parse = True while parse: for trans in self.page.iter_transactions(account): yield trans parse = self.page.next() - def history(self): + def history(self, start, end): self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0') - self.page.filter() + self.page.filter(start, end) assert self.is_on_page(HistoryPage) - def download_history(self): + def get_download_history(self, account): + for csv in self.download_history(): + for trans in self.page.iter_transactions(account): + yield trans + + def period_has_trans(self, start, end): + """ + Checks if there're any transactions in a given period. + """ + self.history(start, end) + return next(self.page.parse(), False) or self.page.next() + + def bisect_oldest_date(self, start, end, steps=5): + """ + Finds an approximate beginning of transactions history in a + given number of iterations. + """ + if not steps: + return start + middle = start + (end-start)/2 + if self.period_has_trans(start, middle): + return self.bisect_oldest_date(start, middle, steps-1) + else: + return self.bisect_oldest_date(middle, end, steps-1) + + def download_history(self, step=90): """ Download CSV history. However, it is not normalized, and sometimes the download is refused and sent later by mail. """ - self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1') - assert self.is_on_page(DownloadHistoryPage) - self.page.download() - return self.page.document + # PayPal limitations as of 2014-06-16 + assert step <= 365*2 + + # To minimize the number of CSV requests, let's first find an + # approximate starting point of transaction history. + end = datetime.date.today() + beginning = self.bisect_oldest_date(self.BEGINNING, end) + + while end > beginning: + start = end - datetime.timedelta(step) + self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1') + assert self.is_on_page(DownloadHistoryPage) + self.page.download(start, end) + assert self.is_on_page(SubmitPage) + yield self.page.document + end = start - datetime.timedelta(1) def transfer(self, from_id, to_id, amount, reason=None): raise NotImplementedError() diff --git a/modules/paypal/pages.py b/modules/paypal/pages.py index ffd005e03d..1effc5ded7 100644 --- a/modules/paypal/pages.py +++ b/modules/paypal/pages.py @@ -125,18 +125,16 @@ def get_accounts(self): class DownloadHistoryPage(BasePage): - def download(self, days=90): - today = datetime.date.today() - start = today - datetime.timedelta(days) + def download(self, start, end): self.browser.select_form(name='form1') - self.browser['to_c'] = str(today.year) - self.browser['to_a'] = str(today.month) - self.browser['to_b'] = str(today.day) + self.browser['to_c'] = str(end.year) + self.browser['to_a'] = str(end.month) + self.browser['to_b'] = str(end.day) self.browser['from_c'] = str(start.year) self.browser['from_a'] = str(start.month) self.browser['from_b'] = str(start.day) - self.browser['custom_file_type'] = ['comma_balaffecting'] + self.browser['custom_file_type'] = ['comma_allactivity'] self.browser['latest_completed_file_type'] = [''] self.browser.submit() @@ -147,43 +145,71 @@ class SubmitPage(BasePage): Any result of form submission """ def iter_transactions(self, account): - DATE = 0 - TIME = 1 - NAME = 3 - TYPE = 4 - CURRENCY = 6 - GROSS = 7 - FEE = 8 - NET = 9 - FROM = 10 - TO = 11 - TRANS_ID = 12 - ITEM = 15 - SITE = 24 csv = self.document + + if len(csv.header) == 43: + # Merchant multi-currency account + DATE = 0 + TIME = 1 + NAME = 3 + TYPE = 4 + CURRENCY = 6 + GROSS = 7 + FEE = 8 + NET = 9 + FROM = 10 + TO = 11 + TRANS_ID = 12 + ITEM = 15 + SITE = 24 + elif len(csv.header) == 11: + # Regular multi-currency account + DATE = 0 + TIME = 1 + NAME = 3 + TYPE = 4 + CURRENCY = 6 + GROSS = -1 + FEE = -1 + NET = 7 + FROM = -1 + TO = -1 + TRANS_ID = -1 + ITEM = -1 + SITE = -1 + else: + raise ValueError('CSV fields count of %i is not supported' % len(csv.header)) + for row in csv.rows: # we filter accounts by currency if account.get_currency(row[CURRENCY]) != account.currency: continue - trans = Transaction(row[TRANS_ID]) + # analog to dict.get() + get = lambda i, v=None: row[i] if 0 <= i < len(row) else v + + trans = Transaction(get(TRANS_ID, u'')) # silly American locale if re.search(r'\d\.\d\d$', row[NET]): - date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%m/%d/%Y %I:%M:%S %p") + date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%m/%d/%Y %H:%M:%S") else: date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%d/%m/%Y %H:%M:%S") trans.date = date trans.rdate = date line = row[NAME] - if row[ITEM]: + if get(ITEM): line += u' ' + row[ITEM] - if row[SITE]: + if get(SITE): line += u"(" + row[SITE] + u")" trans.raw = line trans.label = row[NAME] + if row[TYPE].startswith(u'Update to eCheck') or \ + row[TYPE].startswith(u'Order'): + continue + if row[TYPE].endswith(u'Credit Card') or row[TYPE].endswith(u'carte bancaire'): trans.type = Transaction.TYPE_CARD elif row[TYPE].endswith(u'Payment Sent') or row[TYPE].startswith(u'Paiement'): @@ -195,11 +221,11 @@ def iter_transactions(self, account): # Net is what happens after the fee (0 for most users), so what is the most "real" trans.amount = clean_amount(row[NET]) - trans._gross = clean_amount(row[GROSS]) - trans._fees = clean_amount(row[FEE]) + trans._gross = clean_amount(get(GROSS, row[NET])) + trans._fees = clean_amount(get(FEE, u'0.00')) - trans._to = row[TO] or None - trans._from = row[FROM] or None + trans._to = get(TO) + trans._from = get(FROM) yield trans @@ -250,14 +276,12 @@ def guess_format(self): time_format = "%H:%M:%S" return date_format, time_format, months - def filter(self): + def filter(self, start, end): date_format = self.guess_format()[0] - today = datetime.date.today() - start = datetime.date(1998,6,1) # The day PayPal was founded self.browser.select_form(name='history') self.browser['dateoption'] = ['dateselect'] self.browser['from_date'] = start.strftime(date_format) - self.browser['to_date'] = today.strftime(date_format) + self.browser['to_date'] = end.strftime(date_format) self.browser.submit(name='show') self.browser.select_form(name='history') self.browser.submit(name='filter_2') @@ -301,7 +325,8 @@ def parse(self): info = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip()) trans.raw = info + u' ' + trans.label - if u'Authorization' in info or u'Autorisation' in info: + if u'Authorization' in info or u'Autorisation' in info or \ + u'Order' in info: continue if u'Credit Card' in trans.label or u'Carte bancaire' in trans.label: diff --git a/modules/paypal/test.py b/modules/paypal/test.py new file mode 100644 index 0000000000..2a6355f539 --- /dev/null +++ b/modules/paypal/test.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Oleg Plakhotniuk +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + + +class PaypalTest(BackendTest): + BACKEND = 'paypal' + + def test_balance(self): + for account in self.backend.iter_accounts(): + balance = sum(t.amount for t in self.backend.iter_history(account)) + self.assertEqual(balance, account.balance) -- GitLab