From 4b5831210cfa2dcbb2f7b432a5739abcaade0dee Mon Sep 17 00:00:00 2001 From: Florian Duguet Date: Wed, 30 Oct 2019 11:46:40 +0100 Subject: [PATCH] [trainline] browser2 and python3 --- modules/trainline/browser.py | 116 +++++++++++++++++------------------ modules/trainline/module.py | 21 ++++--- modules/trainline/pages.py | 81 ++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 67 deletions(-) create mode 100644 modules/trainline/pages.py diff --git a/modules/trainline/browser.py b/modules/trainline/browser.py index 09a2a8cb06..4ff24ab9c5 100644 --- a/modules/trainline/browser.py +++ b/modules/trainline/browser.py @@ -17,79 +17,77 @@ # You should have received a copy of the GNU Lesser General Public License # along with this weboob module. If not, see . +from time import sleep -from datetime import datetime from dateutil.relativedelta import relativedelta -from weboob.browser.browsers import APIBrowser +from weboob.browser import URL +from weboob.browser.browsers import LoginBrowser, need_login from weboob.exceptions import BrowserIncorrectPassword -from weboob.browser.filters.standard import CleanDecimal, Date from weboob.browser.exceptions import ClientError -from weboob.capabilities.bill import DocumentTypes, Bill, Subscription +from .pages import SigninPage, UserPage, DocumentsPage -class TrainlineBrowser(APIBrowser): - BASEURL = 'https://www.trainline.fr/api/v5/' - def __init__(self, email, password, *args, **kwargs): - super(TrainlineBrowser, self).__init__(*args, **kwargs) +class TrainlineBrowser(LoginBrowser): + BASEURL = 'https://www.trainline.fr' + signin = URL(r'/api/v5/account/signin', SigninPage) + user_page = URL(r'/api/v5/user', UserPage) + documents_page = URL(r'/api/v5/pnrs', DocumentsPage) + + def __init__(self, login, password, *args, **kwargs): + super(TrainlineBrowser, self).__init__(login, password, *args, **kwargs) self.session.headers['X-Requested-With'] = 'XMLHttpRequest' + def do_login(self): try: - me = self.request('account/signin', data={'email': email, 'password': password}) - except ClientError: - raise BrowserIncorrectPassword + self.signin.go(data={'email': self.username, 'password': self.password}) + except ClientError as error: + json_response = error.response.json() + error_list = json_response.get('errors', {}).get('email', []) + error_message = error_list[0] if error_list else None + raise BrowserIncorrectPassword(error_message) - self.session.headers['Authorization'] = 'Token token="%s"' % me['meta']['token'] + self.session.headers['Authorization'] = 'Token token="%s"' % self.page.get_token() + @need_login def get_subscription_list(self): - me = self.request('user')['user'] - sub = Subscription() - sub.subscriber = '%s %s' % (me['first_name'], me['last_name']) - sub.id = me['id'] - sub.label = me['email'] - yield sub + yield self.user_page.go().get_subscription() + @need_login def iter_documents(self, subscription): - docs, docs_len, check, month_back, date = list(), -1, 0, 6, None - # First request is known - bills = self.request('pnrs') - while check < month_back: - # If not first - if docs_len > -1 and date: - if check > 0: - # If nothing, we try 4 weeks back - date = (datetime.strptime(date, '%Y-%m-%d') - relativedelta(weeks=4)).strftime('%Y-%m-%d') - else: - # Add 8 weeks to last date to be sure to get all - date = (datetime.combine(date, datetime.min.time()) + relativedelta(weeks=8)).strftime('%Y-%m-%d') - bills = self.request('pnrs?date=%s' % date) - - docs_len = len(docs) - for proof, pnr, trip in zip(bills['proofs'], bills['pnrs'], bills['trips']): - # Check if not already in docs list - for doc in docs: - if vars(doc)['id'].split('_', 1)[1] == pnr['id']: - break - else: - b = Bill() - b.id = '%s_%s' % (subscription.id, pnr['id']) - b._url = proof['url'] - b.date = Date().filter(proof['created_at']) - b.format = u"pdf" - b.label = u'Trajet du %s' % Date().filter(trip['departure_date']) - b.type = DocumentTypes.BILL - b.vat = CleanDecimal().filter('0') - if pnr['cents']: - b.price = CleanDecimal().filter(format(pnr['cents']/float(100), '.2f')) - b.currency = pnr['currency'] - docs.append(b) - - check += 1 - # If a new bill is found, we reset check - if docs_len < len(docs): - date = b.date - check = 0 - - return iter(docs) + min_date = None + docs = {} + + i = 0 + while i < 10: + params = {'date': min_date.strftime('%Y-%m-01')} if min_date else None + # date params has a very silly behavior + # * day seems to be useless, (but we have to put it anyway) + # * server return last 3 months from date (including month we give) + # ex: date = 2019-09-01 => return bills from 2019-07-01 to 2019-09-30 + # * this date range behavior seems to not apply for old bills, + # it can happens we get bill for 2017 even if we put date=2019-06-01 + # it is possible maybe because it's the last ones and server doesn't want to + new_doc = False + try: + self.documents_page.go(params=params) + except ClientError as error: + # CAUTION: if we perform too many request we can get a 429 response status code + if error.response.status_code != 429: + raise + # wait 2 seconds and retry, it should work + sleep(2) + for doc in self.page.iter_documents(subid=subscription.id): + if doc.id not in docs.keys(): + new_doc = True + docs[doc.id] = doc + + if min_date is None or min_date > doc.date: + min_date = doc.date + if not new_doc: + min_date -= relativedelta(months=3) + i += 1 + + return sorted(docs.values(), key=lambda doc: doc.date, reverse=True) diff --git a/modules/trainline/module.py b/modules/trainline/module.py index 675798afb1..67fc357a85 100644 --- a/modules/trainline/module.py +++ b/modules/trainline/module.py @@ -18,10 +18,13 @@ # along with this weboob module. If not, see . -from weboob.capabilities.bill import DocumentTypes, CapDocument, Subscription, Document, SubscriptionNotFound, DocumentNotFound +from weboob.capabilities.bill import ( + DocumentTypes, CapDocument, Subscription, Document, SubscriptionNotFound, + DocumentNotFound, +) from weboob.capabilities.base import find_object, NotAvailable from weboob.tools.backend import Module, BackendConfig -from weboob.tools.value import ValueBackendPassword, Value +from weboob.tools.value import ValueBackendPassword from .browser import TrainlineBrowser @@ -31,13 +34,15 @@ class TrainlineModule(Module, CapDocument): NAME = 'trainline' - DESCRIPTION = u'trainline website' - MAINTAINER = u'Edouard Lambert' + DESCRIPTION = 'trainline' + MAINTAINER = 'Edouard Lambert' EMAIL = 'elambert@budget-insight.com' LICENSE = 'LGPLv3+' VERSION = '1.6' - CONFIG = BackendConfig(Value('login', label='Adresse email'), - ValueBackendPassword('password', label='Mot de passe')) + CONFIG = BackendConfig( + ValueBackendPassword('login', label='Adresse email'), + ValueBackendPassword('password', label='Mot de passe') + ) BROWSER = TrainlineBrowser @@ -66,7 +71,7 @@ def iter_documents(self, subscription): def download_document(self, document): if not isinstance(document, Document): document = self.get_document(document) - if document._url is NotAvailable: + if document.url is NotAvailable: return - return self.browser.open(document._url, headers={'Authorization': ''}).content + return self.browser.open(document.url, headers={'Authorization': ''}).content diff --git a/modules/trainline/pages.py b/modules/trainline/pages.py new file mode 100644 index 0000000000..796463c9da --- /dev/null +++ b/modules/trainline/pages.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012-2019 Budget Insight + +from __future__ import unicode_literals + +from weboob.browser.pages import LoggedPage, JsonPage +from weboob.browser.elements import DictElement, ItemElement, method +from weboob.browser.filters.standard import Date, CleanDecimal, Format, Env, Currency, Eval +from weboob.browser.filters.json import Dict +from weboob.capabilities.bill import Subscription, Bill + + +class SigninPage(JsonPage): + @property + def logged(self): + return bool(self.get_token()) + + def get_token(self): + return self.doc.get('meta', {}).get('token', {}) + + +class UserPage(LoggedPage, JsonPage): + def get_subscription(self): + user = self.doc['user'] + sub = Subscription() + sub.subscriber = '%s %s' % (user['first_name'], user['last_name']) + sub.id = user['id'] + sub.label = user['email'] + + return sub + + +class DocumentsPage(LoggedPage, JsonPage): + def build_doc(self, text): + """ + this json contains several important lists + - pnrs + - proofs + - folders + - trips + + each bill has data inside theses lists + this function rebuild doc to put data within same list we call 'bills' + """ + doc = super(DocumentsPage, self).build_doc(text) + + pnrs_dict = {pnr['id']: pnr for pnr in doc['pnrs']} + proofs_dict = {proof['pnr_id']: proof for proof in doc['proofs']} + folders_dict = {folder['pnr_id']: folder for folder in doc['folders']} + trips_dict = {trip['folder_id']: trip for trip in doc['trips']} + + bills = [] + for key, pnr in pnrs_dict.items(): + proof = proofs_dict[key] + folder = folders_dict[key] + trip = trips_dict[folder['id']] + + bills.append({ + 'pnr': pnr, + 'proof': proof, + 'folder': folder, + 'trip': trip, + }) + + return {'bills': bills} + + @method + class iter_documents(DictElement): + item_xpath = 'bills' + + class item(ItemElement): + klass = Bill + + obj_id = Format('%s_%s', Env('subid'), Dict('pnr/id')) + obj_url = Dict('proof/url') + obj_date = Date(Dict('proof/created_at')) + obj_format = 'pdf' + obj_label = Format('Trajet du %s', Date(Dict('trip/departure_date'))) + obj_price = Eval(lambda x: x / 100, CleanDecimal(Dict('pnr/cents'))) + obj_currency = Currency(Dict('pnr/currency')) -- GitLab