From bac984c8704d87d8bb664b28a042f4b94831a24d Mon Sep 17 00:00:00 2001 From: Christophe Francois Date: Mon, 3 Feb 2020 18:24:10 +0100 Subject: [PATCH] [amazon] Add pagination and fix some values for documents We only fetched 10 orders a year, there is a pagination for each year. Some orders have no price because they are purchased using "audio credits". In this case the xpath for the date is also different. --- modules/amazon/pages.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/modules/amazon/pages.py b/modules/amazon/pages.py index 0dc9e5e86f..7c1b5607a8 100644 --- a/modules/amazon/pages.py +++ b/modules/amazon/pages.py @@ -19,12 +19,13 @@ from __future__ import unicode_literals -from weboob.browser.pages import HTMLPage, LoggedPage, FormNotFound, PartialHTMLPage +from weboob.browser.pages import HTMLPage, LoggedPage, FormNotFound, PartialHTMLPage, pagination from weboob.browser.elements import ItemElement, ListElement, method from weboob.browser.filters.html import Link from weboob.browser.filters.standard import ( CleanText, CleanDecimal, Env, Regexp, Format, - Field, Currency, RegexpError, Date, Async, AsyncLoad + Field, Currency, RegexpError, Date, Async, AsyncLoad, + Coalesce, ) from weboob.capabilities.bill import DocumentTypes, Bill, Subscription from weboob.capabilities.base import NotAvailable @@ -130,10 +131,14 @@ def obj_label(self): class DocumentsPage(LoggedPage, HTMLPage): + @pagination @method class iter_documents(ListElement): item_xpath = '//div[contains(@class, "order") and contains(@class, "a-box-group")]' + def next_page(self): + return Link('//ul[@class="a-pagination"]/li[@class="a-last"]/a')(self) + class item(ItemElement): klass = Bill load_details = Field('_pre_url') & AsyncLoad @@ -146,20 +151,27 @@ class item(ItemElement): obj_type = DocumentTypes.BILL def obj_date(self): - date = Date(CleanText('.//div[has-class("a-span4") and not(has-class("recipient"))]/div[2]'), - parse_func=parse_french_date, dayfirst=True, default=NotAvailable)(self) - if date is NotAvailable: - return Date(CleanText('.//div[has-class("a-span3") and not(has-class("recipient"))]/div[2]'), - parse_func=parse_french_date, dayfirst=True)(self) - return date + # The date xpath changes depending on the kind of order + return Coalesce( + Date(CleanText('.//div[has-class("a-span4") and not(has-class("recipient"))]/div[2]'), parse_func=parse_french_date, dayfirst=True, default=NotAvailable), + Date(CleanText('.//div[has-class("a-span3") and not(has-class("recipient"))]/div[2]'), parse_func=parse_french_date, dayfirst=True, default=NotAvailable), + Date(CleanText('.//div[has-class("a-span2") and not(has-class("recipient"))]/div[2]'), parse_func=parse_french_date, dayfirst=True, default=NotAvailable), + )(self) def obj_price(self): + # Some orders, audiobooks for example, are paid using "audio credits", they have no price or currency currency = Env('currency')(self) - return CleanDecimal('.//div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, replace_dots=currency == u'EUR')(self) + return CleanDecimal( + './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, + replace_dots=currency == 'EUR', default=NotAvailable + )(self) def obj_currency(self): currency = Env('currency')(self) - return Currency('.//div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency)(self) + return Currency( + './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, + default=NotAvailable + )(self) def obj_url(self): async_page = Async('details').loaded_page(self) -- GitLab