Newer
Older
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.shop import Order, Payment, Item
from weboob.browser.pages import HTMLPage, pagination, NextPage
from weboob.capabilities.base import empty, NotAvailable
from datetime import datetime
from decimal import Decimal
import re
# Ugly array to avoid the use of french locale
FRENCH_MONTHS = [u'janvier', u'février', u'mars', u'avril', u'mai', u'juin', u'juillet', u'août', u'septembre', u'octobre', u'novembre', u'décembre']
class AmazonPage(HTMLPage):
@property
def logged(self):
return bool(self.doc.xpath(u'//*[contains(text(),"Déconnectez-vous")]'))
class HomePage(AmazonPage):
def to_login(self):
self.browser.home.stay_or_go()
self.browser.location(self.doc.xpath('(//a[contains(., "Identifiez-vous")]//@href)[1]')[0])
return self.browser.page
class LoginPage(AmazonPage):
def login(self, email, password, captcha=None):
form = self.get_form(name='signIn')
form['email'] = email
form['password'] = password
if captcha is not None:
form['guess'] = captcha
def has_captcha(self):
return self.doc.xpath('//img[@id="auth-captcha-image"]/@src')[0] if len(self.doc.xpath('//img[@id="auth-captcha-image"]/@src')) != 0 else None
class HistoryPage(AmazonPage):
def iter_years(self):
for year in self.opt_years():
yield self.to_year(year)
@pagination
def iter_orders(self):
for id_ in self.doc.xpath(u'//span[contains(text(),"N° de commande")]/../span[2]/text()'):
yield self.browser.to_order(id_.strip())
for next_ in self.doc.xpath(u'//ul[@class="a-pagination"]'
u'//a[contains(text(),"Suivante")]/@href'):
raise NextPage(next_)
def to_year(self, year):
form = self.get_form('//form[contains(@class,"time-period-chooser")]')
form['orderFilter'] = [year]
form.submit()
return self.browser.page
def opt_years(self):
return [x for x in self.doc.xpath(
'//select[@name="orderFilter"]/option/@value'
) if x.startswith('year-')]
class OrderPage(AmazonPage):
def shouldSkip(self):
# Reports only fully shipped and delivered orders, because they have
# finalized payment amounts.
# Payment for not yet shipped orders may change, and is not always
# available.
return bool([x for s in [u'En préparation pour expédition', u'En cours de préparation', u'Commande annulée'] # TODO : Other French status applied ?
for x in self.doc.xpath(u'//*[contains(text(),$text)]', text=s)])
def decimal_amount(self, amount):
m = re.match(u'.*EUR ([,0-9]+).*', amount)
if m:
def month_to_int(self, text):
for (idx, month) in enumerate(FRENCH_MONTHS):
text = text.replace(month, str(idx + 1))
return text
class OrderNewPage(OrderPage):
is_here = u'//*[contains(text(),"Commandé le")]'
def order(self):
if not self.shouldSkip():
order = Order(id=self.order_number())
order.date = self.order_date()
order.tax = self.tax()
order.discount = self.discount()
order.shipping = self.shipping()
order.total = self.grand_total()
def bill(self):
pdf = self.doc.xpath(u'//a[contains(text(), "Imprimer une facture")]')
htlm = self.doc.xpath(u'//a[contains(text(), "Imprimer un récapitulatif de commande")]')
format = u'pdf' if pdf else u'html'
url = pdf[0].attrib['href'] if pdf else htlm[0].attrib['href']
return {'url': url, 'format': format}
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
re.match(u'.*Commandé le ([0-9]+ [0-9]+ [0-9]+) .*',
self.month_to_int(self.date_num())).group(1),
'%d %m %Y')
def order_number(self):
m = re.match(u'.*N° de commande : +([^ ]+) .*', self.date_num())
if m:
return m.group(1)
def payments(self):
if self.gift():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = u'GIFT CARD'
pmt.amount = -self.gift()
yield pmt
transactions = list(self.transactions())
if transactions:
for t in transactions:
yield t
else:
for method in self.paymethods():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = method
pmt.amount = self.grand_total()
yield pmt
break
def paymethods(self):
for root in self.doc.xpath(u'//h5[contains(text(),"Méthode de paiement")]'):
alt = root.xpath('../div/img/@alt')[0]
span = root.xpath('../div/span/text()')[0]
digits = re.match(r'[^0-9]*([0-9]+)[^0-9]*', span).group(1)
yield u'%s %s' % (alt, digits)
def grand_total(self):
return self.decimal_amount(self.doc.xpath(
'//span[contains(text(),"Montant total TTC")]/..'
'/following-sibling::div[1]/span/text()')[0].strip())
def date_num(self):
return u' '.join(
self.doc.xpath(
'//span[@class="order-date-invoice-item"]/text()'
)).replace('\n', '')
def tax(self):
return self.amount(u' TVA')
def shipping(self):
return self.amount(u'Livraison :')
def discount(self):
return self.amount(u'Bon de réduction', u'Subscribe & Save', u'Your Coupon Savings',
u'Lightning Deal')
def gift(self):
return self.amount(u'Gift Card Amount')
def amount(self, *names):
return Decimal(sum(
self.decimal_amount(amount.strip()) or 0.0
for n in names for amount in self.doc.xpath(
'(//span[contains(text(),$name)]/../..//span)[2]/text()', name=n)))
except TypeError:
return NotAvailable
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def transactions(self):
for row in self.doc.xpath('//span[contains(text(),"Transactions")]'
'/../../div/div'):
text = row.text_content().strip().replace('\n', ' ')
if u'Expédition' not in text:
continue
date, method, amount = re.match(
'.* ' '([0-9]+ [^ ]+ [0-9]+)'
'[ -]+' '([A-z][^:]+)'
': +' '(EUR [^ ]+)', text).groups()
date = datetime.strptime(self.month_to_int(date), '%d %m %Y')
method = method.replace(u'finissant par ', u'').upper()
amount = self.decimal_amount(amount)
pmt = Payment()
pmt.date = date
pmt.method = method
pmt.amount = amount
yield pmt
def items(self):
for item in self.doc.xpath('//div[contains(@class,"a-box shipment")]'
'/div/div/div/div/div/div'):
url = (item.xpath(u'*//a[contains(@href,"/gp/product")]/@href') +
[u''])[0]
label = u''.join(item.xpath(
'*//a[contains(@href,"/gp/product")]/text()')).strip()
price = u''.join(x.strip() for x in item.xpath(
'*//span[contains(text(),"EUR")]/text()')
if x.strip().startswith('EUR'))
price = self.decimal_amount(price)
multi = re.match(u'([0-9]+) de (.*)', label)
if multi:
amount, label = multi.groups()
price *= Decimal(amount)
if url:
url = unicode(self.browser.BASEURL) + \
if label and price:
itm = Item()
itm.label = label
itm.url = url
itm.price = price
yield itm
class OrderOldPage(OrderPage):
is_here = u'//*[contains(text(),"Amazon.fr numéro de commande")]'
def order(self):
if not self.shouldSkip():
order = Order(id=self.order_number())
order.date = self.order_date()
order.tax = Decimal(self.tax()) if not empty(self.tax()) else Decimal(0.00)
order.discount = Decimal(self.discount()) if not empty(self.discount()) else Decimal(0.00)
order.shipping = Decimal(self.shipping()) if not empty(self.shipping()) else Decimal(0.00)
order.total = Decimal(self.grand_total()) if not empty(self.grand_total()) else Decimal(0.00)
def bill(self):
html = self.doc.xpath(u'//img[contains(@src, "print-invoice")]/parent::a')
return {'url': html[0].attrib['href'], 'format': u'html'}
def order_date(self):
date_str = self.doc.xpath(u'//b[contains(text(),"Commande numérique")]')[0].text
month_str = re.match(u'.*Commande numérique : [0-9]+ ([^ ]+) [0-9]+.*', date_str).group(1)
re.match(u'.*Commande numérique : ([0-9]+ [0-9]+ [0-9]+).*',
date_str.replace(month_str, str(FRENCH_MONTHS.index(month_str) + 1))).group(1),
'%d %m %Y')
def order_number(self):
u'//b[contains(text(),"Amazon.fr numéro de commande")]/../text()')
).strip()
return num_com
def tax(self):
return self.sum_amounts(u'TVA:')
def discount(self):
return self.sum_amounts(u'Subscribe & Save:', u'Bon de réduction:',
return self.sum_amounts(u'Shipping & Handling:', u'Free shipping:',
u'Free Shipping:')
def payments(self):
for shmt in self.shipments():
gift = self.gift(shmt)
if gift:
pmt = Payment()
pmt.date = self.order_date()
pmt.amount = -gift
yield pmt
transactions = list(self.transactions())
if transactions:
for t in transactions:
yield t
else:
for method in self.paymethods():
pmt = Payment()
pmt.date = self.order_date()
pmt.method = method
pmt.amount = self.grand_total()
yield pmt
break
def shipments(self):
for cue in (u'Shipment #', u'Subscribe and Save Shipment'):
for shmt in self.doc.xpath('//b[contains(text(),$cue)]', cue=cue):
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
yield shmt
def items(self):
for shmt in self.shipments():
root = shmt.xpath(u'../../../../../../../..'
u'//b[text()="Articles commandés"]')[0]
for item in root.xpath('../../../tr')[1:]:
count = url = label = None
for div in item.xpath('*//div'):
m = re.match(u'^\s*(\d+)\s*of:(.*)$', div.text,
re.MULTILINE + re.DOTALL)
if not m:
continue
count = Decimal(m.group(1).strip())
label = unicode(m.group(2).strip())
if label:
url = u''
else:
a = div.xpath('*//a[contains(@href,"/gp/product")]')[0]
url = unicode(a.attrib['href'])
label = unicode(a.text.strip())
price1 = item.xpath('*//div')[-1].text.strip()
price = count * self.decimal_amount(price1)
itm = Item()
itm.label = label
itm.url = url
itm.price = price
yield itm
def sum_amounts(self, *names):
return sum(self.amount(shmt, x) for shmt in self.shipments()
for x in names)
def amount(self, shmt, name):
for root in shmt.xpath(u'../../../../../../../..'
u'//td[text()="Sous-total articles: "]/../..'):
for node in root.xpath(u'tr/td[text()=$name]', name=name):
return self.decimal_amount(
node.xpath('../td')[-1].text.strip())
for node in root.xpath(u'tr/td/b[text()=$name]', name=name):
return self.decimal_amount(
node.xpath('../../td/b')[-1].text.strip())
return Decimal(0)
def gift(self, shmt):
return self.amount(shmt, u'Gift Card Amount:')
def paymethods(self):
root = self.doc.xpath('//b[text()="Payment Method: "]/..')
if len(root) == 0:
return
root = root[0]
text = root.text_content().strip()
while text:
for pattern in [
u'^.*Payment Method:',
u'^([^\n]+)\n +\| Last digits: +([0-9]+)\n',
u'^Billing address.*$']:
match = re.match(pattern, text, re.DOTALL+re.MULTILINE)
if match:
text = text[match.end():].strip()
if match.groups():
yield u' '.join(match.groups()).upper()
break
else:
break
def transactions(self):
for tr in self.doc.xpath(
u'//div[contains(b,"Credit Card transactions")]'
u'/following-sibling::table[1]/tr'):
label, date = tr.xpath('td[1]/text()')[0].strip().split(u'\xa0')
amount = tr.xpath('td[2]/text()')[0].strip()
date = datetime.strptime(date, '%B %d, %Y:')
method = label.replace(u'ending in ', u'')[:-1].upper()
amount = self.decimal_amount(amount)
pmt = Payment()
pmt.date = date
pmt.method = method
pmt.amount = amount
yield pmt
def grand_total(self):
return self.decimal_amount(self.doc.xpath(
u'//td[contains(b,"Total pour cette commande")]/b')[0].text)