From 576ed2b47892f7fb2aa7fc12876383a450fa9fea Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sun, 6 Jan 2019 14:05:28 +0100 Subject: [PATCH] chronopost: port to browser2 --- modules/chronopost/browser.py | 24 ++++-------- modules/chronopost/module.py | 5 +-- modules/chronopost/pages.py | 70 +++++++++++++++++------------------ 3 files changed, 42 insertions(+), 57 deletions(-) diff --git a/modules/chronopost/browser.py b/modules/chronopost/browser.py index a87a174a1c..6bf929b0a2 100644 --- a/modules/chronopost/browser.py +++ b/modules/chronopost/browser.py @@ -17,29 +17,19 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.deprecated.browser import Browser +from weboob.browser import PagesBrowser, URL -from .pages import IndexPage, TrackPage +from .pages import TrackPage __all__ = ['ChronopostBrowser'] -class ChronopostBrowser(Browser): - PROTOCOL = 'http' - DOMAIN = 'www.chronopost.fr' - ENCODING = None +class ChronopostBrowser(PagesBrowser): + BASEURL = 'https://www.chronopost.fr' - PAGES = { - 'http://www.chronopost.fr/transport-express/livraison-colis': IndexPage, - 'http://www.chronopost.fr/transport-express/livraison-colis/.*accueil/suivi.*': TrackPage, - } + track = URL(r'/tracking-no-drupal/suivi-colis\?listeNumerosLT=(?P\w+)&langue=fr', TrackPage) def get_tracking_info(self, _id): - self.home() - - assert self.is_on_page(IndexPage) - self.page.track_package(_id) - - assert self.is_on_page(TrackPage) - return self.page.get_info(_id) + self.track.go(id=_id, headers={'Referer': 'https://www.chronopost.fr/fr/chrono_suivi_search?listeNumerosLT=%s' % id}) + return self.page.get_parcel() diff --git a/modules/chronopost/module.py b/modules/chronopost/module.py index d7d0894035..0dc79de557 100644 --- a/modules/chronopost/module.py +++ b/modules/chronopost/module.py @@ -29,7 +29,7 @@ class ChronopostModule(Module, CapParcel): NAME = 'chronopost' - DESCRIPTION = u'Chronopost website' + DESCRIPTION = u'Chronopost' MAINTAINER = u'Romain Bignon' EMAIL = 'romain@weboob.org' VERSION = '1.4' @@ -37,5 +37,4 @@ class ChronopostModule(Module, CapParcel): BROWSER = ChronopostBrowser def get_parcel_tracking(self, id): - with self.browser: - return self.browser.get_tracking_info(id) + return self.browser.get_tracking_info(id) diff --git a/modules/chronopost/pages.py b/modules/chronopost/pages.py index d03ee4ac79..684fe98f1a 100644 --- a/modules/chronopost/pages.py +++ b/modules/chronopost/pages.py @@ -18,51 +18,47 @@ # along with weboob. If not, see . -import re -from dateutil.parser import parse as parse_date - from weboob.capabilities.parcel import Parcel, Event from weboob.capabilities import NotAvailable -from weboob.deprecated.browser import Page +from weboob.browser.pages import JsonPage, HTMLPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.standard import Env, CleanText, DateTime +from weboob.tools.date import parse_french_date + +class TrackPage(JsonPage): + def build_doc(self, text): + doc = super(TrackPage, self).build_doc(text) -class IndexPage(Page): - def track_package(self, _id): - self.browser.select_form(predicate=lambda form: form.attrs.get('id', '') == 'suivreEnvoi') - self.browser['chronoNumbers'] = _id.encode('utf-8') - self.browser.submit() + content = ''.join([doc['top'], doc['tab']]) + html_page = HTMLPage(self.browser, self.response) + return html_page.build_doc(content.encode(self.encoding)) + @method + class get_parcel(ItemElement): + klass = Parcel -class TrackPage(Page): - def get_info(self, id): - if len(self.document.xpath('//libelle[@nom="MSG_AUCUN_EVT"]')) > 0: - return None + obj_id = Env('id') + obj_info = CleanText('//div[has-class("ch-block-subtitle-content")]//div[has-class("ch-colis-information")]/text()') + obj_arrival = CleanText('//div[has-class("ch-block-subtitle-content")]//div[has-class("ch-colis-information")]/text()[3]', + replace=[(u'\xe0', '')], default=NotAvailable) \ + & DateTime(dayfirst=True, parse_func=parse_french_date, default=NotAvailable) - p = Parcel(id) - p.arrival = NotAvailable - p.history = [] + def obj_status(self): + el = self.el.xpath('//div[has-class("ch-suivi-colis-light-info") and has-class("active")]')[0] + if 'last' in el.attrib['class']: + return Parcel.STATUS_ARRIVED + if 'first' in el.attrib['class']: + return Parcel.STATUS_PLANNED - for i, tr in enumerate(self.document.xpath('//table[@class="tabListeEnvois"]//tr')): - tds = tr.findall('td') - if len(tds) < 3: - continue + return Parcel.STATUS_IN_TRANSIT - ev = Event(i) - ev.location = unicode(tds[1].text) if tds[1].text else None - ev.activity = unicode(tds[1].find('br').tail) - if tds[-1].text is not None: - ev.activity += ', ' + self.parser.tocleanstring(tds[-1]) - date = re.sub('[a-z]+', '', self.parser.tocleanstring(tds[0])).strip() - date = re.sub('(\d+)/(\d+)/(\d+)', r'\3-\2-\1', date) - ev.date = parse_date(date) - p.history.append(ev) + class obj_history(ListElement): + item_xpath = '//table[has-class("ch-block-suivi-tab")]//tr[has-class("toggleElmt")]' - p.info = ' '.join([t.strip() for t in self.document.xpath('//div[@class="numeroColi2"]')[0].itertext()][1:]) - if u'Livraison effectuée' in p.history[0].activity: - p.status = p.STATUS_ARRIVED - elif u"en cours d'acheminement" in p.history[0].activity or \ - u"en cours de livraison" in p.history[0].activity or \ - u"Envoi entré dans le pays de destination" in p.history[0].activity: - p.status = p.STATUS_IN_TRANSIT + class item(ItemElement): + klass = Event - return p + obj_date = CleanText('.//td[1]') & DateTime(dayfirst=True, parse_func=parse_french_date) + obj_location = CleanText('.//td[2]/text()[following-sibling::br]') + obj_activity = CleanText('.//td[2]/text()[preceding-sibling::br]') -- GitLab