From 26d632a461ea44ee0ec5cc6ce052d10e54c9a9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Sat, 1 Oct 2016 04:42:02 +0200 Subject: [PATCH] [linuxjobs] first working implementation As published in GLMF. --- modules/linuxjobs/browser.py | 27 +++++++++++++++++---------- modules/linuxjobs/module.py | 5 +++-- modules/linuxjobs/pages.py | 29 +++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/modules/linuxjobs/browser.py b/modules/linuxjobs/browser.py index b578f7ce6b..3d0181474d 100644 --- a/modules/linuxjobs/browser.py +++ b/modules/linuxjobs/browser.py @@ -20,20 +20,27 @@ from weboob.browser import PagesBrowser, URL -from .pages import Page1, Page2 +from .pages import SearchPage, AdvertPage + +import urllib class LinuxJobsBrowser(PagesBrowser): - BASEURL = 'http://www.linuxjobs.com' + BASEURL = 'https://www.linuxjobs.fr' + + advert_page = URL('/jobs/(?P.+)', AdvertPage) + search_page = URL('/search/(?P)', SearchPage) - page1 = URL('/page1\?id=(?P.+)', Page1) - page2 = URL('/page2', Page2) + def get_job_advert(self, _id, advert): + self.advert_page.go(id=_id) - def get_stuff(self, _id): - self.page1.go(id=_id) + assert self.advert_page.is_here() + return self.page.get_job_advert(obj=advert) - assert self.page1.is_here() - self.page.do_stuff(_id) + def search_job(self, pattern=None): + if pattern is None: + return [] + self.search_page.go(job=urllib.quote_plus(pattern.encode('utf-8'))) - assert self.page2.is_here() - return self.page.do_more_stuff() + assert self.search_page.is_here() + return self.page.iter_job_adverts() diff --git a/modules/linuxjobs/module.py b/modules/linuxjobs/module.py index 0984cdf508..d1e8b3c870 100644 --- a/modules/linuxjobs/module.py +++ b/modules/linuxjobs/module.py @@ -55,7 +55,7 @@ def get_job_advert(self, _id, advert=None): :type advert: BaseJobAdvert :rtype: :class:`BaseJobAdvert` or None if not found. """ - raise NotImplementedError() + return self.browser.get_job_advert(_id, advert) def search_job(self, pattern=None): """ @@ -65,4 +65,5 @@ def search_job(self, pattern=None): :type pattern: str :rtype: iter[:class:`BaseJobAdvert`] """ - raise NotImplementedError() + for job_advert in self.browser.search_job(pattern): + yield job_advert diff --git a/modules/linuxjobs/pages.py b/modules/linuxjobs/pages.py index 9afe015a06..e6451f10ce 100644 --- a/modules/linuxjobs/pages.py +++ b/modules/linuxjobs/pages.py @@ -18,14 +18,31 @@ # along with weboob. If not, see . +from weboob.capabilities.job import BaseJobAdvert from weboob.browser.pages import HTMLPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.standard import Regexp, CleanText, Date, Env, BrowserURL +from weboob.browser.filters.html import Link, CleanHTML +class AdvertPage(HTMLPage): + @method + class get_job_advert(ItemElement): + klass = BaseJobAdvert -class Page1(HTMLPage): - def do_stuff(self, _id): - raise NotImplementedError() + obj_id = Env('id') + obj_url = BrowserURL('advert_page', id=Env('id')) + obj_title = CleanText('//title') + obj_job_name = CleanText('//title') -class Page2(HTMLPage): - def do_more_stuff(self): - raise NotImplementedError() +class SearchPage(HTMLPage): + @method + class iter_job_adverts(ListElement): + item_xpath = '//a[@class="list-group-item "]' + + class item(ItemElement): + klass = BaseJobAdvert + + obj_id = Regexp(Link('.'), '.*fr/jobs/(\d+)/.*') + obj_title = CleanText('h4/span[@class="job-title"]') + obj_society_name = CleanText('h4/span[@class="job-company"]') -- GitLab