Commit 26d632a4 authored by François Revol's avatar François Revol

[linuxjobs] first working implementation

As published in GLMF.
parent b6a4c0d4
......@@ -20,20 +20,27 @@
from weboob.browser import PagesBrowser, URL
from .pages import Page1, Page2
from .pages import SearchPage, AdvertPage
import urllib
class LinuxJobsBrowser(PagesBrowser):
BASEURL = 'http://www.linuxjobs.com'
BASEURL = 'https://www.linuxjobs.fr'
advert_page = URL('/jobs/(?P<id>.+)', AdvertPage)
search_page = URL('/search/(?P<job>)', SearchPage)
page1 = URL('/page1\?id=(?P<id>.+)', Page1)
page2 = URL('/page2', Page2)
def get_job_advert(self, _id, advert):
self.advert_page.go(id=_id)
def get_stuff(self, _id):
self.page1.go(id=_id)
assert self.advert_page.is_here()
return self.page.get_job_advert(obj=advert)
assert self.page1.is_here()
self.page.do_stuff(_id)
def search_job(self, pattern=None):
if pattern is None:
return []
self.search_page.go(job=urllib.quote_plus(pattern.encode('utf-8')))
assert self.page2.is_here()
return self.page.do_more_stuff()
assert self.search_page.is_here()
return self.page.iter_job_adverts()
......@@ -55,7 +55,7 @@ class LinuxJobsModule(Module, CapJob):
:type advert: BaseJobAdvert
:rtype: :class:`BaseJobAdvert` or None if not found.
"""
raise NotImplementedError()
return self.browser.get_job_advert(_id, advert)
def search_job(self, pattern=None):
"""
......@@ -65,4 +65,5 @@ class LinuxJobsModule(Module, CapJob):
:type pattern: str
:rtype: iter[:class:`BaseJobAdvert`]
"""
raise NotImplementedError()
for job_advert in self.browser.search_job(pattern):
yield job_advert
......@@ -18,14 +18,31 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.job import BaseJobAdvert
from weboob.browser.pages import HTMLPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import Regexp, CleanText, Date, Env, BrowserURL
from weboob.browser.filters.html import Link, CleanHTML
class AdvertPage(HTMLPage):
@method
class get_job_advert(ItemElement):
klass = BaseJobAdvert
class Page1(HTMLPage):
def do_stuff(self, _id):
raise NotImplementedError()
obj_id = Env('id')
obj_url = BrowserURL('advert_page', id=Env('id'))
obj_title = CleanText('//title')
obj_job_name = CleanText('//title')
class Page2(HTMLPage):
def do_more_stuff(self):
raise NotImplementedError()
class SearchPage(HTMLPage):
@method
class iter_job_adverts(ListElement):
item_xpath = '//a[@class="list-group-item "]'
class item(ItemElement):
klass = BaseJobAdvert
obj_id = Regexp(Link('.'), '.*fr/jobs/(\d+)/.*')
obj_title = CleanText('h4/span[@class="job-title"]')
obj_society_name = CleanText('h4/span[@class="job-company"]')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment