From 6cd0515ffe54c4a974455c136dab44bb50ada604 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Wed, 24 Sep 2014 22:38:50 +0200 Subject: [PATCH] [leboncoin] add new housing module laboncoin --- modules/leboncoin/__init__.py | 24 ++++++ modules/leboncoin/backend.py | 86 +++++++++++++++++++++ modules/leboncoin/browser.py | 55 +++++++++++++ modules/leboncoin/pages.py | 141 ++++++++++++++++++++++++++++++++++ modules/leboncoin/test.py | 39 ++++++++++ 5 files changed, 345 insertions(+) create mode 100644 modules/leboncoin/__init__.py create mode 100644 modules/leboncoin/backend.py create mode 100644 modules/leboncoin/browser.py create mode 100644 modules/leboncoin/pages.py create mode 100644 modules/leboncoin/test.py diff --git a/modules/leboncoin/__init__.py b/modules/leboncoin/__init__.py new file mode 100644 index 0000000000..1746e9bfa6 --- /dev/null +++ b/modules/leboncoin/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import LeboncoinBackend + + +__all__ = ['LeboncoinBackend'] diff --git a/modules/leboncoin/backend.py b/modules/leboncoin/backend.py new file mode 100644 index 0000000000..ff43efc322 --- /dev/null +++ b/modules/leboncoin/backend.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.housing import CapHousing, Query, Housing, HousingPhoto + +from .browser import LeboncoinBrowser + + +__all__ = ['LeboncoinBackend'] + + +class LeboncoinBackend(BaseBackend, CapHousing): + NAME = 'leboncoin' + DESCRIPTION = u'search house on leboncoin website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '1.0' + + BROWSER = LeboncoinBrowser + + RET = {Query.HOUSE_TYPES.HOUSE: '1', + Query.HOUSE_TYPES.APART: '2', + Query.HOUSE_TYPES.LAND: '3', + Query.HOUSE_TYPES.PARKING: '4', + Query.HOUSE_TYPES.OTHER: '5'} + + def get_housing(self, _id): + return self.browser.get_housing(_id) + + def fill_housing(self, housing, fields): + return self.browser.get_housing(housing.id) + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.readurl(photo.url) + return photo + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + cities = [] + for c in query.cities: + cities.append('%s %s' % (c.id, c.name)) + + if len(cities) == 0: + return list() + + ret = [] + for g in query.house_types: + ret.append(self.RET.get(g)) + + if len(ret) == 0: + return list() + + _type = query.TYPE_RENT if query.type is None else query.type + nb_rooms = '' if not query.nb_rooms else query.nb_rooms + area_min = '' if not query.area_min else query.area_min + area_max = '' if not query.area_max else query.area_max + cost_min = '' if not query.cost_min else query.cost_min + cost_max = '' if not query.cost_max else query.cost_max + + return self.browser.search_housings(_type, ','.join(cities), nb_rooms, + area_min, area_max, + cost_min, cost_max, '&ret='.join(ret)) + + OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo} diff --git a/modules/leboncoin/browser.py b/modules/leboncoin/browser.py new file mode 100644 index 0000000000..121635517a --- /dev/null +++ b/modules/leboncoin/browser.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser2 import PagesBrowser, URL + +from .pages import CityListPage, HousingListPage, HousingPage + + +class LeboncoinBrowser(PagesBrowser): + BASEURL = 'http://www.leboncoin.fr' + city = URL('ajax/location_list.html\?city=(?P.*)&zipcode=(?P.*)', CityListPage) + search = URL('ventes_immobilieres/offres/ile_de_france/occasions/\?ps=(?P.*)&pe=(?P.*)&ros=(?P.*)&location=(?P.*)&sqs=(?P.*)&sqe=(?P.*)&ret=(?P.*)', + 'ventes_immobilieres/offres/ile_de_france/occasions/\?.*', + HousingListPage) + housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage) + + def get_cities(self, pattern): + city = '' + zip_code = '' + if pattern.isdigit(): + zip_code = pattern + else: + city = pattern + + return self.city.go(city=city, zip=zip_code).get_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, ret): + # print type achat ou location + return self.search.go(location=cities, + ros=nb_rooms, + sqs=area_min, + sqe=area_max, + ps=cost_min, + pe=cost_max, + ret=ret).get_housing_list() + + def get_housing(self, _id): + return self.housing.go(_id=_id).get_housing() diff --git a/modules/leboncoin/pages.py b/modules/leboncoin/pages.py new file mode 100644 index 0000000000..0b7c76056b --- /dev/null +++ b/modules/leboncoin/pages.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from decimal import Decimal +from weboob.tools.browser2.page import HTMLPage, method, pagination +from weboob.tools.browser2.elements import ItemElement, ListElement +from weboob.tools.browser2.filters import CleanText, Link, Regexp, CleanDecimal, Env, DateTime, Attr +from weboob.capabilities.housing import City, Housing, HousingPhoto +from datetime import date +from weboob.tools.date import DATE_TRANSLATE_FR, LinearDateGuesser + + +class CityListPage(HTMLPage): + @method + class get_cities(ListElement): + item_xpath = '//li' + + class item(ItemElement): + klass = City + + obj_id = CleanText('./span[@class="zipcode"]') + obj_name = CleanText('./span[@class="city"]') + + +class HousingListPage(HTMLPage): + @pagination + @method + class get_housing_list(ListElement): + item_xpath = '//div[@class="list-lbc"]/a' + + def next_page(self): + return Link('//li[@class="page"]/a')(self) + + class item(ItemElement): + klass = Housing + + obj_id = Regexp(Link('.'), 'http://www.leboncoin.fr/ventes_immobilieres/(.*).htm') + obj_title = CleanText('./div[@class="lbc"]/div/div[@class="title"]') + obj_cost = CleanDecimal('./div[@class="lbc"]/div/div[@class="price"]', + replace_dots=(',', '.'), + default=Decimal(0)) + obj_currency = Regexp(CleanText('./div[@class="lbc"]/div/div[@class="price"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') + obj_text = CleanText('./div[@class="lbc"]/div[@class="detail"]') + + def obj_date(self): + _date = CleanText('./div[@class="lbc"]/div[@class="date"]', + replace=[('Aujourd\'hui', str(date.today().day))])(self) + for fr, en in DATE_TRANSLATE_FR: + _date = fr.sub(en, _date) + + self.env['tmp'] = _date + return DateTime(Env('tmp'), LinearDateGuesser())(self) + + def obj_photos(self): + photos = [] + url = Attr('./div[@class="lbc"]/div[@class="image"]/div/img', 'src', default=None)(self) + if url: + photos.append(HousingPhoto(url)) + return photos + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + def parse(self, el): + details = dict() + for tr in el.xpath('//div[@class="floatLeft"]/table/tr'): + if 'Ville' in CleanText('./th')(tr): + self.env['location'] = CleanText('./td')(tr) + else: + details['%s' % CleanText('./th', replace=[(':', '')])(tr)] = CleanText('./td')(tr) + + for tr in el.xpath('//div[@class="lbcParams criterias"]/table/tr'): + if 'Surface' in CleanText('./th')(tr): + self.env['area'] = CleanDecimal(Regexp(CleanText('./td'), '(.*)m.*'), + replace_dots=(',', '.'))(tr) + else: + key = '%s' % CleanText('./th', replace=[(':', '')])(tr) + if 'GES' in key or 'Classe' in key: + details[key] = CleanText('./td/noscript/a')(tr) + else: + details[key] = CleanText('./td')(tr) + + self.env['details'] = details + + obj_id = Env('_id') + obj_title = CleanText('//h2[@id="ad_subject"]') + obj_cost = CleanDecimal('//span[@class="price"]', replace_dots=(',', '.'), default=Decimal(0)) + + obj_currency = Regexp(CleanText('//span[@class="price"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£')) + obj_text = CleanText('//div[@class="content"]') + obj_location = Env('location') + obj_details = Env('details') + obj_area = Env('area') + + def obj_date(self): + sender = CleanText('//div[@class="upload_by"]/a')(self) + _date = CleanText('//div[@class="upload_by"]', + replace=[('- Mise en ligne le ', ''), + (sender, ''), + (u'à', ''), + (u'.', '')])(self) + + for fr, en in DATE_TRANSLATE_FR: + _date = fr.sub(en, _date) + + self.env['tmp'] = _date + return DateTime(Env('tmp'), LinearDateGuesser())(self) + + def obj_photos(self): + photos = [] + for img in self.el.xpath('//div[@id="thumbs_carousel"]/a/span'): + url = CleanText(Regexp(Attr('.', 'style', + default=''), + "background-image: url\('(.*)'\);", + default=''), + replace=[('thumbs', 'images')], + default='')(img) + if url: + photos.append(HousingPhoto(url)) + return photos diff --git a/modules/leboncoin/test.py b/modules/leboncoin/test.py new file mode 100644 index 0000000000..a96cba2562 --- /dev/null +++ b/modules/leboncoin/test.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import itertools +from weboob.tools.test import BackendTest +from weboob.capabilities.housing import Query + + +class LeboncoinTest(BackendTest): + BACKEND = 'leboncoin' + + def test_leboncoin(self): + query = Query() + query.cities = [] + for city in self.backend.search_city('lille'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(itertools.islice(self.backend.search_housings(query), 0, 20)) + self.assertTrue(len(results) > 0) + + obj = self.backend.fillobj(results[0]) + self.assertTrue(obj.area is not None, 'Area for "%s"' % (obj.id)) -- GitLab