Commit 6cd0515f authored by Bezleputh's avatar Bezleputh

[leboncoin] add new housing module laboncoin

parent 7304f1de
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import LeboncoinBackend
__all__ = ['LeboncoinBackend']
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import BaseBackend
from weboob.capabilities.housing import CapHousing, Query, Housing, HousingPhoto
from .browser import LeboncoinBrowser
__all__ = ['LeboncoinBackend']
class LeboncoinBackend(BaseBackend, CapHousing):
NAME = 'leboncoin'
DESCRIPTION = u'search house on leboncoin website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '1.0'
BROWSER = LeboncoinBrowser
RET = {Query.HOUSE_TYPES.HOUSE: '1',
Query.HOUSE_TYPES.APART: '2',
Query.HOUSE_TYPES.LAND: '3',
Query.HOUSE_TYPES.PARKING: '4',
Query.HOUSE_TYPES.OTHER: '5'}
def get_housing(self, _id):
return self.browser.get_housing(_id)
def fill_housing(self, housing, fields):
return self.browser.get_housing(housing.id)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.readurl(photo.url)
return photo
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
cities = []
for c in query.cities:
cities.append('%s %s' % (c.id, c.name))
if len(cities) == 0:
return list()
ret = []
for g in query.house_types:
ret.append(self.RET.get(g))
if len(ret) == 0:
return list()
_type = query.TYPE_RENT if query.type is None else query.type
nb_rooms = '' if not query.nb_rooms else query.nb_rooms
area_min = '' if not query.area_min else query.area_min
area_max = '' if not query.area_max else query.area_max
cost_min = '' if not query.cost_min else query.cost_min
cost_max = '' if not query.cost_max else query.cost_max
return self.browser.search_housings(_type, ','.join(cities), nb_rooms,
area_min, area_max,
cost_min, cost_max, '&ret='.join(ret))
OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo}
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser2 import PagesBrowser, URL
from .pages import CityListPage, HousingListPage, HousingPage
class LeboncoinBrowser(PagesBrowser):
BASEURL = 'http://www.leboncoin.fr'
city = URL('ajax/location_list.html\?city=(?P<city>.*)&zipcode=(?P<zip>.*)', CityListPage)
search = URL('ventes_immobilieres/offres/ile_de_france/occasions/\?ps=(?P<ps>.*)&pe=(?P<pe>.*)&ros=(?P<ros>.*)&location=(?P<location>.*)&sqs=(?P<sqs>.*)&sqe=(?P<sqe>.*)&ret=(?P<ret>.*)',
'ventes_immobilieres/offres/ile_de_france/occasions/\?.*',
HousingListPage)
housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage)
def get_cities(self, pattern):
city = ''
zip_code = ''
if pattern.isdigit():
zip_code = pattern
else:
city = pattern
return self.city.go(city=city, zip=zip_code).get_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, ret):
# print type achat ou location
return self.search.go(location=cities,
ros=nb_rooms,
sqs=area_min,
sqe=area_max,
ps=cost_min,
pe=cost_max,
ret=ret).get_housing_list()
def get_housing(self, _id):
return self.housing.go(_id=_id).get_housing()
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from decimal import Decimal
from weboob.tools.browser2.page import HTMLPage, method, pagination
from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.filters import CleanText, Link, Regexp, CleanDecimal, Env, DateTime, Attr
from weboob.capabilities.housing import City, Housing, HousingPhoto
from datetime import date
from weboob.tools.date import DATE_TRANSLATE_FR, LinearDateGuesser
class CityListPage(HTMLPage):
@method
class get_cities(ListElement):
item_xpath = '//li'
class item(ItemElement):
klass = City
obj_id = CleanText('./span[@class="zipcode"]')
obj_name = CleanText('./span[@class="city"]')
class HousingListPage(HTMLPage):
@pagination
@method
class get_housing_list(ListElement):
item_xpath = '//div[@class="list-lbc"]/a'
def next_page(self):
return Link('//li[@class="page"]/a')(self)
class item(ItemElement):
klass = Housing
obj_id = Regexp(Link('.'), 'http://www.leboncoin.fr/ventes_immobilieres/(.*).htm')
obj_title = CleanText('./div[@class="lbc"]/div/div[@class="title"]')
obj_cost = CleanDecimal('./div[@class="lbc"]/div/div[@class="price"]',
replace_dots=(',', '.'),
default=Decimal(0))
obj_currency = Regexp(CleanText('./div[@class="lbc"]/div/div[@class="price"]'),
'.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
obj_text = CleanText('./div[@class="lbc"]/div[@class="detail"]')
def obj_date(self):
_date = CleanText('./div[@class="lbc"]/div[@class="date"]',
replace=[('Aujourd\'hui', str(date.today().day))])(self)
for fr, en in DATE_TRANSLATE_FR:
_date = fr.sub(en, _date)
self.env['tmp'] = _date
return DateTime(Env('tmp'), LinearDateGuesser())(self)
def obj_photos(self):
photos = []
url = Attr('./div[@class="lbc"]/div[@class="image"]/div/img', 'src', default=None)(self)
if url:
photos.append(HousingPhoto(url))
return photos
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
details = dict()
for tr in el.xpath('//div[@class="floatLeft"]/table/tr'):
if 'Ville' in CleanText('./th')(tr):
self.env['location'] = CleanText('./td')(tr)
else:
details['%s' % CleanText('./th', replace=[(':', '')])(tr)] = CleanText('./td')(tr)
for tr in el.xpath('//div[@class="lbcParams criterias"]/table/tr'):
if 'Surface' in CleanText('./th')(tr):
self.env['area'] = CleanDecimal(Regexp(CleanText('./td'), '(.*)m.*'),
replace_dots=(',', '.'))(tr)
else:
key = '%s' % CleanText('./th', replace=[(':', '')])(tr)
if 'GES' in key or 'Classe' in key:
details[key] = CleanText('./td/noscript/a')(tr)
else:
details[key] = CleanText('./td')(tr)
self.env['details'] = details
obj_id = Env('_id')
obj_title = CleanText('//h2[@id="ad_subject"]')
obj_cost = CleanDecimal('//span[@class="price"]', replace_dots=(',', '.'), default=Decimal(0))
obj_currency = Regexp(CleanText('//span[@class="price"]'),
'.*([%s%s%s])' % (u'€', u'$', u'£'))
obj_text = CleanText('//div[@class="content"]')
obj_location = Env('location')
obj_details = Env('details')
obj_area = Env('area')
def obj_date(self):
sender = CleanText('//div[@class="upload_by"]/a')(self)
_date = CleanText('//div[@class="upload_by"]',
replace=[('- Mise en ligne le ', ''),
(sender, ''),
(u'à', ''),
(u'.', '')])(self)
for fr, en in DATE_TRANSLATE_FR:
_date = fr.sub(en, _date)
self.env['tmp'] = _date
return DateTime(Env('tmp'), LinearDateGuesser())(self)
def obj_photos(self):
photos = []
for img in self.el.xpath('//div[@id="thumbs_carousel"]/a/span'):
url = CleanText(Regexp(Attr('.', 'style',
default=''),
"background-image: url\('(.*)'\);",
default=''),
replace=[('thumbs', 'images')],
default='')(img)
if url:
photos.append(HousingPhoto(url))
return photos
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import itertools
from weboob.tools.test import BackendTest
from weboob.capabilities.housing import Query
class LeboncoinTest(BackendTest):
BACKEND = 'leboncoin'
def test_leboncoin(self):
query = Query()
query.cities = []
for city in self.backend.search_city('lille'):
city.backend = self.backend.name
query.cities.append(city)
results = list(itertools.islice(self.backend.search_housings(query), 0, 20))
self.assertTrue(len(results) > 0)
obj = self.backend.fillobj(results[0])
self.assertTrue(obj.area is not None, 'Area for "%s"' % (obj.id))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment