pax_global_header 0000666 0000000 0000000 00000000064 14575653726 0014536 g ustar 00root root 0000000 0000000 52 comment=5f3d558793b537a74480241ac6981479f5938cd3
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/ 0000775 0000000 0000000 00000000000 14575653726 0024235 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/ 0000775 0000000 0000000 00000000000 14575653726 0025705 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo/ 0000775 0000000 0000000 00000000000 14575653726 0030100 5 ustar 00root root 0000000 0000000 __init__.py 0000664 0000000 0000000 00000001507 14575653726 0032135 0 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo # -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from .module import ExplorimmoModule
__all__ = ['ExplorimmoModule']
browser.py 0000664 0000000 0000000 00000006734 14575653726 0032070 0 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo # -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from urllib.parse import urlencode
from woob.browser import PagesBrowser, URL
from woob.capabilities.housing import (TypeNotSupported, POSTS_TYPES,
HOUSE_TYPES)
from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage
class ExplorimmoBrowser(PagesBrowser):
BASEURL = 'https://immobilier.lefigaro.fr'
cities = URL('/rest/locations\?q=(?P.*)', CitiesPage)
search = URL('/annonces/resultat/annonces.html\?(?P.*)', SearchPage)
housing_html = URL('/annonces/annonce-(?P<_id>.*).html', HousingPage)
phone = URL('/rest/classifieds/(?P<_id>.*)/phone', PhonePage)
housing = URL('/rest/classifieds/(?P<_id>.*)',
'/rest/classifieds/\?(?P.*)', HousingPage2)
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'Maison',
HOUSE_TYPES.APART: 'Appartement',
HOUSE_TYPES.LAND: 'Terrain',
HOUSE_TYPES.PARKING: 'Parking',
HOUSE_TYPES.OTHER: 'Divers'}
def get_cities(self, pattern):
return self.cities.open(city=pattern).get_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
if type not in self.TYPES:
raise TypeNotSupported()
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['Viager']
else:
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
data = {'location': ','.join(cities).encode('iso 8859-1'),
'furnished': type == POSTS_TYPES.FURNISHED_RENT,
'areaMin': area_min or '',
'areaMax': area_max or '',
'priceMin': cost_min or '',
'priceMax': cost_max or '',
'transaction': self.TYPES.get(type, 'location'),
'recherche': '',
'mode': '',
'proximity': '0',
'roomMin': nb_rooms or '',
'page': '1'}
query = u'%s%s%s' % (urlencode(data), '&type=', '&type='.join(ret))
return self.search.go(query=query).iter_housings(
query_type=type,
advert_types=advert_types
)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
def get_phone(self, _id):
return self.phone.go(_id=_id).get_phone()
def get_total_page(self, js_datas):
return self.housing.open(js_datas=js_datas).get_total_page()
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo/module.py0000664 0000000 0000000 00000005121 14575653726 0031736 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.tools.backend import Module
from woob.capabilities.housing import CapHousing, Housing, HousingPhoto
from .browser import ExplorimmoBrowser
__all__ = ['ExplorimmoModule']
class ExplorimmoModule(Module, CapHousing):
NAME = 'explorimmo'
DESCRIPTION = u'explorimmo website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '3.6'
BROWSER = ExplorimmoBrowser
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
housing = self.browser.get_housing(id, housing)
return housing
def search_city(self, pattern):
return self.browser.get_cities(pattern)
def search_housings(self, query):
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types,
query.advert_types)
def fill_housing(self, housing, fields):
if 'phone' in fields:
housing.phone = self.browser.get_phone(housing.id)
fields.remove('phone')
if len(fields) > 0:
self.browser.get_housing(housing.id, housing)
return housing
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing,
HousingPhoto: fill_photo,
}
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo/pages.py 0000664 0000000 0000000 00000041374 14575653726 0031562 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
import json
import math
import re
from decimal import Decimal
from datetime import datetime
from urllib.parse import unquote
from woob.browser.filters.json import Dict
from woob.browser.elements import ItemElement, ListElement, DictElement, method
from woob.browser.pages import JsonPage, HTMLPage, pagination
from woob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
Regexp, Env, BrowserURL, Filter,
Format)
from woob.browser.filters.html import Attr, CleanHTML, XPath
from woob.capabilities.base import NotAvailable, NotLoaded, Currency as BaseCurrency
from woob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
class CitiesPage(JsonPage):
ENCODING = 'UTF-8'
def build_doc(self, content):
content = super(CitiesPage, self).build_doc(content)
if content:
return content
else:
return [{"locations": []}]
@method
class get_cities(DictElement):
item_xpath = '0/locations'
class item(ItemElement):
klass = City
obj_id = Dict('label')
obj_name = Dict('label')
class SearchPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
def next_page(self):
js_datas = CleanText(
'//div[@id="js-data"]/@data-rest-search-request'
)(self).split('?')[-1].split('&')
try:
resultsPerPage = next(
x for x in js_datas if 'resultsPerPage' in x
).split('=')[-1]
currentPageNumber = next(
x for x in js_datas if 'currentPageNumber' in x
).split('=')[-1]
resultCount = CleanText(
'(//div[@id="js-data"]/@data-result-count)[1]'
)(self)
totalPageNumber = math.ceil(
int(resultCount) / int(resultsPerPage)
)
next_page = int(currentPageNumber) + 1
if next_page <= totalPageNumber:
return self.page.url.replace(
'page=%s' % currentPageNumber,
'page=%d' % next_page
)
except StopIteration:
pass
class item(ItemElement):
klass = Housing
price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'
def is_agency(self):
agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
return 'annonce de particulier' not in agency.lower()
def condition(self):
if len(self.env['advert_types']) == 1:
is_agency = self.is_agency()
if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
return not is_agency
elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
return is_agency
return Attr('.', 'data-classified-id', default=False)(self)
obj_id = Attr('.', 'data-classified-id')
obj_type = Env('query_type')
obj_title = CleanText('./div/h2[@class="item-type"]')
def obj_advert_type(self):
if self.is_agency():
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = self.obj_title(self).split()[0].lower()
if type == "appartement" or type == "studio" or type == "chambre":
return HOUSE_TYPES.APART
elif type == "maison" or type == "villa":
return HOUSE_TYPES.HOUSE
elif type == "parking":
return HOUSE_TYPES.PARKING
elif type == "terrain":
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
def obj_location(self):
script = CleanText('./script')(self)
try:
# Should be standard JSON+LD data
script = json.loads(script)
except ValueError:
try:
# But explorimmo can't write JSON correctly and there
# is a trailing "}"
script = json.loads(script.strip().rstrip('}'))
except ValueError:
script = None
if not script:
return NotLoaded
try:
return '%s (%s)' % (
script['address']['addressLocality'],
script['address']['postalCode']
)
except (KeyError):
return NotLoaded
def obj_cost(self):
cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
r'de (.*) à .*',
default=0))(self)
if cost == 0:
return CleanDecimal(self.price_selector, default=NotAvailable)(self)
else:
return cost
obj_currency = Currency(price_selector)
def obj_utilities(self):
utilities = CleanText(
'./div/div/span[@class="price-label"]|'
'./div/div[@class="item-price-pdf"]|'
'./div/div/span[@class="item-price"]'
)(self)
if "CC" in utilities:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_text = CleanText('./div/p[@itemprop="description"]')
obj_area = CleanDecimal(
Regexp(
obj_title,
r'(.*?)([\d,\.]*) m2(.*?)',
'\\2',
default=None
),
replace_dots=True,
default=NotLoaded
)
obj_url = Format(
"https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
CleanText('./@data-classified-id')
)
obj_price_per_meter = PricePerMeterFilter()
def obj_phone(self):
phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
replace=[('Téléphoner : ', '')],
default=NotLoaded)(self)
if '...' in phone:
return NotLoaded
return phone
def obj_details(self):
charges = CleanText('.//span[@class="price-fees"]',
default=None)(self)
if charges:
return {
"fees": charges.split(":")[1].strip()
}
else:
return NotLoaded
def obj_photos(self):
url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
if url:
url = unquote(url)
if "http://" in url[3:]:
rindex = url.rfind("?")
if rindex == -1:
rindex = None
url = url[url.find("http://", 3):rindex]
return [HousingPhoto(url)]
else:
return NotLoaded
class TypeDecimal(Filter):
def filter(self, el):
return Decimal(el)
class FromTimestamp(Filter):
def filter(self, el):
return datetime.fromtimestamp(el / 1000.0)
class PhonePage(JsonPage):
def get_phone(self):
return self.doc.get('phoneNumber')
class HousingPage2(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def is_agency(self):
return Dict('agency/isParticulier')(self) == 'false'
obj_id = Env('_id')
def obj_type(self):
transaction = Dict('characteristics/transaction')(self)
if transaction == 'location':
if Dict('characteristics/isFurnished')(self):
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif transaction == 'vente':
type = Dict('characteristics/estateType')(self).lower()
if 'viager' in type:
return POSTS_TYPES.VIAGER
else:
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_advert_type(self):
if self.is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_house_type(self):
type = Dict('characteristics/estateType')(self).lower()
if 'appartement' in type:
return HOUSE_TYPES.APART
elif 'maison' in type:
return HOUSE_TYPES.HOUSE
elif 'parking' in type:
return HOUSE_TYPES.PARKING
elif 'terrain' in type:
return HOUSE_TYPES.LAND
else:
return HOUSE_TYPES.OTHER
obj_title = Dict('characteristics/titleWithTransaction')
obj_location = Format('%s %s %s', Dict('location/address'),
Dict('location/cityLabel'),
Dict('location/postalCode'))
def obj_cost(self):
cost = TypeDecimal(Dict('characteristics/price'))(self)
if cost == 0:
cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
return cost
obj_currency = BaseCurrency.get_currency('€')
def obj_utilities(self):
are_fees_included = Dict('characteristics/areFeesIncluded',
default=None)(self)
if are_fees_included:
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_text = CleanHTML(Dict('characteristics/description'))
obj_url = BrowserURL('housing_html', _id=Env('_id'))
def obj_area(self):
area = TypeDecimal(Dict('characteristics/area'))(self)
if area == 0:
area = TypeDecimal(Dict('characteristics/areaMin'))(self)
return area
obj_date = FromTimestamp(Dict('characteristics/date'))
obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))
def obj_rooms(self):
# TODO: Why is roomCount a list?
rooms = Dict('characteristics/roomCount', default=[])(self)
if rooms:
return TypeDecimal(rooms[0])(self)
return NotAvailable
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in Dict('characteristics/images')(self):
m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
if m:
photos.append(HousingPhoto(m.group(1)))
else:
photos.append(HousingPhoto(img.get('xl')))
return photos
def obj_DPE(self):
DPE = Dict(
'characteristics/energyConsumptionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_GES(self):
GES = Dict(
'characteristics/greenhouseGasEmissionCategory',
default=""
)(self)
return getattr(ENERGY_CLASS, GES, NotAvailable)
def obj_details(self):
details = {}
details['fees'] = Dict(
'characteristics/fees', default=NotAvailable
)(self)
details['agencyFees'] = Dict(
'characteristics/agencyFees', default=NotAvailable
)(self)
details['guarantee'] = Dict(
'characteristics/guarantee', default=NotAvailable
)(self)
details['bathrooms'] = Dict(
'characteristics/bathroomCount', default=NotAvailable
)(self)
details['creationDate'] = FromTimestamp(
Dict(
'characteristics/creationDate', default=NotAvailable
),
default=NotAvailable
)(self)
details['availabilityDate'] = Dict(
'characteristics/estateAvailabilityDate', default=NotAvailable
)(self)
details['exposure'] = Dict(
'characteristics/exposure', default=NotAvailable
)(self)
details['heatingType'] = Dict(
'characteristics/heatingType', default=NotAvailable
)(self)
details['floor'] = Dict(
'characteristics/floor', default=NotAvailable
)(self)
details['bedrooms'] = Dict(
'characteristics/bedroomCount', default=NotAvailable
)(self)
details['isFurnished'] = Dict(
'characteristics/isFurnished', default=NotAvailable
)(self)
rooms = Dict('characteristics/roomCount', default=[])(self)
if len(rooms):
details['rooms'] = rooms[0]
details['available'] = Dict(
'characteristics/isAvailable', default=NotAvailable
)(self)
agency = Dict('agency', default=NotAvailable)(self)
details['agency'] = ', '.join([
x for x in [
agency.get('corporateName', ''),
agency.get('corporateAddress', ''),
agency.get('corporatePostalCode', ''),
agency.get('corporateCity', '')
] if x
])
return details
def get_total_page(self):
return self.doc.get('pagination').get('total') if 'pagination' in self.doc else 0
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Env('_id')
obj_title = CleanText('//h1[@itemprop="name"]')
obj_location = CleanText('//span[@class="informations-localisation"]')
obj_cost = CleanDecimal('//span[@itemprop="price"]')
obj_currency = Currency('//span[@itemprop="price"]')
obj_text = CleanHTML('//div[@itemprop="description"]')
obj_url = BrowserURL('housing', _id=Env('_id'))
obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
obj_price_per_meter = PricePerMeterFilter()
def obj_photos(self):
photos = []
for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
photos.append(HousingPhoto(url))
return photos
def obj_details(self):
details = dict()
for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
key = CleanText('./span[@class="name"]')(item)
value = CleanText('./span[@class="value"]')(item)
if value and key:
details[key] = value
key = CleanText('//div[@class="title-dpe clearfix"]')(self)
value = CleanText('//div[@class="energy-consumption"]')(self)
if value and key:
details[key] = value
return details
requirements.txt 0000664 0000000 0000000 00000000014 14575653726 0033300 0 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo woob ~= 3.2
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-explorimmo/modules/explorimmo/test.py 0000664 0000000 0000000 00000006567 14575653726 0031447 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.housing import Query, ADVERT_TYPES, POSTS_TYPES
from woob.tools.capabilities.housing.housing_test import HousingTest
from woob.tools.test import BackendTest
class ExplorimmoTest(BackendTest, HousingTest):
MODULE = 'explorimmo'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "title", "location",
"utilities", "text", "area", "url"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos", "cost", "currency"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text", "rooms",
"details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"bedrooms",
"photos",
"DPE",
"GES",
"phone"
]
def test_explorimmo_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_explorimmo_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PERSONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)