Newer
Older
# -*- coding: utf-8 -*-
# Copyright(C) 2017 Phyks (Lucas Verney)
#
# This file is part of a weboob module.
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import datetime
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.browser.filters.standard import (
CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (
City, Housing, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES
class CitiesPage(JsonPage):
def iter_cities(self):
cities_list = self.doc
if isinstance(self.doc, dict):
cities_list = self.doc.values()
for city in cities_list:
city_obj = City()
city_obj.id = city
city_obj.name = city
yield city_obj
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
)
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_type(self):
type = Env('type')(self)
if type == 'location':
if 'appartement-meuble' in self.page.url:
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif type == 'achat':
return POSTS_TYPES.SALE
else:
return NotAvailable
def obj_url(self):
return self.page.url
def obj_house_type(self):
url = self.obj_url()
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
return NotAvailable
obj_title = CleanText('//h1[has-class("OfferTop-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
'//span[has-class("OfferTop-price")]',
default=NotAvailable
)
obj_price_per_meter = PricePerMeterFilter()
'//span[has-class("OfferTop-price")]'
)
obj_location = Format(
'%s - %s',
CleanText('//p[@data-behat="adresseBien"]'),
CleanText('//p[has-class("OfferTop-loc")]')
)
obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
obj_phone = Regexp(
Link(
'//a[has-class("OfferContact-btn--tel")]'
),
r'tel:(.*)'
)
def obj_photos(self):
photos = []
for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
photo_url = Attr('.', 'src')(photo)
photo_url = photo_url.replace('640/480', '800/600')
photos.append(HousingPhoto(photo_url))
return photos
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'//p[has-class("OfferTop-price")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
Phyks (Lucas Verney)
committed
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
default=NotAvailable
)
obj_bedrooms = CleanDecimal(
'//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
default=NotAvailable
)
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
except (RegexpError, XPathNotFound):
electric_consumption = None
DPE = ""
if electric_consumption is not None:
if electric_consumption <= 50:
DPE = "A"
DPE = "F"
else:
DPE = "G"
return getattr(ENERGY_CLASS, DPE, NotAvailable)
return NotAvailable
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
details = {}
dispo = Date(
Regexp(
CleanText('//p[has-class("OfferTop-dispo")]'),
r'.* (\d\d\/\d\d\/\d\d\d\d)',
default=datetime.date.today().isoformat()
)
)(self)
if dispo is not None:
details["dispo"] = dispo
priceMentions = CleanText(
'//p[has-class("OfferTop-mentions")]',
default=None
)(self)
if priceMentions is not None:
details["priceMentions"] = priceMentions
agency = CleanText(
'//p[has-class("OfferContact-address")]',
default=None
)(self)
if agency is not None:
details["agency"] = agency
for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
category = CleanText(
'./h3[has-class("OfferDetails-title--2")]',
default=None
)(item)
if not category:
continue
for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
details[category][detail_title] = detail_value
for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
detail_title = CleanText('.')(detail_item)
details[category][detail_title] = True
try:
electric_consumption = CleanDecimal(Regexp(
Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
r'https://dpe.foncia.net\/(\d+)\/.*'
details["electric_consumption"] = (
'{} kWhEP/m².an'.format(electric_consumption)
)
except (RegexpError, XPathNotFound):
pass
return details
class SearchPage(HTMLPage):
def do_search(self, query, cities):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
form['searchForm[localisation]'] = cities
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
form['searchForm[type_bien][]'] = []
for house_type in query.house_types:
try:
form['searchForm[type_bien][]'].extend(
QUERY_HOUSE_TYPES[house_type]
)
except KeyError:
pass
form['searchForm[type_bien][]'] = [
x for x in form['searchForm[type_bien][]']
if x in AVAILABLE_TYPES.get(query.type, [])
]
if query.area_min:
form['searchForm[surface_min]'] = query.area_min
if query.area_max:
form['searchForm[surface_max]'] = query.area_max
if query.cost_min:
form['searchForm[prix_min]'] = query.cost_min
if query.cost_max:
form['searchForm[prix_max]'] = query.cost_max
if query.nb_rooms:
form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
form.submit()
def find_housing(self, query_type, housing):
form = self.get_form('//form[@name="searchForm"]')
form['searchForm[type]'] = query_type
form['searchForm[reference]'] = housing
form.submit()
class SearchResultsPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//article[has-class("TeaserOffer")]'
next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')
class item(ItemElement):
klass = Housing
obj_id = Format(
'%s:%s',
Env('type'),
Attr('.//span[boolean(@data-reference)]', 'data-reference')
)
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
def obj_house_type(self):
for house_type, types in QUERY_HOUSE_TYPES.items():
for type in types:
if ('/%s/' % type) in url:
return house_type
obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
obj_area = CleanDecimal(
Regexp(
CleanText(
'.//div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
),
r'(\d*\.*\d*) .*',
default=NotAvailable
),
default=NotAvailable
'.//strong[has-class("TeaserOffer-price-num")]'
)
obj_price_per_meter = PricePerMeterFilter()
obj_currency = Currency(
'.//strong[has-class("TeaserOffer-price-num")]'
)
obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')
def obj_photos(self):
url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
# If the used photo is a default no photo, the src is on the same domain.
if url[0] == '/':
return []
else:
return [HousingPhoto(url)]
obj_date = datetime.date.today()
def obj_utilities(self):
price = CleanText(
'.//strong[has-class("TeaserOffer-price-num")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
obj_rooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
)
obj_bedrooms = CleanDecimal(
'.//div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
)
def obj_details(self):
return {
"dispo": Date(
Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
default=datetime.date.today().isoformat())
)(self),
"priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
}