pax_global_header 0000666 0000000 0000000 00000000064 14575653726 0014536 g ustar 00root root 0000000 0000000 52 comment=5f3d558793b537a74480241ac6981479f5938cd3
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/ 0000775 0000000 0000000 00000000000 14575653726 0023502 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/ 0000775 0000000 0000000 00000000000 14575653726 0025152 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/ 0000775 0000000 0000000 00000000000 14575653726 0026612 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/__init__.py 0000664 0000000 0000000 00000000077 14575653726 0030727 0 ustar 00root root 0000000 0000000 from .module import SeLogerModule
__all__ = ['SeLogerModule']
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/browser.py 0000664 0000000 0000000 00000006200 14575653726 0030645 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.browser import PagesBrowser, URL
from woob.browser.profiles import Firefox
from woob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from .constants import TYPES, RET, BASE_URL
from .pages import SearchResultsPage, HousingPage, CitiesPage, ErrorPage
__all__ = ['SeLogerBrowser']
class SeLogerBrowser(PagesBrowser):
BASEURL = BASE_URL
PROFILE = Firefox()
cities = URL(r'https://autocomplete.svc.groupe-seloger.com/auto/complete/0/Ville/6\?text=(?P.*)',
CitiesPage)
search = URL(r'/list.html\?(?P.*)&LISTING-LISTpg=(?P\d+)', SearchResultsPage)
housing = URL(r'/(?P<_id>.+)/detail.htm',
r'/annonces/.+',
HousingPage)
captcha = URL(r'http://validate.perfdrive.com', ErrorPage)
def search_geo(self, pattern):
return self.cities.open(pattern=pattern).iter_cities()
def search_housings(self, _type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
price = '{}/{}'.format(cost_min or '0', cost_max or 'Nan')
surface = '{}/{}'.format(area_min or '0', area_max or 'Nan')
rooms = ''
if nb_rooms:
rooms = '&rooms={}'.format(nb_rooms if nb_rooms <= 5 else 5)
viager = ""
if _type not in TYPES:
raise TypeNotSupported()
elif _type != POSTS_TYPES.VIAGER:
_type = '{}'.format(TYPES.get(_type))
viager = "&natures=1,2,4"
else:
_type = TYPES.get(_type)
places = ','.join(['{}'.format(c) for c in cities])
places = '[{{"inseeCodes": [{}]}}]'.format(places)
ret = ','.join([RET.get(t) for t in house_types if t in RET])
query = "projects={}{}&places={}&types={}&price={}&surface={}{}&enterprise=0&qsVersion=1.0"\
.format(_type,
viager,
places,
ret,
price,
surface,
rooms)
return self.search.go(query=query,
page_number=1).iter_housings(query_type=_type,
advert_types=advert_types,
ret=ret)
def get_housing(self, _id, obj=None):
return self.housing.go(_id=_id).get_housing(obj=obj)
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/constants.py 0000664 0000000 0000000 00000000610 14575653726 0031175 0 ustar 00root root 0000000 0000000 from woob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
TYPES = {POSTS_TYPES.RENT: 1,
POSTS_TYPES.SALE: 2,
POSTS_TYPES.FURNISHED_RENT: 1,
POSTS_TYPES.VIAGER: 5}
RET = {HOUSE_TYPES.HOUSE: '2',
HOUSE_TYPES.APART: '1',
HOUSE_TYPES.LAND: '4',
HOUSE_TYPES.PARKING: '3',
HOUSE_TYPES.OTHER: '10'}
BASE_URL = 'https://www.seloger.com'
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/favicon.png 0000664 0000000 0000000 00000003337 14575653726 0030753 0 ustar 00root root 0000000 0000000 PNG
IHDR @ @ iq bKGD 5gDW pHYs tIME Ƚ
iTXtComment Created with GIMPd.e CIDATxyPWǿ;#9RDBGKD/ԪcjVxN;N[;j;:Gvъ-@%N!Ad#.{nv}oCտb x! pE!qgfC2aq1Z`p>x`=09u:T[˦8yt5N>Qg eV&L
J I͝`>|T/ve& >). R"A+"[* ?`)e9_A 0F_aA.g>)ј-vjix8j*#!TIZ "wD1ta}pܸ
l4zXOE50G$qnnm}X͞9o߶.}c7{AWո
x
n J<8Gzڂ%9^f :,nzfDF
<+A䌩f>u: jXD_7,8N H3!T+Ղr2- Gpr6B>QԩqYN/ gm<rsX2ᅣX3h+}APw\*NA߸0/]Yd!d!EPl|l W}j0=ͿHN8Zw>m N4nǧ@48e9WA|/PɃ|\P>
p%5Z x=n
G}8$sړP/Hg7x^UVϫA,Ʉ0,\㷇AW߁ِHkS{">[7tSPϤCE$W QUGU
L]
٤H4Y%"_Bvqd@U"SɦMXQ(ݰJӻ `4YTx.Wjz)q71~!z0_gkk!Ct }"t :-괠j{mvz%FhYթu^y0[BYHռ˫"5S_9 ^5y7(ZOz@:%Sx:
j3 m5vR ;7Ot`ju`/M{sU@XzőD\˴aԧ!
{GH@ۖ|01y_"6x(P*B!ap^#PnkIy}Yyx?Ecs`7Q'P^՛~mj bUHc4gr8y+ਬaJX/rOf;yN
M-p=6jN>RإoD`H2@4< X(I3`-Rlj+)&C]`q ` ?88P IENDB` woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/module.py 0000664 0000000 0000000 00000004650 14575653726 0030456 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.housing import CapHousing, Housing, HousingPhoto
from woob.tools.backend import Module
from .browser import SeLogerBrowser
__all__ = ['SeLogerModule']
class SeLogerModule(Module, CapHousing):
NAME = 'seloger'
MAINTAINER = u'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '3.6'
DESCRIPTION = 'French housing website'
LICENSE = 'AGPLv3+'
BROWSER = SeLogerBrowser
def search_housings(self, query):
cities = [c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list([])
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types,
query.advert_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
housing_id = housing.id
else:
housing_id = housing
housing = None
return self.browser.get_housing(housing_id, housing)
def search_city(self, pattern):
return self.browser.search_geo(pattern)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def fill_housing(self, housing, fields):
if len(fields) > 0:
housing = self.browser.get_housing(housing.id, housing)
return housing
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/pages.py 0000664 0000000 0000000 00000024613 14575653726 0030271 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
import codecs
from woob.browser.elements import DictElement, ItemElement, method
from woob.browser.filters.json import Dict
from woob.browser.filters.standard import BrowserURL, CleanDecimal, CleanText, Currency, Env, Field, Regexp
from woob.browser.pages import HTMLPage, JsonPage, pagination
from woob.capabilities.address import PostalAddress
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.capabilities.housing import ADVERT_TYPES, ENERGY_CLASS, POSTS_TYPES, UTILITIES, City, Housing, HousingPhoto
from woob.exceptions import ActionNeeded
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
from woob.tools.json import json
from .constants import RET, TYPES, BASE_URL
class ErrorPage(HTMLPage):
def on_load(self):
raise ActionNeeded("Please resolve the captcha")
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
ignore_duplicate = True
class item(ItemElement):
klass = City
obj_id = Dict('Params/ci')
obj_name = Dict('Display')
class SearchResultsPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
json_content = Regexp(CleanText('//script'),
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
json_content = codecs.unicode_escape_decode(json_content)[0]
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
self.doc = json.loads(json_content)
@pagination
@method
class iter_housings(DictElement):
ignore_duplicate = True
item_xpath = 'cards/list'
def next_page(self):
page_nb = Dict('navigation/pagination/page')(self)
max_results = Dict('navigation/counts/count')(self)
results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
if int(max_results) / int(results_per_page) > int(page_nb):
return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
class item(ItemElement):
klass = Housing
def condition(self):
return (
Dict('classifiedURL', default=False)(self)
and Dict('id', default=False)(self)
and (
Dict('classifiedURL', default='')(self).startswith(BASE_URL) or
int(Env('query_type')(self)) == TYPES[POSTS_TYPES.RENT]
)
)
obj_id = Dict('id')
def obj_type(self):
id_type = int(Env('query_type')(self))
try:
post_type = next(k for k, v in TYPES.items() if v == id_type)
if post_type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return post_type
except StopIteration:
return NotAvailable
def obj_house_type(self):
naturebien = CleanText(Dict('estateTypeId'))(self)
try:
return next(k for k, v in RET.items() if v == naturebien)
except StopIteration:
return NotLoaded
def obj_title(self):
return "{} - {} - {}".format(Dict('estateType')(self),
" / ".join(Dict('tags')(self)),
Field('location')(self))
def obj_advert_type(self):
is_agency = Dict('contact/agencyId', default=False)(self)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_utilities = UTILITIES.UNKNOWN
def obj_photos(self):
photos = []
for photo in Dict('photos')(self):
photos.append(HousingPhoto(photo))
return photos
def obj_location(self):
quartier = Dict('districtLabel')(self)
quartier = quartier if quartier else ''
ville = Dict('cityLabel')(self)
ville = ville if ville else ''
cp = Dict('zipCode')(self)
cp = f'({cp})' if cp else ''
return u'%s %s %s' % (quartier, ville, cp)
obj_url = Dict('classifiedURL')
obj_text = Dict('description')
obj_cost = CleanDecimal(Dict('pricing/price', default=''), default=NotLoaded)
obj_currency = Currency(Dict('pricing/price', default=''), default=NotLoaded)
obj_area = CleanDecimal(Dict('surface'))
def obj_price_per_meter(self):
ppm = CleanDecimal(Dict('pricing/squareMeterPrice'), default='')(self)
if not ppm:
ppm = PricePerMeterFilter()(self)
return ppm
class HousingPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
self.doc = json.loads(CleanText('//script[@id="__NEXT_DATA__"]')(self.doc))
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Dict('props/pageProps/listingData/listing/listingDetail/id')
def obj_house_type(self):
naturebien = CleanText(Dict('props/pageProps/listingData/listing/listingDetail/propertyTypeId'))(self)
try:
return next(k for k, v in RET.items() if v == naturebien)
except StopIteration:
return NotLoaded
def obj_type(self):
id_type = Dict('props/pageProps/listingData/listing/listingDetail/transactionTypeId')(self)
post_type = next(k for k, v in TYPES.items() if v == id_type)
if post_type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return post_type
def obj_advert_type(self):
if Dict('props/pageProps/listingData/agency/id', default=None)(self) is not None:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_photos(self):
photos = []
for photo in Dict('props/pageProps/listingData/listing/listingDetail/media/photos')(self):
photos.append(HousingPhoto(photo['defaultUrl']))
return photos
obj_title = Dict('props/pageProps/listingData/listing/listingDetail/title')
def obj_location(self):
address = Dict('props/pageProps/listingData/listing/listingDetail/address')(self)
return f'{address["district"] or ""} {address["city"]} ({address["postalCode"]})'.strip()
def obj_address(self):
address = Dict('props/pageProps/listingData/listing/listingDetail/address')(self)
p = PostalAddress()
p.street = address['street'] or ""
p.postal_code = address['postalCode']
p.city = address['city']
p.full_address = Field('location')(self)
return p
obj_text = Dict('props/pageProps/listingData/listing/listingDetail/descriptive')
obj_cost = Dict('props/pageProps/listingData/listing/listingDetail/listingPrice/price')
obj_currency = Dict('props/pageProps/listingData/listing/listingDetail/listingPrice/priceUnit')
obj_price_per_meter = PricePerMeterFilter()
obj_area = Dict('props/pageProps/listingData/listing/listingDetail/surface')
obj_url = Dict('props/pageProps/listingData/listing/url/value')
def obj_phone(self):
if Dict('props/pageProps/listingData/agency/id', default=None)(self) is not None:
return Dict('props/pageProps/listingData/agency/phoneNumber')
return NotAvailable
def obj_utilities(self):
mention = \
Dict(
'props/pageProps/listingData/listing/listingDetail/listingPrice/price/priceInformation',
default="")(self)
if mention and "cc" in mention:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
obj_bedrooms = CleanDecimal(Dict('props/pageProps/listingData/listing/listingDetail/bedroomCount'),
default=NotAvailable)
obj_rooms = CleanDecimal(Dict('props/pageProps/listingData/listing/listingDetail/roomCount'),
default=NotAvailable)
def obj_DPE(self):
dpe = \
Dict(
"props/pageProps/listingData/listing/listingDetail/energyPerformanceCertificate/electricityRating",
default=None)(self)
if dpe is not None:
return getattr(ENERGY_CLASS, dpe, NotAvailable)
return NotAvailable
def obj_GES(self):
ges = Dict("props/pageProps/listingData/listing/listingDetail/energyPerformanceCertificate/gasRating",
default=None)(self)
if ges is not None:
return getattr(ENERGY_CLASS, ges, NotAvailable)
return NotAvailable
def obj_details(self):
details = {}
for k, v in Dict('props/pageProps/listingData/listing/listingDetail/featureCategories')(self).items():
if type(v) == dict and 'features' in v.keys():
for _ in v['features']:
details[_['name']] = _['title']
return details
requirements.txt 0000664 0000000 0000000 00000000014 14575653726 0032012 0 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger woob ~= 3.2
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-seloger/modules/seloger/test.py 0000664 0000000 0000000 00000006735 14575653726 0030156 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from woob.tools.capabilities.housing.housing_test import HousingTest
from woob.tools.test import BackendTest
class SeLogerTest(BackendTest, HousingTest):
MODULE = 'seloger'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"utilities", "date", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"cost", # Some posts don't have cost in seloger
"currency", # Same
"photos",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"utilities", "date", "location", "text", "phone", "details"
]
FIELDS_ANY_SINGLE_HOUSING = [
"cost", # Some posts don't have cost in seloger
"currency", # Same
"photos",
"rooms",
"bedrooms",
"DPE",
"GES"
]
DO_NOT_DISTINGUISH_FURNISHED_RENT = True
def test_seloger_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_sale(self):
query = Query()
query.area_min = 10
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('lille'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_seloger_rent_personal(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)