Skip to content
Commits on Source (2)
......@@ -18,10 +18,9 @@
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from weboob.tools.compat import urlencode
from weboob.browser import PagesBrowser, URL
from .pages import SearchResultsPage, HousingPage, CitiesPage
from .pages import SearchResultsPage, HousingPage, CitiesPage, ErrorPage, HousingJsonPage
from weboob.browser.profiles import Android
from .constants import TYPES, RET
......@@ -30,50 +29,57 @@
class SeLogerBrowser(PagesBrowser):
BASEURL = 'http://www.seloger.com'
BASEURL = 'https://www.seloger.com'
PROFILE = Android()
cities = URL('https://autocomplete.svc.groupe-seloger.com/auto/complete/0/Ville/6\?text=(?P<pattern>.*)', CitiesPage)
search = URL('https://ws-seloger.svc.groupe-seloger.com/search.xml\?(?P<request>.*)', SearchResultsPage)
housing = URL('https://ws-seloger.svc.groupe-seloger.com/annonceDetail.xml\?idAnnonce=(?P<_id>\d+)&noAudiotel=(?P<noAudiotel>\d)',
cities = URL(r'https://autocomplete.svc.groupe-seloger.com/auto/complete/0/Ville/6\?text=(?P<pattern>.*)',
CitiesPage)
search = URL(r'/list.html\?(?P<query>.*)&LISTING-LISTpg=(?P<page_number>\d+)', SearchResultsPage)
housing = URL(r'/(?P<_id>.+)/detail.htm',
r'/annonces/.+',
HousingPage)
housing_detail = URL(r'detail,json,caracteristique_bien.json\?idannonce=(?P<_id>\d+)', HousingJsonPage)
captcha = URL(r'http://validate.perfdrive.com', ErrorPage)
def search_geo(self, pattern):
return self.cities.open(pattern=pattern).iter_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max,
def search_housings(self, _type, cities, nb_rooms, area_min, area_max,
cost_min, cost_max, house_types, advert_types):
if type not in TYPES:
raise TypeNotSupported()
data = {'ci': ','.join(cities),
'idtt': TYPES.get(type, 1),
'org': 'advanced_search',
'surfacemax': area_max or '',
'surfacemin': area_min or '',
'tri': 'd_dt_crea',
}
if type == POSTS_TYPES.SALE:
data['pxmax'] = cost_max or ''
data['pxmin'] = cost_min or ''
else:
data['px_loyermax'] = cost_max or ''
data['px_loyermin'] = cost_min or ''
price = '{}/{}'.format(cost_min or 'NaN', cost_max or 'Nan')
surface = '{}/{}'.format(area_min or 'Nan', area_max or 'Nan')
rooms = ''
if nb_rooms:
data['nb_pieces'] = nb_rooms
rooms = '&rooms={}'.format(nb_rooms if nb_rooms <= 5 else 5)
ret = []
for house_type in house_types:
if house_type in RET:
ret.append(RET.get(house_type))
viager = ""
if _type not in TYPES:
raise TypeNotSupported()
elif _type != POSTS_TYPES.VIAGER:
_type = '{}'.format(TYPES.get(_type))
viager = "&natures=1,2,4"
else:
_type = TYPES.get(_type)
if ret:
data['idtypebien'] = ','.join(ret)
places = '|'.join(['{{ci:{}}}'.format(c) for c in cities])
places = '[{}]'.format(places)
return self.search.go(request=urlencode(data)).iter_housings(
query_type=type, advert_types=advert_types
)
ret = ','.join([RET.get(t) for t in house_types if t in RET])
query = "projects={}{}&places={}&types={}&price={}&surface={}{}&enterprise=0&qsVersion=1.0"\
.format(_type,
viager,
places,
ret,
price,
surface,
rooms)
return self.search.go(query=query, page_number=1).iter_housings(query_type=_type, advert_types=advert_types, ret=ret)
def get_housing(self, _id, obj=None):
return self.housing.go(_id=_id, noAudiotel=1).get_housing(obj=obj)
return self.housing.go(_id=_id).get_housing(obj=obj)
def get_housing_detail(self, obj):
return self.housing_detail.go(_id=obj.id).get_housing(obj=obj)
......@@ -66,6 +66,15 @@ def fill_photo(self, photo, fields):
return photo
def fill_housing(self, housing, fields):
return self.browser.get_housing(housing.id, housing)
if 'DPE' in fields or 'GES' in fields:
housing = self.browser.get_housing_detail(housing)
fields.remove('DPE')
fields.remove('GES')
if len(fields) > 0:
housing = self.browser.get_housing(housing.id, housing)
return housing
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
......@@ -18,19 +18,27 @@
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.pages import XMLPage, JsonPage, pagination
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.pages import JsonPage, pagination, HTMLPage
from weboob.browser.elements import ItemElement, DictElement, method
from weboob.browser.filters.json import Dict
from weboob.browser.filters.html import XPath
from weboob.browser.filters.standard import (CleanText, CleanDecimal, Currency,
DateTime, Env, Format, Regexp)
Env, Regexp, Field, BrowserURL)
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (Housing, HousingPhoto, City,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES)
from weboob.capabilities.address import PostalAddress
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
from weboob.tools.json import json
from weboob.exceptions import ActionNeeded
from .constants import TYPES, RET
import codecs
class ErrorPage(HTMLPage):
def on_load(self):
raise ActionNeeded("Please resolve the captcha")
class CitiesPage(JsonPage):
......@@ -45,156 +53,210 @@ class item(ItemElement):
obj_name = Dict('Display')
class SeLogerItem(ItemElement):
klass = Housing
obj_id = CleanText('idAnnonce')
def obj_type(self):
idType = int(CleanText('idTypeTransaction')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not
# furnished.
return POSTS_TYPES.RENT
return type
def obj_house_type(self):
idType = CleanText('idTypeBien')(self)
try:
return next(k for k, v in RET.items() if v == idType)
except StopIteration:
return NotLoaded
obj_title = Format(
"%s %s%s - %s",
CleanText('titre'),
CleanText('surface'),
CleanText('surfaceUnite'),
CleanText('ville'),
)
obj_date = DateTime(CleanText('dtFraicheur'))
obj_cost = CleanDecimal('prix', default=NotLoaded)
obj_currency = Currency(Regexp(CleanText('prixUnite'), r'(\W).*', r'\1'))
obj_area = CleanDecimal('surface', default=NotLoaded)
obj_price_per_meter = PricePerMeterFilter()
obj_text = CleanText('descriptif')
obj_rooms = CleanDecimal('nbPiece|nbPieces', default=NotLoaded)
obj_bedrooms = CleanDecimal('nbChambre|nbChambres', default=NotLoaded)
def obj_location(self):
location = CleanText('adresse', default="")(self)
quartier = CleanText('quartier', default=None)(self)
if not location and quartier is not None:
location = quartier
ville = CleanText('ville')(self)
cp = CleanText('cp')(self)
return u'%s %s (%s)' % (location, ville, cp)
obj_station = CleanText('proximite', default=NotLoaded)
obj_url = CleanText('permaLien')
class SearchResultsPage(XMLPage):
class SearchResultsPage(HTMLPage):
def __init__(self, *args, **kwargs):
HTMLPage.__init__(self, *args, **kwargs)
json_content = Regexp(CleanText('//script'),
r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]")(self.doc)
json_content = codecs.unicode_escape_decode(json_content)[0]
json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8')
self.doc = json.loads(json_content)
@pagination
@method
class iter_housings(ListElement):
item_xpath = "//annonce"
class iter_housings(DictElement):
item_xpath = 'cards/list'
def next_page(self):
page = CleanText('//pageSuivante', default=None, replace=[('http://ws.seloger.com/', '')])(self)
if page:
return page
page_nb = Dict('navigation/pagination/page')(self)
max_results = Dict('navigation/pagination/maxResults')(self)
results_per_page = Dict('navigation/pagination/resultsPerPage')(self)
if int(max_results) / int(results_per_page) > int(page_nb):
return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
# TODO handle bellesdemeures
class item(ItemElement):
klass = Housing
class item(SeLogerItem):
def condition(self):
if self.env['query_type'] == POSTS_TYPES.SALE:
# Ignore VIAGER
return CleanText('idTypeTransaction')(self) == '2'
return True
return Dict('cardType')(self) not in ['advertising', 'localExpert'] and Dict('id', default=False)(self)
obj_id = Dict('id')
def validate(self, obj):
return (len(self.env['advert_types']) == 1 and
self.env['advert_types'][0] == obj.advert_type) or \
self.env['advert_types'] > 1
def obj_type(self):
idType = int(Env('query_type')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
obj_type = Env('query_type')
def obj_title(self):
return "{} - {} - {}".format(Dict('estateType')(self),
" / ".join(Dict('tags')(self)),
Field('location')(self))
def obj_advert_type(self):
is_agency = (
';' not in CleanText('contact/nom')(self)
)
is_agency = Dict('contact/agencyId', default=False)(self)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
obj_utilities = UTILITIES.EXCLUDED
def obj_photos(self):
photos = []
for photo in XPath('./photos/photo/stdUrl')(self):
photos.append(HousingPhoto(CleanText('.')(photo)))
for photo in Dict('photos')(self):
photos.append(HousingPhoto(photo))
return photos
def obj_utilities(self):
currency = CleanText('prixUnite')(self)
if "+ch" in currency:
return UTILITIES.EXCLUDED
elif "cc*" in currency:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
def obj_location(self):
quartier = Dict('districtLabel')(self)
quartier = quartier if quartier else ''
ville = Dict('cityLabel')(self)
ville = ville if ville else ''
cp = Dict('zipCode')(self)
cp = cp if cp else ''
return u'%s %s (%s)' % (quartier, ville, cp)
obj_url = Dict('classifiedURL')
class HousingPage(XMLPage):
@method
class get_housing(SeLogerItem):
obj_text = Dict('description')
def obj_photos(self):
photos = []
obj_cost = CleanDecimal(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_currency = Currency(Dict('pricing/price', default=NotLoaded), default=NotLoaded)
obj_price_per_meter = CleanDecimal(Dict('pricing/squareMeterPrice'), default=PricePerMeterFilter)
for photo in XPath('./photos/photo')(self):
url = CleanText('bigUrl', default=None)(photo)
if not url:
url = CleanText('stdUrl', default=None)(photo)
photos.append(HousingPhoto(url))
return photos
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
json_content = Regexp(CleanText('//script'), "var ava_data = ({.+?});")(self)
json_content = json_content.replace("logged", "\"logged\"")
json_content = json_content.replace("lengthcarrousel", "\"lengthcarrousel\"")
json_content = json_content.replace("products", "\"products\"")
json_content = json_content.replace("// // ANNONCES_SIMILAIRE / RECO", "")
self.house_json_datas = json.loads(json_content)['products'][0]
obj_id = CleanText('//form[@name="central"]/input[@name="idannonce"]/@value')
def obj_house_type(self):
naturebien = CleanText('//form[@name="central"]/input[@name="naturebien"]/@value')(self)
try:
return next(k for k, v in RET.items() if v == naturebien)
except StopIteration:
return NotLoaded
def obj_type(self):
idType = int(CleanText('//form[@name="central"]/input[@name="idtt"]/@value')(self))
type = next(k for k, v in TYPES.items() if v == idType)
if type == POSTS_TYPES.FURNISHED_RENT:
# SeLoger does not let us discriminate between furnished and not furnished.
return POSTS_TYPES.RENT
return type
def obj_advert_type(self):
is_agency = (
CleanText('contact/rcsSiren')(self) or
CleanText('contact/rcsNic')(self) or
CleanText('contact/idAnnuaire')(self)
CleanText('//form[@name="central"]/input[@name="nomagance"]/@value')(self) or
CleanText('//form[@name="central"]/input[@name="urlagence"]/@value')(self) or
CleanText('//form[@name="central"]/input[@name="adresseagence"]/@value')(self)
)
if is_agency:
return ADVERT_TYPES.PROFESSIONAL
else:
return ADVERT_TYPES.PERSONAL
def obj_DPE(self):
DPE = CleanText('//bilanConsoEnergie', default="")(self)
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_photos(self):
photos = []
def obj_GES(self):
GES = CleanText('//bilanEmissionGES', default="")(self)
return getattr(ENERGY_CLASS, GES, NotAvailable)
for photo in XPath('//div[@class="carrousel_slide"]/img/@src')(self):
photos.append(HousingPhoto("https:{}".format(photo)))
def obj_details(self):
details = {}
for detail in XPath('//detailAnnonce/details/detail')(self):
details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail)
for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')(self):
p = json.loads(photo)
photos.append(HousingPhoto("https:{}".format(p['url'])))
details['Reference'] = CleanText('//detailAnnonce/reference')(self)
return details
return photos
obj_title = CleanText('//title[1]')
def obj_location(self):
quartier = Regexp(CleanText('//script'),
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
ville = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
ville = ville if ville else ''
cp = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
cp = cp if cp else ''
return u'%s %s (%s)' % (quartier, ville, cp)
def obj_address(self):
p = PostalAddress()
p.street = Regexp(CleanText('//script'),
r"'nomQuartier', { value: \"([\w -]+)\", ")(self)
p.postal_code = CleanText('//form[@name="central"]/input[@name="codepostal"]/@value')(self)
p.city = CleanText('//form[@name="central"]/input[@name="ville"]/@value')(self)
p.full_address = Field('location')(self)
return p
obj_text = CleanText('//form[@name="central"]/input[@name="description"]/@value')
obj_cost = CleanDecimal(CleanText('//a[@id="price"]'), default=NotLoaded)
obj_currency = Currency(CleanText('//a[@id="price"]'), default=NotLoaded)
obj_price_per_meter = PricePerMeterFilter()
obj_phone = CleanText('//contact/telephone')
obj_area = CleanDecimal('//form[@name="central"]/input[@name="surface"]/@value', replace_dots=True)
obj_url = CleanText('//form[@name="central"]/input[@name="urlannonce"]/@value')
obj_phone = CleanText('//div[@class="data-action"]/a[@data-phone]/@data-phone')
def obj_utilities(self):
mention = CleanText('prixMention')(self)
if "charges comprises" in mention:
mention = CleanText('//span[@class="detail_indice_prix"]', default="")(self)
if "(CC) Loyer mensuel charges comprises" in mention:
return UTILITIES.INCLUDED
else:
return UTILITIES.UNKNOWN
def obj_bedrooms(self):
return CleanDecimal(Dict('nb_chambres', default=NotLoaded))(self.house_json_datas)
def obj_rooms(self):
return CleanDecimal(Dict('nb_pieces', default=NotLoaded))(self.house_json_datas)
class HousingJsonPage(JsonPage):
@method
class get_housing(ItemElement):
klass = Housing
def obj_DPE(self):
DPE = Dict("energie", default="")(self)
if DPE['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, DPE['lettre'], NotAvailable)
def obj_GES(self):
GES = Dict("ges", default="")(self)
if GES['status'] > 0:
return NotAvailable
else:
return getattr(ENERGY_CLASS, GES['lettre'], NotAvailable)
def obj_details(self):
details = {}
for c in Dict('categories')(self):
if c['criteria']:
details[c['name']] = ' / '.join([_['value'] for _ in c['criteria']])
for _, c in Dict('infos_acquereur')(self).items():
for key, value in c.items():
details[key] = value
return details
......@@ -33,9 +33,6 @@ class SeLogerTest(BackendTest, HousingTest):
"cost", # Some posts don't have cost in seloger
"currency", # Same
"photos",
"station",
"rooms",
"bedrooms"
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
......
......@@ -56,8 +56,6 @@ then
echo "file://$SRC/modules" > "$WEBOOB_WORKDIR/sources.list"
fi
./bin/weboob-config update
cat > use-weboob-local.sh << EOF
VDIR="$VDIR"
. "$VDIR/bin/activate"
......@@ -75,3 +73,4 @@ then
echo "You can add file://$SRC/modules into $VDIR/workdir/sources.list to use local modules instead of downloading modules."
fi
./bin/weboob-config update
......@@ -74,7 +74,7 @@ class ENERGY_CLASS(Enum):
class POSTS_TYPES(Enum):
RENT=u'RENT'
RENT = u'RENT'
SALE = u'SALE'
SHARING = u'SHARING'
FURNISHED_RENT = u'FURNISHED_RENT'
......