pax_global_header 0000666 0000000 0000000 00000000064 13514067645 0014525 g ustar 00root root 0000000 0000000 52 comment=6d52c0c92f2617476fe0b131c1eebba68676b2fb
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/ 0000775 0000000 0000000 00000000000 13514067645 0021773 5 ustar 00root root 0000000 0000000 woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/ 0000775 0000000 0000000 00000000000 13514067645 0023443 5 ustar 00root root 0000000 0000000 woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/ 0000775 0000000 0000000 00000000000 13514067645 0024223 5 ustar 00root root 0000000 0000000 woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/__init__.py 0000664 0000000 0000000 00000000067 13514067645 0026337 0 ustar 00root root 0000000 0000000 from .module import PapModule
__all__ = ['PapModule']
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/browser.py 0000664 0000000 0000000 00000005403 13514067645 0026262 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from weboob.browser import PagesBrowser, URL
from weboob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from weboob.tools.compat import urlencode
from .pages import HousingPage, CitiesPage
from .constants import TYPES, RET
__all__ = ['PapBrowser']
class PapBrowser(PagesBrowser):
BASEURL = 'https://www.pap.fr'
housing = URL('/annonces/(?P<_id>.*)', HousingPage)
search_page = URL('/annonce/.*', HousingPage)
cities = URL('/json/ac-geo\?q=(?P.*)', CitiesPage)
def search_geo(self, pattern):
return self.cities.open(pattern=pattern).iter_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
if type not in TYPES:
raise TypeNotSupported()
self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'})
data = {'geo_objets_ids': ','.join(cities),
'surface[min]': area_min or '',
'surface[max]': area_max or '',
'prix[min]': cost_min or '',
'prix[max]': cost_max or '',
'produit': TYPES.get(type, 'location'),
'recherche': 1,
'nb_resultats_par_page': 40,
}
if nb_rooms:
data['nb_pieces[min]'] = nb_rooms
data['nb_pieces[max]'] = nb_rooms
if type == POSTS_TYPES.FURNISHED_RENT:
data['tags[]'] = 'meuble'
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['viager']
else:
for house_type in house_types:
if house_type in RET:
ret.append(RET.get(house_type))
_data = '%s%s%s' % (urlencode(data), '&typesbien%5B%5D=', '&typesbien%5B%5D='.join(ret))
return self.search_page.go(data=_data).iter_housings(
query_type=type
)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/constants.py 0000664 0000000 0000000 00000000650 13514067645 0026612 0 ustar 00root root 0000000 0000000 from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'maison',
HOUSE_TYPES.APART: 'appartement',
HOUSE_TYPES.LAND: 'terrain',
HOUSE_TYPES.PARKING: 'garage-parking',
HOUSE_TYPES.OTHER: 'divers'}
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/favicon.png 0000664 0000000 0000000 00000001511 13514067645 0026354 0 ustar 00root root 0000000 0000000 PNG
IHDR @ @ iq sRGB bKGD pHYs tIME. tEXtComment Created with GIMPW IDATxOEܮx)#,!KqYR.
QAE]T;CmUmta6?.h ]1`-z{Μ6»xg!
+*#YW=8
,^S؊
bz8WM`v=;bM`^MxϷ=6my|Ot3Kgp%:9[8uڛ߲t'[?RPG#C٢/^CܣYùYc6;K(izZŖv۰OϽ#lC(a,册8}zT,V%(VU3~
"Ycq=<N,=ØwEﻖ~6KELK
}/=װeulEG,?eNAAAAA IENDB` woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/module.py 0000664 0000000 0000000 00000004765 13514067645 0026076 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from weboob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
ADVERT_TYPES)
from weboob.tools.backend import Module
from .browser import PapBrowser
__all__ = ['PapModule']
class PapModule(Module, CapHousing):
NAME = 'pap'
MAINTAINER = u'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '1.6'
DESCRIPTION = 'French housing website'
LICENSE = 'AGPLv3+'
BROWSER = PapBrowser
def search_housings(self, query):
if(len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL):
# Pap is personal only
return list()
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
return self.browser.get_housing(id, housing)
def search_city(self, pattern):
return self.browser.search_geo(pattern)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def fill_housing(self, housing, fields):
return self.browser.get_housing(housing.id, housing)
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/pages.py 0000664 0000000 0000000 00000024404 13514067645 0025700 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from __future__ import unicode_literals
from decimal import Decimal
from weboob.tools.date import parse_french_date
from weboob.browser.pages import HTMLPage, JsonPage, pagination
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.filters.standard import (CleanText, CleanDecimal, Regexp,
Env, BrowserURL, Format, Currency)
from weboob.browser.filters.html import Attr, Link, XPath, CleanHTML
from weboob.browser.filters.json import Dict
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.housing import (Housing, City, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
class item(ItemElement):
klass = City
obj_id = Dict('id')
obj_name = Dict('name')
class HousingPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//div[has-class("search-list-item")]'
def next_page(self):
return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
class item(ItemElement):
klass = Housing
def condition(self):
title = self.obj_title(self)
isNotFurnishedOk = True
if self.env['query_type'] == POSTS_TYPES.RENT:
isNotFurnishedOk = 'meublé' not in title.lower()
return (
Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and
isNotFurnishedOk
)
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'./div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotLoaded
self.env['bedrooms'] = NotLoaded
self.env['area'] = NotLoaded
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('.')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('.')(item)
else:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)')
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
item_link = Link('./div/a[@class="item-title"]')(self)
house_type = item_link.split('/')[-1].split('-')[0]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText('./div/a[has-class("item-title")]')
obj_area = Env('area')
obj_cost = CleanDecimal(CleanText('./div/a[has-class("item-title")]/span[@class="item-price"]'),
replace_dots=True, default=Decimal(0))
obj_currency = Currency(
'./div/a[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded)
def obj_location(self):
return CleanText('./div/p[@class="item-description"]')(self).split(".")[0]
obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')])
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_url = Format(
u'http://www.pap.fr%s',
Link('./div/a[@class="item-title"]')
)
def obj_photos(self):
photos = []
for img in XPath('./a/img/@src')(self):
if(
img.endswith("visuel-nophoto.png") or
img.endswith('miniature-video.png')
):
continue
photos.append(HousingPhoto(u'%s' % img))
return photos
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'.//ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotAvailable
self.env['bedrooms'] = NotAvailable
self.env['area'] = NotAvailable
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('./strong')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('./strong')(item)
elif ' m²' in name and 'le m²' not in name:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
obj_id = Env('_id')
def obj_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
if 'location' in prev_link:
title = self.obj_title(self)
if 'meublé' in title.lower():
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif 'vente' in prev_link:
return POSTS_TYPES.SALE
elif 'viager' in prev_link:
return POSTS_TYPES.VIAGER
else:
return NotAvailable
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
house_type = prev_link.split('-')[-1]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText(
'//h1[@class="item-title"]'
)
obj_cost = CleanDecimal(
'//h1[@class="item-title"]/span[@class="item-price"]',
replace_dots=True
)
obj_currency = Currency(
'//h1[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_area = Env('area')
def obj_date(self):
date = CleanText(
'//p[@class="item-date"]'
)(self).split("/")[-1].strip()
return parse_french_date(date)
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_location = CleanText('//div[has-class("item-description")]/h2')
obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p'))
def obj_station(self):
return ", ".join([
station.text
for station in XPath(
'//ul[has-class("item-transports")]//span[has-class("label")]'
)(self)
])
def obj_phone(self):
phone = CleanText('(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]')(self)
phone = phone.replace(' ', ', ')
return phone
obj_url = BrowserURL('housing', _id=Env('_id'))
def obj_DPE(self):
DPE = Attr(
'//div[has-class("energy-box")]//div[has-class("energy-rank")]',
'class',
default=""
)(self)
if DPE:
DPE = [x.replace("energy-rank-", "").upper()
for x in DPE.split() if x.startswith("energy-rank-")][0]
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_photos(self):
photos = []
for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
if not img.endswith('miniature-video.png'):
photos.append(HousingPhoto(u'%s' % img))
return photos
woob-6d52c0c92f2617476fe0b131c1eebba68676b2fb-modules-pap/modules/pap/test.py 0000664 0000000 0000000 00000007223 13514067645 0025560 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from weboob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from weboob.tools.test import BackendTest
from weboob.tools.capabilities.housing.housing_test import HousingTest
class PapTest(BackendTest, HousingTest):
MODULE = 'pap'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "utilities", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text",
"phone"
]
FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"rooms",
"bedrooms",
"station"
]
def test_pap_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_furnished_rent(self):
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('85'):
city.backend = self.backend.name
query.cities.append(city)
# Remove rooms from the tested fields as viager never have them
self.FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
"bedrooms"
]
self.FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"bedrooms",
"station"
]
self.check_against_query(query)
def test_pap_professional(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)