pax_global_header 0000666 0000000 0000000 00000000064 14575653726 0014536 g ustar 00root root 0000000 0000000 52 comment=5f3d558793b537a74480241ac6981479f5938cd3
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/ 0000775 0000000 0000000 00000000000 14575653726 0022622 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/ 0000775 0000000 0000000 00000000000 14575653726 0024272 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/ 0000775 0000000 0000000 00000000000 14575653726 0025052 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/__init__.py 0000664 0000000 0000000 00000000067 14575653726 0027166 0 ustar 00root root 0000000 0000000 from .module import PapModule
__all__ = ['PapModule']
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/browser.py 0000664 0000000 0000000 00000006566 14575653726 0027124 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from urllib.parse import urlencode
from woob.browser import URL, PagesBrowser
from woob.browser.cloudscraper import CloudScraperSession, CloudScraperMixin
from woob.capabilities.housing import TypeNotSupported, POSTS_TYPES
from .constants import TYPES, RET
from .pages import HousingPage, CitiesPage
__all__ = ['PapBrowser']
class PapBrowser(CloudScraperMixin, PagesBrowser):
BASEURL = 'https://www.pap.fr'
housing = URL('/annonces/(?P<_id>.*)', HousingPage)
search_page = URL('/recherche', HousingPage)
search_result_page = URL('/annonce/.*', HousingPage)
cities = URL(r'/json/ac-geo\?q=(?P.*)', CitiesPage)
def _create_session(self):
return CloudScraperSession(
server_hostname='www.pap.fr',
delay=10,
browser={'custom': 'ScraperBot/1.0'}
)
def search_geo(self, pattern):
headers = {'Host': 'www.pap.fr'}
return self.cities.go(pattern=pattern, headers=headers).iter_cities()
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
if type not in TYPES:
raise TypeNotSupported()
self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'})
data = {'geo_objets_ids': ','.join(cities),
'surface[min]': area_min or '',
'surface[max]': area_max or '',
'prix[min]': cost_min or '',
'prix[max]': cost_max or '',
'produit': TYPES.get(type, 'location'),
'nb_resultats_par_page': 40,
'action': 'submit',
'nb_chambres[min]': '',
'surface_terrain[min]': '',
'surface_terrain[max]': '',
'transport_objets_ids': '',
'reference_courte': ''
}
if nb_rooms:
data['nb_pieces[min]'] = nb_rooms
data['nb_pieces[max]'] = nb_rooms
if type == POSTS_TYPES.FURNISHED_RENT:
data['tags[]'] = 'meuble'
ret = []
if type == POSTS_TYPES.VIAGER:
ret = ['typesbien%5B%5D=viager']
else:
for house_type in house_types:
if house_type in RET:
ret.append(f"typesbien%5B%5D={RET.get(house_type)}")
_data = f"{urlencode(data)}&{'&'.join(ret)}"
self.search_page.go(data=_data)
assert self.search_result_page.is_here()
return self.page.iter_housings(query_type=type)
def get_housing(self, _id, housing=None):
return self.housing.go(_id=_id).get_housing(obj=housing)
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/constants.py 0000664 0000000 0000000 00000000646 14575653726 0027446 0 ustar 00root root 0000000 0000000 from woob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES
TYPES = {POSTS_TYPES.RENT: 'location',
POSTS_TYPES.FURNISHED_RENT: 'location',
POSTS_TYPES.SALE: 'vente',
POSTS_TYPES.VIAGER: 'vente'}
RET = {HOUSE_TYPES.HOUSE: 'maison',
HOUSE_TYPES.APART: 'appartement',
HOUSE_TYPES.LAND: 'terrain',
HOUSE_TYPES.PARKING: 'garage-parking',
HOUSE_TYPES.OTHER: 'divers'}
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/favicon.png 0000664 0000000 0000000 00000001511 14575653726 0027203 0 ustar 00root root 0000000 0000000 PNG
IHDR @ @ iq sRGB bKGD pHYs tIME. tEXtComment Created with GIMPW IDATxOEܮx)#,!KqYR.
QAE]T;CmUmta6?.h ]1`-z{Μ6»xg!
+*#YW=8
,^S؊
bz8WM`v=;bM`^MxϷ=6my|Ot3Kgp%:9[8uڛ߲t'[?RPG#C٢/^CܣYùYc6;K(izZŖv۰OϽ#lC(a,册8}zT,V%(VU3~
"Ycq=<N,=ØwEﻖ~6KELK
}/=װeulEG,?eNAAAAA IENDB` woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/module.py 0000664 0000000 0000000 00000004747 14575653726 0026725 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.housing import (CapHousing, Housing, HousingPhoto,
ADVERT_TYPES)
from woob.tools.backend import Module
from .browser import PapBrowser
__all__ = ['PapModule']
class PapModule(Module, CapHousing):
NAME = 'pap'
MAINTAINER = u'Romain Bignon'
EMAIL = 'romain@weboob.org'
VERSION = '3.6'
DESCRIPTION = 'French housing website'
LICENSE = 'AGPLv3+'
BROWSER = PapBrowser
def search_housings(self, query):
if(len(query.advert_types) == 1 and
query.advert_types[0] == ADVERT_TYPES.PROFESSIONAL):
# Pap is personal only
return list()
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
if len(cities) == 0:
return list()
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
id = housing.id
else:
id = housing
housing = None
return self.browser.get_housing(id, housing)
def search_city(self, pattern):
return self.browser.search_geo(pattern)
def fill_photo(self, photo, fields):
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
def fill_housing(self, housing, fields):
return self.browser.get_housing(housing.id, housing)
OBJECTS = {HousingPhoto: fill_photo, Housing: fill_housing}
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/pages.py 0000664 0000000 0000000 00000024050 14575653726 0026524 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from decimal import Decimal
from woob.tools.date import parse_french_date
from woob.browser.pages import HTMLPage, JsonPage, pagination
from woob.browser.elements import ItemElement, ListElement, DictElement, method
from woob.browser.filters.standard import (CleanText, CleanDecimal, Regexp,
Env, BrowserURL, Format, Currency)
from woob.browser.filters.html import Attr, Link, XPath, CleanHTML
from woob.browser.filters.json import Dict
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.capabilities.housing import (Housing, City, HousingPhoto,
UTILITIES, ENERGY_CLASS, POSTS_TYPES,
ADVERT_TYPES, HOUSE_TYPES)
from woob.tools.capabilities.housing.housing import PricePerMeterFilter
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
class item(ItemElement):
klass = City
obj_id = Dict('id')
obj_name = Dict('name')
class HousingPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
ignore_duplicate = True
item_xpath = '//div[@id="pages-list"]/div/div/div[@class="search-list-item-alt"]'
def next_page(self):
return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
class item(ItemElement):
klass = Housing
def condition(self):
title = self.obj_title(self)
isNotFurnishedOk = True
if self.env['query_type'] == POSTS_TYPES.RENT:
isNotFurnishedOk = 'meublé' not in title.lower()
id = self.obj_id(self)
if id is None:
return False
return (
id and isNotFurnishedOk
)
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'./div/a[@class="item-title"]/ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotLoaded
self.env['bedrooms'] = NotLoaded
self.env['area'] = NotLoaded
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('.')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('.')(item)
else:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
obj_id = Regexp(
Link('./div/a[@class="item-title"]'), '/annonces/(.*)',
default=None
)
obj_type = Env('query_type')
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
item_link = Link('./div/a[@class="item-title"]')(self)
house_type = item_link.split('/')[-1].split('-')[0]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText('./div/a[@class="item-title"]')
obj_area = Env('area')
obj_cost = CleanDecimal(CleanText('./div/a[@class="item-title"]/span[@class="item-price"]'),
replace_dots=True, default=Decimal(0))
obj_currency = Currency(
'./div/a[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded)
def obj_location(self):
return CleanText('./div/p[@class="item-description"]')(self).split(".")[0]
obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')])
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_url = Format(
u'http://www.pap.fr%s',
Link('./div/a[@class="item-title"]')
)
def obj_photos(self):
photos = []
for img in XPath('./div/div/img/@src')(self):
if(
img.endswith("visuel-nophoto.png") or
img.endswith('miniature-video.png')
):
continue
photos.append(HousingPhoto(u'%s' % img))
return photos
@method
class get_housing(ItemElement):
klass = Housing
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'.//ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotAvailable
self.env['bedrooms'] = NotAvailable
self.env['area'] = NotAvailable
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('./strong')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('./strong')(item)
elif ' m²' in name and 'le m²' not in name:
if Regexp(CleanText('.'), r'^\d+ m²$', default=False)(item):
name = 'area'
value = CleanDecimal('.')(item)
self.env[name] = value
obj_id = Env('_id')
def obj_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
if 'location' in prev_link:
title = self.obj_title(self)
if 'meublé' in title.lower():
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif 'vente' in prev_link:
return POSTS_TYPES.SALE
elif 'viager' in prev_link:
return POSTS_TYPES.VIAGER
else:
return NotAvailable
obj_advert_type = ADVERT_TYPES.PERSONAL
def obj_house_type(self):
prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
house_type = prev_link.split('-')[-1]
if 'parking' in house_type:
return HOUSE_TYPES.PARKING
elif 'appartement' in house_type:
return HOUSE_TYPES.APART
elif 'terrain' in house_type:
return HOUSE_TYPES.LAND
elif 'maison' in house_type:
return HOUSE_TYPES.HOUSE
else:
return HOUSE_TYPES.OTHER
obj_title = CleanText(
'//h1[@class="item-title"]'
)
obj_cost = CleanDecimal(
'//h1[@class="item-title"]/span[@class="item-price"]',
replace_dots=True
)
obj_currency = Currency(
'//h1[@class="item-title"]/span[@class="item-price"]'
)
obj_utilities = UTILITIES.UNKNOWN
obj_area = Env('area')
def obj_date(self):
date = CleanText(
'//p[@class="item-date"]'
)(self).split("/")[-1].strip()
return parse_french_date(date)
obj_rooms = Env('rooms')
obj_bedrooms = Env('bedrooms')
obj_price_per_meter = PricePerMeterFilter()
obj_location = CleanText('//div[has-class("item-description")]/h2')
obj_text = CleanText(CleanHTML('//div[has-class("item-description")]/div/p'))
def obj_station(self):
return ", ".join([
station.text
for station in XPath(
'//ul[has-class("item-transports")]//span[has-class("label")]'
)(self)
])
obj_phone = CleanText('//div[@id="panel_contact_tel"]/a', default=NotAvailable)
obj_url = BrowserURL('housing', _id=Env('_id'))
def obj_DPE(self):
DPE = Attr(
'//div[has-class("energy-box")]//div[has-class("energy-rank")]',
'class',
default=""
)(self)
if DPE:
DPE = [x.replace("energy-rank-", "").upper()
for x in DPE.split() if x.startswith("energy-rank-")][0]
return getattr(ENERGY_CLASS, DPE, NotAvailable)
def obj_photos(self):
photos = []
for img in XPath('//div[@class="owl-carousel"]/div/a/img/@src')(self):
if not img.endswith('miniature-video.png'):
photos.append(HousingPhoto(u'%s' % img))
return photos
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/requirements.txt 0000664 0000000 0000000 00000000031 14575653726 0030330 0 ustar 00root root 0000000 0000000 woob ~= 3.5
cloudscraper
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-pap/modules/pap/test.py 0000664 0000000 0000000 00000007365 14575653726 0026416 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.housing import Query, POSTS_TYPES, ADVERT_TYPES
from woob.tools.test import BackendTest
from woob.tools.capabilities.housing.housing_test import HousingTest
class PapTest(BackendTest, HousingTest):
MODULE = 'pap'
FIELDS_ALL_HOUSINGS_LIST = [
"id", "type", "advert_type", "house_type", "url", "title", "area",
"cost", "currency", "utilities", "location", "text"
]
FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
]
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "utilities", "date", "location", "text",
"phone"
]
FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"rooms",
"bedrooms",
"station"
]
def test_pap_rent(self):
self.DO_NOT_DISTINGUISH_FURNISHED_RENT = True
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_sale(self):
query = Query()
query.area_min = 20
query.type = POSTS_TYPES.SALE
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_furnished_rent(self):
self.DO_NOT_DISTINGUISH_FURNISHED_RENT = False
query = Query()
query.area_min = 20
query.cost_max = 1500
query.type = POSTS_TYPES.FURNISHED_RENT
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
self.check_against_query(query)
def test_pap_viager(self):
query = Query()
query.type = POSTS_TYPES.VIAGER
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
# Remove rooms from the tested fields as viager never have them
self.FIELDS_ANY_HOUSINGS_LIST = [
"photos",
"station",
"bedrooms"
]
self.FIELDS_ANY_SINGLE_HOUSING = [
"photos",
"bedrooms",
"station"
]
self.check_against_query(query)
def test_pap_professional(self):
query = Query()
query.area_min = 20
query.cost_max = 900
query.type = POSTS_TYPES.RENT
query.advert_types = [ADVERT_TYPES.PROFESSIONAL]
query.cities = []
for city in self.backend.search_city('paris'):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
self.assertEqual(len(results), 0)