The new woob repository is here: https://gitlab.com/woob/woob. This gitlab will be removed soon.

Commit 48fda439 authored by Guntra's avatar Guntra Committed by Vincent A

[lesterrains] handle not found fields & cleanup

parent 5ce1b593
......@@ -28,19 +28,14 @@
class LesterrainsBrowser(PagesBrowser):
BASEURL = 'http://www.les-terrains.com'
TYPES = {
POSTS_TYPES.SALE: 'vente'
}
RET = {
HOUSE_TYPES.LAND: 'Terrain seul'
}
cities = URL('/api/get-search.php\?q=(?P<city>.*)', CitiesPage)
search = URL('/index.php\?mode_aff=liste&ongletAccueil=Terrains&(?P<query>.*)&distance=0', SearchPage)
housing = URL(
'/index.php\?page=terrains&mode_aff=un_terrain&idter=(?P<_id>\d+).*',
'/index.php\?page=terrains&mode_aff=maisonterrain&idter=(?P<_id>\d+).*',
......
......@@ -37,17 +37,11 @@
class LesterrainsModule(Module, CapHousing):
NAME = 'lesterrains'
DESCRIPTION = 'Les-Terrains.com'
MAINTAINER = 'Guntra'
EMAIL = 'guntra@example.com'
LICENSE = 'LGPLv3+'
VERSION = '1.6'
BROWSER = LesterrainsBrowser
def search_city(self, pattern):
......
......@@ -26,7 +26,7 @@
from weboob.browser.filters.html import Attr, AbsoluteLink
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.pages import JsonPage, HTMLPage, pagination
from weboob.capabilities.base import Currency
from weboob.capabilities.base import Currency, NotAvailable
from weboob.capabilities.housing import (
Housing, HousingPhoto, City,
POSTS_TYPES, HOUSE_TYPES, ADVERT_TYPES, UTILITIES
......@@ -34,7 +34,6 @@
class CitiesPage(JsonPage):
ENCODING = 'UTF-8'
def build_doc(self, content):
......@@ -46,15 +45,11 @@ def build_doc(self, content):
@method
class get_cities(DictElement):
item_xpath = 'cities'
class item(ItemElement):
klass = City
obj_id = Dict('id') & CleanText() & Lower()
obj_name= Dict('value') & CleanText()
......@@ -63,15 +58,11 @@ class SearchPage(HTMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = '//article[has-class("itemListe")]'
next_page = AbsoluteLink('./div[@class="pagination-foot-bloc"]/a[@class="pageActive"][2]')
class item(ItemElement):
klass = Housing
obj_id = QueryValue(
Attr(
'.//div[has-class("presentationItem")]/h2/a',
......@@ -79,22 +70,24 @@ class item(ItemElement):
),
'idter'
)
obj_url = AbsoluteLink('.//h2/a')
obj_url = AbsoluteLink(
'.//h2/a',
default=NotAvailable
)
obj_type = POSTS_TYPES.SALE
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
obj_house_type = HOUSE_TYPES.LAND
obj_title = CleanText('.//div[@class="presentationItem"]/h2/a')
obj_title = CleanText(
'.//div[@class="presentationItem"]/h2/a',
default=NotAvailable
)
def obj_area(self):
min_area = CleanDecimal(
Regexp(
CleanText('.//div[@class="presentationItem"]/h3'),
'surface de (\d+) m²'
'surface de (\d+) m²',
default=0
)
)(self)
max_area = CleanDecimal(
......@@ -104,7 +97,7 @@ def obj_area(self):
default=0
)
)(self)
if (max_area > min_area):
if max_area > min_area:
return max_area
else:
return min_area
......@@ -112,23 +105,29 @@ def obj_area(self):
obj_cost = CleanDecimal(
CleanText(
'.//div[@class="presentationItem"]/h3/span[1]',
replace=[(".", ""),(" €","")]
replace=[(".", ""),(" €","")],
default=NotAvailable
)
)
obj_currency = Currency.get_currency(u'€')
obj_currency = Currency.get_currency('€')
obj_date = Date(
CleanText(
'.//div[@class="presentationItem"]//span[@class="majItem"]',
replace=[("Mise à jour : ", "")])
replace=[("Mise à jour : ", "")]),
default=NotAvailable
)
obj_location = CleanText(
'.//div[@class="presentationItem"]/h2/a/span',
default=NotAvailable
)
obj_text = CleanText(
'.//div[@class="presentationItem"]/p',
default=NotAvailable
)
obj_phone = CleanText(
'.//div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong',
default=NotAvailable
)
obj_location = CleanText('.//div[@class="presentationItem"]/h2/a/span')
obj_text = CleanText('.//div[@class="presentationItem"]/p')
obj_phone = CleanText('.//div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong')
def _photos_generator(self):
for photo in self.xpath('.//div[has-class("photoItemListe")]/img/@data-src'):
......@@ -139,13 +138,12 @@ def obj_photos(self):
obj_utilities = UTILITIES.UNKNOWN
class HousingPage(HTMLPage):
@method
class get_housing(ItemElement):
klass = Housing
obj_id = Attr(
'//article//a[has-class("add-to-selection")]',
'data-id'
......@@ -155,12 +153,12 @@ def obj_url(self):
return self.page.url
obj_type = POSTS_TYPES.SALE
obj_advert_type = ADVERT_TYPES.PROFESSIONAL
obj_house_type = HOUSE_TYPES.LAND
obj_title = CleanText('//article[@id="annonceTerrain"]/header/h1')
obj_title = CleanText(
'//article[@id="annonceTerrain"]/header/h1',
default=NotAvailable
)
def obj_area(self):
max_area = 0
......@@ -168,7 +166,8 @@ def obj_area(self):
area = CleanDecimal(
CleanText(
'./td[2]',
replace=[("m²","")]
replace=[("m²","")],
default=0
)
)(land)
if area > max_area:
......@@ -181,7 +180,8 @@ def obj_cost(self):
cost = CleanDecimal(
CleanText(
'./td[3]',
replace=[(".","")]
replace=[(".","")],
default=0
)
)(land)
if min_cost == 0:
......@@ -190,17 +190,26 @@ def obj_cost(self):
min_cost = cost
return min_cost
obj_currency = Currency.get_currency(u'€')
obj_currency = Currency.get_currency('€')
obj_date = Date(
CleanText('//section[@id="photos-details"]/div[@class="right-bloc"]/div/div[3]/div[2]/strong')
CleanText(
'//section[@id="photos-details"]/div[@class="right-bloc"]/div/div[3]/div[2]/strong',
default=NotAvailable
),
default=NotAvailable
)
obj_location = CleanText(
'//article[@id="annonceTerrain"]/header/h1/strong',
default=NotAvailable
)
obj_text = CleanText(
'//div[@id="informationsTerrain"]/p[2]',
default=NotAvailable
)
obj_phone = CleanText(
'//div[@id="infos-annonceur"]/div/div/div[@class="phone-numbers-bloc"]/p/strong',
default=NotAvailable
)
obj_location = CleanText('//article[@id="annonceTerrain"]/header/h1/strong')
obj_text = CleanText('//div[@id="informationsTerrain"]/p[2]')
obj_phone = CleanText('//div[@id="infos-annonceur"]/div/div/div[@class="phone-numbers-bloc"]/p/strong')
def obj_photos(self):
photos = []
......
......@@ -26,24 +26,20 @@
class LesterrainsTest(BackendTest, HousingTest):
MODULE = 'lesterrains'
# Fields to be checked for values across all items in housings list
FIELDS_ALL_HOUSINGS_LIST = [
"id", "url", "type", "advert_type", "house_type"
]
# Fields to be checked for at least one item in housings list
FIELDS_ANY_HOUSINGS_LIST = [
"photos"
]
# Fields to be checked for values across all items when querying
# individually
FIELDS_ALL_SINGLE_HOUSING = [
"id", "url", "type", "advert_type", "house_type", "title", "area",
"cost", "currency", "date", "location", "text", "phone"
]
# Fields to be checked for values at least once for all items when querying
# individually
FIELDS_ANY_SINGLE_HOUSING = [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment