From 48fda439151d9dfd35dbb664d2f9bd1ec2984639 Mon Sep 17 00:00:00 2001 From: Guntra Date: Fri, 19 Jul 2019 16:05:33 +0200 Subject: [PATCH] [lesterrains] handle not found fields & cleanup --- modules/lesterrains/browser.py | 5 -- modules/lesterrains/module.py | 6 -- modules/lesterrains/pages.py | 103 ++++++++++++++++++--------------- modules/lesterrains/test.py | 4 -- 4 files changed, 56 insertions(+), 62 deletions(-) diff --git a/modules/lesterrains/browser.py b/modules/lesterrains/browser.py index 9904bf37b2..6cf75cacbb 100644 --- a/modules/lesterrains/browser.py +++ b/modules/lesterrains/browser.py @@ -28,19 +28,14 @@ class LesterrainsBrowser(PagesBrowser): BASEURL = 'http://www.les-terrains.com' - TYPES = { POSTS_TYPES.SALE: 'vente' } - RET = { HOUSE_TYPES.LAND: 'Terrain seul' } - cities = URL('/api/get-search.php\?q=(?P.*)', CitiesPage) - search = URL('/index.php\?mode_aff=liste&ongletAccueil=Terrains&(?P.*)&distance=0', SearchPage) - housing = URL( '/index.php\?page=terrains&mode_aff=un_terrain&idter=(?P<_id>\d+).*', '/index.php\?page=terrains&mode_aff=maisonterrain&idter=(?P<_id>\d+).*', diff --git a/modules/lesterrains/module.py b/modules/lesterrains/module.py index 9c4995e28a..2af318b278 100644 --- a/modules/lesterrains/module.py +++ b/modules/lesterrains/module.py @@ -37,17 +37,11 @@ class LesterrainsModule(Module, CapHousing): NAME = 'lesterrains' - DESCRIPTION = 'Les-Terrains.com' - MAINTAINER = 'Guntra' - EMAIL = 'guntra@example.com' - LICENSE = 'LGPLv3+' - VERSION = '1.6' - BROWSER = LesterrainsBrowser def search_city(self, pattern): diff --git a/modules/lesterrains/pages.py b/modules/lesterrains/pages.py index c9904024dc..4f4ace0a6d 100644 --- a/modules/lesterrains/pages.py +++ b/modules/lesterrains/pages.py @@ -26,7 +26,7 @@ from weboob.browser.filters.html import Attr, AbsoluteLink from weboob.browser.elements import ItemElement, ListElement, DictElement, method from weboob.browser.pages import JsonPage, HTMLPage, pagination -from weboob.capabilities.base import Currency +from weboob.capabilities.base import Currency, NotAvailable from weboob.capabilities.housing import ( Housing, HousingPhoto, City, POSTS_TYPES, HOUSE_TYPES, ADVERT_TYPES, UTILITIES @@ -34,7 +34,6 @@ class CitiesPage(JsonPage): - ENCODING = 'UTF-8' def build_doc(self, content): @@ -46,15 +45,11 @@ def build_doc(self, content): @method class get_cities(DictElement): - item_xpath = 'cities' class item(ItemElement): - klass = City - obj_id = Dict('id') & CleanText() & Lower() - obj_name= Dict('value') & CleanText() @@ -63,15 +58,11 @@ class SearchPage(HTMLPage): @pagination @method class iter_housings(ListElement): - item_xpath = '//article[has-class("itemListe")]' - next_page = AbsoluteLink('./div[@class="pagination-foot-bloc"]/a[@class="pageActive"][2]') class item(ItemElement): - klass = Housing - obj_id = QueryValue( Attr( './/div[has-class("presentationItem")]/h2/a', @@ -79,22 +70,24 @@ class item(ItemElement): ), 'idter' ) - - obj_url = AbsoluteLink('.//h2/a') - + obj_url = AbsoluteLink( + './/h2/a', + default=NotAvailable + ) obj_type = POSTS_TYPES.SALE - obj_advert_type = ADVERT_TYPES.PROFESSIONAL - obj_house_type = HOUSE_TYPES.LAND - - obj_title = CleanText('.//div[@class="presentationItem"]/h2/a') + obj_title = CleanText( + './/div[@class="presentationItem"]/h2/a', + default=NotAvailable + ) def obj_area(self): min_area = CleanDecimal( Regexp( CleanText('.//div[@class="presentationItem"]/h3'), - 'surface de (\d+) m²' + 'surface de (\d+) m²', + default=0 ) )(self) max_area = CleanDecimal( @@ -104,7 +97,7 @@ def obj_area(self): default=0 ) )(self) - if (max_area > min_area): + if max_area > min_area: return max_area else: return min_area @@ -112,23 +105,29 @@ def obj_area(self): obj_cost = CleanDecimal( CleanText( './/div[@class="presentationItem"]/h3/span[1]', - replace=[(".", ""),(" €","")] + replace=[(".", ""),(" €","")], + default=NotAvailable ) ) - - obj_currency = Currency.get_currency(u'€') - + obj_currency = Currency.get_currency('€') obj_date = Date( CleanText( './/div[@class="presentationItem"]//span[@class="majItem"]', - replace=[("Mise à jour : ", "")]) + replace=[("Mise à jour : ", "")]), + default=NotAvailable + ) + obj_location = CleanText( + './/div[@class="presentationItem"]/h2/a/span', + default=NotAvailable + ) + obj_text = CleanText( + './/div[@class="presentationItem"]/p', + default=NotAvailable + ) + obj_phone = CleanText( + './/div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong', + default=NotAvailable ) - - obj_location = CleanText('.//div[@class="presentationItem"]/h2/a/span') - - obj_text = CleanText('.//div[@class="presentationItem"]/p') - - obj_phone = CleanText('.//div[@class="divBoutonContact"]/div[@class="phone-numbers-bloc"]/p[1]/strong') def _photos_generator(self): for photo in self.xpath('.//div[has-class("photoItemListe")]/img/@data-src'): @@ -139,13 +138,12 @@ def obj_photos(self): obj_utilities = UTILITIES.UNKNOWN + class HousingPage(HTMLPage): @method class get_housing(ItemElement): - klass = Housing - obj_id = Attr( '//article//a[has-class("add-to-selection")]', 'data-id' @@ -155,12 +153,12 @@ def obj_url(self): return self.page.url obj_type = POSTS_TYPES.SALE - obj_advert_type = ADVERT_TYPES.PROFESSIONAL - obj_house_type = HOUSE_TYPES.LAND - - obj_title = CleanText('//article[@id="annonceTerrain"]/header/h1') + obj_title = CleanText( + '//article[@id="annonceTerrain"]/header/h1', + default=NotAvailable + ) def obj_area(self): max_area = 0 @@ -168,7 +166,8 @@ def obj_area(self): area = CleanDecimal( CleanText( './td[2]', - replace=[("m²","")] + replace=[("m²","")], + default=0 ) )(land) if area > max_area: @@ -181,7 +180,8 @@ def obj_cost(self): cost = CleanDecimal( CleanText( './td[3]', - replace=[(".","")] + replace=[(".","")], + default=0 ) )(land) if min_cost == 0: @@ -190,17 +190,26 @@ def obj_cost(self): min_cost = cost return min_cost - obj_currency = Currency.get_currency(u'€') - + obj_currency = Currency.get_currency('€') obj_date = Date( - CleanText('//section[@id="photos-details"]/div[@class="right-bloc"]/div/div[3]/div[2]/strong') + CleanText( + '//section[@id="photos-details"]/div[@class="right-bloc"]/div/div[3]/div[2]/strong', + default=NotAvailable + ), + default=NotAvailable + ) + obj_location = CleanText( + '//article[@id="annonceTerrain"]/header/h1/strong', + default=NotAvailable + ) + obj_text = CleanText( + '//div[@id="informationsTerrain"]/p[2]', + default=NotAvailable + ) + obj_phone = CleanText( + '//div[@id="infos-annonceur"]/div/div/div[@class="phone-numbers-bloc"]/p/strong', + default=NotAvailable ) - - obj_location = CleanText('//article[@id="annonceTerrain"]/header/h1/strong') - - obj_text = CleanText('//div[@id="informationsTerrain"]/p[2]') - - obj_phone = CleanText('//div[@id="infos-annonceur"]/div/div/div[@class="phone-numbers-bloc"]/p/strong') def obj_photos(self): photos = [] diff --git a/modules/lesterrains/test.py b/modules/lesterrains/test.py index 36edba83be..8554df0f9d 100644 --- a/modules/lesterrains/test.py +++ b/modules/lesterrains/test.py @@ -26,24 +26,20 @@ class LesterrainsTest(BackendTest, HousingTest): MODULE = 'lesterrains' - # Fields to be checked for values across all items in housings list FIELDS_ALL_HOUSINGS_LIST = [ "id", "url", "type", "advert_type", "house_type" ] - # Fields to be checked for at least one item in housings list FIELDS_ANY_HOUSINGS_LIST = [ "photos" ] - # Fields to be checked for values across all items when querying # individually FIELDS_ALL_SINGLE_HOUSING = [ "id", "url", "type", "advert_type", "house_type", "title", "area", "cost", "currency", "date", "location", "text", "phone" ] - # Fields to be checked for values at least once for all items when querying # individually FIELDS_ANY_SINGLE_HOUSING = [ -- GitLab