From 82fc1196e827b84ee048c3f2b934138e9ed88f1e Mon Sep 17 00:00:00 2001 From: Vincent A Date: Sun, 3 Mar 2019 16:56:08 +0100 Subject: [PATCH] [pagesjaunes] take only phone number, ignore fax number --- modules/pagesjaunes/pages.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/pagesjaunes/pages.py b/modules/pagesjaunes/pages.py index db6b79833e..1e129e7541 100644 --- a/modules/pagesjaunes/pages.py +++ b/modules/pagesjaunes/pages.py @@ -41,7 +41,10 @@ class item(ItemElement): obj_name = CleanText('.//a[has-class("denomination-links")]') obj_address = CleanText('.//a[has-class("adresse")]') - obj_phone = Regexp(CleanText('.//strong[@class="num"]', replace=[(' ', '')]), r'^0(\d{9})$', r'+33\1') + obj_phone = Regexp( + CleanText( + './/div[has-class("tel-zone")][span[contains(text(),"Tél")]]//strong[@class="num"]', + replace=[(' ', '')]), r'^0(\d{9})$', r'+33\1') obj_url = AbsoluteLink('.//a[has-class("denomination-links")]') obj_opening = HasElement('.//span[text()="Horaires"]', NotLoaded, NotAvailable) -- GitLab