pages.py 12.6 KB
Newer Older
1 2 3 4
# -*- coding: utf-8 -*-

# Copyright(C) 2017      Phyks (Lucas Verney)
#
5
# This file is part of a weboob module.
6
#
7
# This weboob module is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
12
# This weboob module is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
18
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
19 20 21 22 23 24

from __future__ import unicode_literals

import datetime

from weboob.browser.pages import JsonPage, HTMLPage, pagination
25
from weboob.browser.filters.standard import (
26
    CleanDecimal, CleanText, Currency, Date, Env, Format, Regexp, RegexpError
27
)
28
from weboob.browser.filters.html import AbsoluteLink, Attr, Link, XPathNotFound
29
from weboob.browser.elements import ItemElement, ListElement, method
30
from weboob.capabilities.base import NotAvailable, NotLoaded
31
from weboob.capabilities.housing import (
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
32 33
    City, Housing, HousingPhoto,
    UTILITIES, ENERGY_CLASS, POSTS_TYPES, ADVERT_TYPES
34
)
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
from weboob.tools.capabilities.housing.housing import PricePerMeterFilter

from .constants import AVAILABLE_TYPES, QUERY_TYPES, QUERY_HOUSE_TYPES


class CitiesPage(JsonPage):
    def iter_cities(self):
        cities_list = self.doc
        if isinstance(self.doc, dict):
            cities_list = self.doc.values()

        for city in cities_list:
            city_obj = City()
            city_obj.id = city
            city_obj.name = city
            yield city_obj


class HousingPage(HTMLPage):
    @method
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Format(
            '%s:%s',
            Env('type'),
            Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')
        )
63 64 65 66 67
        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_type(self):
            type = Env('type')(self)
            if type == 'location':
68 69 70 71
                if 'appartement-meuble' in self.page.url:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
72 73 74 75
            elif type == 'achat':
                return POSTS_TYPES.SALE
            else:
                return NotAvailable
76 77 78 79

        def obj_url(self):
            return self.page.url

80 81 82 83 84 85 86 87
        def obj_house_type(self):
            url = self.obj_url()
            for house_type, types in QUERY_HOUSE_TYPES.items():
                for type in types:
                    if ('/%s/' % type) in url:
                        return house_type
            return NotAvailable

88 89 90 91 92 93 94 95 96 97
        obj_title = CleanText('//h1[has-class("OfferTop-title")]')
        obj_area = CleanDecimal(
            Regexp(
                CleanText(
                    '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
                ),
                r'(\d*\.*\d*) .*'
            )
        )
        obj_cost = CleanDecimal(
98
            '//p[has-class("OfferTop-price")]'
99 100
        )
        obj_price_per_meter = PricePerMeterFilter()
101 102
        obj_currency = Currency(
            '//p[has-class("OfferTop-price")]'
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
        )
        obj_location = Format(
            '%s - %s',
            CleanText('//p[@data-behat="adresseBien"]'),
            CleanText('//p[has-class("OfferTop-loc")]')
        )
        obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
        obj_phone = Regexp(
            Link(
                '//a[has-class("OfferContact-btn--tel")]'
            ),
            r'tel:(.*)'
        )

        def obj_photos(self):
            photos = []
119
            for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
120 121 122
                photo_url = Attr('.', 'src')(photo)
                photo_url = photo_url.replace('640/480', '800/600')
                photos.append(HousingPhoto(photo_url))
123 124 125 126 127 128 129 130 131 132 133 134 135 136
            return photos

        obj_date = datetime.date.today()

        def obj_utilities(self):
            price = CleanText(
                '//p[has-class("OfferTop-price")]'
            )(self)
            if "charges comprises" in price.lower():
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_rooms = CleanDecimal(
137 138
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
            default=NotAvailable
139 140 141 142 143 144
        )
        obj_bedrooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
            default=NotAvailable
        )

145
        def obj_DPE(self):
146 147 148
            try:
                electric_consumption = CleanDecimal(Regexp(
                    Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
149
                    r'https://dpe.foncia.net\/(\d+)\/.*'
150
                ))(self)
151
            except (RegexpError, XPathNotFound):
152
                electric_consumption = None
153

154 155 156 157
            DPE = ""
            if electric_consumption is not None:
                if electric_consumption <= 50:
                    DPE = "A"
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
158
                elif 50 < electric_consumption <= 90:
159
                    DPE = "B"
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
160
                elif 90 < electric_consumption <= 150:
161
                    DPE = "C"
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
162
                elif 150 < electric_consumption <= 230:
163
                    DPE = "D"
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
164
                elif 230 < electric_consumption <= 330:
165
                    DPE = "E"
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
166
                elif 330 < electric_consumption <= 450:
167 168 169
                    DPE = "F"
                else:
                    DPE = "G"
170 171
                return getattr(ENERGY_CLASS, DPE, NotAvailable)
            return NotAvailable
172

173
        def obj_details(self):
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
            details = {}

            dispo = Date(
                Regexp(
                    CleanText('//p[has-class("OfferTop-dispo")]'),
                    r'.* (\d\d\/\d\d\/\d\d\d\d)',
                    default=datetime.date.today().isoformat()
                )
            )(self)
            if dispo is not None:
                details["dispo"] = dispo

            priceMentions = CleanText(
                '//p[has-class("OfferTop-mentions")]',
                default=None
            )(self)
            if priceMentions is not None:
                details["priceMentions"] = priceMentions

            agency = CleanText(
                '//p[has-class("OfferContact-address")]',
                default=None
            )(self)
            if agency is not None:
                details["agency"] = agency

200
            for item in self.xpath('//div[has-class("OfferDetails-columnize")]/div'):
201 202 203 204 205 206 207
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]',
                    default=None
                )(item)
                if not category:
                    continue

208
                details[category] = {}
209

210 211 212 213
                for detail_item in item.xpath('.//ul[has-class("List--data")]/li'):
                    detail_title = CleanText('.//span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(detail_item)
                    details[category][detail_title] = detail_value
214

215 216 217
                for detail_item in item.xpath('.//ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True
218

219 220 221
            try:
                electric_consumption = CleanDecimal(Regexp(
                    Attr('//div[has-class("OfferDetails-content")]//img', 'src'),
222
                    r'https://dpe.foncia.net\/(\d+)\/.*'
223
                ))(self)
224 225 226 227 228 229
                details["electric_consumption"] = (
                    '{} kWhEP/m².an'.format(electric_consumption)
                )
            except (RegexpError, XPathNotFound):
                pass

230 231 232 233
            return details


class SearchPage(HTMLPage):
234
    def do_search(self, query, cities):
235 236 237
        form = self.get_form('//form[@name="searchForm"]')

        form['searchForm[type]'] = QUERY_TYPES.get(query.type, None)
238
        form['searchForm[localisation]'] = cities
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
        form['searchForm[type_bien][]'] = []
        for house_type in query.house_types:
            try:
                form['searchForm[type_bien][]'].extend(
                    QUERY_HOUSE_TYPES[house_type]
                )
            except KeyError:
                pass
        form['searchForm[type_bien][]'] = [
            x for x in form['searchForm[type_bien][]']
            if x in AVAILABLE_TYPES.get(query.type, [])
        ]
        if query.area_min:
            form['searchForm[surface_min]'] = query.area_min
        if query.area_max:
            form['searchForm[surface_max]'] = query.area_max
        if query.cost_min:
            form['searchForm[prix_min]'] = query.cost_min
        if query.cost_max:
            form['searchForm[prix_max]'] = query.cost_max
        if query.nb_rooms:
            form['searchForm[pieces]'] = [i for i in range(1, query.nb_rooms + 1)]
        form.submit()

    def find_housing(self, query_type, housing):
        form = self.get_form('//form[@name="searchForm"]')
        form['searchForm[type]'] = query_type
        form['searchForm[reference]'] = housing
        form.submit()


class SearchResultsPage(HTMLPage):
    @pagination
    @method
    class iter_housings(ListElement):
        item_xpath = '//article[has-class("TeaserOffer")]'

        next_page = Link('//div[has-class("Pagination--more")]/a[contains(text(), "Suivant")]')

        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s:%s',
                Env('type'),
                Attr('.//span[boolean(@data-reference)]', 'data-reference')
            )
286
            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
287 288 289 290
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
Phyks (Lucas Verney)'s avatar
Phyks (Lucas Verney) committed
291
                url = self.obj_url(self)
292 293 294 295
                for house_type, types in QUERY_HOUSE_TYPES.items():
                    for type in types:
                        if ('/%s/' % type) in url:
                            return house_type
296
                return NotLoaded
297

298
            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
299 300 301 302 303 304 305 306 307 308
            obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
            obj_area = CleanDecimal(
                Regexp(
                    CleanText(
                        './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
                    ),
                    r'(\d*\.*\d*) .*'
                )
            )
            obj_cost = CleanDecimal(
309
                './/strong[has-class("TeaserOffer-price-num")]'
310 311
            )
            obj_price_per_meter = PricePerMeterFilter()
312 313
            obj_currency = Currency(
                './/strong[has-class("TeaserOffer-price-num")]'
314 315 316 317 318
            )
            obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
            obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')

            def obj_photos(self):
319 320 321 322 323 324
                url = CleanText(Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self)
                # If the used photo is a default no photo, the src is on the same domain.
                if url[0] == '/':
                    return []
                else:
                    return [HousingPhoto(url)]
325 326 327 328 329 330 331 332 333 334 335 336 337

            obj_date = datetime.date.today()

            def obj_utilities(self):
                price = CleanText(
                    './/strong[has-class("TeaserOffer-price-num")]'
                )(self)
                if "charges comprises" in price.lower():
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.EXCLUDED

            obj_rooms = CleanDecimal(
338
                './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
339
                default=NotLoaded
340 341 342
            )
            obj_bedrooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
343
                default=NotLoaded
344 345 346 347 348 349 350 351 352 353
            )

            def obj_details(self):
                return {
                    "dispo": Date(
                        Attr('.//span[boolean(@data-dispo)]', 'data-dispo',
                             default=datetime.date.today().isoformat())
                    )(self),
                    "priceMentions": CleanText('.//span[has-class("TeaserOffer-price-mentions")]')(self)
                }