pages.py 8.07 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# -*- coding: utf-8 -*-

# Copyright(C) 2012-2020  Budget Insight
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.

from __future__ import unicode_literals

22
import hashlib
23
import re
24

25
from weboob.browser.pages import HTMLPage, LoggedPage, pagination, JsonPage, RawPage
26
from weboob.browser.filters.standard import (
27
    CleanText, Env, Field, Regexp, Format, Date, Coalesce,
28
)
29
from weboob.browser.filters.json import Dict
30 31
from weboob.browser.elements import ListElement, ItemElement, method
from weboob.browser.filters.html import Attr
32
from weboob.capabilities.address import PostalAddress
33
from weboob.capabilities.bill import DocumentTypes, Document, Subscription
34
from weboob.capabilities.profile import Person
35
from weboob.capabilities.base import NotAvailable
36
from weboob.tools.date import parse_french_date
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57


class LoginAccessPage(HTMLPage):
    def login(self, login, password):
        form = self.get_form(id='formulairePrincipal')
        form.url = self.browser.login_ael.build()
        form['spi'] = login
        form['pwd'] = password
        form.submit()


class LoginAELPage(HTMLPage):
    def is_login_successful(self):
        is_login_ok = CleanText('//head/title')(self.doc) == 'lmdp'
        if not is_login_ok:
            return 'wrong login'

        state = Regexp(CleanText('//script'), r"parent.postMessage\('(.*?),.*\)")(self.doc)
        if state != 'ok':
            return 'wrong password'

58 59
    def get_redirect_url(self):
        return Regexp(CleanText('//body/script'), r"postMessage\('ok,(.*)',")(self.doc)
60

61

62 63 64 65
class NoDocumentPage(LoggedPage, RawPage):
    pass


66 67 68 69 70 71 72 73 74 75 76 77 78 79
class ThirdPartyDocPage(LoggedPage, JsonPage):
    @method
    class get_third_party_doc(ItemElement):
        klass = Document

        obj_id = Format('%s_%s', Dict('spiDec1'), Dict('dateNaisDec1'))
        obj_format = 'json'
        obj_label = 'Déclaration par un tiers'
        obj_type = DocumentTypes.OTHER

        def obj_url(self):
            return self.page.url


80 81 82 83 84 85 86 87 88 89 90 91
class ProfilePage(LoggedPage, HTMLPage):
    def get_documents_link(self):
        return self.doc.xpath('//a[contains(@title, "déclarations")]/@href')[0]

    def get_bills_link(self):
        return self.doc.xpath('//a[contains(@title, "résumé")]/@href')[0]

    @method
    class get_subscriptions(ListElement):
        class item(ItemElement):
            klass = Subscription

92
            obj_subscriber = Format('%s %s', CleanText('//span[@id="prenom"]'), CleanText('//span[@id="nom"]'))
93 94
            obj_id = Regexp(CleanText('//span[contains(text(), "N° fiscal")]'), r'N° fiscal : (\d+)')
            obj_label = Field('id')
95

96 97 98 99
    @method
    class get_profile(ItemElement):
        klass = Person

100 101 102
        obj_name = Format('%s %s', Field('firstname'), Field('lastname'))
        obj_firstname = CleanText('//span[@id="prenom"]')
        obj_lastname = CleanText('//span[@id="nom"]')
103 104 105
        obj_email = CleanText('//div[span[contains(text(), "Adresse électronique")]]/following-sibling::div/span')
        obj_mobile = CleanText('//div[span[text()="Téléphone portable"]]/following-sibling::div/span', default=NotAvailable)
        obj_phone = CleanText('//div[span[text()="Téléphone fixe"]]/following-sibling::div/span', default=NotAvailable)
106
        obj_birth_date = Date(CleanText('//span[@id="datenaissance"]'), parse_func=parse_french_date)
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125

        class obj_postal_address(ItemElement):
            klass = PostalAddress

            obj_full_address = Env('full_address', default=NotAvailable)
            obj_street = Env('street', default=NotAvailable)
            obj_postal_code = Env('postal_code', default=NotAvailable)
            obj_city = Env('city', default=NotAvailable)

            def parse(self, obj):
                full_address = CleanText('//span[@id="adressepostale"]')(self)
                m = re.search(r'([\w ]+) (\d{5}) ([\w ]+)', full_address)
                if not m:
                    self.env['full_address'] = full_address
                else:
                    street, postal_code, city = m.groups()
                    self.env['street'] = street
                    self.env['postal_code'] = postal_code
                    self.env['city'] = city
126 127 128


class DocumentsPage(LoggedPage, HTMLPage):
129
    @pagination
130
    @method
131
    class iter_documents(ListElement):
132
        item_xpath = '//ul[has-class("documents")]/li'
133 134

        def next_page(self):
135 136 137 138 139 140 141
            previous_year = CleanText(
                '//li[has-class("blocAnnee") and has-class("selected")]/following-sibling::li[1]/a',
                children=False
            )(self.page.doc)

            # only if previous_year is not None and different from current year,
            # else we return to page with current year and fall into infinite loop
142
            if previous_year:
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
                previous_year = int(Regexp(None, r'(\d{4})').filter(previous_year))

                current_year = int(Regexp(CleanText(
                    '//li[has-class("blocAnnee") and has-class("selected")]/a',
                    children=False
                ), r'(\d{4})')(self.page.doc))

                if previous_year >= current_year:
                    # if previous year is 'something 2078' website return page of current year
                    # previous_year has to be nothing but digit
                    # don't return anything to not fall into infinite loop, but something bad has happened
                    self.logger.error(
                        "pagination loop, previous_year: %s pagination is unexpectedly superior or equal to current_year: %s",
                        previous_year, current_year
                    )
                    return

160
                return self.page.browser.documents.build(params={'n': previous_year})
161 162 163 164

        class item(ItemElement):
            klass = Document

165 166 167 168 169 170
            obj__idEnsua = Attr('.//form/input[@name="idEnsua"]', 'value')  # can be 64 or 128 char length

            def obj_id(self):
                # hash _idEnsua to reduce his size at 32 char
                hash = hashlib.sha1(Field('_idEnsua')(self).encode('utf-8')).hexdigest()
                return '%s_%s' % (Env('subid')(self), hash)
171 172 173

            obj_date = Date(Env('date'))
            obj_label = Env('label')
174 175 176
            obj_type = DocumentTypes.INCOME_TAX
            obj_format = 'pdf'
            obj_url = Format('/enp/ensu/Affichage_Document_PDF?idEnsua=%s', Field('_idEnsua'))
177 178

            def parse(self, el):
179
                label_ct = CleanText('./div[has-class("texte")][has-class("visible-xs")]')
180
                date = Regexp(label_ct, r'le ([\w\/]+?),', default=NotAvailable)(self)
181
                self.env['label'] = label_ct(self)
182 183

                if not date:
184 185 186 187 188 189 190 191 192
                    # exclude n° to not take n° 2555123456 as year 2555
                    # or if there is absolutely no date written in html for this document
                    # when label is "Mise en demeure de payer" for example
                    # take just the year in current page
                    year = Coalesce(
                        Regexp(label_ct, r'\b(\d{4})\b', default=NotAvailable),
                        CleanText('//li[has-class("blocAnnee") and has-class("selected")]/a', default=NotAvailable)
                    )(self)

193
                    if 'sur les revenus de' in self.env['label']:
194 195 196 197 198
                        # this kind of document always appear un july, (but we don't know the day)
                        date = '%s-07-01' % year
                    else:
                        date = '%s-01-01' % year
                self.env['date'] = date