The new woob repository is here: https://gitlab.com/woob/woob. This gitlab will be removed soon.

The new woob repository is here: https://gitlab.com/woob/woob. This gitlab will be removed soon.

Commit 7c469239 authored by Romain Bignon's avatar Romain Bignon

fixes for backported modules

parent 2e1c73b9
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import lxml.html as html
from six.moves.html_parser import HTMLParser
from weboob.tools.compat import basestring, unicode, urljoin
from weboob.tools.html import html2text
from weboob.browser.filters.base import _NO_DEFAULT, Filter, FilterError, _Selector, debug, ItemNotFound
from weboob.browser.filters.standard import (
TableCell, ColumnNotFound, # TODO move class here when modules are migrated
CleanText,
)
__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound',
'Attr', 'Link', 'AbsoluteLink',
'CleanHTML', 'FormValue', 'HasElement',
'TableCell', 'ColumnNotFound',
'ReplaceEntities',
]
from weboob.browser.filters.html import XPathNotFound as _XPathNotFound
class XPathNotFound(_XPathNotFound):
pass
from weboob.browser.filters.html import AttributeNotFound as _AttributeNotFound
class AttributeNotFound(_AttributeNotFound):
pass
from weboob.browser.filters.html import CSS as _CSS
class CSS(_CSS):
"""Select HTML elements with a CSS selector
For example::
obj_foo = CleanText(CSS('div.main'))
will take the text of all ``<div>`` having CSS class "main".
"""
def select(self, selector, item):
ret = item.cssselect(selector)
if isinstance(ret, list):
for el in ret:
if isinstance(el, html.HtmlElement):
self.highlight_el(el, item)
return ret
from weboob.browser.filters.html import XPath as _XPath
class XPath(_XPath):
"""Select HTML elements with a XPath selector
"""
pass
from weboob.browser.filters.html import Attr as _Attr
class Attr(_Attr):
"""Get the text value of an HTML attribute.
Get value from attribute `attr` of HTML element matched by `selector`.
For example::
obj_foo = Attr('//img[@id="thumbnail"]', 'src')
will take the "src" attribute of ``<img>`` whose "id" is "thumbnail".
"""
def __init__(self, selector, attr, default=_NO_DEFAULT):
"""
:param selector: selector targeting the element
:param attr: name of the attribute to take
"""
super(Attr, self).__init__(selector, attr, default=default)
self.attr = attr
@debug()
def filter(self, el):
"""
:raises: :class:`XPathNotFound` if no element is found
:raises: :class:`AttributeNotFound` if the element doesn't have the requested attribute
"""
try:
return u'%s' % el[0].attrib[self.attr]
except IndexError:
return self.default_or_raise(XPathNotFound('Unable to find element %s' % self.selector))
except KeyError:
return self.default_or_raise(AttributeNotFound('Element %s does not have attribute %s' % (el[0], self.attr)))
from weboob.browser.filters.html import Link as _Link
class Link(_Link):
"""
Get the link uri of an element.
If the ``<a>`` tag is not found, an exception `IndexError` is raised.
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Link, self).__init__(selector, default=default)
from weboob.browser.filters.html import AbsoluteLink as _AbsoluteLink
class AbsoluteLink(_AbsoluteLink):
"""Get the absolute link URI of an element.
"""
def __call__(self, item):
ret = super(AbsoluteLink, self).__call__(item)
if ret:
ret = urljoin(item.page.url, ret)
return ret
from weboob.browser.filters.html import CleanHTML as _CleanHTML
class CleanHTML(_CleanHTML):
"""Convert HTML to text (Markdown) using html2text.
.. seealso:: `html2text site <https://pypi.python.org/pypi/html2text>`_
"""
def __init__(self, selector=None, options=None, default=_NO_DEFAULT):
"""
:param options: options suitable for html2text
:type options: dict
"""
super(CleanHTML, self).__init__(selector=selector, default=default)
self.options = options
@debug()
def filter(self, txt):
if isinstance(txt, (tuple, list)):
return u' '.join([self.clean(item, self.options) for item in txt])
return self.clean(txt, self.options)
@classmethod
def clean(cls, txt, options=None):
if not isinstance(txt, basestring):
txt = html.tostring(txt, encoding=unicode)
options = options or {}
return html2text(txt, **options)
from weboob.browser.filters.html import UnrecognizedElement as _UnrecognizedElement
class UnrecognizedElement(_UnrecognizedElement):
pass
from weboob.browser.filters.html import FormValue as _FormValue
class FormValue(_FormValue):
"""
Extract a Python value from a form element.
Checkboxes and radio return booleans, while the rest
return text. For ``<select>`` tags, returns the user-visible text.
"""
@debug()
def filter(self, el):
try:
el = el[0]
except IndexError:
return self.default_or_raise(XPathNotFound('Unable to find element %s' % self.selector))
if el.tag == 'input':
# checkboxes or radios
if el.attrib.get('type') in ('radio', 'checkbox'):
return 'checked' in el.attrib
# regular text input
elif el.attrib.get('type', '') in ('', 'text', 'email', 'search', 'tel', 'url'):
try:
return unicode(el.attrib['value'])
except KeyError:
return self.default_or_raise(AttributeNotFound('Element %s does not have attribute value' % el))
# TODO handle html5 number, datetime, etc.
else:
raise UnrecognizedElement('Element %s is recognized' % el)
elif el.tag == 'textarea':
return unicode(el.text)
elif el.tag == 'select':
options = el.xpath('.//option[@selected]')
# default is the first one
if len(options) == 0:
options = el.xpath('.//option[1]')
return u'\n'.join([unicode(o.text) for o in options])
else:
raise UnrecognizedElement('Element %s is recognized' % el)
from weboob.browser.filters.html import HasElement as _HasElement
class HasElement(_HasElement):
"""
Returns `yesvalue` if the `selector` finds elements, `novalue` otherwise.
"""
def __init__(self, selector, yesvalue=True, novalue=False):
super(HasElement, self).__init__(selector, default=novalue)
self.yesvalue = yesvalue
@debug()
def filter(self, value):
if value:
return self.yesvalue
return self.default_or_raise(FilterError('No default value'))
class ReplaceEntities(CleanText):
"""
Filter to replace HTML entities like "&eacute;" or "&#x42;" with their unicode counterpart.
"""
def filter(self, data):
h = HTMLParser()
txt = super(ReplaceEntities, self).filter(data)
return h.unescape(txt)
......@@ -24,7 +24,7 @@ import re
from weboob.browser.pages import HTMLPage, PDFPage, LoggedPage
from weboob.browser.elements import TableElement, ListElement, ItemElement, method
from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Field, Date, Eval
from weboob.browser.filters.html import Attr, TableCell, ReplaceEntities
from .compat.weboob_browser_filters_html import Attr, TableCell, ReplaceEntities
from weboob.capabilities.bank import Account, Investment, Loan, NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.capabilities.bank.iban import is_iban_valid
......
import weboob.capabilities.bank as OLD
# can't import *, __all__ is incomplete...
for attr in dir(OLD):
globals()[attr] = getattr(OLD, attr)
__all__ = OLD.__all__
class AccountOwnerType(object):
"""
Specifies the usage of the account
"""
PRIVATE = u'PRIV'
"""private personal account"""
ORGANIZATION = u'ORGA'
"""professional account"""
......@@ -26,7 +26,7 @@ import json
from weboob.browser.pages import HTMLPage, JsonPage, LoggedPage
from weboob.exceptions import BrowserUnavailable
from weboob.capabilities import NotAvailable
from weboob.capabilities.bank import (
from .compat.weboob_capabilities_bank import (
Account, AccountOwnerType,
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment