Romain Bignon · Romain Bignon · Romain Bignon · 7c469239 · 7c469239 · 7c469239
--- a/modules/barclays/compat/__init__.py
+++ b/modules/barclays/compat/__init__.py
--- a/modules/barclays/compat/weboob_browser_filters_html.py
+++ b/modules/barclays/compat/weboob_browser_filters_html.py
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import lxml.html as html
+from six.moves.html_parser import HTMLParser
+
+from weboob.tools.compat import basestring, unicode, urljoin
+from weboob.tools.html import html2text
+
+from weboob.browser.filters.base import _NO_DEFAULT, Filter, FilterError, _Selector, debug, ItemNotFound
+from weboob.browser.filters.standard import (
+    TableCell, ColumnNotFound, # TODO move class here when modules are migrated
+    CleanText,
+)
+
+__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound',
+           'Attr', 'Link', 'AbsoluteLink',
+           'CleanHTML', 'FormValue', 'HasElement',
+           'TableCell', 'ColumnNotFound',
+           'ReplaceEntities',
+          ]
+
+
+from weboob.browser.filters.html import XPathNotFound as _XPathNotFound
+class XPathNotFound(_XPathNotFound):
+    pass
+
+
+from weboob.browser.filters.html import AttributeNotFound as _AttributeNotFound
+class AttributeNotFound(_AttributeNotFound):
+    pass
+
+
+from weboob.browser.filters.html import CSS as _CSS
+class CSS(_CSS):
+    """Select HTML elements with a CSS selector
+
+    For example::
+
+        obj_foo = CleanText(CSS('div.main'))
+
+    will take the text of all ``<div>`` having CSS class "main".
+    """
+    def select(self, selector, item):
+        ret = item.cssselect(selector)
+        if isinstance(ret, list):
+            for el in ret:
+                if isinstance(el, html.HtmlElement):
+                    self.highlight_el(el, item)
+
+        return ret
+
+
+from weboob.browser.filters.html import XPath as _XPath
+class XPath(_XPath):
+    """Select HTML elements with a XPath selector
+    """
+    pass
+
+
+from weboob.browser.filters.html import Attr as _Attr
+class Attr(_Attr):
+    """Get the text value of an HTML attribute.
+
+    Get value from attribute `attr` of HTML element matched by `selector`.
+
+    For example::
+
+        obj_foo = Attr('//img[@id="thumbnail"]', 'src')
+
+    will take the "src" attribute of ``<img>`` whose "id" is "thumbnail".
+    """
+
+    def __init__(self, selector, attr, default=_NO_DEFAULT):
+        """
+        :param selector: selector targeting the element
+        :param attr: name of the attribute to take
+        """
+
+        super(Attr, self).__init__(selector, attr, default=default)
+        self.attr = attr
+
+    @debug()
+    def filter(self, el):
+        """
+        :raises: :class:`XPathNotFound` if no element is found
+        :raises: :class:`AttributeNotFound` if the element doesn't have the requested attribute
+        """
+
+        try:
+            return u'%s' % el[0].attrib[self.attr]
+        except IndexError:
+            return self.default_or_raise(XPathNotFound('Unable to find element %s' % self.selector))
+        except KeyError:
+            return self.default_or_raise(AttributeNotFound('Element %s does not have attribute %s' % (el[0], self.attr)))
+
+
+from weboob.browser.filters.html import Link as _Link
+class Link(_Link):
+    """
+    Get the link uri of an element.
+
+    If the ``<a>`` tag is not found, an exception `IndexError` is raised.
+    """
+
+    def __init__(self, selector=None, default=_NO_DEFAULT):
+        super(Link, self).__init__(selector, default=default)
+
+
+from weboob.browser.filters.html import AbsoluteLink as _AbsoluteLink
+class AbsoluteLink(_AbsoluteLink):
+    """Get the absolute link URI of an element.
+    """
+    def __call__(self, item):
+        ret = super(AbsoluteLink, self).__call__(item)
+        if ret:
+            ret = urljoin(item.page.url, ret)
+        return ret
+
+
+from weboob.browser.filters.html import CleanHTML as _CleanHTML
+class CleanHTML(_CleanHTML):
+    """Convert HTML to text (Markdown) using html2text.
+
+    .. seealso:: `html2text site <https://pypi.python.org/pypi/html2text>`_
+    """
+
+    def __init__(self, selector=None, options=None, default=_NO_DEFAULT):
+        """
+        :param options: options suitable for html2text
+        :type options: dict
+        """
+
+        super(CleanHTML, self).__init__(selector=selector, default=default)
+        self.options = options
+
+    @debug()
+    def filter(self, txt):
+        if isinstance(txt, (tuple, list)):
+            return u' '.join([self.clean(item, self.options) for item in txt])
+        return self.clean(txt, self.options)
+
+    @classmethod
+    def clean(cls, txt, options=None):
+        if not isinstance(txt, basestring):
+            txt = html.tostring(txt, encoding=unicode)
+        options = options or {}
+        return html2text(txt, **options)
+
+
+from weboob.browser.filters.html import UnrecognizedElement as _UnrecognizedElement
+class UnrecognizedElement(_UnrecognizedElement):
+    pass
+
+
+from weboob.browser.filters.html import FormValue as _FormValue
+class FormValue(_FormValue):
+    """
+    Extract a Python value from a form element.
+
+    Checkboxes and radio return booleans, while the rest
+    return text. For ``<select>`` tags, returns the user-visible text.
+    """
+
+    @debug()
+    def filter(self, el):
+        try:
+            el = el[0]
+        except IndexError:
+            return self.default_or_raise(XPathNotFound('Unable to find element %s' % self.selector))
+        if el.tag == 'input':
+            # checkboxes or radios
+            if el.attrib.get('type') in ('radio', 'checkbox'):
+                return 'checked' in el.attrib
+            # regular text input
+            elif el.attrib.get('type', '') in ('', 'text', 'email', 'search', 'tel', 'url'):
+                try:
+                    return unicode(el.attrib['value'])
+                except KeyError:
+                    return self.default_or_raise(AttributeNotFound('Element %s does not have attribute value' % el))
+            # TODO handle html5 number, datetime, etc.
+            else:
+                raise UnrecognizedElement('Element %s is recognized' % el)
+        elif el.tag == 'textarea':
+            return unicode(el.text)
+        elif el.tag == 'select':
+            options = el.xpath('.//option[@selected]')
+            # default is the first one
+            if len(options) == 0:
+                options = el.xpath('.//option[1]')
+            return u'\n'.join([unicode(o.text) for o in options])
+        else:
+            raise UnrecognizedElement('Element %s is recognized' % el)
+
+
+from weboob.browser.filters.html import HasElement as _HasElement
+class HasElement(_HasElement):
+    """
+    Returns `yesvalue` if the `selector` finds elements, `novalue` otherwise.
+    """
+    def __init__(self, selector, yesvalue=True, novalue=False):
+        super(HasElement, self).__init__(selector, default=novalue)
+        self.yesvalue = yesvalue
+
+    @debug()
+    def filter(self, value):
+        if value:
+            return self.yesvalue
+        return self.default_or_raise(FilterError('No default value'))
+
+
+class ReplaceEntities(CleanText):
+    """
+    Filter to replace HTML entities like "&eacute;" or "&#x42;" with their unicode counterpart.
+    """
+    def filter(self, data):
+        h = HTMLParser()
+        txt = super(ReplaceEntities, self).filter(data)
+        return h.unescape(txt)
+
--- a/modules/barclays/pages.py
+++ b/modules/barclays/pages.py
@@ -24,7 +24,7 @@
 from weboob.browser.pages import HTMLPage, PDFPage, LoggedPage
 from weboob.browser.elements import TableElement, ListElement, ItemElement, method
 from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Field, Date, Eval
-from weboob.browser.filters.html import Attr, TableCell, ReplaceEntities
+from .compat.weboob_browser_filters_html import Attr, TableCell, ReplaceEntities
 from weboob.capabilities.bank import Account, Investment, Loan, NotAvailable
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction
 from weboob.tools.capabilities.bank.iban import is_iban_valid

--- a/modules/cragr/api/compat/__init__.py
+++ b/modules/cragr/api/compat/__init__.py
--- a/modules/cragr/api/compat/weboob_capabilities_bank.py
+++ b/modules/cragr/api/compat/weboob_capabilities_bank.py
+import weboob.capabilities.bank as OLD
+
+# can't import *, __all__ is incomplete...
+for attr in dir(OLD):
+    globals()[attr] = getattr(OLD, attr)
+
+
+__all__ = OLD.__all__
+
+
+class AccountOwnerType(object):
+    """
+    Specifies the usage of the account
+    """
+    PRIVATE = u'PRIV'
+    """private personal account"""
+    ORGANIZATION = u'ORGA'
+    """professional account"""
--- a/modules/cragr/api/pages.py
+++ b/modules/cragr/api/pages.py
@@ -26,7 +26,7 @@
 from weboob.browser.pages import HTMLPage, JsonPage, LoggedPage
 from weboob.exceptions import BrowserUnavailable
 from weboob.capabilities import NotAvailable
-from weboob.capabilities.bank import (
+from .compat.weboob_capabilities_bank import (
    Account, AccountOwnerType,
 )


--- a/modules/genericnewspaper/compat/__init__.py
+++ b/modules/genericnewspaper/compat/__init__.py
--- a/modules/hsbc/compat/__init__.py
+++ b/modules/hsbc/compat/__init__.py
--- a/tools/stable_backport.py
+++ b/tools/stable_backport.py
@@ -6,7 +6,7 @@
 import sys
 import re
 from contextlib import contextmanager
-from os import system, path, makedirs, getenv
+from os import system, path, makedirs, getenv, mknod, unlink
 from subprocess import check_output, STDOUT, CalledProcessError
 from collections import defaultdict
 import shutil
@@ -43,6 +43,7 @@ def create_compat_dir(name):


 MANUAL_PORTS = [
+    'weboob.capabilities.bank',
 ]

 MANUAL_PORT_DIR = path.join(path.dirname(__file__), 'stable_backport_data')
@@ -180,7 +181,9 @@ def main(self):
            system('git add -u')

        with log('Lookup modules errors'):
+            mknod('modules/__init__.py')
            r = check_output("pylint modules -f parseable -E -d all -e no-name-in-module,import-error; exit 0", shell=True, stderr=STDOUT).decode('utf-8')
+            unlink('modules/__init__.py')

        dirnames = defaultdict(list)
        for line in r.split('\n'):
@@ -204,6 +207,10 @@ def main(self):
                    error.fixup()
                system('git add %s' % compat_dirname)

+        with log('Custom fixups'):
+            replace_all('super(Attr, self).__init__(selector, default=default)', 'super(Attr, self).__init__(selector, attr, default=default)')
+            replace_all("super(Link, self).__init__(selector, 'href', default=default)", "super(Link, self).__init__(selector, default=default)")
+
        system('git add -u')



--- a/tools/stable_backport_data/weboob_capabilities_bank.py
+++ b/tools/stable_backport_data/weboob_capabilities_bank.py
+import weboob.capabilities.bank as OLD
+
+# can't import *, __all__ is incomplete...
+for attr in dir(OLD):
+    globals()[attr] = getattr(OLD, attr)
+
+
+__all__ = OLD.__all__
+
+
+class AccountOwnerType(object):
+    """
+    Specifies the usage of the account
+    """
+    PRIVATE = u'PRIV'
+    """private personal account"""
+    ORGANIZATION = u'ORGA'
+    """professional account"""