Commit 09f96451 authored by Vincent A's avatar Vincent A Committed by Florent Fourcot

weboob.browser.filters: introduce filters.base, move or alias filters

weboob.browser.filters.base will contain the internal classes like
Filter and core like FilterError.

TableCell is imported in filters.html to let modules migrate, then it
can be fully moved.
parent 1aaa1c62
# -*- coding: utf-8 -*-
# Copyright(C) 2017 weboob project
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from functools import wraps
import lxml.html
from weboob.exceptions import ParseError
from weboob.tools.compat import unicode, basestring
from weboob.tools.log import getLogger, DEBUG_FILTERS
__all__ = ['FilterError', 'Filter',]
class NoDefault(object):
def __repr__(self):
return 'NO_DEFAULT'
_NO_DEFAULT = NoDefault()
class FilterError(ParseError):
pass
class _Filter(object):
_creation_counter = 0
def __init__(self, default=_NO_DEFAULT):
self._key = None
self._obj = None
self.default = default
self._creation_counter = _Filter._creation_counter
_Filter._creation_counter += 1
def __or__(self, o):
self.default = o
return self
def __and__(self, o):
if isinstance(o, type) and issubclass(o, _Filter):
o = o()
o.selector = self
return o
def default_or_raise(self, exception):
if self.default is not _NO_DEFAULT:
return self.default
else:
raise exception
def __str__(self):
return self.__class__.__name__
def highlight_el(self, el, item=None):
obj = self._obj or item
try:
if not hasattr(obj, 'saved_attrib'):
return
if not obj.page.browser.highlight_el:
return
except AttributeError:
return
if el not in obj.saved_attrib:
obj.saved_attrib[el] = dict(el.attrib)
el.attrib['style'] = 'color: white !important; background: red !important;'
if self._key:
el.attrib['title'] = 'weboob field: %s' % self._key
def debug(*args):
"""
A decorator function to provide some debug information
in Filters.
It prints by default the name of the Filter and the input value.
"""
def wraper(function):
@wraps(function)
def print_debug(self, value):
logger = getLogger('b2filters')
result = ''
outputvalue = value
if isinstance(value, list):
from lxml import etree
outputvalue = ''
first = True
for element in value:
if first:
first = False
else:
outputvalue += ', '
if isinstance(element, etree.ElementBase):
outputvalue += "%s" % etree.tostring(element, encoding=unicode)
else:
outputvalue += "%r" % element
if self._obj is not None:
result += "%s" % self._obj._random_id
if self._key is not None:
result += ".%s" % self._key
name = str(self)
result += " %s(%r" % (name, outputvalue)
for arg in self.__dict__:
if arg.startswith('_') or arg == u"selector":
continue
if arg == u'default' and getattr(self, arg) == _NO_DEFAULT:
continue
result += ", %s=%r" % (arg, getattr(self, arg))
result += u')'
logger.log(DEBUG_FILTERS, result)
res = function(self, value)
return res
return print_debug
return wraper
class Filter(_Filter):
"""
Class used to filter on a HTML element given as call parameter to return
matching elements.
Filters can be chained, so the parameter supplied to constructor can be
either a xpath selector string, or an other filter called before.
>>> from lxml.html import etree
>>> f = CleanDecimal(CleanText('//p'), replace_dots=True)
>>> f(etree.fromstring('<html><body><p>blah: <span>229,90</span></p></body></html>'))
Decimal('229.90')
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Filter, self).__init__(default=default)
self.selector = selector
def select(self, selector, item):
if isinstance(selector, basestring):
ret = item.xpath(selector)
elif isinstance(selector, _Filter):
selector._key = self._key
selector._obj = self._obj
ret = selector(item)
elif callable(selector):
ret = selector(item)
else:
ret = selector
if isinstance(ret, lxml.html.HtmlElement):
self.highlight_el(ret, item)
elif isinstance(ret, list):
for el in ret:
if isinstance(el, lxml.html.HtmlElement):
self.highlight_el(el, item)
return ret
def __call__(self, item):
return self.filter(self.select(self.selector, item))
@debug()
def filter(self, value):
"""
This method have to be overrided by children classes.
"""
raise NotImplementedError()
class _Selector(Filter):
def filter(self, elements):
if elements is not None:
return elements
else:
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
......@@ -24,10 +24,14 @@ from weboob.exceptions import ParseError
from weboob.tools.compat import basestring, unicode, urljoin
from weboob.tools.html import html2text
from .standard import _NO_DEFAULT, Filter, FilterError, _Selector, debug
from .base import _NO_DEFAULT, Filter, FilterError, _Selector, debug
from .standard import TableCell # TODO move class here when modules are migrated
__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound',
'Attr', 'Link', 'CleanHTML', 'FormValue', 'HasElement']
'Attr', 'Link', 'CleanHTML', 'FormValue', 'HasElement',
'TableCell',
]
class XPathNotFound(FilterError):
......
......@@ -17,9 +17,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.exceptions import ParseError
from weboob.tools.compat import basestring, with_metaclass
from .standard import _Filter, _NO_DEFAULT, Filter, ParseError, debug
from .base import _Filter, _NO_DEFAULT, Filter, debug
__all__ = ['Dict']
......
......@@ -20,7 +20,6 @@
from __future__ import absolute_import
import datetime
from functools import wraps
import re
import unicodedata
from decimal import Decimal, InvalidOperation
......@@ -28,7 +27,6 @@ from itertools import islice
from collections import Iterator
from dateutil.parser import parse as parse_date
import lxml.html
from weboob.capabilities.base import empty
from weboob.capabilities.base import Currency as BaseCurrency
......@@ -36,14 +34,8 @@ from weboob.tools.compat import basestring, unicode, long
from weboob.exceptions import ParseError
from weboob.browser.url import URL
from weboob.tools.compat import parse_qs, urlparse
from weboob.tools.log import getLogger, DEBUG_FILTERS
class NoDefault(object):
def __repr__(self):
return 'NO_DEFAULT'
_NO_DEFAULT = NoDefault()
from .base import _NO_DEFAULT, FilterError, _Filter, Filter, debug
__all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound',
......@@ -54,10 +46,6 @@ __all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound',
'Eval', 'BrowserURL', 'Async', 'AsyncLoad']
class FilterError(ParseError):
pass
class ColumnNotFound(FilterError):
pass
......@@ -70,156 +58,6 @@ class ItemNotFound(FilterError):
pass
class _Filter(object):
_creation_counter = 0
def __init__(self, default=_NO_DEFAULT):
self._key = None
self._obj = None
self.default = default
self._creation_counter = _Filter._creation_counter
_Filter._creation_counter += 1
def __or__(self, o):
self.default = o
return self
def __and__(self, o):
if isinstance(o, type) and issubclass(o, _Filter):
o = o()
o.selector = self
return o
def default_or_raise(self, exception):
if self.default is not _NO_DEFAULT:
return self.default
else:
raise exception
def __str__(self):
return self.__class__.__name__
def highlight_el(self, el, item=None):
obj = self._obj or item
try:
if not hasattr(obj, 'saved_attrib'):
return
if not obj.page.browser.highlight_el:
return
except AttributeError:
return
if el not in obj.saved_attrib:
obj.saved_attrib[el] = dict(el.attrib)
el.attrib['style'] = 'color: white !important; background: red !important;'
if self._key:
el.attrib['title'] = 'weboob field: %s' % self._key
def debug(*args):
"""
A decorator function to provide some debug information
in Filters.
It prints by default the name of the Filter and the input value.
"""
def wraper(function):
@wraps(function)
def print_debug(self, value):
logger = getLogger('b2filters')
result = ''
outputvalue = value
if isinstance(value, list):
from lxml import etree
outputvalue = ''
first = True
for element in value:
if first:
first = False
else:
outputvalue += ', '
if isinstance(element, etree.ElementBase):
outputvalue += "%s" % etree.tostring(element, encoding=unicode)
else:
outputvalue += "%r" % element
if self._obj is not None:
result += "%s" % self._obj._random_id
if self._key is not None:
result += ".%s" % self._key
name = str(self)
result += " %s(%r" % (name, outputvalue)
for arg in self.__dict__:
if arg.startswith('_') or arg == u"selector":
continue
if arg == u'default' and getattr(self, arg) == _NO_DEFAULT:
continue
result += ", %s=%r" % (arg, getattr(self, arg))
result += u')'
logger.log(DEBUG_FILTERS, result)
res = function(self, value)
return res
return print_debug
return wraper
class Filter(_Filter):
"""
Class used to filter on a HTML element given as call parameter to return
matching elements.
Filters can be chained, so the parameter supplied to constructor can be
either a xpath selector string, or an other filter called before.
>>> from lxml.html import etree
>>> f = CleanDecimal(CleanText('//p'), replace_dots=True)
>>> f(etree.fromstring('<html><body><p>blah: <span>229,90</span></p></body></html>'))
Decimal('229.90')
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Filter, self).__init__(default=default)
self.selector = selector
def select(self, selector, item):
if isinstance(selector, basestring):
ret = item.xpath(selector)
elif isinstance(selector, _Filter):
selector._key = self._key
selector._obj = self._obj
ret = selector(item)
elif callable(selector):
ret = selector(item)
else:
ret = selector
if isinstance(ret, lxml.html.HtmlElement):
self.highlight_el(ret, item)
elif isinstance(ret, list):
for el in ret:
if isinstance(el, lxml.html.HtmlElement):
self.highlight_el(el, item)
return ret
def __call__(self, item):
return self.filter(self.select(self.selector, item))
@debug()
def filter(self, value):
"""
This method have to be overrided by children classes.
"""
raise NotImplementedError()
class _Selector(Filter):
def filter(self, elements):
if elements is not None:
return elements
else:
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
class AsyncLoad(Filter):
def __call__(self, item):
link = self.select(self.selector, item)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment