Skip to content
Commits on Source (12)
......@@ -17,6 +17,7 @@ tests = weboob.tools.capabilities.bank.iban,
weboob.browser.pages,
weboob.browser.filters.standard,
weboob.browser.tests.form,
weboob.browser.tests.filters,
weboob.browser.tests.url
[isort]
......
......@@ -600,9 +600,17 @@ class Regexp(Filter):
def __init__(self, selector=None, pattern=None, template=None, nth=0, flags=0, default=_NO_DEFAULT):
super(Regexp, self).__init__(selector, default=default)
assert pattern is not None
if pattern is None:
raise FilterError('Missing pattern parameter')
self.pattern = pattern
self._regex = re.compile(pattern, flags)
# 8192 = regex.VERSION0 / 256 = regex.VERSION1
if '(?V0)' in pattern or '(?V1)' in pattern or flags & 8192 or flags & 256:
import regex
self._regex = regex.compile(pattern, flags)
else:
self._regex = re.compile(pattern, flags)
self._regex = self._regex
self.template = template
self.nth = nth
......@@ -695,10 +703,10 @@ def filter(self, txt):
class DateTime(Filter):
"""Parse date and time."""
def __init__(self, selector=None, default=_NO_DEFAULT, dayfirst=False, translations=None,
parse_func=parse_date, fuzzy=False):
def __init__(self, selector=None, default=_NO_DEFAULT, translations=None,
parse_func=parse_date, strict=True, **kwargs):
"""
:param dayfirst: if True, the day is be the first element in the string to parse
:param dayfirst: if True, the day is the first element in the string to parse
:type dayfirst: bool
:param parse_func: the function to use for parsing the datetime
:param translations: string replacements from site locale to English
......@@ -706,10 +714,13 @@ def __init__(self, selector=None, default=_NO_DEFAULT, dayfirst=False, translati
"""
super(DateTime, self).__init__(selector, default=default)
self.dayfirst = dayfirst
self.kwargs = kwargs
self.translations = translations
self.parse_func = parse_func
self.fuzzy = fuzzy
self.strict = strict
_default_date_1 = datetime.datetime(2100, 10, 10, 1, 1, 1)
_default_date_2 = datetime.datetime(2120, 12, 12, 2, 2, 2)
@debug()
def filter(self, txt):
......@@ -719,7 +730,14 @@ def filter(self, txt):
if self.translations:
for search, repl in self.translations:
txt = search.sub(repl, txt)
return self.parse_func(txt, dayfirst=self.dayfirst, fuzzy=self.fuzzy)
if self.strict:
parse1 = self.parse_func(txt, default=self._default_date_1, **self.kwargs)
parse2 = self.parse_func(txt, default=self._default_date_2, **self.kwargs)
if parse1 != parse2:
raise FilterError('Date is not complete')
return parse1
else:
return self.parse_func(txt, **self.kwargs)
except (ValueError, TypeError) as e:
return self.default_or_raise(FormatError('Unable to parse %r: %s' % (txt, e)))
......@@ -751,10 +769,11 @@ def filter(self, txt):
class Date(DateTime):
"""Parse date."""
def __init__(self, selector=None, default=_NO_DEFAULT, dayfirst=False, translations=None,
parse_func=parse_date, fuzzy=False):
super(Date, self).__init__(selector, default=default, dayfirst=dayfirst, translations=translations,
parse_func=parse_func, fuzzy=fuzzy)
def __init__(self, selector=None, default=_NO_DEFAULT, translations=None, parse_func=parse_date, strict=True, **kwargs):
super(Date, self).__init__(selector, default=default, translations=translations, parse_func=parse_func, strict=strict, **kwargs)
_default_date_1 = datetime.datetime(2100, 10, 10, 1, 1, 1)
_default_date_2 = datetime.datetime(2120, 12, 12, 1, 1, 1)
@debug()
def filter(self, txt):
......@@ -889,8 +908,21 @@ def filter(self, values):
class Join(Filter):
def __init__(self, pattern, selector=None, textCleaner=CleanText, newline=False, addBefore='', addAfter=''):
super(Join, self).__init__(selector)
"""
Join multiple results from a selector.
>>> Join(' - ', '//div/p') # doctest: +SKIP
>>> Join(pattern=', ').filter([u"Oui", u"bonjour", ""]) == u"Oui, bonjour"
True
>>> Join(pattern='-').filter([u"Au", u"revoir", ""]) == u"Au-revoir"
True
>>> Join(pattern='-').filter([]) == u""
True
>>> Join(pattern='-', default=u'empty').filter([]) == u'empty'
True
"""
def __init__(self, pattern, selector=None, textCleaner=CleanText, newline=False, addBefore='', addAfter='', default=_NO_DEFAULT):
super(Join, self).__init__(selector, default=default)
self.pattern = pattern
self.textCleaner = textCleaner
self.newline = newline
......@@ -913,9 +945,39 @@ def filter(self, el):
if self.addAfter:
result = '%s%s' % (result, self.addAfter)
if not result and self.default is not _NO_DEFAULT:
return self.default
return result
class MultiJoin(MultiFilter):
"""
Join multiple filters.
>>> MultiJoin(Field('field1'), Field('field2')) # doctest: +SKIP
>>> MultiJoin(pattern=u', ').filter([u"Oui", u"bonjour", ""]) == u"Oui, bonjour"
True
>>> MultiJoin(pattern=u'-').filter([u"Au", u"revoir", ""]) == u"Au-revoir"
True
>>> MultiJoin(pattern=u'-').filter([]) == u""
True
>>> MultiJoin(pattern=u'-', default=u'empty').filter([]) == u'empty'
True
>>> MultiJoin(pattern=u'-').filter([1, 2, 3]) == u'1-2-3'
True
"""
def __init__(self, *args, **kwargs):
self.pattern = kwargs.pop('pattern', ', ')
super(MultiJoin, self).__init__(*args, **kwargs)
@debug()
def filter(self, values):
values = [unicode(v) for v in values if v]
if not values and self.default is not _NO_DEFAULT:
return self.default
return self.pattern.join(values)
class Eval(MultiFilter):
"""
Evaluate a function with given 'deferred' arguments.
......@@ -1048,3 +1110,34 @@ def test_CleanDecimal_strict():
assert_raises(NumberFormatError, CleanDecimal.SI().filter, 'foo 123,456,789')
assert_raises(NumberFormatError, CleanDecimal.SI().filter, 'foo 12 3456 bar')
assert_raises(NumberFormatError, CleanDecimal.SI().filter, 'foo 123-456 bar')
def test_DateTime():
today = datetime.datetime.now()
assert_raises(FilterError, Date(strict=True).filter, '2019')
assert_raises(FilterError, Date(strict=True).filter, '1788-7')
assert_raises(FilterError, Date(strict=True).filter, 'June 1st')
assert Date(strict=True).filter('1788-7-15') == datetime.date(1788, 7, 15)
assert Date(strict=False).filter('1788-7-15') == datetime.date(1788, 7, 15)
assert Date(strict=False).filter('1945-7') == datetime.date(1945, 7, today.day)
assert Date(strict=False).filter('June 1st') == datetime.date(today.year, 6, 1)
assert DateTime(strict=False).filter('1788-7') == datetime.datetime(1788, 7, today.day)
assert DateTime(strict=False).filter('1788') == datetime.datetime(1788, today.month, today.day)
assert DateTime(strict=False).filter('5-1') == datetime.datetime(today.year, 5, 1)
assert Date(yearfirst=True).filter('88-7-15') == datetime.date(1988, 7, 15)
assert Date(yearfirst=False).filter('20-7-15') == datetime.date(2015, 7, 20)
assert Date(yearfirst=True).filter('1789-7-15') == datetime.date(1789, 7, 15)
assert Date(yearfirst=True, strict=False).filter('7-15') == datetime.date(today.year, 7, 15)
def test_regex():
try:
assert Regexp(pattern=r'([[a-z]--[aeiou]]+)(?V1)').filter(u'abcde') == u'bcd'
assert not Regexp(pattern=r'([[a-z]--[aeiou]]+)(?V0)', default=False).filter(u'abcde')
except ImportError:
pass
assert not Regexp(pattern=r'([[a-z]--[aeiou]]+)', default=False).filter(u'abcde')
......@@ -18,6 +18,7 @@
from unittest import TestCase
from lxml.html import fromstring
from weboob.browser.filters.html import Link
from weboob.browser.filters.standard import RawText
......@@ -48,3 +49,10 @@ def test_first_node_is_text_recursive(self):
def test_first_node_is_element_recursive(self):
e = fromstring('<html><body><p><span>229,90</span> EUR</p></body></html>')
self.assertEqual("229,90 EUR", RawText('//p', default="foo", children=True)(e))
class LinkTest(TestCase):
def test_link(self):
e = fromstring('<a href="https://www.google.com/">Google</a>')
self.assertEqual('https://www.google.com/', Link('//a')(e))
......@@ -27,10 +27,11 @@
import os
import subprocess
import hashlib
from datetime import datetime
from contextlib import closing
from compileall import compile_dir
from contextlib import closing, contextmanager
from datetime import datetime
from io import BytesIO, StringIO
from tempfile import NamedTemporaryFile
from weboob.exceptions import BrowserHTTPError, BrowserHTTPNotFound, ModuleInstallError
from .modules import LoadedModule
......@@ -43,11 +44,18 @@
from configparser import RawConfigParser, DEFAULTSECT
@contextmanager
def open_for_config(filename):
if sys.version_info.major == 2:
return open(filename, 'wb')
f = NamedTemporaryFile(mode='wb',
dir=os.path.dirname(filename),
delete=False)
else:
return open(filename, 'w', encoding='utf-8')
f = NamedTemporaryFile(mode='w', encoding='utf-8',
dir=os.path.dirname(filename),
delete=False)
yield f
os.rename(f.name, filename)
class ModuleInfo(object):
......