diff --git a/weboob/browser/filters/html.py b/weboob/browser/filters/html.py index a4faf4a3cce4cdc0103d7b135968d11c763f7f18..4cee737ca71d07de15c782b28bfcc3020588938d 100644 --- a/weboob/browser/filters/html.py +++ b/weboob/browser/filters/html.py @@ -16,13 +16,13 @@ # # You should have received a copy of the GNU Lesser General Public License # along with weboob. If not, see . + import datetime from decimal import Decimal import lxml.html as html -from six.moves.html_parser import HTMLParser -from weboob.tools.compat import basestring, unicode, urljoin +from weboob.tools.compat import basestring, unicode, urljoin, html_unescape from weboob.tools.html import html2text from .base import ( @@ -244,9 +244,8 @@ class ReplaceEntities(CleanText): Filter to replace HTML entities like "é" or "B" with their unicode counterpart. """ def filter(self, data): - h = HTMLParser() txt = super(ReplaceEntities, self).filter(data) - return h.unescape(txt) + return html_unescape(txt) class TableCell(_Filter): diff --git a/weboob/tools/compat.py b/weboob/tools/compat.py index 75078b3e2cce196c8873ed7bbcb72c9766afbc3a..f6582bc32914a020c5c1e9e2b0d86969c1e892ab 100644 --- a/weboob/tools/compat.py +++ b/weboob/tools/compat.py @@ -170,3 +170,15 @@ def fullmatch(pattern, string_to_parse, flags=0): else: def fullmatch(pattern, string_to_parse, flags=0): return re.match(r'%s$' % pattern, string_to_parse, flags) + + +if sys.version_info > (3, 4): + def html_unescape(s): + import html + + return html.unescape(s) +else: + def html_unescape(s): + from six.moves.html_parser import HTMLParser + + return HTMLParser().unescape(s)