From 41106404e9e478ccaf6729ad12e95d7db6679b6f Mon Sep 17 00:00:00 2001 From: Vincent A Date: Sun, 10 Jan 2021 15:02:59 +0100 Subject: [PATCH] weboob.tools.compat: define html_unescape (deprecation update) Since Python 3.4, HTMLParser.unescape is deprecated and replaced with html.unescape. --- weboob/browser/filters/html.py | 7 +++---- weboob/tools/compat.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/weboob/browser/filters/html.py b/weboob/browser/filters/html.py index a4faf4a3cc..4cee737ca7 100644 --- a/weboob/browser/filters/html.py +++ b/weboob/browser/filters/html.py @@ -16,13 +16,13 @@ # # You should have received a copy of the GNU Lesser General Public License # along with weboob. If not, see . + import datetime from decimal import Decimal import lxml.html as html -from six.moves.html_parser import HTMLParser -from weboob.tools.compat import basestring, unicode, urljoin +from weboob.tools.compat import basestring, unicode, urljoin, html_unescape from weboob.tools.html import html2text from .base import ( @@ -244,9 +244,8 @@ class ReplaceEntities(CleanText): Filter to replace HTML entities like "é" or "B" with their unicode counterpart. """ def filter(self, data): - h = HTMLParser() txt = super(ReplaceEntities, self).filter(data) - return h.unescape(txt) + return html_unescape(txt) class TableCell(_Filter): diff --git a/weboob/tools/compat.py b/weboob/tools/compat.py index 75078b3e2c..f6582bc329 100644 --- a/weboob/tools/compat.py +++ b/weboob/tools/compat.py @@ -170,3 +170,15 @@ def fullmatch(pattern, string_to_parse, flags=0): else: def fullmatch(pattern, string_to_parse, flags=0): return re.match(r'%s$' % pattern, string_to_parse, flags) + + +if sys.version_info > (3, 4): + def html_unescape(s): + import html + + return html.unescape(s) +else: + def html_unescape(s): + from six.moves.html_parser import HTMLParser + + return HTMLParser().unescape(s) -- GitLab