Commit 41106404 authored by Vincent A's avatar Vincent A define html_unescape (deprecation update)

Since Python 3.4, HTMLParser.unescape is deprecated and replaced with
parent d5116352
......@@ -16,13 +16,13 @@
# You should have received a copy of the GNU Lesser General Public License
# along with weboob. If not, see <>.
import datetime
from decimal import Decimal
import lxml.html as html
from six.moves.html_parser import HTMLParser
from import basestring, unicode, urljoin
from import basestring, unicode, urljoin, html_unescape
from import html2text
from .base import (
......@@ -244,9 +244,8 @@ class ReplaceEntities(CleanText):
Filter to replace HTML entities like "&eacute;" or "&#x42;" with their unicode counterpart.
def filter(self, data):
h = HTMLParser()
txt = super(ReplaceEntities, self).filter(data)
return h.unescape(txt)
return html_unescape(txt)
class TableCell(_Filter):
......@@ -170,3 +170,15 @@ if sys.version >= '3.4':
def fullmatch(pattern, string_to_parse, flags=0):
return re.match(r'%s$' % pattern, string_to_parse, flags)
if sys.version_info > (3, 4):
def html_unescape(s):
import html
return html.unescape(s)
def html_unescape(s):
from six.moves.html_parser import HTMLParser
return HTMLParser().unescape(s)
