Commit 41106404 authored by Vincent A's avatar Vincent A

weboob.tools.compat: define html_unescape (deprecation update)

Since Python 3.4, HTMLParser.unescape is deprecated and replaced with
html.unescape.
parent d5116352
......@@ -16,13 +16,13 @@
#
# You should have received a copy of the GNU Lesser General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
from decimal import Decimal
import lxml.html as html
from six.moves.html_parser import HTMLParser
from weboob.tools.compat import basestring, unicode, urljoin
from weboob.tools.compat import basestring, unicode, urljoin, html_unescape
from weboob.tools.html import html2text
from .base import (
......@@ -244,9 +244,8 @@ class ReplaceEntities(CleanText):
Filter to replace HTML entities like "&eacute;" or "&#x42;" with their unicode counterpart.
"""
def filter(self, data):
h = HTMLParser()
txt = super(ReplaceEntities, self).filter(data)
return h.unescape(txt)
return html_unescape(txt)
class TableCell(_Filter):
......
......@@ -170,3 +170,15 @@ if sys.version >= '3.4':
else:
def fullmatch(pattern, string_to_parse, flags=0):
return re.match(r'%s$' % pattern, string_to_parse, flags)
if sys.version_info > (3, 4):
def html_unescape(s):
import html
return html.unescape(s)
else:
def html_unescape(s):
from six.moves.html_parser import HTMLParser
return HTMLParser().unescape(s)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment