Commit b2c32431 authored by Vincent A's avatar Vincent A

modules: use html_unescape (HTMLParser.unescape was deprecated)

parent 41106404
Pipeline #3533 passed with stages
in 15 minutes and 30 seconds
......@@ -18,15 +18,12 @@
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
import re
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
from weboob.browser.pages import HTMLPage, LoggedPage
from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
from weboob.capabilities.collection import Collection
from weboob.browser.filters.standard import CleanText
from weboob.tools.compat import html_unescape
class PageLogin(HTMLPage):
......@@ -96,10 +93,6 @@ class PageChapter(LoggedPage, HTMLPage):
return '-'.join(self.obj_split_path())
def unescape(s):
return HTMLParser().unescape(s)
class PageSection(LoggedPage, HTMLPage):
video_url = re.compile(r'[^\s;]+/HD\.mp4', re.I)
video_thumb = re.compile(r'reposter=&#34;(.*?)&#34;')
......@@ -124,7 +117,7 @@ class PageSection(LoggedPage, HTMLPage):
except IndexError:
thumb = None
try:
title = unescape(unescape(list(self.video_title.finditer(beforetext))[-1].group(1)))
title = html_unescape(html_unescape(list(self.video_title.finditer(beforetext))[-1].group(1)))
except IndexError:
title = u'%s - %s' % (match.group('id'), n)
......
......@@ -20,17 +20,13 @@
from __future__ import unicode_literals
import re
try:
from HTMLParser import HTMLParser
except ImportError:
from html.parser import HTMLParser
from weboob.browser import PagesBrowser, URL
from weboob.browser.profiles import Wget
from weboob.exceptions import BrowserHTTPNotFound
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.cinema import Movie, Person
from weboob.tools.compat import unicode
from weboob.tools.compat import unicode, html_unescape
from .pages import PersonPage, MovieCrewPage, BiographyPage, ReleasePage
......@@ -51,7 +47,6 @@ class ImdbBrowser(PagesBrowser):
def iter_movies(self, pattern):
res = self.open('http://www.imdb.com/xml/find?json=1&nr=1&tt=on', params={'q': pattern})
jres = res.json()
htmlparser = HTMLParser()
for cat in ['title_popular', 'title_exact', 'title_approx']:
if cat in jres:
for m in jres[cat]:
......@@ -61,11 +56,11 @@ class ImdbBrowser(PagesBrowser):
0].strip(', '), tdesc.split('>')[1].split('<')[0])
else:
short_description = tdesc.strip(', ')
movie = Movie(m['id'], htmlparser.unescape(m['title']))
movie = Movie(m['id'], html_unescape(m['title']))
movie.other_titles = NotLoaded
movie.release_date = NotLoaded
movie.duration = NotLoaded
movie.short_description = htmlparser.unescape(short_description)
movie.short_description = html_unescape(short_description)
movie.pitch = NotLoaded
movie.country = NotLoaded
movie.note = NotLoaded
......@@ -77,11 +72,10 @@ class ImdbBrowser(PagesBrowser):
def iter_persons(self, pattern):
res = self.open('http://www.imdb.com/xml/find?json=1&nr=1&nm=on', params={'q': pattern})
jres = res.json()
htmlparser = HTMLParser()
for cat in ['name_popular', 'name_exact', 'name_approx']:
if cat in jres:
for p in jres[cat]:
person = Person(p['id'], htmlparser.unescape(unicode(p['name'])))
person = Person(p['id'], html_unescape(unicode(p['name'])))
person.real_name = NotLoaded
person.birth_place = NotLoaded
person.birth_date = NotLoaded
......@@ -89,7 +83,7 @@ class ImdbBrowser(PagesBrowser):
person.gender = NotLoaded
person.nationality = NotLoaded
person.short_biography = NotLoaded
person.short_description = htmlparser.unescape(p['description'])
person.short_description = html_unescape(p['description'])
person.roles = NotLoaded
person.thumbnail_url = NotLoaded
yield person
......@@ -100,7 +94,6 @@ class ImdbBrowser(PagesBrowser):
jres = res.json()
else:
return None
htmlparser = HTMLParser()
title = NotAvailable
duration = NotAvailable
......@@ -116,7 +109,7 @@ class ImdbBrowser(PagesBrowser):
if 'Title' not in jres:
return
title = htmlparser.unescape(unicode(jres['Title'].strip()))
title = html_unescape(unicode(jres['Title'].strip()))
if 'Poster' in jres:
thumbnail_url = unicode(jres['Poster'])
if 'Director' in jres:
......
......@@ -20,15 +20,11 @@
from collections import OrderedDict
from datetime import datetime
try:
from HTMLParser import HTMLParser
except ImportError:
from html.parser import HTMLParser
from weboob.capabilities.contact import CapContact, ContactPhoto, Contact, ProfileNode
from weboob.capabilities.dating import CapDating
from weboob.capabilities.messages import CapMessages, CapMessagesPost, Message, Thread
from weboob.tools.backend import Module, BackendConfig
from weboob.tools.compat import html_unescape
from weboob.tools.misc import to_unicode
from weboob.tools.value import Value, ValueBackendPassword, ValueBool
......@@ -187,7 +183,7 @@ class OkCModule(Module, CapMessages, CapContact, CapMessagesPost, CapDating):
sender=sender.name,
receivers=[receiver.name],
date=date,
content=to_unicode(HTMLParser().unescape(message['body'])),
content=to_unicode(html_unescape(message['body'])),
children=[],
parent=parent,
signature=sender.get_text(),
......
......@@ -20,10 +20,6 @@
from __future__ import unicode_literals
import re
try:
from html.parser import HTMLParser
except ImportError:
import HTMLParser
from weboob.browser.pages import HTMLPage, LoggedPage, JsonPage, pagination
from weboob.capabilities.bill import Subscription
......@@ -39,7 +35,7 @@ from weboob.browser.filters.json import Dict
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.bill import DocumentTypes, Bill
from weboob.tools.date import parse_french_date
from weboob.tools.compat import urlencode, urlparse, parse_qsl
from weboob.tools.compat import urlencode, urlparse, parse_qsl, html_unescape
class BillsApiProPage(LoggedPage, JsonPage):
......@@ -146,14 +142,14 @@ class BillsPage(LoggedPage, HTMLPage):
def obj_url(self):
if Field('_url_base')(self):
# URL won't work if HTML is not unescape
return HTMLParser().unescape(str(Field('_url_base')(self)))
return html_unescape(str(Field('_url_base')(self)))
return Link(TableCell(Field('_cell')(self))(self)[0].xpath('./a'), default=NotAvailable)(self)
obj__label_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*</span>(.*?)</a.*', default=None)
def obj_label(self):
if Field('_label_base')(self):
return HTMLParser().unescape(str(Field('_label_base')(self)))
return html_unescape(str(Field('_label_base')(self)))
else:
return CleanText(TableCell(Field('_cell')(self))(self)[0].xpath('.//span[@class="ec_visually_hidden"]'))(self)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment