Commit b2c32431 authored by Vincent A's avatar Vincent A

modules: use html_unescape (HTMLParser.unescape was deprecated)

parent 41106404
Pipeline #3533 passed with stages
in 15 minutes and 30 seconds
......@@ -18,15 +18,12 @@
# along with this weboob module. If not, see <>.
import re
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
from weboob.browser.pages import HTMLPage, LoggedPage
from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
from weboob.capabilities.collection import Collection
from weboob.browser.filters.standard import CleanText
from import html_unescape
class PageLogin(HTMLPage):
......@@ -96,10 +93,6 @@ class PageChapter(LoggedPage, HTMLPage):
return '-'.join(self.obj_split_path())
def unescape(s):
return HTMLParser().unescape(s)
class PageSection(LoggedPage, HTMLPage):
video_url = re.compile(r'[^\s;]+/HD\.mp4', re.I)
video_thumb = re.compile(r'reposter=&#34;(.*?)&#34;')
......@@ -124,7 +117,7 @@ class PageSection(LoggedPage, HTMLPage):
except IndexError:
thumb = None
title = unescape(unescape(list(self.video_title.finditer(beforetext))[-1].group(1)))
title = html_unescape(html_unescape(list(self.video_title.finditer(beforetext))[-1].group(1)))
except IndexError:
title = u'%s - %s' % ('id'), n)
......@@ -20,17 +20,13 @@
from __future__ import unicode_literals
import re
from HTMLParser import HTMLParser
except ImportError:
from html.parser import HTMLParser
from weboob.browser import PagesBrowser, URL
from weboob.browser.profiles import Wget
from weboob.exceptions import BrowserHTTPNotFound
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.capabilities.cinema import Movie, Person
from import unicode
from import unicode, html_unescape
from .pages import PersonPage, MovieCrewPage, BiographyPage, ReleasePage
......@@ -51,7 +47,6 @@ class ImdbBrowser(PagesBrowser):
def iter_movies(self, pattern):
res ='', params={'q': pattern})
jres = res.json()
htmlparser = HTMLParser()
for cat in ['title_popular', 'title_exact', 'title_approx']:
if cat in jres:
for m in jres[cat]:
......@@ -61,11 +56,11 @@ class ImdbBrowser(PagesBrowser):
0].strip(', '), tdesc.split('>')[1].split('<')[0])
short_description = tdesc.strip(', ')
movie = Movie(m['id'], htmlparser.unescape(m['title']))
movie = Movie(m['id'], html_unescape(m['title']))
movie.other_titles = NotLoaded
movie.release_date = NotLoaded
movie.duration = NotLoaded
movie.short_description = htmlparser.unescape(short_description)
movie.short_description = html_unescape(short_description)
movie.pitch = NotLoaded = NotLoaded
movie.note = NotLoaded
......@@ -77,11 +72,10 @@ class ImdbBrowser(PagesBrowser):
def iter_persons(self, pattern):
res ='', params={'q': pattern})
jres = res.json()
htmlparser = HTMLParser()
for cat in ['name_popular', 'name_exact', 'name_approx']:
if cat in jres:
for p in jres[cat]:
person = Person(p['id'], htmlparser.unescape(unicode(p['name'])))
person = Person(p['id'], html_unescape(unicode(p['name'])))
person.real_name = NotLoaded
person.birth_place = NotLoaded
person.birth_date = NotLoaded
......@@ -89,7 +83,7 @@ class ImdbBrowser(PagesBrowser):
person.gender = NotLoaded
person.nationality = NotLoaded
person.short_biography = NotLoaded
person.short_description = htmlparser.unescape(p['description'])
person.short_description = html_unescape(p['description'])
person.roles = NotLoaded
person.thumbnail_url = NotLoaded
yield person
......@@ -100,7 +94,6 @@ class ImdbBrowser(PagesBrowser):
jres = res.json()
return None
htmlparser = HTMLParser()
title = NotAvailable
duration = NotAvailable
......@@ -116,7 +109,7 @@ class ImdbBrowser(PagesBrowser):
if 'Title' not in jres:
title = htmlparser.unescape(unicode(jres['Title'].strip()))
title = html_unescape(unicode(jres['Title'].strip()))
if 'Poster' in jres:
thumbnail_url = unicode(jres['Poster'])
if 'Director' in jres:
......@@ -20,15 +20,11 @@
from collections import OrderedDict
from datetime import datetime
from HTMLParser import HTMLParser
except ImportError:
from html.parser import HTMLParser
from import CapContact, ContactPhoto, Contact, ProfileNode
from import CapDating
from weboob.capabilities.messages import CapMessages, CapMessagesPost, Message, Thread
from import Module, BackendConfig
from import html_unescape
from import to_unicode
from import Value, ValueBackendPassword, ValueBool
......@@ -187,7 +183,7 @@ class OkCModule(Module, CapMessages, CapContact, CapMessagesPost, CapDating):,
......@@ -20,10 +20,6 @@
from __future__ import unicode_literals
import re
from html.parser import HTMLParser
except ImportError:
import HTMLParser
from weboob.browser.pages import HTMLPage, LoggedPage, JsonPage, pagination
from weboob.capabilities.bill import Subscription
......@@ -39,7 +35,7 @@ from weboob.browser.filters.json import Dict
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.bill import DocumentTypes, Bill
from import parse_french_date
from import urlencode, urlparse, parse_qsl
from import urlencode, urlparse, parse_qsl, html_unescape
class BillsApiProPage(LoggedPage, JsonPage):
......@@ -146,14 +142,14 @@ class BillsPage(LoggedPage, HTMLPage):
def obj_url(self):
if Field('_url_base')(self):
# URL won't work if HTML is not unescape
return HTMLParser().unescape(str(Field('_url_base')(self)))
return html_unescape(str(Field('_url_base')(self)))
return Link(TableCell(Field('_cell')(self))(self)[0].xpath('./a'), default=NotAvailable)(self)
obj__label_base = Regexp(CleanText('.//ul[@class="liste"]/script', default=None), '.*</span>(.*?)</a.*', default=None)
def obj_label(self):
if Field('_label_base')(self):
return HTMLParser().unescape(str(Field('_label_base')(self)))
return html_unescape(str(Field('_label_base')(self)))
return CleanText(TableCell(Field('_cell')(self))(self)[0].xpath('.//span[@class="ec_visually_hidden"]'))(self)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment