Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
weboob
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
180
Issues
180
List
Boards
Labels
Milestones
Merge Requests
53
Merge Requests
53
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
weboob
weboob
Commits
b2c32431
Commit
b2c32431
authored
Jan 10, 2021
by
Vincent A
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modules: use html_unescape (HTMLParser.unescape was deprecated)
parent
41106404
Pipeline
#3533
passed with stages
in 15 minutes and 30 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
35 deletions
+13
-35
modules/funmooc/pages.py
modules/funmooc/pages.py
+2
-9
modules/imdb/browser.py
modules/imdb/browser.py
+6
-13
modules/okc/module.py
modules/okc/module.py
+2
-6
modules/orange/pages/bills.py
modules/orange/pages/bills.py
+3
-7
No files found.
modules/funmooc/pages.py
View file @
b2c32431
...
...
@@ -18,15 +18,12 @@
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
import
re
try
:
from
html.parser
import
HTMLParser
except
ImportError
:
from
HTMLParser
import
HTMLParser
from
weboob.browser.pages
import
HTMLPage
,
LoggedPage
from
weboob.browser.elements
import
method
,
ListElement
,
ItemElement
,
SkipItem
from
weboob.capabilities.collection
import
Collection
from
weboob.browser.filters.standard
import
CleanText
from
weboob.tools.compat
import
html_unescape
class
PageLogin
(
HTMLPage
):
...
...
@@ -96,10 +93,6 @@ class PageChapter(LoggedPage, HTMLPage):
return
'-'
.
join
(
self
.
obj_split_path
())
def
unescape
(
s
):
return
HTMLParser
()
.
unescape
(
s
)
class
PageSection
(
LoggedPage
,
HTMLPage
):
video_url
=
re
.
compile
(
r'[^\s;]+/HD\.mp4'
,
re
.
I
)
video_thumb
=
re
.
compile
(
r'reposter="(.*?)"'
)
...
...
@@ -124,7 +117,7 @@ class PageSection(LoggedPage, HTMLPage):
except
IndexError
:
thumb
=
None
try
:
title
=
unescape
(
unescape
(
list
(
self
.
video_title
.
finditer
(
beforetext
))[
-
1
]
.
group
(
1
)))
title
=
html_unescape
(
html_
unescape
(
list
(
self
.
video_title
.
finditer
(
beforetext
))[
-
1
]
.
group
(
1
)))
except
IndexError
:
title
=
u'
%
s -
%
s'
%
(
match
.
group
(
'id'
),
n
)
...
...
modules/imdb/browser.py
View file @
b2c32431
...
...
@@ -20,17 +20,13 @@
from
__future__
import
unicode_literals
import
re
try
:
from
HTMLParser
import
HTMLParser
except
ImportError
:
from
html.parser
import
HTMLParser
from
weboob.browser
import
PagesBrowser
,
URL
from
weboob.browser.profiles
import
Wget
from
weboob.exceptions
import
BrowserHTTPNotFound
from
weboob.capabilities.base
import
NotAvailable
,
NotLoaded
from
weboob.capabilities.cinema
import
Movie
,
Person
from
weboob.tools.compat
import
unicode
from
weboob.tools.compat
import
unicode
,
html_unescape
from
.pages
import
PersonPage
,
MovieCrewPage
,
BiographyPage
,
ReleasePage
...
...
@@ -51,7 +47,6 @@ class ImdbBrowser(PagesBrowser):
def
iter_movies
(
self
,
pattern
):
res
=
self
.
open
(
'http://www.imdb.com/xml/find?json=1&nr=1&tt=on'
,
params
=
{
'q'
:
pattern
})
jres
=
res
.
json
()
htmlparser
=
HTMLParser
()
for
cat
in
[
'title_popular'
,
'title_exact'
,
'title_approx'
]:
if
cat
in
jres
:
for
m
in
jres
[
cat
]:
...
...
@@ -61,11 +56,11 @@ class ImdbBrowser(PagesBrowser):
0
]
.
strip
(
', '
),
tdesc
.
split
(
'>'
)[
1
]
.
split
(
'<'
)[
0
])
else
:
short_description
=
tdesc
.
strip
(
', '
)
movie
=
Movie
(
m
[
'id'
],
html
parser
.
unescape
(
m
[
'title'
]))
movie
=
Movie
(
m
[
'id'
],
html
_
unescape
(
m
[
'title'
]))
movie
.
other_titles
=
NotLoaded
movie
.
release_date
=
NotLoaded
movie
.
duration
=
NotLoaded
movie
.
short_description
=
html
parser
.
unescape
(
short_description
)
movie
.
short_description
=
html
_
unescape
(
short_description
)
movie
.
pitch
=
NotLoaded
movie
.
country
=
NotLoaded
movie
.
note
=
NotLoaded
...
...
@@ -77,11 +72,10 @@ class ImdbBrowser(PagesBrowser):
def
iter_persons
(
self
,
pattern
):
res
=
self
.
open
(
'http://www.imdb.com/xml/find?json=1&nr=1&nm=on'
,
params
=
{
'q'
:
pattern
})
jres
=
res
.
json
()
htmlparser
=
HTMLParser
()
for
cat
in
[
'name_popular'
,
'name_exact'
,
'name_approx'
]:
if
cat
in
jres
:
for
p
in
jres
[
cat
]:
person
=
Person
(
p
[
'id'
],
html
parser
.
unescape
(
unicode
(
p
[
'name'
])))
person
=
Person
(
p
[
'id'
],
html
_
unescape
(
unicode
(
p
[
'name'
])))
person
.
real_name
=
NotLoaded
person
.
birth_place
=
NotLoaded
person
.
birth_date
=
NotLoaded
...
...
@@ -89,7 +83,7 @@ class ImdbBrowser(PagesBrowser):
person
.
gender
=
NotLoaded
person
.
nationality
=
NotLoaded
person
.
short_biography
=
NotLoaded
person
.
short_description
=
html
parser
.
unescape
(
p
[
'description'
])
person
.
short_description
=
html
_
unescape
(
p
[
'description'
])
person
.
roles
=
NotLoaded
person
.
thumbnail_url
=
NotLoaded
yield
person
...
...
@@ -100,7 +94,6 @@ class ImdbBrowser(PagesBrowser):
jres
=
res
.
json
()
else
:
return
None
htmlparser
=
HTMLParser
()
title
=
NotAvailable
duration
=
NotAvailable
...
...
@@ -116,7 +109,7 @@ class ImdbBrowser(PagesBrowser):
if
'Title'
not
in
jres
:
return
title
=
html
parser
.
unescape
(
unicode
(
jres
[
'Title'
]
.
strip
()))
title
=
html
_
unescape
(
unicode
(
jres
[
'Title'
]
.
strip
()))
if
'Poster'
in
jres
:
thumbnail_url
=
unicode
(
jres
[
'Poster'
])
if
'Director'
in
jres
:
...
...
modules/okc/module.py
View file @
b2c32431
...
...
@@ -20,15 +20,11 @@
from
collections
import
OrderedDict
from
datetime
import
datetime
try
:
from
HTMLParser
import
HTMLParser
except
ImportError
:
from
html.parser
import
HTMLParser
from
weboob.capabilities.contact
import
CapContact
,
ContactPhoto
,
Contact
,
ProfileNode
from
weboob.capabilities.dating
import
CapDating
from
weboob.capabilities.messages
import
CapMessages
,
CapMessagesPost
,
Message
,
Thread
from
weboob.tools.backend
import
Module
,
BackendConfig
from
weboob.tools.compat
import
html_unescape
from
weboob.tools.misc
import
to_unicode
from
weboob.tools.value
import
Value
,
ValueBackendPassword
,
ValueBool
...
...
@@ -187,7 +183,7 @@ class OkCModule(Module, CapMessages, CapContact, CapMessagesPost, CapDating):
sender
=
sender
.
name
,
receivers
=
[
receiver
.
name
],
date
=
date
,
content
=
to_unicode
(
HTMLParser
()
.
unescape
(
message
[
'body'
])),
content
=
to_unicode
(
html_
unescape
(
message
[
'body'
])),
children
=
[],
parent
=
parent
,
signature
=
sender
.
get_text
(),
...
...
modules/orange/pages/bills.py
View file @
b2c32431
...
...
@@ -20,10 +20,6 @@
from
__future__
import
unicode_literals
import
re
try
:
from
html.parser
import
HTMLParser
except
ImportError
:
import
HTMLParser
from
weboob.browser.pages
import
HTMLPage
,
LoggedPage
,
JsonPage
,
pagination
from
weboob.capabilities.bill
import
Subscription
...
...
@@ -39,7 +35,7 @@ from weboob.browser.filters.json import Dict
from
weboob.capabilities.base
import
NotAvailable
from
weboob.capabilities.bill
import
DocumentTypes
,
Bill
from
weboob.tools.date
import
parse_french_date
from
weboob.tools.compat
import
urlencode
,
urlparse
,
parse_qsl
from
weboob.tools.compat
import
urlencode
,
urlparse
,
parse_qsl
,
html_unescape
class
BillsApiProPage
(
LoggedPage
,
JsonPage
):
...
...
@@ -146,14 +142,14 @@ class BillsPage(LoggedPage, HTMLPage):
def
obj_url
(
self
):
if
Field
(
'_url_base'
)(
self
):
# URL won't work if HTML is not unescape
return
HTMLParser
()
.
unescape
(
str
(
Field
(
'_url_base'
)(
self
)))
return
html_
unescape
(
str
(
Field
(
'_url_base'
)(
self
)))
return
Link
(
TableCell
(
Field
(
'_cell'
)(
self
))(
self
)[
0
]
.
xpath
(
'./a'
),
default
=
NotAvailable
)(
self
)
obj__label_base
=
Regexp
(
CleanText
(
'.//ul[@class="liste"]/script'
,
default
=
None
),
'.*</span>(.*?)</a.*'
,
default
=
None
)
def
obj_label
(
self
):
if
Field
(
'_label_base'
)(
self
):
return
HTMLParser
()
.
unescape
(
str
(
Field
(
'_label_base'
)(
self
)))
return
html_
unescape
(
str
(
Field
(
'_label_base'
)(
self
)))
else
:
return
CleanText
(
TableCell
(
Field
(
'_cell'
)(
self
))(
self
)[
0
]
.
xpath
(
'.//span[@class="ec_visually_hidden"]'
))(
self
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment