diff --git a/modules/podnapisi/browser.py b/modules/podnapisi/browser.py
index c4a5455c2a2264b9d815c5a0e746c637a684baba..c41a6c1a02556c6406f9bd1fa34aaaeadada516d 100644
--- a/modules/podnapisi/browser.py
+++ b/modules/podnapisi/browser.py
@@ -18,34 +18,26 @@
# along with weboob. If not, see .
-from weboob.deprecated.browser import Browser, BrowserHTTPNotFound
-
-from .pages import SearchPage, SubtitlePage, LANGUAGE_NUMBERS
+from weboob.browser import PagesBrowser, URL
+from .pages import SearchPage, SubtitlePage
__all__ = ['PodnapisiBrowser']
-class PodnapisiBrowser(Browser):
- DOMAIN = 'www.podnapisi.net'
- PROTOCOL = 'http'
- ENCODING = 'utf-8'
- USER_AGENT = Browser.USER_AGENTS['wget']
- PAGES = {
- 'http://www.podnapisi.net/fr/ppodnapisi/search\?sJ=[0-9]*&sK=.*&sS=downloads&sO=desc': SearchPage,
- 'http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/[0-9]*': SubtitlePage
- }
+class PodnapisiBrowser(PagesBrowser):
+ BASEURL = 'https://www.podnapisi.net'
+ search = URL('/subtitles/search/advanced\?keywords=(?P.*)&language=(?P.*)',
+ '/en/subtitles/search/advanced\?keywords=(?P.*)&language=(?P.*)',
+ SearchPage)
+ file = URL('/subtitles/(?P-*\w*)/download')
+ subtitle = URL('/subtitles/(?P.*)', SubtitlePage)
def iter_subtitles(self, language, pattern):
- nlang = LANGUAGE_NUMBERS[language]
- self.location('http://www.podnapisi.net/fr/ppodnapisi/search?sJ=%s&sK=%s&sS=downloads&sO=desc' % (nlang, pattern.encode('utf-8')))
- assert self.is_on_page(SearchPage)
- return self.page.iter_subtitles(unicode(language))
+ return self.search.go(language=language, keywords=pattern).iter_subtitles()
+
+ def get_file(self, id):
+ return self.file.go(id=id).content
def get_subtitle(self, id):
- try:
- self.location('http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/%s' % id)
- except BrowserHTTPNotFound:
- return
- if self.is_on_page(SubtitlePage):
- return self.page.get_subtitle(id)
+ return self.subtitle.go(id=id).get_subtitle()
diff --git a/modules/podnapisi/module.py b/modules/podnapisi/module.py
index db7dd22fc1f377e106a2bb24a665fa8cc7452de7..835ddd05a808af68bf3a50c19782322a6047a07e 100644
--- a/modules/podnapisi/module.py
+++ b/modules/podnapisi/module.py
@@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.capabilities.subtitle import CapSubtitle, LanguageNotSupported, Subtitle
+from weboob.capabilities.subtitle import CapSubtitle, LanguageNotSupported
from weboob.applications.suboob.suboob import LANGUAGE_CONV
from weboob.tools.backend import Module
from weboob.tools.compat import quote_plus
@@ -37,29 +37,13 @@ class PodnapisiModule(Module, CapSubtitle):
LICENSE = 'AGPLv3+'
BROWSER = PodnapisiBrowser
- def get_subtitle(self, id):
- return self.browser.get_subtitle(id)
-
def get_subtitle_file(self, id):
- subtitle = self.browser.get_subtitle(id)
- if not subtitle:
- return None
+ return self.browser.get_file(id)
- return self.browser.openurl(subtitle.url.encode('utf-8')).read()
+ def get_subtitle(self, id):
+ return self.browser.get_subtitle(id)
def iter_subtitles(self, language, pattern):
- if language not in LANGUAGE_CONV.keys():
+ if language not in list(LANGUAGE_CONV.keys()):
raise LanguageNotSupported()
return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
-
- def fill_subtitle(self, subtitle, fields):
- if 'description' in fields or 'url' in fields:
- sub = self.get_subtitle(subtitle.id)
- subtitle.description = sub.description
- subtitle.url = sub.url
-
- return subtitle
-
- OBJECTS = {
- Subtitle: fill_subtitle,
- }
diff --git a/modules/podnapisi/pages.py b/modules/podnapisi/pages.py
index aef0d5e06ec3258d1c48ae4a12d86e06c2404687..e2c31cc2a8f56950eaa40f5d3b65185d5bea4eb6 100644
--- a/modules/podnapisi/pages.py
+++ b/modules/podnapisi/pages.py
@@ -16,121 +16,54 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
+from __future__ import unicode_literals
-
+from weboob.browser.elements import TableElement, ItemElement, method
+from weboob.browser.pages import HTMLPage, pagination
+from weboob.browser.filters.html import TableCell, AbsoluteLink, Attr
+from weboob.browser.filters.standard import CleanText, Field, Type, Regexp
from weboob.capabilities.subtitle import Subtitle
-from weboob.capabilities.base import NotAvailable, NotLoaded
-from weboob.deprecated.browser import Page
-
+from weboob.tools.compat import urljoin
-LANGUAGE_NUMBERS = {
- 'sq': '29',
- 'de': '5',
- 'en': '2',
- 'ar': '12',
- 'bn': '59',
- 'be': '50',
- 'bg': '33',
- 'ca': '53',
- 'zh': '17',
- 'ko': '4',
- 'hr': '38',
- 'da': '24',
- 'es': '28',
- 'et': '20',
- 'fi': '31',
- 'fr': '8',
- 'gr': '16',
- 'hi': '42',
- 'nl': '23',
- 'hu': '15',
- 'iw': '22',
- 'id': '54',
- 'ga': '49',
- 'is': '6',
- 'it': '9',
- 'ja': '11',
- 'lv': '21',
- 'lt': '19',
- 'mk': '35',
- 'ms': '55',
- 'no': '3',
- 'pl': '26',
- 'pt': '32',
- 'ro': '13',
- 'ru': '27',
- 'sr': '36',
- 'sk': '37',
- 'sl': '1',
- 'sv': '25',
- 'cz': '7',
- 'th': '44',
- 'tr': '30',
- 'uk': '46',
- 'vi': '51'
-}
-
-class SearchPage(Page):
+class SearchPage(HTMLPage):
""" Page which contains results as a list of movies
"""
+ @pagination
+ @method
+ class iter_subtitles(TableElement):
+ head_xpath = '//div[has-class("table-responsive")]/table/thead/tr/th'
+ item_xpath = '//tr[has-class("subtitle-entry")]'
- def iter_subtitles(self, language):
- linksresults = self.parser.select(self.document.getroot(), 'a.subtitle_page_link')
- for link in linksresults:
- id = unicode(link.attrib.get('href', '').split('-p')[-1])
- name = unicode(link.text_content())
- tr = link.getparent().getparent().getparent()
- cdtd = self.parser.select(tr, 'td')[4]
- nb_cd = int(cdtd.text)
- description = NotLoaded
- subtitle = Subtitle(id, name)
- subtitle.nb_cd = nb_cd
- subtitle.language = language
- subtitle.description = description
- yield subtitle
+ col_cd = u'# CDs'
+ col_language = u'Language'
+ next_page = AbsoluteLink('//ul[has-class("pagination")]/li[has-class("next")]/a', default=None)
-class SubtitlePage(Page):
- """ Page which contains a single subtitle for a movie
- """
+ class item(ItemElement):
+ klass = Subtitle
- def get_subtitle(self, id):
- language = NotAvailable
- url = NotAvailable
- nb_cd = NotAvailable
- links_info = self.parser.select(self.document.getroot(), 'fieldset.information a')
- for link in links_info:
- href = link.attrib.get('href', '')
- if '/fr/ppodnapisi/kategorija/jezik/' in href:
- nlang = href.split('/')[-1]
- for lang, langnum in LANGUAGE_NUMBERS.items():
- if str(langnum) == str(nlang):
- language = unicode(lang)
- break
+ obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]')
+ obj_nb_cd = Type(CleanText(TableCell('cd')), type=int)
+ obj_language = CleanText(TableCell('language'))
+ obj_url = AbsoluteLink('.//td/div[has-class("pull-left")]/a[@alt="Download subtitles."]')
+ obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1')
- desc = u''
- infos = self.parser.select(self.document.getroot(), 'fieldset.information')
- for info in infos:
- for p in self.parser.select(info, 'p'):
- desc += '%s\n' % (u' '.join(p.text_content().strip().split()))
- spans = self.parser.select(info, 'span')
- for span in spans:
- if span.text is not None and 'CD' in span.text:
- nb_cd = int(self.parser.select(span.getparent(), 'span')[1].text)
- title = unicode(self.parser.select(self.document.getroot(), 'head title', 1).text)
- name = title.split(' - ')[0]
+class SubtitlePage(HTMLPage):
+ @method
+ class get_subtitle(ItemElement):
+ klass = Subtitle
- dllinks = self.parser.select(self.document.getroot(), 'div.footer > a.download')
- for link in dllinks:
- href = link.attrib.get('href', '')
- if id in href:
- url = u'http://www.podnapisi.net%s' % href
+ obj_id = CleanText('//div[has-class("col-md-3")]/table[has-class("table-condensed")]/tr[1]/td')
+ obj_language = Regexp(
+ CleanText(
+ Attr('//div[has-class("col-md-3")]/table[has-class("table-condensed")]/tr/td/a/span', 'class')
+ ),
+ r'-(\w*)$', r'\1'
+ )
+ obj_name = CleanText('//div[has-class("clearfix")]/table[has-class("table-condensed")]/tr[1]/td/a')
- subtitle = Subtitle(id, name)
- subtitle.url = url
- subtitle.language = language
- subtitle.nb_cd = nb_cd
- subtitle.description = desc
- return subtitle
+ def obj_url(self):
+ return urljoin(self.page.browser.BASEURL,
+ CleanText(Attr('//form[has-class("download-form")]', 'action'))(self))
diff --git a/modules/podnapisi/test.py b/modules/podnapisi/test.py
index 1d42d949892991437f1174c5572d327018bad843..c284567e8e303fc5335caec6723933ee43800263 100644
--- a/modules/podnapisi/test.py
+++ b/modules/podnapisi/test.py
@@ -28,14 +28,13 @@ class PodnapisiTest(BackendTest):
def test_subtitle(self):
lsub = []
subtitles = self.backend.iter_subtitles('fr', 'spiderman')
- for i in range(5):
- subtitle = subtitles.next()
+ for subtitle in subtitles:
lsub.append(subtitle)
assert (len(lsub) > 0)
# get the file of a random sub
if len(lsub):
subtitle = choice(lsub)
- self.backend.get_subtitle_file(subtitle.id)
+ assert(not self.backend.get_subtitle_file(subtitle.id).startswith(b'<'))
ss = self.backend.get_subtitle(subtitle.id)
- assert ss.url.startswith('http')
+ assert ss.url.startswith('https')
diff --git a/tools/py3-compatible.modules b/tools/py3-compatible.modules
index 8e7fd84981d0127fbbf60a78d141ca5b8301ac42..53d17b28eec59de1d4640db51ddf5c7712d9403a 100644
--- a/tools/py3-compatible.modules
+++ b/tools/py3-compatible.modules
@@ -94,6 +94,7 @@ parolesmania
pastebin
pastealacon
pixtoilelibre
+podnapisi
popolemploi
pornhub
ratp