diff --git a/modules/podnapisi/browser.py b/modules/podnapisi/browser.py index c4a5455c2a2264b9d815c5a0e746c637a684baba..c41a6c1a02556c6406f9bd1fa34aaaeadada516d 100644 --- a/modules/podnapisi/browser.py +++ b/modules/podnapisi/browser.py @@ -18,34 +18,26 @@ # along with weboob. If not, see . -from weboob.deprecated.browser import Browser, BrowserHTTPNotFound - -from .pages import SearchPage, SubtitlePage, LANGUAGE_NUMBERS +from weboob.browser import PagesBrowser, URL +from .pages import SearchPage, SubtitlePage __all__ = ['PodnapisiBrowser'] -class PodnapisiBrowser(Browser): - DOMAIN = 'www.podnapisi.net' - PROTOCOL = 'http' - ENCODING = 'utf-8' - USER_AGENT = Browser.USER_AGENTS['wget'] - PAGES = { - 'http://www.podnapisi.net/fr/ppodnapisi/search\?sJ=[0-9]*&sK=.*&sS=downloads&sO=desc': SearchPage, - 'http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/[0-9]*': SubtitlePage - } +class PodnapisiBrowser(PagesBrowser): + BASEURL = 'https://www.podnapisi.net' + search = URL('/subtitles/search/advanced\?keywords=(?P.*)&language=(?P.*)', + '/en/subtitles/search/advanced\?keywords=(?P.*)&language=(?P.*)', + SearchPage) + file = URL('/subtitles/(?P-*\w*)/download') + subtitle = URL('/subtitles/(?P.*)', SubtitlePage) def iter_subtitles(self, language, pattern): - nlang = LANGUAGE_NUMBERS[language] - self.location('http://www.podnapisi.net/fr/ppodnapisi/search?sJ=%s&sK=%s&sS=downloads&sO=desc' % (nlang, pattern.encode('utf-8'))) - assert self.is_on_page(SearchPage) - return self.page.iter_subtitles(unicode(language)) + return self.search.go(language=language, keywords=pattern).iter_subtitles() + + def get_file(self, id): + return self.file.go(id=id).content def get_subtitle(self, id): - try: - self.location('http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/%s' % id) - except BrowserHTTPNotFound: - return - if self.is_on_page(SubtitlePage): - return self.page.get_subtitle(id) + return self.subtitle.go(id=id).get_subtitle() diff --git a/modules/podnapisi/module.py b/modules/podnapisi/module.py index db7dd22fc1f377e106a2bb24a665fa8cc7452de7..835ddd05a808af68bf3a50c19782322a6047a07e 100644 --- a/modules/podnapisi/module.py +++ b/modules/podnapisi/module.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.subtitle import CapSubtitle, LanguageNotSupported, Subtitle +from weboob.capabilities.subtitle import CapSubtitle, LanguageNotSupported from weboob.applications.suboob.suboob import LANGUAGE_CONV from weboob.tools.backend import Module from weboob.tools.compat import quote_plus @@ -37,29 +37,13 @@ class PodnapisiModule(Module, CapSubtitle): LICENSE = 'AGPLv3+' BROWSER = PodnapisiBrowser - def get_subtitle(self, id): - return self.browser.get_subtitle(id) - def get_subtitle_file(self, id): - subtitle = self.browser.get_subtitle(id) - if not subtitle: - return None + return self.browser.get_file(id) - return self.browser.openurl(subtitle.url.encode('utf-8')).read() + def get_subtitle(self, id): + return self.browser.get_subtitle(id) def iter_subtitles(self, language, pattern): - if language not in LANGUAGE_CONV.keys(): + if language not in list(LANGUAGE_CONV.keys()): raise LanguageNotSupported() return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8'))) - - def fill_subtitle(self, subtitle, fields): - if 'description' in fields or 'url' in fields: - sub = self.get_subtitle(subtitle.id) - subtitle.description = sub.description - subtitle.url = sub.url - - return subtitle - - OBJECTS = { - Subtitle: fill_subtitle, - } diff --git a/modules/podnapisi/pages.py b/modules/podnapisi/pages.py index aef0d5e06ec3258d1c48ae4a12d86e06c2404687..e2c31cc2a8f56950eaa40f5d3b65185d5bea4eb6 100644 --- a/modules/podnapisi/pages.py +++ b/modules/podnapisi/pages.py @@ -16,121 +16,54 @@ # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +from __future__ import unicode_literals - +from weboob.browser.elements import TableElement, ItemElement, method +from weboob.browser.pages import HTMLPage, pagination +from weboob.browser.filters.html import TableCell, AbsoluteLink, Attr +from weboob.browser.filters.standard import CleanText, Field, Type, Regexp from weboob.capabilities.subtitle import Subtitle -from weboob.capabilities.base import NotAvailable, NotLoaded -from weboob.deprecated.browser import Page - +from weboob.tools.compat import urljoin -LANGUAGE_NUMBERS = { - 'sq': '29', - 'de': '5', - 'en': '2', - 'ar': '12', - 'bn': '59', - 'be': '50', - 'bg': '33', - 'ca': '53', - 'zh': '17', - 'ko': '4', - 'hr': '38', - 'da': '24', - 'es': '28', - 'et': '20', - 'fi': '31', - 'fr': '8', - 'gr': '16', - 'hi': '42', - 'nl': '23', - 'hu': '15', - 'iw': '22', - 'id': '54', - 'ga': '49', - 'is': '6', - 'it': '9', - 'ja': '11', - 'lv': '21', - 'lt': '19', - 'mk': '35', - 'ms': '55', - 'no': '3', - 'pl': '26', - 'pt': '32', - 'ro': '13', - 'ru': '27', - 'sr': '36', - 'sk': '37', - 'sl': '1', - 'sv': '25', - 'cz': '7', - 'th': '44', - 'tr': '30', - 'uk': '46', - 'vi': '51' -} - -class SearchPage(Page): +class SearchPage(HTMLPage): """ Page which contains results as a list of movies """ + @pagination + @method + class iter_subtitles(TableElement): + head_xpath = '//div[has-class("table-responsive")]/table/thead/tr/th' + item_xpath = '//tr[has-class("subtitle-entry")]' - def iter_subtitles(self, language): - linksresults = self.parser.select(self.document.getroot(), 'a.subtitle_page_link') - for link in linksresults: - id = unicode(link.attrib.get('href', '').split('-p')[-1]) - name = unicode(link.text_content()) - tr = link.getparent().getparent().getparent() - cdtd = self.parser.select(tr, 'td')[4] - nb_cd = int(cdtd.text) - description = NotLoaded - subtitle = Subtitle(id, name) - subtitle.nb_cd = nb_cd - subtitle.language = language - subtitle.description = description - yield subtitle + col_cd = u'# CDs' + col_language = u'Language' + next_page = AbsoluteLink('//ul[has-class("pagination")]/li[has-class("next")]/a', default=None) -class SubtitlePage(Page): - """ Page which contains a single subtitle for a movie - """ + class item(ItemElement): + klass = Subtitle - def get_subtitle(self, id): - language = NotAvailable - url = NotAvailable - nb_cd = NotAvailable - links_info = self.parser.select(self.document.getroot(), 'fieldset.information a') - for link in links_info: - href = link.attrib.get('href', '') - if '/fr/ppodnapisi/kategorija/jezik/' in href: - nlang = href.split('/')[-1] - for lang, langnum in LANGUAGE_NUMBERS.items(): - if str(langnum) == str(nlang): - language = unicode(lang) - break + obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]') + obj_nb_cd = Type(CleanText(TableCell('cd')), type=int) + obj_language = CleanText(TableCell('language')) + obj_url = AbsoluteLink('.//td/div[has-class("pull-left")]/a[@alt="Download subtitles."]') + obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1') - desc = u'' - infos = self.parser.select(self.document.getroot(), 'fieldset.information') - for info in infos: - for p in self.parser.select(info, 'p'): - desc += '%s\n' % (u' '.join(p.text_content().strip().split())) - spans = self.parser.select(info, 'span') - for span in spans: - if span.text is not None and 'CD' in span.text: - nb_cd = int(self.parser.select(span.getparent(), 'span')[1].text) - title = unicode(self.parser.select(self.document.getroot(), 'head title', 1).text) - name = title.split(' - ')[0] +class SubtitlePage(HTMLPage): + @method + class get_subtitle(ItemElement): + klass = Subtitle - dllinks = self.parser.select(self.document.getroot(), 'div.footer > a.download') - for link in dllinks: - href = link.attrib.get('href', '') - if id in href: - url = u'http://www.podnapisi.net%s' % href + obj_id = CleanText('//div[has-class("col-md-3")]/table[has-class("table-condensed")]/tr[1]/td') + obj_language = Regexp( + CleanText( + Attr('//div[has-class("col-md-3")]/table[has-class("table-condensed")]/tr/td/a/span', 'class') + ), + r'-(\w*)$', r'\1' + ) + obj_name = CleanText('//div[has-class("clearfix")]/table[has-class("table-condensed")]/tr[1]/td/a') - subtitle = Subtitle(id, name) - subtitle.url = url - subtitle.language = language - subtitle.nb_cd = nb_cd - subtitle.description = desc - return subtitle + def obj_url(self): + return urljoin(self.page.browser.BASEURL, + CleanText(Attr('//form[has-class("download-form")]', 'action'))(self)) diff --git a/modules/podnapisi/test.py b/modules/podnapisi/test.py index 1d42d949892991437f1174c5572d327018bad843..c284567e8e303fc5335caec6723933ee43800263 100644 --- a/modules/podnapisi/test.py +++ b/modules/podnapisi/test.py @@ -28,14 +28,13 @@ class PodnapisiTest(BackendTest): def test_subtitle(self): lsub = [] subtitles = self.backend.iter_subtitles('fr', 'spiderman') - for i in range(5): - subtitle = subtitles.next() + for subtitle in subtitles: lsub.append(subtitle) assert (len(lsub) > 0) # get the file of a random sub if len(lsub): subtitle = choice(lsub) - self.backend.get_subtitle_file(subtitle.id) + assert(not self.backend.get_subtitle_file(subtitle.id).startswith(b'<')) ss = self.backend.get_subtitle(subtitle.id) - assert ss.url.startswith('http') + assert ss.url.startswith('https') diff --git a/tools/py3-compatible.modules b/tools/py3-compatible.modules index 8e7fd84981d0127fbbf60a78d141ca5b8301ac42..53d17b28eec59de1d4640db51ddf5c7712d9403a 100644 --- a/tools/py3-compatible.modules +++ b/tools/py3-compatible.modules @@ -94,6 +94,7 @@ parolesmania pastebin pastealacon pixtoilelibre +podnapisi popolemploi pornhub ratp