From b05b789cf4546508b5f93665f73eacfee2b32561 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 12 Apr 2013 14:10:38 +0200 Subject: [PATCH] new subtitle backend: podnapisi --- modules/podnapisi/__init__.py | 22 ++++++++ modules/podnapisi/backend.py | 68 +++++++++++++++++++++++++ modules/podnapisi/browser.py | 52 +++++++++++++++++++ modules/podnapisi/pages.py | 96 +++++++++++++++++++++++++++++++++++ modules/podnapisi/test.py | 41 +++++++++++++++ 5 files changed, 279 insertions(+) create mode 100644 modules/podnapisi/__init__.py create mode 100644 modules/podnapisi/backend.py create mode 100644 modules/podnapisi/browser.py create mode 100644 modules/podnapisi/pages.py create mode 100644 modules/podnapisi/test.py diff --git a/modules/podnapisi/__init__.py b/modules/podnapisi/__init__.py new file mode 100644 index 0000000000..313fff6306 --- /dev/null +++ b/modules/podnapisi/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import PodnapisiBackend + +__all__ = ['PodnapisiBackend'] diff --git a/modules/podnapisi/backend.py b/modules/podnapisi/backend.py new file mode 100644 index 0000000000..1c5abd95e4 --- /dev/null +++ b/modules/podnapisi/backend.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported, Subtitle +from weboob.applications.suboob.suboob import LANGUAGE_CONV +from weboob.tools.backend import BaseBackend + +from .browser import PodnapisiBrowser + +from urllib import quote_plus + +__all__ = ['PodnapisiBackend'] + + +class PodnapisiBackend(BaseBackend, ICapSubtitle): + NAME = 'podnapisi' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.g' + DESCRIPTION = 'Podnapisi movies and tv series subtitle website' + LICENSE = 'AGPLv3+' + BROWSER = PodnapisiBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_subtitle(self, id): + return self.browser.get_subtitle(id) + + def get_subtitle_file(self, id): + subtitle = self.browser.get_subtitle(id) + if not subtitle: + return None + + return self.browser.openurl(subtitle.url.encode('utf-8')).read() + + def iter_subtitles(self, language, pattern): + if language not in LANGUAGE_CONV.keys(): + raise LanguageNotSupported() + return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8'))) + + def fill_subtitle(self, subtitle, fields): + if 'description' in fields or 'url' in fields: + sub = self.get_subtitle(subtitle.id) + subtitle.description = sub.description + subtitle.url = sub.url + + return subtitle + + OBJECTS = { + Subtitle: fill_subtitle, + } diff --git a/modules/podnapisi/browser.py b/modules/podnapisi/browser.py new file mode 100644 index 0000000000..41830828db --- /dev/null +++ b/modules/podnapisi/browser.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound +from weboob.applications.suboob.suboob import LANGUAGE_CONV + +from .pages import SearchPage, SubtitlePage, LANGUAGE_NUMBERS + + +__all__ = ['PodnapisiBrowser'] + + +class PodnapisiBrowser(BaseBrowser): + DOMAIN = 'www.podnapisi.net' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://www.podnapisi.net/fr/ppodnapisi/search\?sJ=[0-9]*&sK=.*&sS=downloads&sO=desc': SearchPage, + 'http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/[0-9]*': SubtitlePage + } + + def iter_subtitles(self, language, pattern): + nlang = LANGUAGE_NUMBERS[language] + self.location('http://www.podnapisi.net/fr/ppodnapisi/search?sJ=%s&sK=%s&sS=downloads&sO=desc' % (nlang, pattern.encode('utf-8'))) + assert self.is_on_page(SearchPage) + return self.page.iter_subtitles(unicode(language)) + + def get_subtitle(self, id): + try: + self.location('http://www.podnapisi.net/fr/ppodnapisi/podnapis/i/%s' % id) + except BrowserHTTPNotFound: + return + if self.is_on_page(SubtitlePage): + return self.page.get_subtitle(id) diff --git a/modules/podnapisi/pages.py b/modules/podnapisi/pages.py new file mode 100644 index 0000000000..627d8748dc --- /dev/null +++ b/modules/podnapisi/pages.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.subtitle import Subtitle +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage +from weboob.applications.suboob.suboob import LANGUAGE_CONV + + +__all__ = ['SubtitlePage', 'SearchPage'] + + +LANGUAGE_NUMBERS = { + 'fr': 8, + 'en': 2, +} + + +class SearchPage(BasePage): + """ Page which contains results as a list of movies + """ + def iter_subtitles(self, language): + linksresults = self.parser.select(self.document.getroot(), 'a.subtitle_page_link') + for link in linksresults: + id = unicode(link.attrib.get('href', '').split('es-p')[-1]) + name = unicode(link.text_content()) + tr = link.getparent().getparent().getparent() + cdtd = self.parser.select(tr, 'td')[4] + nb_cd = int(cdtd.text) + description = NotLoaded + subtitle = Subtitle(id, name) + subtitle.nb_cd = nb_cd + subtitle.language = language + subtitle.description = description + yield subtitle + + +class SubtitlePage(BasePage): + """ Page which contains a single subtitle for a movie + """ + def get_subtitle(self, id): + language = NotAvailable + url = NotAvailable + nb_cd = NotAvailable + links_info = self.parser.select(self.document.getroot(), 'fieldset.information a') + for link in links_info: + href = link.attrib.get('href', '') + if '/fr/ppodnapisi/kategorija/jezik/' in href: + nlang = href.split('/')[-1] + for lang, langnum in LANGUAGE_NUMBERS.items(): + if str(langnum) == str(nlang): + language = unicode(lang) + break + + desc = u'' + infos = self.parser.select(self.document.getroot(), 'fieldset.information') + for info in infos: + for p in self.parser.select(info, 'p'): + desc += '%s\n' % (u' '.join(p.text_content().strip().split())) + spans = self.parser.select(info, 'span') + for span in spans: + if span.text is not None and 'CD' in span.text: + nb_cd = int(self.parser.select(span.getparent(), 'span')[1].text) + + title = unicode(self.parser.select(self.document.getroot(), 'head title', 1).text) + name = title.split(' - ')[0] + + dllinks = self.parser.select(self.document.getroot(), 'div.footer > a.download') + for link in dllinks: + href = link.attrib.get('href', '') + if id in href: + url = u'http://www.podnapisi.net%s' % href + + subtitle = Subtitle(id, name) + subtitle.url = url + subtitle.language = language + subtitle.nb_cd = nb_cd + subtitle.description = desc + return subtitle diff --git a/modules/podnapisi/test.py b/modules/podnapisi/test.py new file mode 100644 index 0000000000..55243d6f53 --- /dev/null +++ b/modules/podnapisi/test.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + +from random import choice + + +class PodnapisiTest(BackendTest): + BACKEND = 'podnapisi' + + def test_subtitle(self): + lsub = [] + subtitles = self.backend.iter_subtitles('fr', 'spiderman') + for i in range(5): + subtitle = subtitles.next() + lsub.append(subtitle) + assert (len(lsub) > 0) + + # get the file of a random sub + if len(lsub): + subtitle = choice(lsub) + self.backend.get_subtitle_file(subtitle.id) + ss = self.backend.get_subtitle(subtitle.id) + assert ss.url.startswith('http') -- GitLab