Commit d00bc07c authored by Romain Bignon's avatar Romain Bignon

remove browser1 modules

parent 4126dad0
# -*- coding: utf-8 -*-
# Copyright(C) 2013 franek
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .module import ArretSurImagesModule
__all__ = ['ArretSurImagesModule']
# -*- coding: utf-8 -*-
# Copyright(C) 2013 franek
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.deprecated.browser import Browser, BrowserIncorrectPassword
from weboob.deprecated.browser.decorators import id2url
from .pages import VideoPage, IndexPage, LoginPage, LoginRedirectPage
from .video import ArretSurImagesVideo
__all__ = ['ArretSurImagesBrowser']
class ArretSurImagesBrowser(Browser):
PROTOCOL = 'http'
DOMAIN = 'www.arretsurimages.net'
ENCODING = None
PAGES = {
'%s://%s/contenu.php\?id=.+' % (PROTOCOL, DOMAIN): VideoPage,
'%s://%s/emissions.php' % (PROTOCOL, DOMAIN): IndexPage,
'%s://%s/forum/login.php' % (PROTOCOL, DOMAIN): LoginPage,
'%s://%s/forum/index.php' % (PROTOCOL, DOMAIN): LoginRedirectPage,
}
def home(self):
self.location('http://www.arretsurimages.net')
def search_videos(self, pattern):
self.location(self.buildurl('/emissions.php'))
assert self.is_on_page(IndexPage)
return self.page.iter_videos(pattern)
@id2url(ArretSurImagesVideo.id2url)
def get_video(self, url, video=None):
self.login()
self.location(url)
return self.page.get_video(video)
def is_logged(self):
return not self.is_on_page(LoginPage)
def login(self):
if not self.is_on_page(LoginPage):
self.location('http://www.arretsurimages.net/forum/login.php', no_login=True)
self.page.login(self.username, self.password)
if not self.is_logged():
raise BrowserIncorrectPassword()
def latest_videos(self):
self.location(self.buildurl('/emissions.php'))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()
# -*- coding: utf-8 -*-
# Copyright(C) 2013 franek
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.capabilities.collection import CapCollection, CollectionNotFound
from weboob.tools.backend import Module, BackendConfig
from weboob.tools.value import ValueBackendPassword
from .browser import ArretSurImagesBrowser
from .video import ArretSurImagesVideo
__all__ = ['ArretSurImagesModule']
class ArretSurImagesModule(Module, CapVideo, CapCollection):
NAME = 'arretsurimages'
DESCRIPTION = u'arretsurimages website'
MAINTAINER = u'franek'
EMAIL = 'franek@chicour.net'
VERSION = '1.4'
CONFIG = BackendConfig(ValueBackendPassword('login', label='email', masked=False),
ValueBackendPassword('password', label='Password'))
BROWSER = ArretSurImagesBrowser
def create_default_browser(self):
return self.create_browser(self.config['login'].get(), self.config['password'].get(), get_home=False)
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
with self.browser:
return self.browser.search_videos(pattern)
# raise UserError('Search does not work on ASI website, use ls latest command')
def get_video(self, _id):
if _id.startswith('http://') and not _id.startswith('http://www.arretsurimages.net'):
return None
with self.browser:
return self.browser.get_video(_id)
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(ArretSurImagesVideo.id2url(video.id), video)
if 'thumbnail' in fields and video.thumbnail:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video
def iter_resources(self, objs, split_path):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield self.get_collection(objs, [u'latest'])
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
yield video
def validate_collection(self, objs, collection):
if collection.path_level == 0:
return
if BaseVideo in objs and collection.split_path == [u'latest']:
collection.title = u'Latest ArretSurImages videos'
return
raise CollectionNotFound(collection.split_path)
OBJECTS = {ArretSurImagesVideo: fill_video}
# -*- coding: utf-8 -*-
# Copyright(C) 2013 franek
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
from weboob.capabilities.base import UserError
from weboob.capabilities.image import Thumbnail
from weboob.deprecated.browser import Page, BrokenPageError
from weboob.capabilities import NotAvailable
from .video import ArretSurImagesVideo
class IndexPage(Page):
def iter_videos(self, pattern=None):
videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]")
for div in videos:
title = self.parser.select(div, 'h1', 1).text_content().replace(' ', ' ')
if pattern:
if pattern.upper() not in title.upper():
continue
m = re.match(r'/contenu.php\?id=(.*)', div.find('a').attrib['href'])
_id = ''
if m:
_id = m.group(1)
video = ArretSurImagesVideo(_id)
video.title = unicode(title)
video.rating = None
video.rating_max = None
thumb = self.parser.select(div, 'img', 1)
url = u'http://www.arretsurimages.net' + thumb.attrib['src']
video.thumbnail = Thumbnail(url)
video.thumbnail.url = video.thumbnail.id
yield video
class ForbiddenVideo(UserError):
pass
class VideoPage(Page):
def is_logged(self):
try:
self.parser.select(self.document.getroot(), '#user-info', 1)
except BrokenPageError:
return False
else:
return True
def on_loaded(self):
if not self.is_logged():
raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
def get_video(self, video=None):
if not video:
video = ArretSurImagesVideo(self.get_id())
video.title = unicode(self.get_title())
video.url = unicode(self.get_url())
video.set_empty_fields(NotAvailable)
return video
def get_firstUrl(self):
obj = self.parser.select(self.document.getroot(), 'a.bouton-telecharger', 1)
firstUrl = obj.attrib['href']
return firstUrl
def get_title(self):
title = self.document.getroot().cssselect('div[id=titrage-contenu] h1')[0].text
return title
def get_id(self):
m = re.match(r'http://videos.arretsurimages.net/telecharger/(.*)', self.get_firstUrl())
if m:
return m.group(1)
self.logger.warning('Unable to parse ID')
return 0
def get_url(self):
firstUrl = self.get_firstUrl()
doc = self.browser.get_document(self.browser.openurl(firstUrl))
links = doc.xpath('//a')
url = None
i = 1
for link in links:
# we take the second link of the page
if i == 2:
url = link.attrib['href']
i += 1
return url
class LoginPage(Page):
def login(self, username, password):
response = self.browser.response()
response.set_data(response.get_data().replace("<br/>", "<br />")) # Python mechanize is broken, fixing it.
self.browser.set_response(response)
self.browser.select_form(nr=0)
self.browser.form.set_all_readonly(False)
self.browser['redir'] = '/forum/index.php'
self.browser['username'] = username
self.browser['password'] = password
self.browser.submit()
class LoginRedirectPage(Page):
pass
# -*- coding: utf-8 -*-
# Copyright(C) 2013 franek
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.tools.test import BackendTest, SkipTest
class ArretSurImagesTest(BackendTest):
MODULE = 'arretsurimages'
def test_latest_arretsurimages(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
assert len(l)
if self.backend.browser.username != u'None':
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
else:
raise SkipTest("User credentials not defined")
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
class ArretSurImagesVideo(BaseVideo):
@classmethod
def id2url(cls, _id):
return 'http://www.arretsurimages.net/contenu.php?id=%s' % _id
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .module import AttilasubModule
__all__ = ['AttilasubModule']
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.deprecated.browser import Browser, BrowserHTTPNotFound
from .pages import SubtitlesPage, SearchPage
__all__ = ['AttilasubBrowser']
class AttilasubBrowser(Browser):
DOMAIN = 'davidbillemont3.free.fr'
PROTOCOL = 'http'
ENCODING = 'windows-1252'
USER_AGENT = Browser.USER_AGENTS['wget']
PAGES = {
'http://search.freefind.com/find.html.*': SearchPage,
'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage,
}
def iter_subtitles(self, language, pattern):
self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' %
pattern.encode('utf-8'))
assert self.is_on_page(SearchPage)
return self.page.iter_subtitles(language, pattern)
def get_subtitle(self, id):
url_end = id.split('|')[0]
try:
self.location('http://davidbillemont3.free.fr/%s' % url_end)
except BrowserHTTPNotFound:
return
if self.is_on_page(SubtitlesPage):
return self.page.get_subtitle(id)
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import CapSubtitle, LanguageNotSupported
from weboob.tools.backend import Module
from weboob.tools.compat import quote_plus
from .browser import AttilasubBrowser
__all__ = ['AttilasubModule']
class AttilasubModule(Module, CapSubtitle):
NAME = 'attilasub'
MAINTAINER = u'Julien Veyssier'
EMAIL = 'julien.veyssier@aiur.fr'
VERSION = '1.4'
DESCRIPTION = '"Attila\'s Website 2.0" French subtitles'
LICENSE = 'AGPLv3+'
LANGUAGE_LIST = ['fr']
BROWSER = AttilasubBrowser
def get_subtitle(self, id):
return self.browser.get_subtitle(id)
def get_subtitle_file(self, id):
subtitle = self.browser.get_subtitle(id)
if not subtitle:
return None
return self.browser.openurl(subtitle.url.encode('utf-8')).read()
def iter_subtitles(self, language, pattern):
if language not in self.LANGUAGE_LIST:
raise LanguageNotSupported()
return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import Subtitle
from weboob.capabilities.base import NotAvailable
from weboob.deprecated.browser import Page
class SearchPage(Page):
def iter_subtitles(self, language, pattern):
fontresult = self.parser.select(self.document.getroot(), 'div.search-results font.search-results')
# for each result in freefind, explore the subtitle list page to iter subtitles
for res in fontresult:
a = self.parser.select(res, 'a', 1)
url = a.attrib.get('href', '')
self.browser.location(url)
assert self.browser.is_on_page(SubtitlesPage)
# subtitles page does the job
for subtitle in self.browser.page.iter_subtitles(language, pattern):
yield subtitle
class SubtitlesPage(Page):
def get_subtitle(self, id):
href = id.split('|')[1]
# we have to find the 'tr' which contains the link to this address
a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href, 1)
line = a.getparent().getparent().getparent().getparent().getparent()
cols = self.parser.select(line, 'td')
traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
original_title = self.parser.select(cols[1], 'font', 1).text.lower()
nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
nb_cd = int(nb_cd.split()[0])
traduced_title_words = traduced_title.split()
original_title_words = original_title.split()
# this is to trash special spacing chars
traduced_title = " ".join(traduced_title_words)
original_title = " ".join(original_title_words)
name = unicode('%s (%s)' % (original_title, traduced_title))
url = unicode('http://davidbillemont3.free.fr/%s' % href)
subtitle = Subtitle(id, name)
subtitle.url = url
subtitle.ext = url.split('.')[-1]
subtitle.language = unicode('fr')
subtitle.nb_cd = nb_cd
subtitle.description = NotAvailable
return subtitle
def iter_subtitles(self, language, pattern):
pattern = pattern.strip().replace('+', ' ').lower()
pattern_words = pattern.split()
tab = self.parser.select(self.document.getroot(), 'table[bordercolor="#B8C0B2"]')
if len(tab) == 0:
tab = self.parser.select(self.document.getroot(), 'table[bordercolordark="#B8C0B2"]')
if len(tab) == 0:
return
# some results of freefind point on useless pages
if tab[0].attrib.get('width', '') != '100%':
return
for line in tab[0].getiterator('tr'):
cols = self.parser.select(line, 'td')
traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
original_title = self.parser.select(cols[1], 'font', 1).text.lower()
traduced_title_words = traduced_title.split()
original_title_words = original_title.split()
# if the pattern is one word and in the title OR if the
# intersection between pattern and the title is at least 2 words
if (len(pattern_words) == 1 and pattern in traduced_title_words) or\
(len(pattern_words) == 1 and pattern in original_title_words) or\
(len(list(set(pattern_words) & set(traduced_title_words))) > 1) or\
(len(list(set(pattern_words) & set(original_title_words))) > 1):
# this is to trash special spacing chars
traduced_title = " ".join(traduced_title_words)
original_title = " ".join(original_title_words)
nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
nb_cd = int(nb_cd.strip(' CD'))
name = unicode('%s (%s)' % (original_title, traduced_title))
href = self.parser.select(cols[3], 'a', 1).attrib.get('href', '')
url = unicode('http://davidbillemont3.free.fr/%s' % href)
id = unicode('%s|%s' % (self.browser.geturl().split('/')[-1], href))
subtitle = Subtitle(id, name)
subtitle.url = url
subtitle.ext = url.split('.')[-1]
subtitle.language = unicode('fr')
subtitle.nb_cd = nb_cd
subtitle.description = NotAvailable
yield subtitle
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
import urllib
from random import choice
class AttilasubTest(BackendTest):
MODULE = 'attilasub'
def test_subtitle(self):
subtitles = list(self.backend.iter_subtitles('fr', 'spiderman'))
assert (len(subtitles) > 0)
for subtitle in subtitles:
path, qs = urllib.splitquery(subtitle.url)
assert path.endswith('.rar')
# get the file of a random sub
if len(subtitles):
subtitle = choice(subtitles)
self.backend.get_subtitle_file(subtitle.id)