Commit 67a393f5 authored by Bezleputh's avatar Bezleputh

[francetelevisions] handle ls command + fix test

parent 67097528
......@@ -22,7 +22,7 @@ from __future__ import unicode_literals
from weboob.browser import PagesBrowser, URL
from weboob.exceptions import BrowserHTTPNotFound
from .pages import SearchPage, VideoWebPage, VideoJsonPage
from .pages import SearchPage, VideoWebPage, VideoJsonPage, HomePage
__all__ = ['PluzzBrowser']
......@@ -34,6 +34,7 @@ class PluzzBrowser(PagesBrowser):
search_page = URL(r'/recherche/', SearchPage)
video = URL(r'/.+/(?P<number>\d+)-[^/]+.html$', VideoWebPage)
video_json = URL(r'\?idDiffusion=(?P<number>.+)$', VideoJsonPage)
home = URL(r'/(?P<cat>.*)', HomePage)
def search_videos(self, s):
self.location(, params={'q': s})
......@@ -55,3 +56,19 @@ class PluzzBrowser(PagesBrowser): = id
return video
def get_categories(self):
return self.home.go(cat="").iter_categories()
def iter_subcategories(self, cat):
for cat in self.home.go(cat="/".join(cat)).iter_subcategories(cat=cat):
yield cat = r"//li[@class='card card-li ']|//li[@class='card card-small ']"
for vid in
yield vid
def iter_videos(self, cat): = self.home.go(cat="") = r'//h1[contains(text(), "%s")]/following-sibling::ul/li' % cat
......@@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <>.
from weboob.capabilities.base import find_object
from import CapVideo, BaseVideo
from weboob.capabilities.collection import CapCollection, CollectionNotFound
from import Module
......@@ -54,16 +54,18 @@ class PluzzModule(Module, CapVideo, CapCollection):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield self.get_collection(objs, [u'latest'])
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
for category in self.browser.get_categories():
yield category
elif collection.path_level == 1 and collection.split_path[0].startswith('vid_'):
cat = find_object(self.browser.get_categories(), id=collection.split_path[0], error=None)
for video in self.browser.iter_videos(cat.title):
yield video
for cat in self.browser.iter_subcategories(collection.split_path):
yield cat
def validate_collection(self, objs, collection):
if collection.path_level == 0:
if BaseVideo in objs and collection.split_path == [u'latest']:
collection.title = u'Latest France Télévisions videos'
if collection.path_level <= 2:
raise CollectionNotFound(collection.split_path)
......@@ -20,22 +20,25 @@
from __future__ import unicode_literals
import re
from datetime import datetime
import hashlib
from datetime import datetime, timedelta
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.file import LICENSES
from weboob.capabilities.image import Thumbnail
from import BaseVideo
from weboob.capabilities.collection import Collection
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import CleanText, Regexp, Format, DateTime, Duration, Date, Eval, Env
from weboob.browser.filters.html import Attr, AbsoluteLink
from weboob.browser.filters.standard import CleanText, Regexp, Format, DateTime, Duration, Date, Eval, Env, Field
from weboob.browser.filters.html import Attr, AbsoluteLink, CleanHTML
from weboob.browser.filters.json import Dict
def parse_duration(text):
return int(text) * 60
return timedelta(seconds=int(text) * 60)
class SearchPage(HTMLPage):
......@@ -57,16 +60,17 @@ class SearchPage(HTMLPage):
self.env['title'] = '%s - %s' % (basetitle, sub)
obj_id = AbsoluteLink('.//a')
#~ obj__number = Attr('./div[@class="card-content"]//a', 'data-video-content')
# obj__number = Attr('./div[@class="card-content"]//a', 'data-video-content')
obj_title = Env('title')
obj_thumbnail = Eval(Thumbnail, Format('https:%s', Attr('./a//img', 'data-src')))
obj_date = Date(Regexp(Env('infos'), r'\| (\d+\.\d+\.\d+) \|', default=NotAvailable), dayfirst=True, default=NotAvailable)
obj_date = Date(Regexp(Env('infos'), r'\| (\d+\.\d+\.\d+) \|',
dayfirst=True, default=NotAvailable)
obj_duration = Eval(parse_duration, Regexp(Env('infos'), r'(\d+) min'))
class VideoWebPage(HTMLPage):
def get_number(self):
return Attr('//div[@id="player"]', 'data-main-video')(self.doc)
......@@ -107,3 +111,56 @@ class VideoJsonPage(JsonPage):
def validate(self, obj):
return obj.url
class HomePage(HTMLPage):
class iter_categories(ListElement):
item_xpath = '//h1'
class item(ItemElement):
klass = Collection
def obj_id(self):
id = Regexp(CleanText('./a/@href'), '//*)/', default=None)(self)
if not id:
id = CleanText('.')(self)
id = id.encode('ascii', 'ignore')
id = hashlib.md5(id).hexdigest()
id = u'vid_%s' % id
return id
obj_title = CleanText('.')
def obj_split_path(self):
return [Field('id')(self)]
class iter_subcategories(ListElement):
item_xpath = '//h2[has-class("title-wall")]'
class item(ItemElement):
klass = Collection
obj_id = Regexp(CleanText('./a/@href'), '//*/(.*)/', default=None)
obj_title = CleanText('.')
def obj_split_path(self):
cat = Env('cat')(self)
return cat
class iter_videos(ListElement):
def parse(self, el):
self.item_xpath =
class item(ItemElement):
klass = BaseVideo
obj_id = Format('https:%s', CleanText('./a/@href'))
obj_title = CleanText(CleanHTML('./a/div[@class="card-content"]|./div[has-class("card-content")]'))
def condition(self):
return Field('title')(self)
......@@ -26,19 +26,20 @@ class PluzzTest(BackendTest):
def test_search(self):
# If the test fails, it might be good news!
l = list(self.backend.search_videos("journal"))
l = list(self.backend.search_videos("20h"))
self.assertTrue(len(l) > 0)
v = l[0]
v = self.backend.fillobj(v, ('url',)) or v
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (, v.url))
def test_video_from_url(self):
v = self.backend.get_video('')
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (, v.url))
def test_latest(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
assert len(l)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url, 'URL for video "%s" not found' % (
def test_categories(self):
cat = list(self.backend.iter_resources([BaseVideo], []))
self.assertTrue(len(cat) > 0)
for c in cat:
if c.split_path[-1].startswith('vid_'):
videos = list(self.backend.iter_resources([BaseVideo], c.split_path))
self.assertTrue(len(videos) > 0)
v = videos[0]
v = self.backend.fillobj(v, ('url',)) or v
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (, v.url))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment