Commit 67a393f5 authored by Bezleputh's avatar Bezleputh

[francetelevisions] handle ls command + fix test

parent 67097528
......@@ -22,7 +22,7 @@ from __future__ import unicode_literals
from weboob.browser import PagesBrowser, URL
from weboob.exceptions import BrowserHTTPNotFound
from .pages import SearchPage, VideoWebPage, VideoJsonPage
from .pages import SearchPage, VideoWebPage, VideoJsonPage, HomePage
__all__ = ['PluzzBrowser']
......@@ -34,6 +34,7 @@ class PluzzBrowser(PagesBrowser):
search_page = URL(r'/recherche/', SearchPage)
video = URL(r'/.+/(?P<number>\d+)-[^/]+.html$', VideoWebPage)
video_json = URL(r'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/\?idDiffusion=(?P<number>.+)$', VideoJsonPage)
home = URL(r'/(?P<cat>.*)', HomePage)
def search_videos(self, s):
self.location(self.search_page.build(), params={'q': s})
......@@ -55,3 +56,19 @@ class PluzzBrowser(PagesBrowser):
video.id = id
return video
def get_categories(self):
return self.home.go(cat="").iter_categories()
def iter_subcategories(self, cat):
for cat in self.home.go(cat="/".join(cat)).iter_subcategories(cat=cat):
yield cat
self.page.item_xpath = r"//li[@class='card card-li ']|//li[@class='card card-small ']"
for vid in self.page.iter_videos():
yield vid
def iter_videos(self, cat):
self.page = self.home.go(cat="")
self.page.item_xpath = r'//h1[contains(text(), "%s")]/following-sibling::ul/li' % cat
return self.page.iter_videos()
......@@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.base import find_object
from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.capabilities.collection import CapCollection, CollectionNotFound
from weboob.tools.backend import Module
......@@ -54,16 +54,18 @@ class PluzzModule(Module, CapVideo, CapCollection):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield self.get_collection(objs, [u'latest'])
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
for category in self.browser.get_categories():
yield category
elif collection.path_level == 1 and collection.split_path[0].startswith('vid_'):
cat = find_object(self.browser.get_categories(), id=collection.split_path[0], error=None)
for video in self.browser.iter_videos(cat.title):
yield video
else:
for cat in self.browser.iter_subcategories(collection.split_path):
yield cat
def validate_collection(self, objs, collection):
if collection.path_level == 0:
return
if BaseVideo in objs and collection.split_path == [u'latest']:
collection.title = u'Latest France Télévisions videos'
if collection.path_level <= 2:
return
raise CollectionNotFound(collection.split_path)
......
......@@ -20,22 +20,25 @@
from __future__ import unicode_literals
import re
from datetime import datetime
import hashlib
from datetime import datetime, timedelta
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.file import LICENSES
from weboob.capabilities.image import Thumbnail
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.collection import Collection
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import CleanText, Regexp, Format, DateTime, Duration, Date, Eval, Env
from weboob.browser.filters.html import Attr, AbsoluteLink
from weboob.browser.filters.standard import CleanText, Regexp, Format, DateTime, Duration, Date, Eval, Env, Field
from weboob.browser.filters.html import Attr, AbsoluteLink, CleanHTML
from weboob.browser.filters.json import Dict
def parse_duration(text):
return int(text) * 60
return timedelta(seconds=int(text) * 60)
class SearchPage(HTMLPage):
......@@ -57,16 +60,17 @@ class SearchPage(HTMLPage):
self.env['title'] = '%s - %s' % (basetitle, sub)
obj_id = AbsoluteLink('.//a')
#~ obj__number = Attr('./div[@class="card-content"]//a', 'data-video-content')
# obj__number = Attr('./div[@class="card-content"]//a', 'data-video-content')
obj_title = Env('title')
obj_thumbnail = Eval(Thumbnail, Format('https:%s', Attr('./a//img', 'data-src')))
obj_date = Date(Regexp(Env('infos'), r'\| (\d+\.\d+\.\d+) \|', default=NotAvailable), dayfirst=True, default=NotAvailable)
obj_date = Date(Regexp(Env('infos'), r'\| (\d+\.\d+\.\d+) \|',
default=NotAvailable),
dayfirst=True, default=NotAvailable)
obj_duration = Eval(parse_duration, Regexp(Env('infos'), r'(\d+) min'))
class VideoWebPage(HTMLPage):
def get_number(self):
return Attr('//div[@id="player"]', 'data-main-video')(self.doc)
......@@ -107,3 +111,56 @@ class VideoJsonPage(JsonPage):
def validate(self, obj):
return obj.url
class HomePage(HTMLPage):
@method
class iter_categories(ListElement):
item_xpath = '//h1'
class item(ItemElement):
klass = Collection
def obj_id(self):
id = Regexp(CleanText('./a/@href'), '//www.france.tv/(.*)/', default=None)(self)
if not id:
id = CleanText('.')(self)
id = id.encode('ascii', 'ignore')
id = hashlib.md5(id).hexdigest()
id = u'vid_%s' % id
return id
obj_title = CleanText('.')
def obj_split_path(self):
return [Field('id')(self)]
@method
class iter_subcategories(ListElement):
item_xpath = '//h2[has-class("title-wall")]'
class item(ItemElement):
klass = Collection
obj_id = Regexp(CleanText('./a/@href'), '//www.france.tv/.*/(.*)/', default=None)
obj_title = CleanText('.')
def obj_split_path(self):
cat = Env('cat')(self)
cat.append(Field('id')(self))
return cat
@method
class iter_videos(ListElement):
def parse(self, el):
self.item_xpath = self.page.item_xpath
class item(ItemElement):
klass = BaseVideo
obj_id = Format('https:%s', CleanText('./a/@href'))
obj_title = CleanText(CleanHTML('./a/div[@class="card-content"]|./div[has-class("card-content")]'))
def condition(self):
return Field('title')(self)
......@@ -26,19 +26,20 @@ class PluzzTest(BackendTest):
def test_search(self):
# If the test fails, it might be good news!
l = list(self.backend.search_videos("journal"))
l = list(self.backend.search_videos("20h"))
self.assertTrue(len(l) > 0)
v = l[0]
v = self.backend.fillobj(v, ('url',)) or v
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_video_from_url(self):
v = self.backend.get_video('http://pluzz.francetv.fr/videos/plus_belle_la_vie.html')
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_latest(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
assert len(l)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
def test_categories(self):
cat = list(self.backend.iter_resources([BaseVideo], []))
self.assertTrue(len(cat) > 0)
for c in cat:
if c.split_path[-1].startswith('vid_'):
videos = list(self.backend.iter_resources([BaseVideo], c.split_path))
self.assertTrue(len(videos) > 0)
v = videos[0]
v = self.backend.fillobj(v, ('url',)) or v
self.assertTrue(v.url, 'URL for video "%s" not found: %s' % (v.id, v.url))
return
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment