Newer
Older
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Romain Bignon
#
# This file is part of a weboob module.
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.pages import HTMLPage, pagination
from weboob.browser.filters.standard import CleanText, Regexp, Env, Duration, DateTime
from weboob.browser.filters.html import Link
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import Thumbnail
from weboob.exceptions import ParseError
from weboob.tools.json import json
from datetime import timedelta
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def determine_ext(url, default_ext='unknown_video'):
if url is None:
return default_ext
guess = url.partition('?')[0].rpartition('.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
elif guess.rstrip('/') in ('mp4', 'm3u8'):
return guess.rstrip('/')
else:
return default_ext
class IndexPage(HTMLPage):
@pagination
@method
class iter_videos(ListElement):
item_xpath = '//div[@data-video-id]'
next_page = Link('//a[@title="suivant"]')
class item(ItemElement):
klass = BaseVideo
def validate(self, obj):
return obj.id
obj_id = CleanText('./div/@data-playable')
obj_title = CleanText('./div[@class="media-block"]/h3')
obj_author = CleanText('./div[@class="media-block"]/div/span/a')
obj_duration = Duration(CleanText('./div/a/div[has-class("badge--duration")]'), default=NotAvailable)
def obj_thumbnail(self):
url = CleanText('./div/a/img/@data-src')(self)
thumbnail = Thumbnail(url)
thumbnail.url = url
return thumbnail
class VideoPage(HTMLPage):
@method
class get_video(ItemElement):
klass = BaseVideo
obj_id = Env('_id')
obj_title = CleanText('//title')
obj_author = CleanText('//meta[@name="author"]/@content')
obj_description = CleanText('//meta[@name="description"]/@content')
def obj_duration(self):
seconds = int(CleanText('//meta[@property="video:duration"]/@content', default=0)(self))
return timedelta(seconds=seconds)
def obj_thumbnail(self):
url = CleanText('//meta[@property="og:image"]/@content')(self)
thumbnail = Thumbnail(url)
thumbnail.url = url
return thumbnail
obj_date = DateTime(CleanText('//meta[@property="video:release_date"]/@content'))
def obj__formats(self):
player = Regexp(CleanText('//script'), '.*var config = ({"context".*}}});\s*buildPlayer\(config\);.*', default=None)(self)
if player:
info = json.loads(player)
if info.get('error') is not None:
raise ParseError(info['error']['title'])
metadata = info.get('metadata')
formats = {}
for quality, media_list in metadata['qualities'].items():
for media in media_list:
media_url = media.get('url')
if not media_url:
continue
type_ = media.get('type')
if type_ == 'application/vnd.lumberjack.manifest':
continue
ext = determine_ext(media_url)
if ext in formats:
if quality in formats.get(ext):
formats[ext][quality] = media_url
else:
formats[ext] = {quality: media_url}
else:
formats[ext] = {quality: media_url}