Commit cf45e256 authored by Vincent A's avatar Vincent A Committed by Romain Bignon

[francetelevisions] site changed

Some features like emission search was not done though.
parent e2799435
......@@ -17,67 +17,41 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <>.
from __future__ import unicode_literals
from weboob.browser import PagesBrowser, URL
from .pages import IndexPage, VideoPage, Programs, VideoListPage, LatestPage, FrancetvinfoPage
from weboob.exceptions import BrowserHTTPNotFound
from .pages import SearchPage, VideoWebPage, VideoJsonPage
__all__ = ['PluzzBrowser']
class PluzzBrowser(PagesBrowser):
ENCODING = 'utf-8'
francetvinfo = URL(r'<url>.*)', FrancetvinfoPage)
latest = URL(r'', LatestPage)
programs_page = URL(r'', Programs)
index_page = URL(r'recherche\?recherche=(?P<pattern>.*)', IndexPage)
video_page = URL(r'\?idDiffusion=(?P<id>.*)&catalogue=Pluzz',
videos_list_page = URL(r'(?P<program>videos/.*)', VideoListPage)
def get_video_id_from_francetvinfo(self, url):
return self.francetvinfo.go(url=url).get_video_id_from_francetvinfo()
def get_video_from_url(self, url):
video = self.videos_list_page.go(program=url).get_last_video()
if video:
return self.get_video(, video)
def search_videos(self, pattern):
if not self.PROGRAMS:
self.PROGRAMS = list(self.get_program_list())
search_page = URL(r'/recherche/', SearchPage)
video = URL(r'/.+/(?P<number>\d+)-[^/]+.html$', VideoWebPage)
video_json = URL(r'\?idDiffusion=(?P<number>.+)$', VideoJsonPage)
videos = []
for program in self.PROGRAMS:
if pattern.upper() in program._title.upper():
video = self.videos_list_page.go(
if video:
videos += list(
def search_videos(self, s):
self.location(, params={'q': s})
return videos if len(videos) > 0 else self.index_page.go(pattern=pattern).iter_videos()
def get_video(self, id):
number =
def get_program_list(self):
return list(self.programs_page.go().iter_programs())
def get_video(self, url, video=None):
video =
except BrowserHTTPNotFound:
self.logger.warning('video info not found, probably needs payment')
video =
if not video:
self.logger.debug('video info not found, maybe not available?')
return = id
for item in self.read_url(video.url):
video.url = u'%s' % item
return video
def read_url(self, url):
r =, stream=True)
buf = r.iter_lines()
return buf
def latest_videos(self):
return self.latest.go().iter_videos()
......@@ -24,7 +24,6 @@ from import Module
from .browser import PluzzBrowser
import re
__all__ = ['PluzzModule']
......@@ -39,28 +38,16 @@ class PluzzModule(Module, CapVideo, CapCollection):
BROWSER = PluzzBrowser
def get_video(self, _id):
m = re.match('*)', _id)
if m:
return self.browser.get_video_from_url(
m2 = re.match('*)', _id)
if m2:
_id = self.browser.get_video_id_from_francetvinfo(
if not _id:
return self.browser.get_video(_id)
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
return self.browser.search_videos(pattern)
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
video = self.browser.get_video(, video)
if 'url' in fields:
video = self.browser.get_video(
if 'thumbnail' in fields and video.thumbnail: =
return video
def iter_resources(self, objs, split_path):
......@@ -17,143 +17,81 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <>.
from __future__ import unicode_literals
from weboob.capabilities.file import LICENSES
from weboob.capabilities.image import Thumbnail
from import BaseVideo
from weboob.capabilities.base import BaseObject
from datetime import timedelta
from datetime import datetime
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, DictElement, method
from weboob.browser.filters.standard import Filter, CleanText, Regexp, Format, DateTime, Env, Duration
from weboob.browser.filters.html import Link, Attr
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import CleanText, Regexp, Format, DateTime, Duration, Date, Eval
from weboob.browser.filters.html import Attr, AbsoluteLink
from weboob.browser.filters.json import Dict
class DurationPluzz(Filter):
def filter(self, el):
duration = Regexp(CleanText('.'), r'.+\|(.+)')(el[0])
if duration[-1:] == "'":
t = [0, int(duration[:-1])]
t = map(int, duration.split(':'))
return timedelta(hours=t[0], minutes=t[1])
class FrancetvinfoPage(HTMLPage):
def get_video_id_from_francetvinfo(self):
return Regexp(CleanText('//a[@id="catchup"]/@href'),
def parse_duration(text):
return int(text) * 60
class VideoListPage(HTMLPage):
class get_last_video(ItemElement):
klass = BaseVideo
obj_id = CleanText('//div[@id="diffusion-info"]/@data-diffusion')
obj_title = CleanText('//div[@id="diffusion-info"]/h1/div[@id="diffusion-titre"]')
obj_date = DateTime(Regexp(CleanText('//div[@id="diffusion-info"]/h1|//div[@id="diffusion-info"]/div/div/*[1]',
replace=[(u'à', u''), (u' ', u' ')]),
class SearchPage(HTMLPage):
class iter_videos(ListElement):
item_xpath = '//div[@id="player-memeProgramme"]/a'
item_xpath = '//section[h1[ends-with(text(), "vidéos")]]/ul/li'
class item(ItemElement):
klass = BaseVideo
def condition(self):
return CleanText('div[@class="autre-emission-c3"]')(self) == "En replay"
obj_id = AbsoluteLink('.//a')
#~ obj__number = Attr('./div[@class="card-content"]//a', 'data-video-content')
obj_id = Regexp(Link('.'), '^/videos/.+,(.+).html$')
obj_title = CleanText('//meta[@name="programme_titre"]/@content')
obj_date = DateTime(Regexp(CleanText('./div[@class="autre-emission-c2"]|./div[@class="autre-emission-c4"]',
replace=[(u'à', u''), (u' ', u' ')]),
obj_title = Format('%s - %s', CleanText('.//h3/a'), CleanText('.//h3/following-sibling::p[1]'))
obj_thumbnail = Eval(Thumbnail, Format('https:%s', Attr('./a//img', 'data-src')))
_infos = CleanText('.//h3/following-sibling::p[2]')
obj_date = Date(Regexp(_infos, r'\| (\d+\.\d+\.\d+) \|'), dayfirst=True)
obj_duration = Eval(parse_duration, Regexp(_infos, r' \| (\d+) min'))
class IndexPage(HTMLPage):
class iter_videos(ListElement):
item_xpath = '//div[@class="panel-resultat panel-separateur"]'
ignore_duplicate = True
class item(ItemElement):
klass = BaseVideo
class VideoWebPage(HTMLPage):
def get_number(self):
return Attr('//div[@id="player"]', 'data-main-video')(self.doc)
obj_title = Format('%s du %s',
obj_id = Regexp(Link('div/div[@class="resultat-titre-diff"]/a'),
obj_date = DateTime(Regexp(CleanText('div/div[@class="resultat-soustitre-diff"]',
replace=[(u'à', u''), (u' ', u' ')]),
obj_duration = DurationPluzz('div/div[3]')
def obj_thumbnail(self):
url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self)
thumbnail = Thumbnail(url)
thumbnail.url =
return thumbnail
class VideoPage(JsonPage):
class get_video(ItemElement):
klass = BaseVideo
obj_title = CleanText('//article[@id="description"]//h1')
obj_description = CleanText('//article[@id="description"]//section/following-sibling::div')
def validate(self, obj):
return obj.url
obj_date = DateTime(Regexp(
CleanText('//article[@id="description"]//span[contains(text(),"diffusé le")]'),
r'(\d{2})\.(\d{2})\.(\d{2}) à (\d{2})h(\d{2})', r'20\3/\2/\1 \4:\5'))
obj_duration = Eval(parse_duration, Regexp(CleanText('//div[span[text()="|"]]'), r'| (\d+)min'))
def parse(self, el):
for video in el['videos']:
if video['format'] != 'm3u8-download':
obj_thumbnail = Eval(Thumbnail, Format('https:%s', Attr('//div[@id="playerPlaceholder"]//img', 'data-src')))
obj__number = Attr('//div[@id="player"]', 'data-main-video')
self.env['url'] = video['url']
obj_id = Env('id')
class VideoJsonPage(JsonPage):
class get_video(ItemElement):
klass = BaseVideo
obj_title = Format(u'%s - %s', Dict['titre'], Dict['sous_titre'])
obj_url = Env('url')
obj_date = Dict['diffusion']['date_debut'] & DateTime
obj_date = Eval(datetime.fromtimestamp, Dict('diffusion/timestamp'))
obj_duration = Dict['duree'] & Duration
obj_description = Dict['synopsis']
obj_ext = u'm3u8'
def obj_thumbnail(self):
url = Format('', Dict['image'])(self)
thumbnail = Thumbnail(url)
thumbnail.url =
return thumbnail
class Programs(JsonPage):
class iter_programs(DictElement):
item_xpath = 'reponse/programme'
class item(ItemElement):
klass = BaseObject
obj_id = CleanText(Dict('url'))
obj__title = CleanText(Dict('titre_programme'))
obj__uuid = Dict['id']
def obj_url(self):
return next((v['url_secure'] for v in['videos'] if v['format'] == 'm3u8-download'), None)
class LatestPage(JsonPage):
class iter_videos(DictElement):
item_xpath = 'reponse/emissions'
obj_thumbnail = Eval(Thumbnail, Dict['image_secure'])
class Item(ItemElement):
klass = BaseVideo
obj_id = Dict('id_diffusion')
obj_title = Dict('titre_programme')
obj_date = DateTime(Dict('date_diffusion'))
def validate(self, obj):
return obj.url
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment