Commit d24ca46e authored by François Revol's avatar François Revol Committed by Romain Bignon

Add a video module for gdcvault.com

For now it only fetches the speaker video, but each page can have
both a speaker and slides video feed.
TODO: search
Signed-off-by: François Revol's avatarFrançois Revol <revol@free.fr>
Signed-off-by: Romain Bignon's avatarRomain Bignon <romain@symlink.me>
parent bb9a62b5
from .backend import GDCVaultBackend
__all__ = ['GDCVaultBackend']
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2012 François Revol
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
from weboob.capabilities.video import ICapVideo, BaseVideo
from weboob.tools.backend import BaseBackend
from weboob.capabilities.collection import ICapCollection, CollectionNotFound
from .browser import GDCVaultBrowser
from .video import GDCVaultVideo
__all__ = ['GDCVaultBackend']
class GDCVaultBackend(BaseBackend, ICapVideo, ICapCollection):
NAME = 'gdcvault'
MAINTAINER = u'François Revol'
EMAIL = 'revol@free.fr'
VERSION = '0.d'
DESCRIPTION = 'Game Developers Conferences Vault video streaming website'
LICENSE = 'AGPLv3+'
BROWSER = GDCVaultBrowser
def get_video(self, _id):
with self.browser:
return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time']
# def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
# with self.browser:
# return self.browser.search_videos(pattern, self.SORTBY[sortby])
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(GDCVaultVideo.id2url(video.id), video)
if 'thumbnail' in fields and video.thumbnail:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video
def iter_resources(self, objs, split_path):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield self.get_collection(objs, [u'latest'])
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
yield video
def validate_collection(self, objs, collection):
if collection.path_level == 0:
return
if BaseVideo in objs and collection.split_path == [u'latest']:
collection.title = u'Latest GDCVault videos'
return
raise CollectionNotFound(collection.split_path)
OBJECTS = {GDCVaultVideo: fill_video}
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2012 François Revol
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url
#from .pages.index import IndexPage
from .pages import VideoPage
from .video import GDCVaultVideo
__all__ = ['GDCVaultBrowser']
class GDCVaultBrowser(BaseBrowser):
DOMAIN = 'gdcvault.com'
ENCODING = None
PAGES = {r'http://[w\.]*gdcvault.com/play/(?P<id>[\d]+)/?.*': VideoPage,
}
@id2url(GDCVaultVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video)
# def search_videos(self, pattern, sortby):
# return None
# self.location(self.buildurl('http://gdcvault.com/en/search%s' % sortby, query=pattern.encode('utf-8')))
# assert self.is_on_page(IndexPage)
# return self.page.iter_videos()
# def latest_videos(self):
# self.home()
# assert self.is_on_page(IndexPage)
# return self.page.iter_videos()
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2012 François Revol
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.mech import ClientForm
ControlNotFoundError = ClientForm.ControlNotFoundError
from weboob.tools.browser import BasePage
import re
import datetime
from dateutil.parser import parse as parse_dt
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BrokenPageError
from .video import GDCVaultVideo
#import lxml.etree
__all__ = ['VideoPage']
class VideoPage(BasePage):
def get_video(self, video=None):
if video is None:
video = GDCVaultVideo(self.group_dict['id'])
# the config file has it too, but in CDATA
obj = self.parser.select(self.document.getroot(), 'title')
if len(obj) > 0:
title = obj[0].text.strip()
m = re.match('GDC Vault\s+-\s+(.*)', title)
if m:
title = m.group(1)
video.title = unicode(title)
# get the config file for the rest
obj = self.parser.select(self.document.getroot(), 'iframe', 1)
if obj is None:
return None
iframe_url = obj.attrib['src']
m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url)
if not m:
return None
config_url = m.group(1) + m.group(2)
#config = self.browser.openurl(config_url).read()
config = self.browser.get_document(self.browser.openurl(config_url))
obj = self.parser.select(config.getroot(), 'akamaihost', 1)
host = obj.text
if host is None:
raise BrokenPageError('Missing tag in xml config file')
videos = {}
obj = self.parser.select(config.getroot(), 'speakervideo', 1)
videos['speaker'] = 'rtmp://' + host + '/' + obj.text
obj = self.parser.select(config.getroot(), 'slidevideo', 1)
videos['slides'] = 'rtmp://' + host + '/' + obj.text
#print videos
obj = self.parser.select(config.getroot(), 'date', 1)
video.date = parse_dt(obj.text)
obj = self.parser.select(config.getroot(), 'duration', 1)
m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
if m:
video.duration = datetime.timedelta(hours = int(m.group(1)),
minutes = int(m.group(2)),
seconds = int(m.group(3)))
obj = self.parser.select(config.getroot(), 'speaker', 1)
#print obj.text_content()
#TODO: speaker as CDATA
#video.author = u'European Parliament'
#XXX
video.url = unicode(videos['speaker'])
#self.set_details(video)
video.set_empty_fields(NotAvailable)
return video
obj = self.parser.select(self.document.getroot(), 'title')
if len(obj) < 1:
return None
title = obj[0].text.strip()
m = re.match('GDC Vault\s+-\s+(.*)', title)
if m:
title = m.group(1)
def set_details(self, v):
obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
if obj is not None:
value = obj.attrib['content']
m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
if not m:
raise BrokenPageError('Unable to parse datetime: %r' % value)
day = m.group(1)
month = m.group(2)
year = m.group(3)
hour = m.group(4)
minute = m.group(5)
v.date = datetime.datetime(year=int(year),
month=int(month),
day=int(day),
hour=int(hour),
minute=int(minute))
obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
if obj is not None:
span = self.parser.select(obj, 'span.ep_date', 1)
value = span.text
m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
if not m:
raise BrokenPageError('Unable to parse datetime: %r' % value)
bhour = m.group(1)
bminute = m.group(2)
ehour = m.group(3)
eminute = m.group(4)
day = m.group(5)
month = m.group(6)
year = m.group(7)
start = datetime.datetime(year=int(year),
month=int(month),
day=int(day),
hour=int(bhour),
minute=int(bminute))
end = datetime.datetime(year=int(year),
month=int(month),
day=int(day),
hour=int(ehour),
minute=int(eminute))
v.duration = end - start
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2012 François Revol
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
#from weboob.capabilities.video import BaseVideo
class GDCVaultTest(BackendTest):
BACKEND = 'gdcvault'
# def test_search(self):
# l = list(self.backend.search_videos('linux'))
# self.assertTrue(len(l) > 0)
# v = l[0]
# self.backend.fillobj(v, ('url',))
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# self.backend.browser.openurl(v.url)
# def test_latest(self):
# l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
# self.assertTrue(len(l) > 0)
# v = l[0]
# self.backend.fillobj(v, ('url',))
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
import re
__all__ = ['GDCVaultVideo']
class GDCVaultVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.ext = u'flv'
@classmethod
def id2url(cls, _id):
# attempt to enlarge the id namespace to differentiate
# videos from the same page
m = re.match('\d+#speaker', _id)
if m:
return u'http://www.gdcvault.com/play/%s#speaker' % _id
m = re.match('\d+#slides', _id)
if m:
return u'http://www.gdcvault.com/play/%s#slides' % _id
return u'http://www.gdcvault.com/play/%s' % _id
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment