Commit 0dacfa11 authored by Laurent Bachelier's avatar Laurent Bachelier 🐧

Add support for fetching past emissions URLs

Works with France Inter and France Culture
parent 53915bf6
......@@ -22,4 +22,3 @@ from .backend import RadioFranceBackend
__all__ = ['RadioFranceBackend']
......@@ -19,18 +19,19 @@
from weboob.capabilities.base import NotLoaded
from weboob.capabilities.video import ICapVideo
from weboob.capabilities.radio import ICapRadio, Radio, Stream, Emission
from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection
from weboob.tools.backend import BaseBackend
from .browser import RadioFranceBrowser
from .browser import RadioFranceBrowser, RadioFranceVideo
__all__ = ['RadioFranceBackend']
class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
NAME = 'radiofrance'
MAINTAINER = 'Laurent Bachelier'
EMAIL = 'laurent@bachelier.name'
......@@ -170,4 +171,23 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
radio.current.artist = artist
return radio
OBJECTS = {Radio: fill_radio}
# avoid warning, but TODO
# http://www.franceculture.fr/recherche/key%3DYOURSEARCH%2526type%3Demission
# http://www.franceinter.fr/recherche/key%3DYOURSEARCH%2526tri%3Dpertinence%2526theme%3Ddefault%2526type%3Demission
def iter_search_results(self, *args, **kwargs):
return []
def get_video(self, _id):
with self.browser:
video = self.browser.get_video(_id)
return video
def fill_video(self, video, fields):
if 'url' in fields:
with self.browser:
video.url = self.browser.get_url(video.id)
return video
OBJECTS = {Radio: fill_radio,
RadioFranceVideo: fill_video}
......@@ -18,9 +18,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BasePage, BrokenPageError
from weboob.capabilities.video import BaseVideo
from weboob.tools.browser.decorators import id2url
from StringIO import StringIO
from time import time
import re
import urlparse
try:
import json
......@@ -28,7 +32,42 @@ except ImportError:
import simplejson as json
__all__ = ['RadioFranceBrowser']
__all__ = ['RadioFranceBrowser', 'RadioFranceVideo']
class RadioFranceVideo(BaseVideo):
RADIOS = ('franceinter', 'franceculture')
@classmethod
def id2url(cls, _id):
radio_id, replay_id = _id.split('-', 2)
return 'http://www.%s.fr/player/reecouter?play=%s' % \
(radio_id, replay_id)
class PlayerPage(BasePage):
URL = r'^http://www\.(?P<radio_id>%s)\.fr/player/reecouter\?play=(?P<replay_id>\d+)$' \
% '|'.join(RadioFranceVideo.RADIOS)
MP3_REGEXP = re.compile(r'sites%2Fdefault.+.(?:MP3|mp3)')
def get_url(self):
radio_id = self.groups[0]
player = self.parser.select(self.document.getroot(), '#rfPlayer embed', 1)
urlparams = urlparse.parse_qs(player.attrib['src'])
return 'http://www.%s.fr/%s' % (radio_id, urlparams['urlAOD'][0])
class ReplayPage(BasePage):
URL = r'^http://www\.(?P<radio_id>%s)\.fr/emission-.+$' \
% '|'.join(RadioFranceVideo.RADIOS)
def get_id(self):
radio_id = self.groups[0]
for node in self.parser.select(self.document.getroot(), 'div.node-rf_diffusion'):
match = re.match(r'^node-(\d+)$', node.attrib.get('id', ''))
if match:
player_id = match.groups()[0]
return (radio_id, player_id)
class DataPage(BasePage):
......@@ -54,6 +93,7 @@ class RssPage(BasePage):
class RssAntennaPage(BasePage):
ENCODING = 'ISO-8859-1'
def get_track(self):
# This information is not always available
try:
......@@ -71,7 +111,9 @@ class RadioFranceBrowser(BaseBrowser):
ENCODING = 'UTF-8'
PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage,
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage,
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage}
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage,
PlayerPage.URL: PlayerPage,
ReplayPage.URL: ReplayPage}
def get_current_playerjs(self, _id):
self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % _id)
......@@ -103,3 +145,23 @@ class RadioFranceBrowser(BaseBrowser):
result = self.page.get_track()
self.ENCODING = RadioFranceBrowser.ENCODING
return result
@id2url(RadioFranceVideo.id2url)
def get_video(self, url):
radio_id = replay_id = None
match = re.match(PlayerPage.URL, url)
if match:
radio_id, replay_id = match.groups()
elif re.match(ReplayPage.URL, url):
self.location(url)
assert self.is_on_page(ReplayPage)
radio_id, replay_id = self.page.get_id()
if radio_id and replay_id:
_id = '%s-%s' % (radio_id, replay_id)
return RadioFranceVideo(_id)
@id2url(RadioFranceVideo.id2url)
def get_url(self, url):
self.location(url)
assert self.is_on_page(PlayerPage)
return self.page.get_url()
......@@ -27,3 +27,20 @@ class RadioFranceTest(BackendTest):
def test_get_radios(self):
l = list(self.backend.iter_resources([]))
self.assertTrue(len(l) > 0)
def test_get_video(self):
# this should be available up to 24/10/2014 15h00
urls = ('http://www.franceinter.fr/emission-vivre-avec-les-betes-y-arthus-bertrand-felins-g-tsai-s-envoler-conte-boreal-reha-hutin-30-m',
'http://www.franceinter.fr/player/reecouter?play=263735',
'franceinter-263735')
for url in urls:
vid = self.backend.get_video(url)
assert vid.id == urls[-1]
self.backend.fillobj(vid, ['url'])
assert vid.url.lower().endswith('.mp3')
# france culture (no expiration known)
vid = self.backend.get_video('http://www.franceculture.fr/emission-la-dispute-expositions-paul-strand-youssef-nabil-et-dorothee-smith-2012-02-01')
assert vid.id
self.backend.fillobj(vid, ['url'])
assert vid.url.lower().endswith('.mp3')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment