From d3a630b1042e359b5ffc54b1925cd3572ee3ad13 Mon Sep 17 00:00:00 2001 From: Johann Broudin <45597667+broujo@users.noreply.github.com> Date: Sun, 14 Jun 2020 17:14:45 +0200 Subject: [PATCH] [virgin radio] Fix + Move to dynamically generated radios Now radio are dynamically parsed from the website using the Javascript file used by the player. Adds new radios (perrier, scene, tonic). Hopefully future radios will be added automatically. Changed the way current title is parsed. The information is not part of the media stream's icy header anymore. The website's API endpoints are used instead. --- modules/virginradio/module.py | 122 +++++++++++++++++----------------- 1 file changed, 60 insertions(+), 62 deletions(-) diff --git a/modules/virginradio/module.py b/modules/virginradio/module.py index 918d24f6f7..77e903b0de 100644 --- a/modules/virginradio/module.py +++ b/modules/virginradio/module.py @@ -24,7 +24,8 @@ from weboob.capabilities.collection import CapCollection from weboob.tools.backend import Module from weboob.browser import Browser -from weboob.tools.misc import to_unicode +import re +from lxml import html __all__ = ['VirginRadioModule'] @@ -39,86 +40,83 @@ class VirginRadioModule(Module, CapRadio, CapCollection): LICENSE = 'AGPLv3+' BROWSER = Browser - _RADIOS = { - 'officiel': ( - u'Virgin Radio', - u'Virgin Radio', - u'http://mp3lg3.scdn.arkena.com/10490/virginradio.mp3', - 64), - 'new': ( - u'Virgin Radio New', - u'Virgin Radio New', - u'http://mp3lg3.tdf-cdn.com/9145/lag_103228.mp3', - 64), - 'classics': ( - u'Virgin Radio Classics', - u'Virgin Radio Classics', - u'http://mp3lg3.tdf-cdn.com/9146/lag_103325.mp3', - 64), - 'electroshock': ( - u'Virgin Radio Electroshock', - u'Virgin Radio Electroshock', - u'http://mp3lg3.tdf-cdn.com/9148/lag_103401.mp3', - 64), - 'hits': ( - u'Virgin Radio Hits', - u'Virgin Radio Hits', - u'http://mp3lg3.tdf-cdn.com/9150/lag_103440.mp3', - 64), - 'rock': ( - u'Virgin Radio Rock', - u'Virgin Radio Rock', - u'http://mp3lg3.scdn.arkena.com/9151/lag_103523.mp3', - 64) - } - - def get_stream_info(self, radio, url): - stream = BaseAudioStream(0) - current = StreamInfo(0) + _RADIOS_URL = 'https://www.virginradio.fr/desktop/js/all.min.js' + _RADIOS_RE = ( + r'{id:(?P\d+),' + r'id_radio:(?P\d+),' + r'type:"[^"]*",' + r'name:"(?P[^"]*)",' + r'hls_source:"(?P[^"]*)",' + r'source:"(?P[^"]*)"' + ) - r = self.browser.open(url, stream=True, headers={'Icy-Metadata':'1'}) + _PROGRAM_URL = 'https://www.virginradio.fr/calendar/api/current.json/argv/calendar_type/emission/origine_flags/virginradio/get_current_foreign_type/TRUE' + _INFO_URL = 'https://www.virginradio.fr/radio/api/get_current_event/?id_radio=%s' - stream.bitrate = int(r.headers['icy-br'].split(',')[0]) + _WEBRADIOS_URL = 'https://www.virginradio.fr/webradios/' + _XPATH_RADIO_NAME = '//ul/li[@class="brick"]/div/div[@data-id="%s"]/ancestor::li/div/h3/text()' - r.raw.read(int(r.headers['icy-metaint'])) - size = ord(r.raw.read(1)) - content = r.raw.read(size*16) - r.close() - - for s in content.split("\x00")[0].split(";"): - a = s.split("=") - if a[0] == "StreamTitle": - stream.title = to_unicode(a[1].split("'")[1]) - res = stream.title.split(" - ") - current.who = to_unicode(res[0]) - if(len(res) == 1): - current.what = "" - else: - current.what = to_unicode(res[1]) - - stream.format=u'mp3' - stream.url = url - return [stream], current + _RADIOS = {} def get_radio(self, radio): + self.get_radios() if not isinstance(radio, Radio): radio = Radio(radio) if radio.id not in self._RADIOS: return None - title, description, url, bitrate = self._RADIOS[radio.id] + radio.title = self._RADIOS[radio.id]['title'] + radio.description = self._RADIOS[radio.id]['title'] + + if radio.id == 'live': + r = self.browser.open(self._PROGRAM_URL) + info = r.json()['root_tab']['events'][0] + radio.description = "%s - %s" % (info['title'], info['tab_foreign_type']['resum']) - radio.title = title - radio.description = description + stream_hls = BaseAudioStream(0) + stream_hls.url = self._RADIOS[radio.id]['hls_source'] + stream_hls.bitrate = 128 + stream_hls.format=u'aac' - radio.streams, radio.current = self.get_stream_info(radio.id, url) + stream = BaseAudioStream(0) + stream.url = self._RADIOS[radio.id]['source'] + stream.bitrate = 128 + stream.format=u'mp3' + + current = StreamInfo(0) + current.who = '' + current.what = '' + + r = self.browser.open(self._INFO_URL % (self._RADIOS[radio.id]['radio_id'])) + info = r.json()['root_tab']['event'] + if len(info) > 0: + current.who = info[0]['artist'] + current.what = info[0]['title'] + radio.streams = [stream_hls, stream] + radio.current = current return radio + def get_radios(self): + webradios = self.browser.open(self._WEBRADIOS_URL) + tree = html.fromstring(webradios.content) + + if not self._RADIOS: + r = self.browser.open(self._RADIOS_URL) + for m in re.finditer(self._RADIOS_RE, r.text): + self._RADIOS[m.group('name')] = { + 'radio_id': m.group('id_radio'), + 'name': m.group('name'), + 'hls_source': m.group('hls_source'), + 'source': m.group('source'), + 'title': tree.xpath(self._XPATH_RADIO_NAME % (m.group('id_radio')))[0] } + def iter_resources(self, objs, split_path): if Radio in objs: self._restrict_level(split_path) + self.get_radios() + for id in self._RADIOS: yield self.get_radio(id) -- GitLab