From d3a630b1042e359b5ffc54b1925cd3572ee3ad13 Mon Sep 17 00:00:00 2001
From: Johann Broudin <45597667+broujo@users.noreply.github.com>
Date: Sun, 14 Jun 2020 17:14:45 +0200
Subject: [PATCH] [virgin radio] Fix + Move to dynamically generated radios

Now radio are dynamically parsed from the website using the Javascript
file used by the player.

Adds new radios (perrier, scene, tonic). Hopefully future radios will be
added automatically.

Changed the way current title is parsed. The information is not part of
the media stream's icy header anymore. The website's API endpoints are
used instead.
---
 modules/virginradio/module.py | 122 +++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 62 deletions(-)
diff --git a/modules/virginradio/module.py b/modules/virginradio/module.py
index 918d24f6f7..77e903b0de 100644
--- a/modules/virginradio/module.py
+++ b/modules/virginradio/module.py
@@ -24,7 +24,8 @@
 from weboob.capabilities.collection import CapCollection
 from weboob.tools.backend import Module
 from weboob.browser import Browser
-from weboob.tools.misc import to_unicode
+import re
+from lxml import html
 
 
 __all__ = ['VirginRadioModule']
@@ -39,86 +40,83 @@ class VirginRadioModule(Module, CapRadio, CapCollection):
     LICENSE = 'AGPLv3+'
     BROWSER = Browser
 
-    _RADIOS = {
-            'officiel': (
-                u'Virgin Radio',
-                u'Virgin Radio',
-                u'http://mp3lg3.scdn.arkena.com/10490/virginradio.mp3',
-                64),
-            'new': (
-                u'Virgin Radio New',
-                u'Virgin Radio New',
-                u'http://mp3lg3.tdf-cdn.com/9145/lag_103228.mp3',
-                64),
-            'classics': (
-                u'Virgin Radio Classics',
-                u'Virgin Radio Classics',
-                u'http://mp3lg3.tdf-cdn.com/9146/lag_103325.mp3',
-                64),
-            'electroshock': (
-                u'Virgin Radio Electroshock',
-                u'Virgin Radio Electroshock',
-                u'http://mp3lg3.tdf-cdn.com/9148/lag_103401.mp3',
-                64),
-            'hits': (
-                u'Virgin Radio Hits',
-                u'Virgin Radio Hits',
-                u'http://mp3lg3.tdf-cdn.com/9150/lag_103440.mp3',
-                64),
-            'rock': (
-                u'Virgin Radio Rock',
-                u'Virgin Radio Rock',
-                u'http://mp3lg3.scdn.arkena.com/9151/lag_103523.mp3',
-                64)
-            }
-
-    def get_stream_info(self, radio, url):
-        stream = BaseAudioStream(0)
-        current = StreamInfo(0)
+    _RADIOS_URL = 'https://www.virginradio.fr/desktop/js/all.min.js'
+    _RADIOS_RE = (
+            r'{id:(?P<id>\d+),'
+            r'id_radio:(?P<id_radio>\d+),'
+            r'type:"[^"]*",'
+            r'name:"(?P<name>[^"]*)",'
+            r'hls_source:"(?P<hls_source>[^"]*)",'
+            r'source:"(?P<source>[^"]*)"'
+            )
 
-        r = self.browser.open(url, stream=True, headers={'Icy-Metadata':'1'})
+    _PROGRAM_URL = 'https://www.virginradio.fr/calendar/api/current.json/argv/calendar_type/emission/origine_flags/virginradio/get_current_foreign_type/TRUE'
+    _INFO_URL = 'https://www.virginradio.fr/radio/api/get_current_event/?id_radio=%s'
 
-        stream.bitrate = int(r.headers['icy-br'].split(',')[0])
+    _WEBRADIOS_URL = 'https://www.virginradio.fr/webradios/'
+    _XPATH_RADIO_NAME = '//ul/li[@class="brick"]/div/div[@data-id="%s"]/ancestor::li/div/h3/text()'
 
-        r.raw.read(int(r.headers['icy-metaint']))
-        size = ord(r.raw.read(1))
-        content = r.raw.read(size*16)
-        r.close()
-
-        for s in content.split("\x00")[0].split(";"):
-            a = s.split("=")
-            if a[0] == "StreamTitle":
-                stream.title = to_unicode(a[1].split("'")[1])
-                res = stream.title.split(" - ")
-                current.who = to_unicode(res[0])
-                if(len(res) == 1):
-                    current.what = ""
-                else:
-                    current.what = to_unicode(res[1])
-
-        stream.format=u'mp3'
-        stream.url = url
-        return [stream], current
+    _RADIOS = {}
 
     def get_radio(self, radio):
+        self.get_radios()
         if not isinstance(radio, Radio):
             radio = Radio(radio)
 
         if radio.id not in self._RADIOS:
             return None
 
-        title, description, url, bitrate = self._RADIOS[radio.id]
+        radio.title = self._RADIOS[radio.id]['title']
+        radio.description = self._RADIOS[radio.id]['title']
+
+        if radio.id == 'live':
+            r = self.browser.open(self._PROGRAM_URL)
+            info = r.json()['root_tab']['events'][0]
+            radio.description = "%s - %s" % (info['title'], info['tab_foreign_type']['resum'])
 
-        radio.title = title
-        radio.description = description
+        stream_hls = BaseAudioStream(0)
+        stream_hls.url = self._RADIOS[radio.id]['hls_source']
+        stream_hls.bitrate = 128
+        stream_hls.format=u'aac'
 
-        radio.streams, radio.current = self.get_stream_info(radio.id, url)
+        stream = BaseAudioStream(0)
+        stream.url = self._RADIOS[radio.id]['source']
+        stream.bitrate = 128
+        stream.format=u'mp3'
+
+        current = StreamInfo(0)
+        current.who = ''
+        current.what = ''
+
+        r = self.browser.open(self._INFO_URL % (self._RADIOS[radio.id]['radio_id']))
+        info = r.json()['root_tab']['event']
+        if len(info) > 0:
+            current.who = info[0]['artist']
+            current.what = info[0]['title']
+        radio.streams = [stream_hls, stream]
+        radio.current = current
         return radio
 
+    def get_radios(self):
+        webradios = self.browser.open(self._WEBRADIOS_URL)
+        tree = html.fromstring(webradios.content)
+
+        if not self._RADIOS:
+            r = self.browser.open(self._RADIOS_URL)
+            for m in re.finditer(self._RADIOS_RE, r.text):
+                self._RADIOS[m.group('name')] = {
+                        'radio_id': m.group('id_radio'),
+                        'name': m.group('name'),
+                        'hls_source': m.group('hls_source'),
+                        'source': m.group('source'),
+                        'title': tree.xpath(self._XPATH_RADIO_NAME % (m.group('id_radio')))[0] }
+
     def iter_resources(self, objs, split_path):
         if Radio in objs:
             self._restrict_level(split_path)
 
+            self.get_radios()
+
             for id in self._RADIOS:
                 yield self.get_radio(id)
 
-- 
GitLab