From 3c9c2c749091071e089a05aad81856ac1330ffa5 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Fri, 3 Feb 2012 13:26:41 +0100 Subject: [PATCH] Split backend.py into backend.py and browser.py It was becoming too large. --- modules/radiofrance/backend.py | 59 +----------------------- modules/radiofrance/browser.py | 82 ++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 58 deletions(-) create mode 100644 modules/radiofrance/browser.py diff --git a/modules/radiofrance/backend.py b/modules/radiofrance/backend.py index 118f9efb81..d9e1315507 100644 --- a/modules/radiofrance/backend.py +++ b/modules/radiofrance/backend.py @@ -22,71 +22,14 @@ from weboob.capabilities.radio import ICapRadio, Radio, Stream, Emission from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection from weboob.tools.backend import BaseBackend -from weboob.tools.browser import BaseBrowser, BasePage -from StringIO import StringIO -from time import time -try: - import json -except ImportError: - import simplejson as json +from .browser import RadioFranceBrowser __all__ = ['RadioFranceBackend'] -class DataPage(BasePage): - def get_title(self): - for metas in self.parser.select(self.document.getroot(), 'div.metas'): - title = unicode(metas.text_content()).strip() - if len(title): - return title - - -class RssPage(BasePage): - def get_title(self): - titles = [] - for heading in self.parser.select(self.document.getroot(), 'h1, h2, h3, h4'): - # Remove newlines/multiple spaces - words = heading.text_content() - if words: - for word in unicode(words).split(): - titles.append(word) - if len(titles): - return ' '.join(titles) - - -class RadioFranceBrowser(BaseBrowser): - DOMAIN = None - ENCODING = 'UTF-8' - PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage, - r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage} - - def get_current_playerjs(self, id): - self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % id) - assert self.is_on_page(DataPage) - - return self.page.get_title() - - def get_current_rss(self, id): - self.location('http://players.tv-radio.com/radiofrance/metadatas/%sRSS.html' % id) - assert self.is_on_page(RssPage) - - return self.page.get_title() - - def get_current_direct(self, id): - json_data = self.openurl('http://www.%s.fr/sites/default/files/direct.json?_=%s' % (id, int(time()))) - data = json.load(json_data) - - document = self.parser.parse(StringIO(data.get('html'))) - artist = document.findtext('//span[@class="artiste"]') - title = document.findtext('//span[@class="titre"]') - artist = unicode(artist) if artist else None - title = unicode(title) if title else None - return (artist, title) - - class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection): NAME = 'radiofrance' MAINTAINER = 'Laurent Bachelier' diff --git a/modules/radiofrance/browser.py b/modules/radiofrance/browser.py new file mode 100644 index 0000000000..4386371edd --- /dev/null +++ b/modules/radiofrance/browser.py @@ -0,0 +1,82 @@ +# * -*- coding: utf-8 -*- + +# Copyright(C) 2011-2012 Johann Broudin, Laurent Bachelier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BaseBrowser, BasePage + +from StringIO import StringIO +from time import time + +try: + import json +except ImportError: + import simplejson as json + + +__all__ = ['RadioFranceBrowser'] + + +class DataPage(BasePage): + def get_title(self): + for metas in self.parser.select(self.document.getroot(), 'div.metas'): + title = unicode(metas.text_content()).strip() + if len(title): + return title + + +class RssPage(BasePage): + def get_title(self): + titles = [] + for heading in self.parser.select(self.document.getroot(), 'h1, h2, h3, h4'): + # Remove newlines/multiple spaces + words = heading.text_content() + if words: + for word in unicode(words).split(): + titles.append(word) + if len(titles): + return ' '.join(titles) + + +class RadioFranceBrowser(BaseBrowser): + DOMAIN = None + ENCODING = 'UTF-8' + PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage, + r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage} + + def get_current_playerjs(self, _id): + self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % _id) + assert self.is_on_page(DataPage) + + return self.page.get_title() + + def get_current_rss(self, _id): + self.location('http://players.tv-radio.com/radiofrance/metadatas/%sRSS.html' % _id) + assert self.is_on_page(RssPage) + + return self.page.get_title() + + def get_current_direct(self, _id): + json_data = self.openurl('http://www.%s.fr/sites/default/files/direct.json?_=%s' % (_id, int(time()))) + data = json.load(json_data) + + document = self.parser.parse(StringIO(data.get('html'))) + artist = document.findtext('//span[@class="artiste"]') + title = document.findtext('//span[@class="titre"]') + artist = unicode(artist) if artist else None + title = unicode(title) if title else None + return (artist, title) -- GitLab