diff --git a/modules/seeklyrics/__init__.py b/modules/seeklyrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ca63d6321b0ebda27a51d81ebaa3658b7bccdc1 --- /dev/null +++ b/modules/seeklyrics/__init__.py @@ -0,0 +1,3 @@ +from .backend import SeeklyricsBackend + +__all__ = ['SeeklyricsBackend'] diff --git a/modules/seeklyrics/backend.py b/modules/seeklyrics/backend.py new file mode 100644 index 0000000000000000000000000000000000000000..44d783c03dbf8e5cc07bfa401cfa14a7dbfdae69 --- /dev/null +++ b/modules/seeklyrics/backend.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.lyrics import ICapLyrics +from weboob.tools.backend import BaseBackend + +from .browser import SeeklyricsBrowser + +from urllib import quote_plus +from contextlib import closing +from gzip import GzipFile + +__all__ = ['SeeklyricsBackend'] + + +class SeeklyricsBackend(BaseBackend, ICapLyrics): + NAME = 'seeklyrics' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.f' + DESCRIPTION = 'SeekLyrics lyrics website' + LICENSE = 'AGPLv3+' + BROWSER = SeeklyricsBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_lyrics(self, id): + return self.browser.get_lyrics(id) + + def iter_lyrics(self, pattern): + return self.browser.iter_lyrics(quote_plus(pattern.encode('utf-8'))) diff --git a/modules/seeklyrics/browser.py b/modules/seeklyrics/browser.py new file mode 100644 index 0000000000000000000000000000000000000000..8fec62e64a9589fedc4e300ee22a513cb0beba0c --- /dev/null +++ b/modules/seeklyrics/browser.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import ResultsPage, SonglyricsPage + + +__all__ = ['SeeklyricsBrowser'] + + +class SeeklyricsBrowser(BaseBrowser): + DOMAIN = 'www.seeklyrics.com' + PROTOCOL = 'http' + ENCODING = 'iso-8859-1' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://www.seeklyrics.com/search.php.*': ResultsPage, + 'http://www.seeklyrics.com/lyrics/.*': SonglyricsPage, + } + + def iter_lyrics(self, pattern): + self.location('http://www.seeklyrics.com/search.php?q=%s&t=1' % pattern.encode('utf-8')) + assert self.is_on_page(ResultsPage) + return self.page.iter_lyrics() + + def get_lyrics(self, id): + self.location('http://www.seeklyrics.com/lyrics/%s.html' % id) + assert self.is_on_page(SonglyricsPage) + return self.page.get_lyrics(id) diff --git a/modules/seeklyrics/pages.py b/modules/seeklyrics/pages.py new file mode 100644 index 0000000000000000000000000000000000000000..1cf394a5846a6594dd7e8d25b9fb7ecc2f61de68 --- /dev/null +++ b/modules/seeklyrics/pages.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +try: + from urlparse import parse_qs +except ImportError: + from cgi import parse_qs # NOQA + +from urlparse import urlsplit + +from weboob.capabilities.lyrics import SongLyrics +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage + + +__all__ = ['ResultsPage','SonglyricsPage'] + + +class ResultsPage(BasePage): + def iter_lyrics(self): + first = True + for tr in self.parser.select(self.document.getroot(),'table[title~=Results] tr'): + if first: + first = False + continue + artist = NotAvailable + ftitle = self.parser.select(tr,'a > font > font',1) + title = ftitle.getparent().getparent().text_content() + id = ftitle.getparent().getparent().attrib.get('href','').replace('/lyrics/','').replace('.html','') + aartist = self.parser.select(tr,'a')[-1] + artist = aartist.text + songlyrics = SongLyrics(id, title) + songlyrics.artist = artist + songlyrics.content = NotLoaded + yield songlyrics + + +class SonglyricsPage(BasePage): + def get_lyrics(self, id): + artist = NotAvailable + title = NotAvailable + l_artitle = self.parser.select(self.document.getroot(),'table.text td > b > h2') + if len(l_artitle) > 0: + artitle = l_artitle[0].text.split(' Lyrics by ') + artist = artitle[1] + title = artitle[0] + content = self.parser.select(self.document.getroot(),'div#songlyrics',1).text_content().strip() + songlyrics = SongLyrics(id, title) + songlyrics.artist = artist + songlyrics.content = content + return songlyrics diff --git a/modules/seeklyrics/test.py b/modules/seeklyrics/test.py new file mode 100644 index 0000000000000000000000000000000000000000..de0535706bcebcc677bddda7d6e9ed0ad5a944bc --- /dev/null +++ b/modules/seeklyrics/test.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest +from weboob.capabilities.base import NotLoaded + +from random import choice + +class SeeklyricsTest(BackendTest): + BACKEND = 'seeklyrics' + + def test_search(self): + l_lyrics = list(self.backend.iter_lyrics('Complainte')) + for songlyrics in l_lyrics: + assert songlyrics.id + assert songlyrics.title + assert songlyrics.artist + assert songlyrics.content is NotLoaded + full_lyr = self.backend.get_lyrics(songlyrics.id) + assert full_lyr.id + assert full_lyr.title + assert full_lyr.artist + assert full_lyr.content is not NotLoaded + diff --git a/scripts/booblyrics b/scripts/booblyrics new file mode 100755 index 0000000000000000000000000000000000000000..406fa17d5938aaf3752383e2fc3ba1b1e7450f09 --- /dev/null +++ b/scripts/booblyrics @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# vim: ft=python et softtabstop=4 cinoptions=4 shiftwidth=4 ts=4 ai + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.applications.booblyrics import Booblyrics + + +if __name__ == '__main__': + Booblyrics.run() diff --git a/weboob/applications/booblyrics/booblyrics.py b/weboob/applications/booblyrics/booblyrics.py index b90d4f1f3ccd8dcd66ac8b076edd47a4503ef47d..af19ad9e4f66e99ad530c8e33326b31d39ad1e9e 100644 --- a/weboob/applications/booblyrics/booblyrics.py +++ b/weboob/applications/booblyrics/booblyrics.py @@ -40,7 +40,7 @@ def format_obj(self, obj, alias): result += 'Title: %s\n' % obj.title result += 'Artist: %s\n' % obj.artist result += '\n%sContent%s\n' % (self.BOLD, self.NC) - result += obj.content + result += '%s'%obj.content return result @@ -63,7 +63,7 @@ class Booblyrics(ReplApplication): COPYRIGHT = 'Copyright(C) 2013 Julien Veyssier' DESCRIPTION = "Console application allowing to search for song lyrics on various websites." SHORT_DESCRIPTION = "search and display song lyrics" - CAPS = ICapTorrent + CAPS = ICapLyrics EXTRA_FORMATTERS = {'lyrics_list': LyricsListFormatter, 'lyrics_info': LyricsInfoFormatter, } @@ -83,13 +83,21 @@ def do_info(self, id): Get information about song lyrics. """ - songlyrics = self.get_object(id, 'get_lyrics') - if not lyrics: + # TODO restore get_object line and handle fillobj + #songlyrics = self.get_object(id, 'get_lyrics') + songlyrics = None + _id, backend = self.parse_id(id) + for _backend, result in self.do('get_lyrics', _id, backends=backend): + if result: + backend = _backend + songlyrics = result + + if not songlyrics: print >>sys.stderr, 'Song lyrics not found: %s' % id return 3 self.start_format() - self.format(torrent) + self.format(songlyrics) self.flush() def do_search(self, pattern):