pages.py 2.48 KB
Newer Older
1 2 3 4
# -*- coding: utf-8 -*-

# Copyright(C) 2016 Julien Veyssier
#
5
# This file is part of a weboob module.
6
#
7
# This weboob module is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
12
# This weboob module is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
18
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
19 20 21 22 23 24 25 26 27 28 29 30 31 32


from weboob.capabilities.lyrics import SongLyrics
from weboob.capabilities.base import NotLoaded, NotAvailable

from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.pages import HTMLPage
from weboob.browser.filters.standard import Regexp, CleanText
from weboob.browser.filters.html import CleanHTML


class SearchPage(HTMLPage):
    @method
    class iter_lyrics(ListElement):
33
        item_xpath = '//table[has-class("songs_list")]//tr[count(td) = 2]'
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61

        class item(ItemElement):
            klass = SongLyrics

            obj_id = CleanText('./@href', default=NotAvailable)
            def obj_id(self):
                href = CleanText('./td[2]/a/@href', default=NotAvailable)(self)
                spl = href.replace('.html', '').split('/')
                lid = spl[2]
                aid = spl[3]
                sid = spl[4]
                return '%s|%s|%s' % (lid, aid, sid)
            obj_title = Regexp(CleanText('./td[2]', default=NotAvailable), '(.*) lyrics$')
            obj_artist = CleanText('./td[1]/a', default=NotAvailable)
            obj_content = NotLoaded


class LyricsPage(HTMLPage):
    @method
    class get_lyrics(ItemElement):
        klass = SongLyrics

        def obj_id(self):
            spl = self.page.url.replace('http://', '').replace('.html', '').split('/')
            lid = spl[2]
            aid = spl[3]
            sid = spl[4]
            return '%s|%s|%s' % (lid, aid, sid)
62

63
        obj_content = CleanText(CleanHTML('//p[@id="lyrics_text"]', default=NotAvailable), newlines=False)
64 65
        obj_artist = CleanText('//a[has-class("artist_name")]', default=NotAvailable)
        obj_title = Regexp(CleanText('//h1[has-class("song_name")]', default=NotAvailable), '(.*) lyrics$')