browser.py 2.48 KB
Newer Older
1 2
# -*- coding: utf-8 -*-

3
# Copyright(C) 2016 Julien Veyssier
4
#
5
# This file is part of a weboob module.
6
#
7
# This weboob module is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
12
# This weboob module is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
18
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
19 20


21 22 23 24
from weboob.browser.exceptions import BrowserHTTPNotFound
from weboob.browser import PagesBrowser
from weboob.browser.url import URL
from weboob.browser.profiles import Firefox
25 26 27

from .pages import SongResultsPage, SonglyricsPage, ArtistResultsPage, ArtistSongsPage, HomePage

28 29
import itertools

30 31 32 33

__all__ = ['ParolesmusiqueBrowser']


34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
class ParolesmusiqueBrowser(PagesBrowser):
    PROFILE = Firefox()
    TIMEOUT = 30

    BASEURL = 'http://www.paroles-musique.com/'
    home = URL('$',
                 HomePage)
    songResults = URL('lyrics-paroles-0-.*,0.php',
                 SongResultsPage)
    artistResults = URL('lyrics-paroles-.*-0,0.php',
                  ArtistResultsPage)
    songLyrics = URL('paroles-(?P<songid>.*,p[0-9]*)',
                  SonglyricsPage)
    artistSongs = URL('paroles-(?P<artistid>.*,a[0-9]*)',
                  ArtistSongsPage)

50 51

    def iter_lyrics(self, criteria, pattern):
52 53 54
        self.home.stay_or_go()
        assert self.home.is_here()
        self.page.search_lyrics(criteria, pattern)
55 56 57 58 59 60 61 62 63 64 65
        if criteria == 'song':
            assert self.songResults.is_here()
            return self.page.iter_lyrics()
        elif criteria == 'artist':
            assert self.artistResults.is_here()
            artist_ids = self.page.get_artist_ids()
            it = []
            # we just take the 3 first artists to avoid too many page loadings
            for aid in artist_ids[:3]:
                it = itertools.chain(it, self.artistSongs.go(artistid=aid).iter_lyrics())
            return it
66

67 68

    def get_lyrics(self, id):
69
        try:
70 71 72
            self.songLyrics.go(songid=id)
            songlyrics = self.page.get_lyrics()
            return songlyrics
73 74
        except BrowserHTTPNotFound:
            return
75