browser.py 3.99 KB
Newer Older
1 2 3 4
# -*- coding: utf-8 -*-

# Copyright(C) 2015      Vincent A
#
5
# This file is part of a weboob module.
6
#
7
# This weboob module is free software: you can redistribute it and/or modify
8 9 10 11
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
12
# This weboob module is distributed in the hope that it will be useful,
13 14 15 16 17
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
18
# along with this weboob module. If not, see <http://www.gnu.org/licenses/>.
19

20
from __future__ import unicode_literals
21 22

from weboob.browser import LoginBrowser, URL, need_login
23 24
from weboob.browser.exceptions import HTTPNotFound
from weboob.exceptions import BrowserIncorrectPassword
25
from weboob.capabilities.image import Thumbnail
26 27 28 29

from .pages import PageLogin, PageDashboard, PageChapter, PageSection
from .video import MoocVideo

30 31
import re

32 33

class FunmoocBrowser(LoginBrowser):
34
    BASEURL = 'https://www.fun-mooc.fr'
35

36
    login = URL('/login', '/login_ajax', PageLogin)
37 38 39 40 41 42 43 44 45
    dashboard = URL('/dashboard', PageDashboard)
    course = URL('/courses/(?P<course>[^/]+/[^/]+/[^/]+)/courseware/?$',
                 '/courses/(?P<course>[^/]+/[^/]+/[^/]+)/info/?$',
                 PageChapter)
    chapter = URL('/courses/(?P<course>[^/]+/[^/]+/[^/]+)/courseware'
                  '/(?P<chapter>[0-9a-f]+)/$', PageChapter)
    section = URL('/courses/(?P<course>[^/]+/[^/]+/[^/]+)/courseware/'
                  '(?P<chapter>[0-9a-f]+)/(?P<section>[0-9a-f]+)/$', PageSection)

46
    file = URL(r'https://.*\.cloudfront\.net/videos/(?P<id>[^/]+)/'
47
               r'(?P<quality>\w+)\.mp4')
48 49 50 51 52 53 54 55 56

    def __init__(self, username, password, quality='hd', *args, **kwargs):
        super(FunmoocBrowser, self).__init__(username, password, *args, **kwargs)
        self.quality = quality

    def do_login(self):
        self.login.stay_or_go()
        csrf = self.session.cookies.get('csrftoken')
        self.page.login(self.username, self.password, csrf)
57
        self.dashboard.stay_or_go()
58 59
        if not self.page.logged:
            raise BrowserIncorrectPassword()
60

61 62 63
    def get_video(self, url):
        v = MoocVideo(url)
        v.url = url
64
        v.ext = 'mp4'
65
        v.title = re.sub(r'[:/"]', '-', url)
66 67 68 69 70 71
        return v

    @need_login
    def iter_videos(self, course, chapter, section):
        course = course.replace('-', '/')
        assert self.section.stay_or_go(course=course, chapter=chapter, section=section)
72 73

        for n, d in enumerate(self.page.iter_videos()):
74
            video = self.get_video(d['url'])
75 76 77 78 79
            if d.get('thumbnail'):
                video.thumbnail = Thumbnail(d['thumbnail'])
            if d.get('title'):
                video.title = d['title']
            yield video
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

    @need_login
    def iter_sections(self, courseid, chapter):
        course = courseid.replace('-', '/')
        assert self.chapter.stay_or_go(course=course, chapter=chapter)
        for coll in self.page.iter_sections():
            if coll.split_path[:2] == [courseid, chapter]:
                yield coll

    @need_login
    def iter_chapters(self, courseid):
        course = courseid.replace('-', '/')
        assert self.course.stay_or_go(course=course)
        return self.page.iter_chapters()

    @need_login
    def iter_courses(self):
        assert self.dashboard.stay_or_go()
        return self.page.iter_courses()
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114

    @need_login
    def check_collection(self, path):
        if len(path) == 0:
            return True
        elif len(path) > 3:
            return False

        parts = list(zip(('course', 'chapter', 'section'), path))
        parts[0] = (parts[0][0], parts[0][1].replace('-', '/'))
        try:
            getattr(self, parts[-1][0]).open(**dict(parts))
        except HTTPNotFound:
            return False

        return True