# -*- coding: utf-8 -*- # Copyright(C) 2011 Romain Bignon # # This file is part of a woob module. # # This woob module is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This woob module is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this woob module. If not, see . import re from urllib.parse import quote_plus from woob.browser import PagesBrowser, URL from .pages import IndexPage, VideoPage __all__ = ['DailymotionBrowser'] class DailymotionBrowser(PagesBrowser): BASEURL = 'http://www.dailymotion.com' video_page = URL(r'http://[w\.]*dailymotion\.com/video/(?P<_id>.*)', VideoPage) latest_page = URL(r'/1', IndexPage) index_page = URL(r'http://[w\.]*dailymotion\.com/(?P.*)', r'http://[w\.]*dailymotion\.com/1', r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1', r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*', IndexPage) def __init__(self, resolution, format, *args, **kwargs): self.resolution = resolution self.format = format PagesBrowser.__init__(self, *args, **kwargs) def get_video(self, _id, video=None): video = self.video_page.go(_id=_id).get_video(obj=video) if video._formats and self.format in video._formats: video.ext = self.format if self.format == u'm3u8': video.url = self.retrieve_m3u8_url(video._formats.get(self.format)) elif self.resolution in video._formats.get(self.format): video.url = video._formats.get(self.format).get(self.resolution) else: video.url = video._formats.get(self.format).values()[-1] return video def retrieve_m3u8_url(self, urls): if self.resolution in urls: return urls.get(self.resolution) return_next = False for resolution, url in urls.items(): for item in self.read_url(url): if return_next: return str(item.split('#')[0]) m = re.match('^#.*,NAME="%s"' % self.resolution, item) if not m: continue return_next = True return str(item.split('#')[0]) def read_url(self, url): r = self.open(url, stream=True) buf = r.iter_lines() return buf def search_videos(self, pattern, sortby): pattern = pattern.replace('/', '').encode('utf-8') if sortby is None: url = 'en/search/%s/1' % quote_plus(pattern) else: url = 'en/%s/search/%s/1' % (sortby, quote_plus(pattern)) return self.index_page.go(search=url).iter_videos() def latest_videos(self): return self.latest_page.go().iter_videos()