diff --git a/modules/piratebay/backend.py b/modules/piratebay/backend.py index 8aaa00376b81da5bd8aba819400fbecdc3ea2371..cd988e0223613ef14498cac8f838a35864b54e8a 100644 --- a/modules/piratebay/backend.py +++ b/modules/piratebay/backend.py @@ -49,4 +49,4 @@ def get_torrent_file(self, id): return self.browser.openurl(torrent.url.encode('utf-8')).read() def iter_torrents(self, pattern): - return self.browser.iter_torrents(pattern.replace(' ','+')) + return self.browser.iter_torrents(pattern.replace(' ', '+')) diff --git a/modules/piratebay/browser.py b/modules/piratebay/browser.py index 3f41e277a6b54fa868d69da34697c1e23c21968f..f398be80b2327dd0de9cce7cd52fa126f90f0109 100644 --- a/modules/piratebay/browser.py +++ b/modules/piratebay/browser.py @@ -30,26 +30,26 @@ class PiratebayBrowser(BaseBrowser): - DOMAIN = 'thepiratebay.org' + DOMAIN = 'thepiratebay.se' PROTOCOL = 'https' ENCODING = 'utf-8' USER_AGENT = BaseBrowser.USER_AGENTS['wget'] - PAGES = {'https://thepiratebay.org' : IndexPage, - 'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage, - 'https://thepiratebay.org/torrent/.*' : TorrentPage + PAGES = {'https://thepiratebay.se': IndexPage, + 'https://thepiratebay.se/search/.*/0/7/0': TorrentsPage, + 'https://thepiratebay.se/torrent/.*': TorrentPage } def home(self): - return self.location('https://thepiratebay.org') + return self.location('https://thepiratebay.se') def iter_torrents(self, pattern): - self.location('https://thepiratebay.org/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8'))) + self.location('https://thepiratebay.se/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8'))) assert self.is_on_page(TorrentsPage) return self.page.iter_torrents() def get_torrent(self, id): - self.location('https://thepiratebay.org/torrent/%s/' % id) + self.location('https://thepiratebay.se/torrent/%s/' % id) assert self.is_on_page(TorrentPage) return self.page.get_torrent(id) diff --git a/modules/piratebay/pages/index.py b/modules/piratebay/pages/index.py index 22ec8ed808d8e045d375bb9949ffbc12eb9c9074..fa18d0a7df0f4e53eea88c1dbf2511d00a330c87 100644 --- a/modules/piratebay/pages/index.py +++ b/modules/piratebay/pages/index.py @@ -27,4 +27,3 @@ class IndexPage(BasePage): def is_logged(self): return 'id' in self.document.find('body').attrib - diff --git a/modules/piratebay/pages/torrents.py b/modules/piratebay/pages/torrents.py index 60bdc26bcd377d4274a16f2d00ada3783992bdf5..637b8b7486d489b9cedccfa63413dbfe9d7ee3a7 100644 --- a/modules/piratebay/pages/torrents.py +++ b/modules/piratebay/pages/torrents.py @@ -18,7 +18,6 @@ # along with weboob. If not, see . - from weboob.tools.browser import BasePage from weboob.capabilities.torrent import Torrent @@ -30,17 +29,16 @@ class TorrentsPage(BasePage): def unit(self, n, u): m = {'B': 1, 'KB': 1024, - 'MB': 1024*1024, - 'GB': 1024*1024*1024, - 'TB': 1024*1024*1024*1024, + 'MB': 1024 * 1024, + 'GB': 1024 * 1024 * 1024, + 'TB': 1024 * 1024 * 1024 * 1024, } - #return float(n.replace(',', '')) * m.get(u, 1) - return float(n*m[u]) + return float(n * m[u]) def iter_torrents(self): table = self.parser.select(self.document.getroot(), 'table#searchResult', 1) for tr in table.getiterator('tr'): - if tr.get('class','') != "header": + if tr.get('class', '') != "header": td = tr.getchildren()[1] div = td.getchildren()[0] link = div.find('a').attrib['href'] @@ -51,7 +49,7 @@ def iter_torrents(self): url = a.attrib['href'] size = td.find('font').text.split(',')[1].strip() - u = size.split(' ')[1].split(u'\xa0')[1].replace('i','') + u = size.split(' ')[1].split(u'\xa0')[1].replace('i', '') size = size.split(' ')[1].split(u'\xa0')[0] seed = tr.getchildren()[2].text @@ -60,42 +58,47 @@ def iter_torrents(self): torrent = Torrent(idt, title, url=url, - size=self.unit(float(size),u), + size=self.unit(float(size), u), seeders=int(seed), leechers=int(leech)) yield torrent + class TorrentPage(BasePage): def get_torrent(self, id): for div in self.document.getiterator('div'): - if div.attrib.get('id','') == 'title': + if div.attrib.get('id', '') == 'title': title = div.text.strip() - elif div.attrib.get('class','') == 'download': - url = div.getchildren()[0].attrib.get('href','') - elif div.attrib.get('id','') == 'details': + elif div.attrib.get('class', '') == 'download': + # the last link is now the one with http + url = self.parser.select(div, 'a')[-1].attrib.get('href', '') + # https fails on the download server, so strip it + if url.startswith('https://'): + url = url.replace('https://', 'http://', 1) + elif div.attrib.get('id', '') == 'details': size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0]) if len(div.getchildren()) > 1 \ - and div.getchildren()[1].attrib.get('class','') == 'col2' : + and div.getchildren()[1].attrib.get('class', '') == 'col2': child_to_explore = div.getchildren()[1] else: child_to_explore = div.getchildren()[0] prev_child_txt = "none" - seed="-1" - leech="-1" + seed = "-1" + leech = "-1" for ch in child_to_explore.getchildren(): if prev_child_txt == "Seeders:": seed = ch.text if prev_child_txt == "Leechers:": leech = ch.text prev_child_txt = ch.text - elif div.attrib.get('class','') == 'nfo': + elif div.attrib.get('class', '') == 'nfo': description = div.getchildren()[0].text torrent = Torrent(id, title) - torrent.url = url + torrent.url = url or None torrent.size = size torrent.seeders = int(seed) torrent.leechers = int(leech) - torrent.description = description + torrent.description = description.strip() torrent.files = ['NYI'] return torrent