Commit 98854246 authored by Laurent Bachelier's avatar Laurent Bachelier 🐧

Fix piratebay

Move to .se, remove https for downloads, use the correct link for
downloads.
And some cosmetic enhancements
parent 664e4cc0
......@@ -49,4 +49,4 @@ class PiratebayBackend(BaseBackend, ICapTorrent):
return self.browser.openurl(torrent.url.encode('utf-8')).read()
def iter_torrents(self, pattern):
return self.browser.iter_torrents(pattern.replace(' ','+'))
return self.browser.iter_torrents(pattern.replace(' ', '+'))
......@@ -30,26 +30,26 @@ __all__ = ['PiratebayBrowser']
class PiratebayBrowser(BaseBrowser):
DOMAIN = 'thepiratebay.org'
DOMAIN = 'thepiratebay.se'
PROTOCOL = 'https'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = {'https://thepiratebay.org' : IndexPage,
'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
'https://thepiratebay.org/torrent/.*' : TorrentPage
PAGES = {'https://thepiratebay.se': IndexPage,
'https://thepiratebay.se/search/.*/0/7/0': TorrentsPage,
'https://thepiratebay.se/torrent/.*': TorrentPage
}
def home(self):
return self.location('https://thepiratebay.org')
return self.location('https://thepiratebay.se')
def iter_torrents(self, pattern):
self.location('https://thepiratebay.org/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
self.location('https://thepiratebay.se/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
assert self.is_on_page(TorrentsPage)
return self.page.iter_torrents()
def get_torrent(self, id):
self.location('https://thepiratebay.org/torrent/%s/' % id)
self.location('https://thepiratebay.se/torrent/%s/' % id)
assert self.is_on_page(TorrentPage)
return self.page.get_torrent(id)
......@@ -27,4 +27,3 @@ __all__ = ['IndexPage']
class IndexPage(BasePage):
def is_logged(self):
return 'id' in self.document.find('body').attrib
......@@ -18,7 +18,6 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage
from weboob.capabilities.torrent import Torrent
......@@ -30,17 +29,16 @@ class TorrentsPage(BasePage):
def unit(self, n, u):
m = {'B': 1,
'KB': 1024,
'MB': 1024*1024,
'GB': 1024*1024*1024,
'TB': 1024*1024*1024*1024,
'MB': 1024 * 1024,
'GB': 1024 * 1024 * 1024,
'TB': 1024 * 1024 * 1024 * 1024,
}
#return float(n.replace(',', '')) * m.get(u, 1)
return float(n*m[u])
return float(n * m[u])
def iter_torrents(self):
table = self.parser.select(self.document.getroot(), 'table#searchResult', 1)
for tr in table.getiterator('tr'):
if tr.get('class','') != "header":
if tr.get('class', '') != "header":
td = tr.getchildren()[1]
div = td.getchildren()[0]
link = div.find('a').attrib['href']
......@@ -51,7 +49,7 @@ class TorrentsPage(BasePage):
url = a.attrib['href']
size = td.find('font').text.split(',')[1].strip()
u = size.split(' ')[1].split(u'\xa0')[1].replace('i','')
u = size.split(' ')[1].split(u'\xa0')[1].replace('i', '')
size = size.split(' ')[1].split(u'\xa0')[0]
seed = tr.getchildren()[2].text
......@@ -60,42 +58,47 @@ class TorrentsPage(BasePage):
torrent = Torrent(idt,
title,
url=url,
size=self.unit(float(size),u),
size=self.unit(float(size), u),
seeders=int(seed),
leechers=int(leech))
yield torrent
class TorrentPage(BasePage):
def get_torrent(self, id):
for div in self.document.getiterator('div'):
if div.attrib.get('id','') == 'title':
if div.attrib.get('id', '') == 'title':
title = div.text.strip()
elif div.attrib.get('class','') == 'download':
url = div.getchildren()[0].attrib.get('href','')
elif div.attrib.get('id','') == 'details':
elif div.attrib.get('class', '') == 'download':
# the last link is now the one with http
url = self.parser.select(div, 'a')[-1].attrib.get('href', '')
# https fails on the download server, so strip it
if url.startswith('https://'):
url = url.replace('https://', 'http://', 1)
elif div.attrib.get('id', '') == 'details':
size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
if len(div.getchildren()) > 1 \
and div.getchildren()[1].attrib.get('class','') == 'col2' :
and div.getchildren()[1].attrib.get('class', '') == 'col2':
child_to_explore = div.getchildren()[1]
else:
child_to_explore = div.getchildren()[0]
prev_child_txt = "none"
seed="-1"
leech="-1"
seed = "-1"
leech = "-1"
for ch in child_to_explore.getchildren():
if prev_child_txt == "Seeders:":
seed = ch.text
if prev_child_txt == "Leechers:":
leech = ch.text
prev_child_txt = ch.text
elif div.attrib.get('class','') == 'nfo':
elif div.attrib.get('class', '') == 'nfo':
description = div.getchildren()[0].text
torrent = Torrent(id, title)
torrent.url = url
torrent.url = url or None
torrent.size = size
torrent.seeders = int(seed)
torrent.leechers = int(leech)
torrent.description = description
torrent.description = description.strip()
torrent.files = ['NYI']
return torrent
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment