Commit ad3de2eb authored by blckshrk's avatar blckshrk Committed by Florent Fourcot

Bugs fix and improvement of the coverage.

All fixes done are basically CSS selection corrections due to few changing in the HTML structure of pages.
parent f13f7bad
......@@ -23,6 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage
from datetime import datetime
import re
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
......@@ -33,11 +34,13 @@ class ReleasePage(BasePage):
'''
def get_movie_releases(self, country_filter):
result = unicode()
links = self.parser.select(self.document.getroot(), 'b a')
links = self.parser.select(self.document.getroot(), 'table#release_dates a')
for a in links:
href = a.attrib.get('href', '')
# XXX: search() could raise an exception
if href.strip('/').split('/')[0] == 'calendar' and\
(country_filter is None or href.split('region=')[-1].lower() == country_filter):
(country_filter is None or re.search('region=([a-zA-Z]+)&', href).group(1).lower() == country_filter):
country = a.text
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
date_links = self.parser.select(td_date, 'a')
......@@ -74,14 +77,15 @@ class MovieCrewPage(BasePage):
'''
def iter_persons(self, role_filter=None):
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
tables = self.parser.select(self.document.getroot(), 'table.cast')
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0:
table = tables[0]
tds = self.parser.select(table, 'td.nm')
tds = self.parser.select(table, 'td.itemprop')
for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
name = unicode(td.find('a').text)
char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content())
char_name = unicode(self.parser.select(td.getparent(), 'td.character', 1).text_content())
person = Person(id, name)
person.short_description = char_name
person.real_name = NotLoaded
......@@ -95,7 +99,7 @@ class MovieCrewPage(BasePage):
person.thumbnail_url = NotLoaded
yield person
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'):
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing="1"] h5 a'):
role = gloss_link.attrib.get('name', '').rstrip('s')
if (role_filter is None or (role_filter is not None and role == role_filter)):
tbody = gloss_link.getparent().getparent().getparent().getparent()
......@@ -114,12 +118,12 @@ class MovieCrewPage(BasePage):
# yield self.browser.get_person(id)
def iter_persons_ids(self):
tables = self.parser.select(self.document.getroot(), 'table.cast')
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0:
table = tables[0]
tds = self.parser.select(table, 'td.nm')
tds = self.parser.select(table, 'td.itemprop')
for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
yield id
......@@ -152,7 +156,7 @@ class PersonPage(BasePage):
real_name = unicode(a.text.strip())
elif 'birth_place' in href:
birth_place = unicode(a.text.lower().strip())
names = self.parser.select(td_overview, 'h1[itemprop=name]')
names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
if len(names) > 0:
name = unicode(names[0].text.strip())
times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
......
......@@ -19,39 +19,45 @@
from weboob.tools.test import BackendTest
class ImdbTest(BackendTest):
BACKEND = 'imdb'
def test_search_movie(self):
movies = list(self.backend.iter_movies('spiderman'))
assert len(movies) > 0
for movie in movies:
assert movie.id
def test_get_movie(self):
movie = self.backend.get_movie('tt0079980')
assert movie
assert movie.id
assert movie.original_title
def test_search_person(self):
persons = list(self.backend.iter_persons('dewaere'))
assert len(persons) > 0
for person in persons:
assert person.id
def test_get_person(self):
person = self.backend.get_person('nm0223033')
assert person
assert person.id
assert person.name
assert person.birth_date
def test_movie_persons(self):
persons = list(self.backend.iter_movie_persons('tt0079980'))
assert len(persons) > 0
for person in persons:
assert person.id
assert person.name
assert person.short_description
def test_person_movies(self):
movies = list(self.backend.iter_person_movies('nm0223033'))
assert len(movies) > 0
for movie in movies:
assert movie.id
assert movie.original_title
......@@ -62,6 +68,19 @@ class ImdbTest(BackendTest):
assert bio is not None
def test_get_movie_releases(self):
rel = self.backend.get_movie_releases('tt0079980')
rel = self.backend.get_movie_releases('tt0079980', 'fr')
assert rel != ''
assert rel is not None
assert rel == 'France : 25 April 1979'
def test_iter_person_movies_ids(self):
movies_ids = list(self.backend.iter_person_movies_ids('nm0223033'))
assert len(movies_ids) > 0
for movie_id in movies_ids:
assert movie_id
def test_iter_movie_persons_ids(self):
persons_ids = list(self.backend.iter_movie_persons_ids('tt0079980'))
assert len(persons_ids) > 0
for person_id in persons_ids:
assert person_id
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment