pax_global_header 0000666 0000000 0000000 00000000064 14575653726 0014536 g ustar 00root root 0000000 0000000 52 comment=5f3d558793b537a74480241ac6981479f5938cd3
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/ 0000775 0000000 0000000 00000000000 14575653726 0022755 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/ 0000775 0000000 0000000 00000000000 14575653726 0024425 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/ 0000775 0000000 0000000 00000000000 14575653726 0025340 5 ustar 00root root 0000000 0000000 woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/__init__.py 0000664 0000000 0000000 00000001472 14575653726 0027455 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from .module import ImdbModule
__all__ = ['ImdbModule']
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/browser.py 0000664 0000000 0000000 00000015617 14575653726 0027407 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from html import unescape
import re
from woob.browser import PagesBrowser, URL
from woob.browser.profiles import Wget
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.capabilities.cinema import Movie, Person
from .pages import PersonPage, MovieCrewPage, BiographyPage, ReleasePage
from datetime import datetime
__all__ = ['ImdbBrowser']
class ImdbBrowser(PagesBrowser):
BASEURL = 'https://www.imdb.com'
PROFILE = Wget()
movie_crew = URL(r'/title/(?Ptt[0-9]*)/fullcredits.*', MovieCrewPage)
release = URL(r'/title/(?Ptt[0-9]*)/releaseinfo.*', ReleasePage)
bio = URL(r'/name/(?Pnm[0-9]*)/bio.*', BiographyPage)
person = URL(r'/name/(?Pnm[0-9]*)/*', PersonPage)
def iter_movies(self, pattern):
res = self.open(f'https://v2.sg.media-imdb.com/suggestion/titles/{pattern[0]}/{pattern}.json')
jres = res.json()
for m in jres['d']:
movie = Movie(m['id'], m['l'])
movie.other_titles = NotLoaded
movie.release_date = NotLoaded
movie.duration = NotLoaded
movie.short_description = NotLoaded
movie.pitch = NotLoaded
movie.country = NotLoaded
movie.note = NotLoaded
movie.roles = NotLoaded
movie.all_release_dates = NotLoaded
movie.thumbnail_url = m['i']['imageUrl']
yield movie
def iter_persons(self, pattern):
res = self.open(f'https://v2.sg.media-imdb.com/suggestion/names/{pattern[0]}/{pattern}.json')
jres = res.json()
for p in jres['d']:
person = Person(p['id'], p['l'])
person.real_name = NotLoaded
person.birth_place = NotLoaded
person.birth_date = NotLoaded
person.death_date = NotLoaded
person.gender = NotLoaded
person.nationality = NotLoaded
person.short_biography = NotLoaded
person.short_description = NotLoaded
person.roles = NotLoaded
if 'i' in p:
person.thumbnail_url = p['i']['imageUrl']
yield person
def get_movie(self, id):
res = self.open(f'https://www.omdbapi.com/?apikey=b7c56eb5&i={id}&plot=full')
if res is not None:
jres = res.json()
else:
return None
title = NotAvailable
duration = NotAvailable
release_date = NotAvailable
pitch = NotAvailable
country = NotAvailable
note = NotAvailable
short_description = NotAvailable
thumbnail_url = NotAvailable
other_titles = []
genres = []
roles = {}
if 'Title' not in jres:
return
title = unescape(str(jres['Title'].strip()))
if 'Poster' in jres:
thumbnail_url = str(jres['Poster'])
if 'Director' in jres:
short_description = str(jres['Director'])
if 'Genre' in jres:
for g in jres['Genre'].split(', '):
genres.append(g)
if 'Runtime' in jres:
m = re.search('(\d+?) min', jres['Runtime'])
if m:
duration = int(m.group(1))
if 'Released' in jres:
released_string = str(jres['Released'])
if released_string == 'N/A':
release_date = NotAvailable
else:
months = {
'Jan':'01',
'Feb':'02',
'Mar':'03',
'Apr':'04',
'May':'05',
'Jun':'06',
'Jul':'07',
'Aug':'08',
'Sep':'09',
'Oct':'10',
'Nov':'11',
'Dec':'12',
}
for st in months:
released_string = released_string.replace(st,months[st])
release_date = datetime.strptime(released_string, '%d %m %Y')
if 'Country' in jres:
country = u''
for c in jres['Country'].split(', '):
country += f'{c}, '
country = country[:-2]
if 'Plot' in jres:
pitch = str(jres['Plot'])
if 'imdbRating' in jres and 'imdbVotes' in jres:
note = f'{jres["imdbRating"]}/10 ({jres["imdbVotes"]} votes)'
for r in ['Actors', 'Director', 'Writer']:
if f'{r}' in jres.keys():
roles[f'{r}'] = [('N/A',e) for e in jres[f'{r}'].split(', ')]
movie = Movie(id, title)
movie.other_titles = other_titles
movie.release_date = release_date
movie.duration = duration
movie.genres = genres
movie.pitch = pitch
movie.country = country
movie.note = note
movie.roles = roles
movie.short_description = short_description
movie.all_release_dates = NotLoaded
movie.thumbnail_url = thumbnail_url
return movie
def get_person(self, id):
self.person.go(id=id)
assert self.person.is_here()
return self.page.get_person(id)
def get_person_biography(self, id):
self.bio.go(id=id)
assert self.bio.is_here()
return self.page.get_biography()
def iter_movie_persons(self, movie_id, role):
self.movie_crew.go(id=movie_id)
assert self.movie_crew.is_here()
for p in self.page.iter_persons(role):
yield p
def iter_person_movies(self, person_id, role):
self.person.go(id=person_id)
assert self.person.is_here()
return self.page.iter_movies(role)
def iter_person_movies_ids(self, person_id):
self.person.go(id=person_id)
assert self.person.is_here()
for movie in self.page.iter_movies_ids():
yield movie
def iter_movie_persons_ids(self, movie_id):
self.movie_crew.go(id=movie_id)
assert self.movie_crew.is_here()
for person in self.page.iter_persons_ids():
yield person
def get_movie_releases(self, id, country):
self.release.go(id=id)
assert self.release.is_here()
return self.page.get_movie_releases(country)
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/favicon.png 0000664 0000000 0000000 00000003765 14575653726 0027506 0 ustar 00root root 0000000 0000000 PNG
IHDR @ @ iq bKGD pHYs tIME
ӊ IDATxmlSIHHH M[UJ!iF5\B+Ok2I,
i:A`t!%UCH$`;`KW&ϧks}y}QC`@ P (
`]:l@60 R$ І>n ׁ+@=n9w7jmfӎ_ K w 'm;A0:`kRQC`ni 0"`7,v얃wd
kmv f9U;_^-{ ɚ(+]f4
0E!)v p! &k HCN"+[l fu7P<␏ &`hNz951t}WdО͎{8\DUa4ڈ|F-scY65{QPH130 .{:>#_aN?,^i
/KØw{p1T_x{ al>' %YWۏ'Ѩ¿Gf{
^^|FmM Y7>\ p.6ldWxy/0\@`*; pB| ϿЎ%*'7'.a琳mjWE\X Y^*Nd?&T:HύC"#M}8nl^~):Çwrոq:jQQ;LRhl~"j<nrf$oiWNua-Pw˕~ &NxtZ _}=a*5'*$j~g߁;,Zʵ#pT*ڻ_y"zRRrQUsYJ˚ȝ<Tnxͮ8v@u$7oq6(mopb#"Q璖ok"@ 3y+W6ˋ.w˖VU+yxXtjkTUٺ='l._8G % Q7f0.^pv0M̜Bjd5[6i*O@^VkL!k֫TU9uƅ:ҝFNʖ'\̝###yЃ"Ex_-r"9Y=2H/~UCex >r+i̘Gcs'Gz[?+3zP)mu꣏sϞǣ$)=:^(ޘ! u9:VL(@ԒŢ:;(ZSG`1h Yi+O:oHXҝpjF/M}* 7@Ws5n{SDW;FA^>;;}p9T䗛nܗ'; |KL?4Y?WT!1-"iqO@VsEJIgdxIr9 Tg_053^DډI{XK,G(Xm`G=d~KQ9ZS&ǴDo$P!ɩa5~ց)0=W 46{ )I -Uͬ<&Ƹl̞c]
cF>>̖nh#! ơlv^fl#|!R C="$cVf|C;oC]ˀ!" }>qP7;;o
@ P o+gb( IENDB` woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/module.py 0000664 0000000 0000000 00000010003 14575653726 0027171 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.capabilities.cinema import CapCinema, Person, Movie
from woob.tools.backend import Module
from .browser import ImdbBrowser
__all__ = ['ImdbModule']
class ImdbModule(Module, CapCinema):
NAME = 'imdb'
MAINTAINER = u'Julien Veyssier'
EMAIL = 'julien.veyssier@aiur.fr'
VERSION = '3.6'
DESCRIPTION = 'Internet Movie Database service'
LICENSE = 'AGPLv3+'
BROWSER = ImdbBrowser
def get_movie(self, id):
return self.browser.get_movie(id)
def get_person(self, id):
return self.browser.get_person(id)
def iter_movies(self, pattern):
return self.browser.iter_movies(pattern)
def iter_persons(self, pattern):
return self.browser.iter_persons(pattern)
def iter_movie_persons(self, id, role=None):
return self.browser.iter_movie_persons(id, role)
def iter_person_movies(self, id, role=None):
return self.browser.iter_person_movies(id, role)
def iter_person_movies_ids(self, id):
return self.browser.iter_person_movies_ids(id)
def iter_movie_persons_ids(self, id):
return self.browser.iter_movie_persons_ids(id)
def get_person_biography(self, id):
return self.browser.get_person_biography(id)
def get_movie_releases(self, id, country=None):
return self.browser.get_movie_releases(id, country)
def fill_person(self, person, fields):
if 'real_name' in fields or 'birth_place' in fields\
or 'death_date' in fields or 'nationality' in fields\
or 'short_biography' in fields or 'roles' in fields\
or 'birth_date' in fields or 'thumbnail_url' in fields\
or 'gender' in fields or fields is None:
per = self.get_person(person.id)
person.real_name = per.real_name
person.birth_date = per.birth_date
person.death_date = per.death_date
person.birth_place = per.birth_place
person.gender = per.gender
person.nationality = per.nationality
person.short_biography = per.short_biography
person.short_description = per.short_description
person.roles = per.roles
person.thumbnail_url = per.thumbnail_url
if 'biography' in fields:
person.biography = self.get_person_biography(person.id)
return person
def fill_movie(self, movie, fields):
if 'other_titles' in fields or 'release_date' in fields\
or 'duration' in fields or 'country' in fields\
or 'roles' in fields or 'note' in fields\
or 'thumbnail_url' in fields:
mov = self.get_movie(movie.id)
movie.other_titles = mov.other_titles
movie.release_date = mov.release_date
movie.duration = mov.duration
movie.pitch = mov.pitch
movie.country = mov.country
movie.note = mov.note
movie.roles = mov.roles
movie.genres = mov.genres
movie.short_description = mov.short_description
movie.thumbnail_url = mov.thumbnail_url
if 'all_release_dates' in fields:
movie.all_release_dates = self.get_movie_releases(movie.id)
return movie
OBJECTS = {
Person: fill_person,
Movie: fill_movie
}
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/pages.py 0000664 0000000 0000000 00000021105 14575653726 0027010 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from datetime import datetime
import re
from woob.capabilities.cinema import Person, Movie
from woob.capabilities.base import NotAvailable, NotLoaded
from woob.browser.pages import HTMLPage
from woob.browser.filters.html import CleanHTML
class ReleasePage(HTMLPage):
''' Page containing releases of a movie
'''
def get_movie_releases(self, country_filter):
result = ''
links = self.doc.xpath('//div[@id="releaseinfo_content"]//a')
for a in links:
href = a.attrib.get('href', '')
# XXX: search() could raise an exception
if href.strip('/').split('/')[0] == 'calendar' and\
(country_filter is None or re.search('region=([a-zA-Z]+)', href).group(1).lower() == country_filter):
country = a.text.strip()
date = a.xpath('./../../td[has-class("release-date-item__date")]')[0].text
result += f'{country} : {date}\n'
if result == u'':
result = NotAvailable
else:
result = result.strip()
return result
class BiographyPage(HTMLPage):
''' Page containing biography of a person
'''
def get_biography(self):
bio = ''
start = False
tn = self.doc.xpath('//div[@id="bio_content"]')[0]
for el in tn.xpath('./*'):
if el.attrib.get('name') == 'mini_bio':
start = True
if start:
bio += CleanHTML('.')(el)
content_after_bio = ['family', 'trademark', 'trivia', 'salary']
if el.attrib.get('name') in content_after_bio:
break
return bio
class MovieCrewPage(HTMLPage):
''' Page listing all the persons related to a movie
'''
def iter_persons(self, role_filter=None):
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
tables = self.doc.xpath('//table[has-class("cast_list")]')
if len(tables) > 0:
table = tables[0]
trs = table.xpath('.//tr')
for tr in trs:
a = tr.xpath('.//a')
if len(a) == 3:
id = a[1].attrib.get('href', '').strip('/').split('/')[1]
name = a[1].text
char_name = a[2].text
thumbnail_url = a[0].xpath('.//img')[0].attrib.get('src')
person = Person(id, name)
person.short_description = char_name
person.real_name = NotLoaded
person.birth_place = NotLoaded
person.birth_date = NotLoaded
person.death_date = NotLoaded
person.gender = NotLoaded
person.nationality = NotLoaded
person.short_biography = NotLoaded
person.roles = NotLoaded
person.thumbnail_url = thumbnail_url
yield person
for gloss_link in self.doc.xpath('//table[@cellspacing="1"]//h5//a'):
role = gloss_link.attrib.get('name', '').rstrip('s')
if (role_filter is None or (role_filter is not None and role == role_filter)):
tbody = gloss_link.getparent().getparent().getparent().getparent()
for line in tbody.xpath('.//tr')[1:]:
for a in line.xpath('.//a'):
role_detail = NotAvailable
href = a.attrib.get('href', '')
if '/name/nm' in href:
id = href.strip('/').split('/')[-1]
name = a.text
if 'glossary' in href:
role_detail = a.text
person = Person(id, name)
person.short_description = role_detail
yield person
# yield self.browser.get_person(id)
def iter_persons_ids(self):
tables = self.doc.xpath('//table[has-class("cast_list")]')
if len(tables) > 0:
table = tables[0]
tds = table.xpath('.//td[has-class("character")]')
for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
yield id
class PersonPage(HTMLPage):
''' Page informing about a person
It is used to build a Person instance and to get the movie list related to a person
'''
def get_person(self, id):
name = NotAvailable
short_biography = NotAvailable
short_description = NotAvailable
birth_place = NotAvailable
birth_date = NotAvailable
death_date = NotAvailable
real_name = NotAvailable
gender = NotAvailable
thumbnail_url = NotAvailable
roles = {}
nationality = NotAvailable
td_overview = self.doc.xpath('//table[@id="name-overview-widget-layout"]')[0]
names = td_overview.xpath('.//h1//span[@class="itemprop"]')
if len(names) > 0:
name = names[0].text.strip()
descs = td_overview.xpath('.//div[has-class("name-trivia-bio-text")]//div[has-class("inline")]')
if len(descs) > 0:
short_biography = CleanHTML('.')(descs[0])
birth = td_overview.xpath('.//div[@id="name-born-info"]')
if len(birth) > 0:
time = birth[0].xpath('.//time')[0].attrib.get('datetime', '').split('-')
if len(time) == 3 and int(time[0]) >= 1900:
birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
birth_place = birth[0].xpath('.//a')[2].text
death = td_overview.xpath('.//div[@id="name-death-info"]')
if len(death) > 0:
time = death[0].xpath('.//time')[0].attrib.get('datetime', '').split('-')
if len(time) == 3 and int(time[0]) >= 1900:
death_date = datetime(int(time[0]), int(time[1]), int(time[2]))
img_thumbnail = td_overview.xpath('.//td[@id="img_primary"]//img[@id="name-poster"]')
if len(img_thumbnail) > 0:
thumbnail_url = img_thumbnail[0].attrib.get('src', '')
roles = self.get_roles()
person = Person(id, name)
person.real_name = real_name
person.birth_date = birth_date
person.death_date = death_date
person.birth_place = birth_place
person.gender = gender
person.nationality = nationality
person.short_biography = short_biography
person.short_description = short_description
person.roles = roles
person.thumbnail_url = thumbnail_url
return person
def iter_movies_ids(self):
for role_div in self.doc.xpath('//div[@id="filmography"]//div[has-class("filmo-category-section")]/div'):
for a in role_div.xpath('.//a'):
m = re.search('/title/(tt.*)/\.*', a.attrib.get('href'))
if m:
yield m.group(1)
def get_roles(self):
roles = {}
for role_div in self.doc.xpath('//div[@id="filmography"]/div[has-class("head")]'):
role = role_div.xpath('.//a')[-1].text
roles[role] = []
category = role_div.attrib.get('data-category')
for infos in self.doc.xpath('//div[@id="filmography"]/div[has-class("filmo-category-section")]/div'):
if category in infos.attrib.get('id'):
roles[role].append(('N/A',infos.text_content().replace('\n', ' ').strip()))
return roles
def iter_movies(self, role_filter=None):
for role_div in self.doc.xpath('//div[@id="filmography"]/div[has-class("filmo-category-section")]/div'):
for a in role_div.xpath('.//a'):
m = re.search('/title/(tt.*)/\.*', a.attrib.get('href'))
if m:
yield Movie(m.group(1), a.text)
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/requirements.txt 0000664 0000000 0000000 00000000014 14575653726 0030617 0 ustar 00root root 0000000 0000000 woob ~= 3.2
woob-master-5f3d558793b537a74480241ac6981479f5938cd3-modules-imdb/modules/imdb/test.py 0000664 0000000 0000000 00000006720 14575653726 0026676 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of a woob module.
#
# This woob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This woob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
from woob.tools.test import BackendTest
from woob.capabilities.cinema import Person, Movie
class ImdbTest(BackendTest):
MODULE = 'imdb'
def test_search_movie(self):
movies = list(self.backend.iter_movies('spiderman'))
assert len(movies) > 0
for movie in movies:
assert movie.id
def test_get_movie(self):
movie = self.backend.get_movie('tt0079980')
assert movie
assert movie.id
assert movie.original_title
def test_search_person(self):
persons = list(self.backend.iter_persons('dewaere'))
assert len(persons) > 0
for person in persons:
assert person.id
def test_get_person(self):
person = self.backend.get_person('nm0223033')
assert person
assert person.id
assert person.name
assert person.short_biography
assert person.birth_date
assert person.birth_place
assert person.death_date
assert person.thumbnail_url
def test_movie_persons(self):
persons = list(self.backend.iter_movie_persons('tt0079980'))
assert len(persons) > 0
for person in persons:
assert person.id
assert person.name
assert person.short_description
def test_person_movies(self):
movies = list(self.backend.iter_person_movies('nm0223033'))
assert len(movies) > 0
for movie in movies:
assert movie.id
assert movie.original_title
def test_get_person_biography(self):
bio = self.backend.get_person_biography('nm0223033')
assert bio != ''
assert bio is not None
def test_get_movie_releases(self):
rel = self.backend.get_movie_releases('tt0079980', 'fr')
assert rel != ''
assert rel is not None
assert rel == 'France : 25 April 1979'
def test_iter_person_movies_ids(self):
movies_ids = list(self.backend.iter_person_movies_ids('nm0223033'))
assert len(movies_ids) > 0
for movie_id in movies_ids:
assert movie_id
def test_iter_movie_persons_ids(self):
persons_ids = list(self.backend.iter_movie_persons_ids('tt0079980'))
assert len(persons_ids) > 0
for person_id in persons_ids:
assert person_id
def test_fill_person(self):
person = Person('nm0223033', 'dewaere')
self.backend.fillobj(person, ['birth_place', 'biography'])
assert person.birth_place
assert person.biography
def test_fill_movie(self):
movie = Movie('tt0079980', 'serie noire')
self.backend.fillobj(movie, ['release_date', 'all_release_dates'])
assert movie.release_date
assert movie.all_release_dates