diff --git a/modules/750g/backend.py b/modules/750g/backend.py index 30083cdad5a50335a57850e34e55e5fff79d3baf..e888c17952765bd521d7730d14d4115706f8f179 100644 --- a/modules/750g/backend.py +++ b/modules/750g/backend.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.recipe import ICapRecipe,Recipe +from weboob.capabilities.recipe import ICapRecipe, Recipe from weboob.tools.backend import BaseBackend from .browser import SevenFiftyGramsBrowser @@ -46,16 +46,16 @@ def iter_recipes(self, pattern): def fill_recipe(self, recipe, fields): if 'nb_person' in fields or 'instructions' in fields: rec = self.get_recipe(recipe.id) - recipe.picture_url = rec.picture_url - recipe.instructions = rec.instructions - recipe.ingredients = rec.ingredients - recipe.comments = rec.comments - recipe.nb_person = rec.nb_person - recipe.cooking_time = rec.cooking_time + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time recipe.preparation_time = rec.preparation_time return recipe OBJECTS = { - Recipe:fill_recipe, - } + Recipe: fill_recipe, + } diff --git a/modules/750g/browser.py b/modules/750g/browser.py index 69ed93e7d7d02d2c5dcf445bdaaca066ef6e8f61..9636625e700473ce8c20e6bff7fa339a93e6041b 100644 --- a/modules/750g/browser.py +++ b/modules/750g/browser.py @@ -34,10 +34,10 @@ class SevenFiftyGramsBrowser(BaseBrowser): PAGES = { 'http://www.750g.com/recettes_.*.htm': ResultsPage, 'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage, - } + } def iter_recipes(self, pattern): - self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ','_'))) + self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ', '_'))) assert self.is_on_page(ResultsPage) return self.page.iter_recipes() diff --git a/modules/750g/pages.py b/modules/750g/pages.py index 670bc73b41a03c0abb98346d0e7a559e089f4097..428078f138485787711a153c21acde610f4bd0ab 100644 --- a/modules/750g/pages.py +++ b/modules/750g/pages.py @@ -30,35 +30,36 @@ class ResultsPage(BasePage): """ Page which contains results as a list of recipies """ def iter_recipes(self): - for div in self.parser.select(self.document.getroot(),'div.recette_description > div.data'): - links = self.parser.select(div,'div.info > p.title > a.fn') + for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'): + links = self.parser.select(div, 'div.info > p.title > a.fn') if len(links) > 0: link = links[0] title = unicode(link.text) - #id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) - id = unicode(self.parser.select(div,'div.carnet-add a',1).attrib.get('href','').split('=')[-1]) + # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) + id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1]) thumbnail_url = NotAvailable short_description = NotAvailable - imgs = self.parser.select(div,'img.recipe-image') + imgs = self.parser.select(div, 'img.recipe-image') if len(imgs) > 0: - thumbnail_url = unicode(imgs[0].attrib.get('src','')) - short_description = unicode(' '.join(self.parser.select(div,'div.infos_column',1).text_content().split()).strip()) - imgs_cost = self.parser.select(div,'div.infos_column img') + thumbnail_url = unicode(imgs[0].attrib.get('src', '')) + short_description = unicode(' '.join(self.parser.select( + div, 'div.infos_column', 1).text_content().split()).strip()) + imgs_cost = self.parser.select(div, 'div.infos_column img') cost_tot = len(imgs_cost) cost_on = 0 for img in imgs_cost: - if img.attrib.get('src','').endswith('euro_on.png'): + if img.attrib.get('src', '').endswith('euro_on.png'): cost_on += 1 - short_description += u' %s/%s'%(cost_on,cost_tot) + short_description += u' %s/%s' % (cost_on, cost_tot) - recipe = Recipe(id,title) + recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url - recipe.short_description= short_description - recipe.instructions = NotLoaded - recipe.ingredients = NotLoaded - recipe.nb_person = NotLoaded - recipe.cooking_time = NotLoaded + recipe.short_description = short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded yield recipe @@ -76,10 +77,10 @@ def get_recipe(self, id): instructions = NotAvailable comments = [] - title = unicode(self.parser.select(self.document.getroot(),'head > title',1).text.split(' - ')[1]) - main = self.parser.select(self.document.getroot(),'div.recette_description',1) + title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1]) + main = self.parser.select(self.document.getroot(), 'div.recette_description', 1) - rec_infos = self.parser.select(self.document.getroot(),'div.recette_infos div.infos_column strong') + rec_infos = self.parser.select(self.document.getroot(), 'div.recette_infos div.infos_column strong') for info_title in rec_infos: if u'Temps de préparation' in unicode(info_title.text): if info_title.tail.strip() != '': @@ -96,31 +97,31 @@ def get_recipe(self, id): nb_person = int(info_title.tail) ingredients = [] - p_ing = self.parser.select(main,'div.data.top.left > div.content p') + p_ing = self.parser.select(main, 'div.data.top.left > div.content p') for ing in p_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) - lines_instr = self.parser.select(main,'div.data.top.right div.content li') + lines_instr = self.parser.select(main, 'div.data.top.right div.content li') if len(lines_instr) > 0: instructions = u'' for line in lines_instr: inst = ' '.join(line.text_content().strip().split()) - instructions += '%s\n'% inst + instructions += '%s\n' % inst instructions = instructions.strip('\n') - imgillu = self.parser.select(self.document.getroot(),'div.resume_recette_illustree img.photo') + imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo') if len(imgillu) > 0: - picture_url = unicode(imgillu[0].attrib.get('src','')) + picture_url = unicode(imgillu[0].attrib.get('src', '')) - for divcom in self.parser.select(self.document.getroot(),'div.comment-outer'): + for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'): comtxt = unicode(' '.join(divcom.text_content().strip().split())) if u'| Répondre' in comtxt: - comtxt = comtxt.strip('0123456789').replace(u' | Répondre','') + comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '') comments.append(comtxt) - recipe = Recipe(id,title) + recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person diff --git a/modules/attilasub/backend.py b/modules/attilasub/backend.py index 06aa4265935632725b262236f33152fe550fd68b..720e30551af11cc579f92035b39828bb91d6113f 100644 --- a/modules/attilasub/backend.py +++ b/modules/attilasub/backend.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported +from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported from weboob.tools.backend import BaseBackend from .browser import AttilasubBrowser @@ -53,4 +53,4 @@ def get_subtitle_file(self, id): def iter_subtitles(self, language, pattern): if language not in self.LANGUAGE_LIST: raise LanguageNotSupported() - return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) + return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8'))) diff --git a/modules/attilasub/browser.py b/modules/attilasub/browser.py index 26b19d43ff4dac1c32e861a8d3d35ac9b930c0c3..3e018868dd339c6900ee29692de09107d59b0bfc 100644 --- a/modules/attilasub/browser.py +++ b/modules/attilasub/browser.py @@ -34,12 +34,13 @@ class AttilasubBrowser(BaseBrowser): PAGES = { 'http://search.freefind.com/find.html.*': SearchPage, 'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage, - } + } def iter_subtitles(self, language, pattern): - self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8')) + self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % + pattern.encode('utf-8')) assert self.is_on_page(SearchPage) - return self.page.iter_subtitles(language,pattern) + return self.page.iter_subtitles(language, pattern) def get_subtitle(self, id): url_end = id.split('|')[0] diff --git a/modules/attilasub/pages.py b/modules/attilasub/pages.py index 31b2092a87b3f4dddd60c84972c1ed0b2e86e9ce..6828b25fd5eecaf65dd61a35e4599e314a7bcf60 100644 --- a/modules/attilasub/pages.py +++ b/modules/attilasub/pages.py @@ -23,16 +23,16 @@ from weboob.tools.browser import BasePage -__all__ = ['SubtitlesPage','SearchPage'] +__all__ = ['SubtitlesPage', 'SearchPage'] class SearchPage(BasePage): def iter_subtitles(self, language, pattern): - fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results') + fontresult = self.parser.select(self.document.getroot(), 'div.search-results font.search-results') # for each result in freefind, explore the subtitle list page to iter subtitles for res in fontresult: - a = self.parser.select(res,'a',1) - url = a.attrib.get('href','') + a = self.parser.select(res, 'a', 1) + url = a.attrib.get('href', '') self.browser.location(url) assert self.browser.is_on_page(SubtitlesPage) # subtitles page does the job @@ -41,15 +41,15 @@ def iter_subtitles(self, language, pattern): class SubtitlesPage(BasePage): - def get_subtitle(self,id): + def get_subtitle(self, id): href = id.split('|')[1] # we have to find the 'tr' which contains the link to this address - a = self.parser.select(self.document.getroot(),'a[href="%s"]'%href,1) + a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href, 1) line = a.getparent().getparent().getparent().getparent().getparent() - cols = self.parser.select(line,'td') - traduced_title = self.parser.select(cols[0],'font',1).text.lower() - original_title = self.parser.select(cols[1],'font',1).text.lower() - nb_cd = self.parser.select(cols[2],'font',1).text.strip() + cols = self.parser.select(line, 'td') + traduced_title = self.parser.select(cols[0], 'font', 1).text.lower() + original_title = self.parser.select(cols[1], 'font', 1).text.lower() + nb_cd = self.parser.select(cols[2], 'font', 1).text.strip() nb_cd = int(nb_cd.split()[0]) traduced_title_words = traduced_title.split() @@ -59,30 +59,30 @@ def get_subtitle(self,id): traduced_title = " ".join(traduced_title_words) original_title = " ".join(original_title_words) - name = unicode('%s (%s)'%(original_title,traduced_title)) - url = unicode('http://davidbillemont3.free.fr/%s'%href) - subtitle = Subtitle(id,name) + name = unicode('%s (%s)' % (original_title, traduced_title)) + url = unicode('http://davidbillemont3.free.fr/%s' % href) + subtitle = Subtitle(id, name) subtitle.url = url subtitle.language = unicode('fr') subtitle.nb_cd = nb_cd subtitle.description = NotAvailable return subtitle - def iter_subtitles(self,language, pattern): - pattern = pattern.strip().replace('+',' ').lower() + def iter_subtitles(self, language, pattern): + pattern = pattern.strip().replace('+', ' ').lower() pattern_words = pattern.split() - tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]') + tab = self.parser.select(self.document.getroot(), 'table[bordercolor="#B8C0B2"]') if len(tab) == 0: - tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]') + tab = self.parser.select(self.document.getroot(), 'table[bordercolordark="#B8C0B2"]') if len(tab) == 0: return # some results of freefind point on useless pages - if tab[0].attrib.get('width','') != '100%': + if tab[0].attrib.get('width', '') != '100%': return for line in tab[0].getiterator('tr'): - cols = self.parser.select(line,'td') - traduced_title = self.parser.select(cols[0],'font',1).text.lower() - original_title = self.parser.select(cols[1],'font',1).text.lower() + cols = self.parser.select(line, 'td') + traduced_title = self.parser.select(cols[0], 'font', 1).text.lower() + original_title = self.parser.select(cols[1], 'font', 1).text.lower() traduced_title_words = traduced_title.split() original_title_words = original_title.split() @@ -98,13 +98,13 @@ def iter_subtitles(self,language, pattern): traduced_title = " ".join(traduced_title_words) original_title = " ".join(original_title_words) - nb_cd = self.parser.select(cols[2],'font',1).text.strip() + nb_cd = self.parser.select(cols[2], 'font', 1).text.strip() nb_cd = int(nb_cd.strip(' CD')) - name = unicode('%s (%s)'%(original_title,traduced_title)) - href = self.parser.select(cols[3],'a',1).attrib.get('href','') - url = unicode('http://davidbillemont3.free.fr/%s'%href) - id = unicode('%s|%s'%(self.browser.geturl().split('/')[-1],href)) - subtitle = Subtitle(id,name) + name = unicode('%s (%s)' % (original_title, traduced_title)) + href = self.parser.select(cols[3], 'a', 1).attrib.get('href', '') + url = unicode('http://davidbillemont3.free.fr/%s' % href) + id = unicode('%s|%s' % (self.browser.geturl().split('/')[-1], href)) + subtitle = Subtitle(id, name) subtitle.url = url subtitle.language = unicode('fr') subtitle.nb_cd = nb_cd diff --git a/modules/attilasub/test.py b/modules/attilasub/test.py index 2746db658278a73f03392b5799e5ab5d9565654e..2f3eb6ef1c496732af80ddbf3d280bf50dd70513 100644 --- a/modules/attilasub/test.py +++ b/modules/attilasub/test.py @@ -27,7 +27,7 @@ class AttilasubTest(BackendTest): BACKEND = 'attilasub' def test_subtitle(self): - subtitles = list(self.backend.iter_subtitles('fr','spiderman')) + subtitles = list(self.backend.iter_subtitles('fr', 'spiderman')) assert (len(subtitles) > 0) for subtitle in subtitles: path, qs = urllib.splitquery(subtitle.url) diff --git a/modules/imdb/backend.py b/modules/imdb/backend.py index cbe258614feb5eb3834f55fc6bf35ee4f1ad9386..782eb9832b02a28f45cce0c689666f66e2f23040 100644 --- a/modules/imdb/backend.py +++ b/modules/imdb/backend.py @@ -67,25 +67,25 @@ def get_person_biography(self, id): return self.browser.get_person_biography(id) def get_movie_releases(self, id, country=None): - return self.browser.get_movie_releases(id,country) + return self.browser.get_movie_releases(id, country) def fill_person(self, person, fields): if 'real_name' in fields or 'birth_place' in fields\ - or 'death_date' in fields or 'nationality' in fields\ - or 'short_biography' in fields or 'roles' in fields\ - or 'birth_date' in fields or 'thumbnail_url' in fields\ - or 'gender' in fields or fields is None: + or 'death_date' in fields or 'nationality' in fields\ + or 'short_biography' in fields or 'roles' in fields\ + or 'birth_date' in fields or 'thumbnail_url' in fields\ + or 'gender' in fields or fields is None: per = self.get_person(person.id) - person.real_name = per.real_name - person.birth_date = per.birth_date - person.death_date = per.death_date - person.birth_place = per.birth_place - person.gender = per.gender - person.nationality = per.nationality + person.real_name = per.real_name + person.birth_date = per.birth_date + person.death_date = per.death_date + person.birth_place = per.birth_place + person.gender = per.gender + person.nationality = per.nationality person.short_biography = per.short_biography person.short_description = per.short_description - person.roles = per.roles - person.thumbnail_url = per.thumbnail_url + person.roles = per.roles + person.thumbnail_url = per.thumbnail_url if 'biography' in fields: person.biography = self.get_person_biography(person.id) @@ -94,19 +94,19 @@ def fill_person(self, person, fields): def fill_movie(self, movie, fields): if 'other_titles' in fields or 'release_date' in fields\ - or 'duration' in fields or 'country' in fields\ - or 'roles' in fields or 'note' in fields\ - or 'thumbnail_url' in fields: + or 'duration' in fields or 'country' in fields\ + or 'roles' in fields or 'note' in fields\ + or 'thumbnail_url' in fields: mov = self.get_movie(movie.id) - movie.other_titles = mov.other_titles - movie.release_date = mov.release_date - movie.duration = mov.duration - movie.pitch = mov.pitch - movie.country = mov.country - movie.note = mov.note - movie.roles = mov.roles - movie.short_description= mov.short_description - movie.thumbnail_url = mov.thumbnail_url + movie.other_titles = mov.other_titles + movie.release_date = mov.release_date + movie.duration = mov.duration + movie.pitch = mov.pitch + movie.country = mov.country + movie.note = mov.note + movie.roles = mov.roles + movie.short_description = mov.short_description + movie.thumbnail_url = mov.thumbnail_url if 'all_release_dates' in fields: movie.all_release_dates = self.get_movie_releases(movie.id) @@ -114,6 +114,6 @@ def fill_movie(self, movie, fields): return movie OBJECTS = { - Person:fill_person, - Movie:fill_movie - } + Person: fill_person, + Movie: fill_movie + } diff --git a/modules/imdb/browser.py b/modules/imdb/browser.py index 7d6a985333593058c6e52c8e88cbedcbee641d65..41175aa3269418e4ea0efa94ee2573fd18f19c00 100644 --- a/modules/imdb/browser.py +++ b/modules/imdb/browser.py @@ -42,53 +42,55 @@ class ImdbBrowser(BaseBrowser): 'http://www.imdb.com/name/nm[0-9]*/*': PersonPage, 'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage, 'http://www.imdb.com/name/nm[0-9]*/filmo.*': FilmographyPage, - } + } def iter_movies(self, pattern): res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res) - for cat in ['title_popular','title_exact','title_approx']: + for cat in ['title_popular', 'title_exact', 'title_approx']: if cat in jres: for m in jres[cat]: tdesc = unicode(m['title_description']) if '' in tdesc: - short_description = u'%s %s'%(tdesc.split('<')[0].strip(', '), tdesc.split('>')[1].split('<')[0]) + short_description = u'%s %s' % (tdesc.split('<')[ + 0].strip(', '), tdesc.split('>')[1].split('<')[0]) else: short_description = tdesc.strip(', ') - movie = Movie(m['id'],latin2unicode(m['title'])) - movie.other_titles = NotLoaded - movie.release_date = NotLoaded - movie.duration = NotLoaded + movie = Movie(m['id'], latin2unicode(m['title'])) + movie.other_titles = NotLoaded + movie.release_date = NotLoaded + movie.duration = NotLoaded movie.short_description = latin2unicode(short_description) - movie.pitch = NotLoaded - movie.country = NotLoaded - movie.note = NotLoaded - movie.roles = NotLoaded - movie.all_release_dates= NotLoaded - movie.thumbnail_url = NotLoaded + movie.pitch = NotLoaded + movie.country = NotLoaded + movie.note = NotLoaded + movie.roles = NotLoaded + movie.all_release_dates = NotLoaded + movie.thumbnail_url = NotLoaded yield movie def iter_persons(self, pattern): res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res) - for cat in ['name_popular','name_exact','name_approx']: + for cat in ['name_popular', 'name_exact', 'name_approx']: if cat in jres: for p in jres[cat]: - person = Person(p['id'],latin2unicode(p['name'])) - person.real_name = NotLoaded - person.birth_place = NotLoaded - person.birth_date = NotLoaded - person.death_date = NotLoaded - person.gender = NotLoaded - person.nationality = NotLoaded - person.short_biography= NotLoaded - person.short_description= latin2unicode(p['description']) - person.roles = NotLoaded - person.thumbnail_url = NotLoaded + person = Person(p['id'], latin2unicode(p['name'])) + person.real_name = NotLoaded + person.birth_place = NotLoaded + person.birth_date = NotLoaded + person.death_date = NotLoaded + person.gender = NotLoaded + person.nationality = NotLoaded + person.short_biography = NotLoaded + person.short_description = latin2unicode(p['description']) + person.roles = NotLoaded + person.thumbnail_url = NotLoaded yield person def get_movie(self, id): - res = self.readurl('http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id ) + res = self.readurl( + 'http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id) if res is not None: jres = json.loads(res) else: @@ -122,7 +124,7 @@ def get_movie(self, id): if 'also_known_as' in jres: for other_t in jres['also_known_as']: if 'country' in other_t and 'title' in other_t: - other_titles.append('%s : %s' % (other_t['country'],htmlparser.unescape(other_t['title']))) + other_titles.append('%s : %s' % (other_t['country'], htmlparser.unescape(other_t['title']))) if 'release_date' in jres: dstr = str(jres['release_date']) year = int(dstr[:4]) @@ -134,31 +136,31 @@ def get_movie(self, id): day = int(dstr[-2:]) if day == 0: day = 1 - release_date = datetime(year,month,day) + release_date = datetime(year, month, day) if 'country' in jres: country = u'' for c in jres['country']: - country += '%s, '%c + country += '%s, ' % c country = country[:-2] if 'plot_simple' in jres: pitch = unicode(jres['plot_simple']) if 'rating' in jres and 'rating_count' in jres: - note = u'%s/10 (%s votes)'%(jres['rating'],jres['rating_count']) - for r in ['actor','director','writer']: - if '%ss'%r in jres: - roles['%s'%r] = list(jres['%ss'%r]) - - movie = Movie(id,title) - movie.other_titles = other_titles - movie.release_date = release_date - movie.duration = duration - movie.pitch = pitch - movie.country = country - movie.note = note - movie.roles = roles - movie.short_description= short_description - movie.all_release_dates= NotLoaded - movie.thumbnail_url = thumbnail_url + note = u'%s/10 (%s votes)' % (jres['rating'], jres['rating_count']) + for r in ['actor', 'director', 'writer']: + if '%ss' % r in jres: + roles['%s' % r] = list(jres['%ss' % r]) + + movie = Movie(id, title) + movie.other_titles = other_titles + movie.release_date = release_date + movie.duration = duration + movie.pitch = pitch + movie.country = country + movie.note = note + movie.roles = roles + movie.short_description = short_description + movie.all_release_dates = NotLoaded + movie.thumbnail_url = thumbnail_url return movie def get_person(self, id): @@ -175,7 +177,7 @@ def get_person_biography(self, id): return self.page.get_biography() def iter_movie_persons(self, movie_id, role): - self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id) + self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id) assert self.is_on_page(MovieCrewPage) for p in self.page.iter_persons(role): yield p @@ -192,13 +194,13 @@ def iter_person_movies_ids(self, person_id): yield movie def iter_movie_persons_ids(self, movie_id): - self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id) + self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id) assert self.is_on_page(MovieCrewPage) for person in self.page.iter_persons_ids(): yield person - def get_movie_releases(self,id, country): - self.location('http://www.imdb.com/title/%s/releaseinfo'%id) + def get_movie_releases(self, id, country): + self.location('http://www.imdb.com/title/%s/releaseinfo' % id) assert self.is_on_page(ReleasePage) return self.page.get_movie_releases(country) @@ -222,5 +224,5 @@ def get_movie_releases(self,id, country): def latin2unicode(word): for key in dict_hex.keys(): - word = word.replace(key,dict_hex[key]) + word = word.replace(key, dict_hex[key]) return unicode(word) diff --git a/modules/imdb/pages.py b/modules/imdb/pages.py index cdf11ca641c820cd612f78dc5122df578885a1c7..43f5171582c3b2612f67b8a02ad3bf4488599f71 100644 --- a/modules/imdb/pages.py +++ b/modules/imdb/pages.py @@ -25,28 +25,28 @@ from datetime import datetime -__all__ = ['PersonPage','MovieCrewPage','BiographyPage','FilmographyPage','ReleasePage'] +__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage'] class ReleasePage(BasePage): ''' Page containing releases of a movie ''' - def get_movie_releases(self,country_filter): + def get_movie_releases(self, country_filter): result = unicode() - links = self.parser.select(self.document.getroot(),'b a') + links = self.parser.select(self.document.getroot(), 'b a') for a in links: - href = a.attrib.get('href','') + href = a.attrib.get('href', '') if href.strip('/').split('/')[0] == 'calendar' and\ - (country_filter is None or href.split('region=')[-1].lower() == country_filter): + (country_filter is None or href.split('region=')[-1].lower() == country_filter): country = a.text - td_date = self.parser.select(a.getparent().getparent().getparent(),'td')[1] - date_links = self.parser.select(td_date,'a') + td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1] + date_links = self.parser.select(td_date, 'a') if len(date_links) > 1: - date = date_links[1].attrib.get('href','').strip('/').split('/')[-1] - date += '-'+date_links[0].attrib.get('href','').strip('/').split('/')[-1] + date = date_links[1].attrib.get('href', '').strip('/').split('/')[-1] + date += '-'+date_links[0].attrib.get('href', '').strip('/').split('/')[-1] else: - date = unicode(self.parser.select(a.getparent().getparent().getparent(),'td')[1].text_content()) - result += '%s : %s\n' % (country,date) + date = unicode(self.parser.select(a.getparent().getparent().getparent(), 'td')[1].text_content()) + result += '%s : %s\n' % (country, date) if result == u'': result = NotAvailable else: @@ -59,11 +59,11 @@ class BiographyPage(BasePage): ''' def get_biography(self): bio = unicode() - tn = self.parser.select(self.document.getroot(),'div#tn15content',1) + tn = self.parser.select(self.document.getroot(), 'div#tn15content', 1) # we only read paragraphs, titles and links for ch in tn.getchildren(): - if ch.tag in ['p','h5','a']: - bio += '%s\n\n'%ch.text_content().strip() + if ch.tag in ['p', 'h5', 'a']: + bio += '%s\n\n' % ch.text_content().strip() if bio == u'': bio = NotAvailable return bio @@ -74,52 +74,52 @@ class MovieCrewPage(BasePage): ''' def iter_persons(self, role_filter=None): if (role_filter is None or (role_filter is not None and role_filter == 'actor')): - tables = self.parser.select(self.document.getroot(),'table.cast') + tables = self.parser.select(self.document.getroot(), 'table.cast') if len(tables) > 0: table = tables[0] - tds = self.parser.select(table,'td.nm') + tds = self.parser.select(table, 'td.nm') for td in tds: - id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] + id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1] name = unicode(td.find('a').text) - char_name = unicode(self.parser.select(td.getparent(),'td.char',1).text_content()) - person = Person(id,name) + char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content()) + person = Person(id, name) person.short_description = char_name - person.real_name = NotLoaded - person.birth_place = NotLoaded - person.birth_date = NotLoaded - person.death_date = NotLoaded - person.gender = NotLoaded - person.nationality = NotLoaded - person.short_biography= NotLoaded - person.roles = NotLoaded - person.thumbnail_url = NotLoaded + person.real_name = NotLoaded + person.birth_place = NotLoaded + person.birth_date = NotLoaded + person.death_date = NotLoaded + person.gender = NotLoaded + person.nationality = NotLoaded + person.short_biography = NotLoaded + person.roles = NotLoaded + person.thumbnail_url = NotLoaded yield person - for gloss_link in self.parser.select(self.document.getroot(),'table[cellspacing=1] h5 a'): - role = gloss_link.attrib.get('name','').rstrip('s') + for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'): + role = gloss_link.attrib.get('name', '').rstrip('s') if (role_filter is None or (role_filter is not None and role == role_filter)): tbody = gloss_link.getparent().getparent().getparent().getparent() - for line in self.parser.select(tbody,'tr')[1:]: - for a in self.parser.select(line,'a'): + for line in self.parser.select(tbody, 'tr')[1:]: + for a in self.parser.select(line, 'a'): role_detail = NotAvailable - href = a.attrib.get('href','') + href = a.attrib.get('href', '') if '/name/nm' in href: id = href.strip('/').split('/')[-1] name = unicode(a.text) if 'glossary' in href: role_detail = unicode(a.text) - person = Person(id,name) + person = Person(id, name) person.short_description = role_detail yield person - #yield self.browser.get_person(id) + # yield self.browser.get_person(id) def iter_persons_ids(self): - tables = self.parser.select(self.document.getroot(),'table.cast') + tables = self.parser.select(self.document.getroot(), 'table.cast') if len(tables) > 0: table = tables[0] - tds = self.parser.select(table,'td.nm') + tds = self.parser.select(table, 'td.nm') for td in tds: - id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] + id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1] yield id @@ -127,7 +127,7 @@ class PersonPage(BasePage): ''' Page giving informations about a person It is used to build a Person instance and to get the movie list related to a person ''' - def get_person(self,id): + def get_person(self, id): name = NotAvailable short_biography = NotAvailable short_description = NotAvailable @@ -139,52 +139,52 @@ def get_person(self,id): thumbnail_url = NotAvailable roles = {} nationality = NotAvailable - td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1) - descs = self.parser.select(td_overview,'span[itemprop=description]') + td_overview = self.parser.select(self.document.getroot(), 'td#overview-top', 1) + descs = self.parser.select(td_overview, 'span[itemprop=description]') if len(descs) > 0: short_biography = unicode(descs[0].text) - rname_block = self.parser.select(td_overview,'div.txt-block h4.inline') + rname_block = self.parser.select(td_overview, 'div.txt-block h4.inline') if len(rname_block) > 0 and "born" in rname_block[0].text.lower(): - links = self.parser.select(rname_block[0].getparent(),'a') + links = self.parser.select(rname_block[0].getparent(), 'a') for a in links: - href = a.attrib.get('href','').strip() + href = a.attrib.get('href', '').strip() if href == 'bio': real_name = unicode(a.text.strip()) elif 'birth_place' in href: birth_place = unicode(a.text.lower().strip()) - names = self.parser.select(td_overview,'h1[itemprop=name]') + names = self.parser.select(td_overview, 'h1[itemprop=name]') if len(names) > 0: name = unicode(names[0].text.strip()) - times = self.parser.select(td_overview,'time[itemprop=birthDate]') + times = self.parser.select(td_overview, 'time[itemprop=birthDate]') if len(times) > 0: - time = times[0].attrib.get('datetime','').split('-') + time = times[0].attrib.get('datetime', '').split('-') if len(time) == 3 and int(time[0]) >= 1900: - birth_date = datetime(int(time[0]),int(time[1]),int(time[2])) - dtimes = self.parser.select(td_overview,'time[itemprop=deathDate]') + birth_date = datetime(int(time[0]), int(time[1]), int(time[2])) + dtimes = self.parser.select(td_overview, 'time[itemprop=deathDate]') if len(dtimes) > 0: - dtime = dtimes[0].attrib.get('datetime','').split('-') + dtime = dtimes[0].attrib.get('datetime', '').split('-') if len(dtime) == 3 and int(dtime[0]) >= 1900: - death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2])) - img_thumbnail = self.parser.select(self.document.getroot(),'td#img_primary img') + death_date = datetime(int(dtime[0]), int(dtime[1]), int(dtime[2])) + img_thumbnail = self.parser.select(self.document.getroot(), 'td#img_primary img') if len(img_thumbnail) > 0: - thumbnail_url = unicode(img_thumbnail[0].attrib.get('src','')) + thumbnail_url = unicode(img_thumbnail[0].attrib.get('src', '')) # go to the filmography page - self.browser.location('http://www.imdb.com/name/%s/filmotype'%id) + self.browser.location('http://www.imdb.com/name/%s/filmotype' % id) assert self.browser.is_on_page(FilmographyPage) roles = self.browser.page.get_roles() - person = Person(id,name) - person.real_name = real_name - person.birth_date = birth_date - person.death_date = death_date - person.birth_place = birth_place - person.gender = gender - person.nationality = nationality + person = Person(id, name) + person.real_name = real_name + person.birth_date = birth_date + person.death_date = death_date + person.birth_place = birth_place + person.gender = gender + person.nationality = nationality person.short_biography = short_biography person.short_description = short_description - person.roles = roles - person.thumbnail_url = thumbnail_url + person.roles = roles + person.thumbnail_url = thumbnail_url return person @@ -193,39 +193,39 @@ class FilmographyPage(BasePage): This page is easier to parse than the main person page filmography ''' def iter_movies_ids(self): - for role_div in self.parser.select(self.document.getroot(),'div.filmo'): - for a in self.parser.select(role_div,'ol > li > a'): - id = a.attrib.get('href','').strip('/').split('/')[-1] + for role_div in self.parser.select(self.document.getroot(), 'div.filmo'): + for a in self.parser.select(role_div, 'ol > li > a'): + id = a.attrib.get('href', '').strip('/').split('/')[-1] if id.startswith('tt'): yield id def get_roles(self): roles = {} - for role_div in self.parser.select(self.document.getroot(),'div.filmo'): - role = self.parser.select(role_div,'h5 a',1).text.replace(':','') + for role_div in self.parser.select(self.document.getroot(), 'div.filmo'): + role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '') roles[role] = [] - for a in self.parser.select(role_div,'ol > li > a'): - id = a.attrib.get('href','').strip('/').split('/')[-1] + for a in self.parser.select(role_div, 'ol > li > a'): + id = a.attrib.get('href', '').strip('/').split('/')[-1] if id.startswith('tt'): if '(' in a.tail and ')' in a.tail: between_p = a.tail.split(')')[0].split('(')[1] else: between_p = '????' - roles[role].append('(%s) %s'%(between_p,a.text)) + roles[role].append('(%s) %s' % (between_p, a.text)) return roles def iter_movies(self, role_filter=None): - for role_div in self.parser.select(self.document.getroot(),'div.filmo'): - role = self.parser.select(role_div,'h5 a',1).text.replace(':','') + for role_div in self.parser.select(self.document.getroot(), 'div.filmo'): + role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '') if (role_filter is None or (role_filter is not None and role.lower().strip() == role_filter))\ - and role != 'In Development': - for a in self.parser.select(role_div,'ol > li > a'): - id = a.attrib.get('href','').strip('/').split('/')[-1] + and role != 'In Development': + for a in self.parser.select(role_div, 'ol > li > a'): + id = a.attrib.get('href', '').strip('/').split('/')[-1] if id.startswith('tt'): title = unicode(a.text) role_detail = NotAvailable if len(a.tail) > 0: - role_detail = unicode(' '.join(a.tail.replace('..','').split())) - movie = Movie(id,title) + role_detail = unicode(' '.join(a.tail.replace('..', '').split())) + movie = Movie(id, title) movie.short_description = role_detail yield movie diff --git a/modules/isohunt/backend.py b/modules/isohunt/backend.py index 9ac47c4f484936fe4338df0b0d02a5384f7aebde..54293587298e3137c88ff1d63153883d4edde21d 100644 --- a/modules/isohunt/backend.py +++ b/modules/isohunt/backend.py @@ -49,7 +49,7 @@ def get_torrent_file(self, id): return self.browser.openurl(torrent.url.encode('utf-8')).read() def iter_torrents(self, pattern): - return self.browser.iter_torrents(pattern.replace(' ','+')) + return self.browser.iter_torrents(pattern.replace(' ', '+')) def fill_torrent(self, torrent, fields): if 'description' in fields or 'files' in fields: @@ -61,5 +61,5 @@ def fill_torrent(self, torrent, fields): return torrent OBJECTS = { - Torrent:fill_torrent + Torrent: fill_torrent } diff --git a/modules/isohunt/browser.py b/modules/isohunt/browser.py index 51b436cc48277257e7bd516785ca1d3588484937..292a23e823f83421129a3a074f2c752917251bb9 100644 --- a/modules/isohunt/browser.py +++ b/modules/isohunt/browser.py @@ -32,9 +32,9 @@ class IsohuntBrowser(BaseBrowser): ENCODING = 'utf-8' USER_AGENT = BaseBrowser.USER_AGENTS['wget'] PAGES = { - 'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d' : TorrentsPage, - 'https://isohunt.com/torrent_details.*tab=summary' : TorrentPage, - } + 'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d': TorrentsPage, + 'https://isohunt.com/torrent_details.*tab=summary': TorrentPage, + } def home(self): return self.location('https://isohunt.com') diff --git a/modules/isohunt/pages/torrents.py b/modules/isohunt/pages/torrents.py index 4b01a2bf9f4cfa5f208dde594c7c3c338202304c..2c4a4e50a9481b38c1b4e08f934cc5f29865cb8e 100644 --- a/modules/isohunt/pages/torrents.py +++ b/modules/isohunt/pages/torrents.py @@ -73,7 +73,8 @@ def get_torrent(self, id): title = NotAvailable size = NotAvailable url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id) - title = unicode(self.parser.select(self.document.getroot(),'head > meta[name=title]',1).attrib.get('content','')) + title = unicode(self.parser.select( + self.document.getroot(), 'head > meta[name=title]', 1).attrib.get('content', '')) seed = NotAvailable leech = NotAvailable tip_id = "none" diff --git a/modules/kickass/backend.py b/modules/kickass/backend.py index 1838d6c8a23609eff88a902026d33d4c82587a0f..8fcab2f54df5a0b53261126aadaaf1f625c06583 100644 --- a/modules/kickass/backend.py +++ b/modules/kickass/backend.py @@ -72,5 +72,5 @@ def fill_torrent(self, torrent, fields): return torrent OBJECTS = { - Torrent:fill_torrent + Torrent: fill_torrent } diff --git a/modules/kickass/browser.py b/modules/kickass/browser.py index 5b2aaefd3e28059b43e172d9a08054613aee7e59..5cea0ac78a0ff86d095b138074f664784fed253e 100644 --- a/modules/kickass/browser.py +++ b/modules/kickass/browser.py @@ -34,7 +34,7 @@ class KickassBrowser(BaseBrowser): PAGES = { 'http://kat.ph/usearch/.*field=seeders&sorder=desc': TorrentsPage, 'http://kat.ph/.*.html': TorrentPage, - } + } def home(self): return self.location('http://kat.ph') diff --git a/modules/kickass/pages.py b/modules/kickass/pages.py index 92a60ae3e8f02804e0ae8761b07ca0c98f1bd1ea..5f120bdabcc468ec600641938dc7d03d3c5eb434 100644 --- a/modules/kickass/pages.py +++ b/modules/kickass/pages.py @@ -31,7 +31,7 @@ from weboob.tools.misc import get_bytes_size -__all__ = ['TorrentsPage','TorrentPage'] +__all__ = ['TorrentsPage', 'TorrentPage'] class TorrentsPage(BasePage): @@ -53,14 +53,14 @@ def iter_torrents(self): .replace('.html', '') # look for url - for a in self.parser.select(tr,'div.iaconbox a'): + for a in self.parser.select(tr, 'div.iaconbox a'): href = a.attrib.get('href', '') if href.startswith('magnet'): magnet = unicode(href) elif href.startswith('http'): url = unicode(href) elif href.startswith('//'): - url = u'http:%s'%href + url = u'http:%s' % href size = tr.getchildren()[1].text u = tr.getchildren()[1].getchildren()[0].text @@ -107,16 +107,16 @@ def get_torrent(self, id): leech = 0 title = self.parser.select(self.document.getroot(), - 'h1.torrentName span', 1) + 'h1.torrentName span', 1) title = unicode(title.text) for a in self.parser.select(self.document.getroot(), - 'div.downloadButtonGroup a'): + 'div.downloadButtonGroup a'): href = a.attrib.get('href', '') if href.startswith('magnet'): magnet = unicode(href) elif href.startswith('//'): - url = u'http:%s'%href + url = u'http:%s' % href elif href.startswith('http'): url = unicode(href) @@ -127,7 +127,7 @@ def get_torrent(self, id): # is enough to know if this is the right span if (span.attrib.get('class', '') == 'folder' or span.attrib.get('class', '') == 'folderopen') \ - and len(span.getchildren()) > 2: + and len(span.getchildren()) > 2: size = span.getchildren()[1].tail u = span.getchildren()[2].text size = float(size.split(': ')[1].replace(',', '.')) diff --git a/modules/marmiton/backend.py b/modules/marmiton/backend.py index 965dd698212e0f7aadc63fccbef880665ea32508..d36b0f3211cc0aa433614fcbc5c6a124aa0d99ac 100644 --- a/modules/marmiton/backend.py +++ b/modules/marmiton/backend.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.recipe import ICapRecipe,Recipe +from weboob.capabilities.recipe import ICapRecipe, Recipe from weboob.tools.backend import BaseBackend from .browser import MarmitonBrowser @@ -48,16 +48,16 @@ def iter_recipes(self, pattern): def fill_recipe(self, recipe, fields): if 'nb_person' in fields or 'instructions' in fields: rec = self.get_recipe(recipe.id) - recipe.picture_url = rec.picture_url - recipe.instructions = rec.instructions - recipe.ingredients = rec.ingredients - recipe.comments = rec.comments - recipe.nb_person = rec.nb_person - recipe.cooking_time = rec.cooking_time + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time recipe.preparation_time = rec.preparation_time return recipe OBJECTS = { - Recipe:fill_recipe, - } + Recipe: fill_recipe, + } diff --git a/modules/marmiton/browser.py b/modules/marmiton/browser.py index 9c6a50b5f11fe396fa770cf642d47e5de818eae3..2f606406d1f211f12b3e7d87405d60330eeddf5f 100644 --- a/modules/marmiton/browser.py +++ b/modules/marmiton/browser.py @@ -34,7 +34,7 @@ class MarmitonBrowser(BaseBrowser): PAGES = { 'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage, 'http://www.marmiton.org/recettes/recette_.*': RecipePage, - } + } def iter_recipes(self, pattern): self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern)) diff --git a/modules/marmiton/pages.py b/modules/marmiton/pages.py index 69e31a56cb063e59f3ed1134119f90d79aa3d531..d1a6a0284a95e1197acbc375b6a4aa109113464e 100644 --- a/modules/marmiton/pages.py +++ b/modules/marmiton/pages.py @@ -30,27 +30,28 @@ class ResultsPage(BasePage): """ Page which contains results as a list of recipies """ def iter_recipes(self): - for div in self.parser.select(self.document.getroot(),'div.m_search_result'): - tds = self.parser.select(div,'td') + for div in self.parser.select(self.document.getroot(), 'div.m_search_result'): + tds = self.parser.select(div, 'td') if len(tds) == 2: title = NotAvailable thumbnail_url = NotAvailable short_description = NotAvailable - imgs = self.parser.select(tds[0],'img') + imgs = self.parser.select(tds[0], 'img') if len(imgs) > 0: - thumbnail_url = unicode(imgs[0].attrib.get('src','')) - link = self.parser.select(tds[1],'div.m_search_titre_recette a',1) + thumbnail_url = unicode(imgs[0].attrib.get('src', '')) + link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1) title = unicode(link.text) - id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','') - short_description = unicode(' '.join(self.parser.select(tds[1],'div.m_search_result_part4',1).text.strip().split('\n'))) + id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '') + short_description = unicode(' '.join(self.parser.select(tds[ + 1], 'div.m_search_result_part4', 1).text.strip().split('\n'))) - recipe = Recipe(id,title) + recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url - recipe.short_description= short_description - recipe.instructions = NotLoaded - recipe.ingredients = NotLoaded - recipe.nb_person = NotLoaded - recipe.cooking_time = NotLoaded + recipe.short_description = short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded yield recipe @@ -68,30 +69,30 @@ def get_recipe(self, id): instructions = NotAvailable comments = [] - title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip()) - main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1) - preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content()) - cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content()) - ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content() + title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip()) + main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1) + preparation_time = int(self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content()) + cooking_time = int(self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content()) + ing_header_line = self.parser.select(main, 'p.m_content_recette_ingredients span', 1).text_content() if '(pour' in ing_header_line and ')' in ing_header_line: nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0]) - ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ') - ingredients=ingredients[1:] - rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip() + ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ') + ingredients = ingredients[1:] + rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip() instructions = u'' for line in rinstructions.split('\n'): - instructions += '%s\n'%line.strip() + instructions += '%s\n' % line.strip() instructions = instructions.strip('\n') - imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img') + imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img') if len(imgillu) > 0: - picture_url = unicode(imgillu[0].attrib.get('src','')) - for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'): - note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip() - user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip() - content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip() - comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content)) + picture_url = unicode(imgillu[0].attrib.get('src', '')) + for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'): + note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() + user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() + content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() + comments.append(u'user: %s, note: %s, comment: %s' % (user, note, content)) - recipe = Recipe(id,title) + recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person diff --git a/modules/opensubtitles/backend.py b/modules/opensubtitles/backend.py index 68f5f336debdf4efb7355decfc4bb4b3893d5ae6..d9f7bf7a5f4a37f90ba41dfb39838c8aeeab517a 100644 --- a/modules/opensubtitles/backend.py +++ b/modules/opensubtitles/backend.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported,Subtitle +from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported, Subtitle from weboob.applications.suboob.suboob import LANGUAGE_CONV from weboob.tools.backend import BaseBackend @@ -53,15 +53,15 @@ def get_subtitle_file(self, id): def iter_subtitles(self, language, pattern): if language not in LANGUAGE_CONV.keys(): raise LanguageNotSupported() - return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) + return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8'))) def fill_subtitle(self, subtitle, fields): if 'description' in fields: sub = self.get_subtitle(subtitle.id) - subtitle.description = sub.description + subtitle.description = sub.description return subtitle OBJECTS = { - Subtitle:fill_subtitle, - } + Subtitle: fill_subtitle, + } diff --git a/modules/opensubtitles/browser.py b/modules/opensubtitles/browser.py index 19b523e5d4bd500f94848590eb52a693c136b27a..5a29a0299a72c24869469fc17bf9113c6381f8b5 100644 --- a/modules/opensubtitles/browser.py +++ b/modules/opensubtitles/browser.py @@ -35,13 +35,14 @@ class OpensubtitlesBrowser(BaseBrowser): PAGES = { 'http://www.opensubtitles.org.*search2/sublanguageid.*moviename.*': SearchPage, 'http://www.opensubtitles.org.*search/sublanguageid.*idmovie.*': SubtitlesPage, - 'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*' : SubtitlesPage, - 'http://www.opensubtitles.org.*subtitles/[0-9]*/.*' : SubtitlePage - } + 'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*': SubtitlesPage, + 'http://www.opensubtitles.org.*subtitles/[0-9]*/.*': SubtitlePage + } def iter_subtitles(self, language, pattern): lang = LANGUAGE_CONV[language] - self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % (lang,pattern.encode('utf-8'))) + self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % ( + lang, pattern.encode('utf-8'))) assert self.is_on_page(SearchPage) or self.is_on_page(SubtitlesPage) or self.is_on_page(SubtitlePage) return self.page.iter_subtitles() diff --git a/modules/opensubtitles/pages.py b/modules/opensubtitles/pages.py index e7b90d19df98c80babc4b5b5c3556c6fe3d89102..d3058eb91fb99f86aee0db7686a2b08dd2c3c90a 100644 --- a/modules/opensubtitles/pages.py +++ b/modules/opensubtitles/pages.py @@ -24,24 +24,24 @@ from weboob.applications.suboob.suboob import LANGUAGE_CONV -__all__ = ['SubtitlesPage','SubtitlePage','SearchPage'] +__all__ = ['SubtitlesPage', 'SubtitlePage', 'SearchPage'] class SearchPage(BasePage): """ Page which contains results as a list of movies """ def iter_subtitles(self): - tabresults = self.parser.select(self.document.getroot(),'table#search_results') + tabresults = self.parser.select(self.document.getroot(), 'table#search_results') if len(tabresults) > 0: table = tabresults[0] # for each result line, explore the subtitle list page to iter subtitles - for line in self.parser.select(table,'tr'): - links = self.parser.select(line,'a') + for line in self.parser.select(table, 'tr'): + links = self.parser.select(line, 'a') if len(links) > 0: a = links[0] - url = a.attrib.get('href','') + url = a.attrib.get('href', '') if "ads.opensubtitles" not in url: - self.browser.location("http://www.opensubtitles.org%s"%url) + self.browser.location("http://www.opensubtitles.org%s" % url) assert self.browser.is_on_page(SubtitlesPage) or self.browser.is_on_page(SubtitlePage) # subtitles page does the job for subtitle in self.browser.page.iter_subtitles(): @@ -52,48 +52,48 @@ class SubtitlesPage(BasePage): """ Page which contains several subtitles for a single movie """ def iter_subtitles(self): - tabresults = self.parser.select(self.document.getroot(),'table#search_results') + tabresults = self.parser.select(self.document.getroot(), 'table#search_results') if len(tabresults) > 0: table = tabresults[0] # for each result line, get informations # why following line doesn't work all the time (for example 'search fr sopranos guy walks' ? - #for line in self.parser.select(table,'tr'): + # for line in self.parser.select(table,'tr'): for line in table.getiterator('tr'): # some tr are useless, specially ads - if line.attrib.get('id','').startswith('name'): + if line.attrib.get('id', '').startswith('name'): yield self.get_subtitle_from_line(line) - def get_subtitle_from_line(self,line): - cells = self.parser.select(line,'td') + def get_subtitle_from_line(self, line): + cells = self.parser.select(line, 'td') if len(cells) > 0: - links = self.parser.select(line,'a') + links = self.parser.select(line, 'a') a = links[0] name = u" ".join(a.text.strip().split()) first_cell = cells[0] - spanlist = self.parser.select(first_cell,'span') + spanlist = self.parser.select(first_cell, 'span') if len(spanlist) > 0: - long_name = spanlist[0].attrib.get('title','') + long_name = spanlist[0].attrib.get('title', '') else: texts = first_cell.itertext() long_name = texts.next() long_name = texts.next() if "Download at 25" in long_name: long_name = "---" - name = "%s (%s)"%(name,long_name) + name = "%s (%s)" % (name, long_name) second_cell = cells[1] - link = self.parser.select(second_cell,'a',1) - lang = link.attrib.get('href','').split('/')[-1].split('-')[-1] - for lshort,llong in LANGUAGE_CONV.items(): + link = self.parser.select(second_cell, 'a', 1) + lang = link.attrib.get('href', '').split('/')[-1].split('-')[-1] + for lshort, llong in LANGUAGE_CONV.items(): if lang == llong: lang = unicode(lshort) break - nb_cd = int(cells[2].text.strip().lower().replace('cd','')) + nb_cd = int(cells[2].text.strip().lower().replace('cd', '')) cell_dl = cells[4] - href = self.parser.select(cell_dl,'a',1).attrib.get('href','') - url = unicode('http://www.opensubtitles.org%s'%href) + href = self.parser.select(cell_dl, 'a', 1).attrib.get('href', '') + url = unicode('http://www.opensubtitles.org%s' % href) id = href.split('/')[-1] - subtitle = Subtitle(id,name) + subtitle = Subtitle(id, name) subtitle.url = url subtitle.language = lang subtitle.nb_cd = nb_cd @@ -106,15 +106,15 @@ class SubtitlePage(BasePage): """ def get_subtitle(self): desc = NotAvailable - father = self.parser.select(self.document.getroot(),'a#app_link',1).getparent() - a = self.parser.select(father,'a')[1] - id = a.attrib.get('href','').split('/')[-1] - url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s'%id) - link = self.parser.select(self.document.getroot(),'link[rel=bookmark]',1) - title = unicode(link.attrib.get('title','')) + father = self.parser.select(self.document.getroot(), 'a#app_link', 1).getparent() + a = self.parser.select(father, 'a')[1] + id = a.attrib.get('href', '').split('/')[-1] + url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s' % id) + link = self.parser.select(self.document.getroot(), 'link[rel=bookmark]', 1) + title = unicode(link.attrib.get('title', '')) nb_cd = int(title.lower().split('cd')[0].split()[-1]) lang = unicode(title.split('(')[1].split(')')[0]) - file_names = self.parser.select(self.document.getroot(),"img[title~=filename]") + file_names = self.parser.select(self.document.getroot(), "img[title~=filename]") if len(file_names) > 0: file_name = file_names[0].getparent().text_content() file_name = ' '.join(file_name.split()) @@ -122,11 +122,11 @@ def get_subtitle(self): for f in file_names: desc_line = f.getparent().text_content() desc += '\n'+' '.join(desc_line.split()) - name = unicode('%s (%s)'%(title,file_name)) + name = unicode('%s (%s)' % (title, file_name)) - subtitle = Subtitle(id,name) + subtitle = Subtitle(id, name) subtitle.url = url - for lshort,llong in LANGUAGE_CONV.items(): + for lshort, llong in LANGUAGE_CONV.items(): if lang == llong: lang = unicode(lshort) break diff --git a/modules/opensubtitles/test.py b/modules/opensubtitles/test.py index 66e7e568df186101159be37f7badd8a6c2d5fd23..0fc2526388aa1b5486af5219ba484f190f90722d 100644 --- a/modules/opensubtitles/test.py +++ b/modules/opensubtitles/test.py @@ -27,7 +27,7 @@ class OpensubtitlesTest(BackendTest): def test_subtitle(self): lsub = [] - subtitles = self.backend.iter_subtitles('fr','spiderman') + subtitles = self.backend.iter_subtitles('fr', 'spiderman') for i in range(5): subtitle = subtitles.next() lsub.append(subtitle) diff --git a/modules/parolesmania/backend.py b/modules/parolesmania/backend.py index 9723237e211da2d981ae0186f52f0bcad1fb029b..59f95047185d119540fab940f3f674153da9f5a1 100644 --- a/modules/parolesmania/backend.py +++ b/modules/parolesmania/backend.py @@ -43,7 +43,7 @@ def get_lyrics(self, id): return self.browser.get_lyrics(id) def iter_lyrics(self, criteria, pattern): - return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('utf-8'))) + return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('utf-8'))) def fill_songlyrics(self, songlyrics, fields): if 'content' in fields: @@ -52,5 +52,5 @@ def fill_songlyrics(self, songlyrics, fields): return songlyrics OBJECTS = { - SongLyrics:fill_songlyrics + SongLyrics: fill_songlyrics } diff --git a/modules/parolesmania/browser.py b/modules/parolesmania/browser.py index f01f6e9d210655f88d09e05ee67f9aa79ced8fd1..e6f2b3567919b14ba19ec1fb5e5537659b237f5e 100644 --- a/modules/parolesmania/browser.py +++ b/modules/parolesmania/browser.py @@ -36,13 +36,13 @@ class ParolesmaniaBrowser(BaseBrowser): 'http://www.parolesmania.com/recherche.php\?c=artist.*': ArtistResultsPage, 'http://www.parolesmania.com/paroles.*[0-9]*/paroles.*': SonglyricsPage, 'http://www.parolesmania.com/paroles[^/]*.html': ArtistSongsPage, - } + } def iter_lyrics(self, criteria, pattern): crit = 'artist' if criteria != 'artist': crit = 'title' - self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s'%(crit,pattern)) + self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s' % (crit, pattern)) assert self.is_on_page(SongResultsPage) or self.is_on_page(ArtistResultsPage)\ or self.is_on_page(ArtistSongsPage) for lyr in self.page.iter_lyrics(): @@ -50,6 +50,6 @@ def iter_lyrics(self, criteria, pattern): def get_lyrics(self, id): ids = id.split('|') - self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0],ids[1])) + self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0], ids[1])) assert self.is_on_page(SonglyricsPage) return self.page.get_lyrics(id) diff --git a/modules/parolesmania/pages.py b/modules/parolesmania/pages.py index b35b946796d8b46eeeadc31ac4db491ba58c7e71..e075d13373be72003cba41cb54dd8f923effdcf4 100644 --- a/modules/parolesmania/pages.py +++ b/modules/parolesmania/pages.py @@ -23,32 +23,32 @@ from weboob.tools.browser import BasePage -__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] +__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] class ArtistResultsPage(BasePage): def iter_lyrics(self): - for link in self.parser.select(self.document.getroot(),'div#albums > h1 a'): + for link in self.parser.select(self.document.getroot(), 'div#albums > h1 a'): artist = unicode(link.text_content()) - href = link.attrib.get('href','') + href = link.attrib.get('href', '') if href.startswith('/paroles'): - self.browser.location('http://www.parolesmania.com%s'%href) + self.browser.location('http://www.parolesmania.com%s' % href) assert self.browser.is_on_page(ArtistSongsPage) for lyr in self.browser.page.iter_lyrics(artist): yield lyr class ArtistSongsPage(BasePage): - def iter_lyrics(self,artist=None): + def iter_lyrics(self, artist=None): if artist is None: - artist = self.parser.select(self.document.getroot(),'head > title',1).text.replace('Paroles ','') - for link in self.parser.select(self.document.getroot(),'div#albums a'): - href = link.attrib.get('href','') - titleattrib = link.attrib.get('title','') + artist = self.parser.select(self.document.getroot(), 'head > title', 1).text.replace('Paroles ', '') + for link in self.parser.select(self.document.getroot(), 'div#albums a'): + href = link.attrib.get('href', '') + titleattrib = link.attrib.get('title', '') if href.startswith('/paroles') and not href.endswith('alpha.html') and titleattrib.startswith('Paroles '): title = unicode(link.text) - ids = href.replace('/','').replace('.html','').split('paroles_') - id = '%s|%s'%(ids[1],ids[2]) + ids = href.replace('/', '').replace('.html', '').split('paroles_') + id = '%s|%s' % (ids[1], ids[2]) songlyrics = SongLyrics(id, title) songlyrics.artist = artist songlyrics.content = NotLoaded @@ -57,13 +57,13 @@ def iter_lyrics(self,artist=None): class SongResultsPage(BasePage): def iter_lyrics(self): - for link in self.parser.select(self.document.getroot(),'div#albums a'): + for link in self.parser.select(self.document.getroot(), 'div#albums a'): artist = NotAvailable title = unicode(link.text.split(' - ')[0]) - href = link.attrib.get('href','') + href = link.attrib.get('href', '') if href.startswith('/paroles') and not href.endswith('alpha.html'): - ids = href.replace('/','').replace('.html','').split('paroles_') - id = '%s|%s'%(ids[1],ids[2]) + ids = href.replace('/', '').replace('.html', '').split('paroles_') + id = '%s|%s' % (ids[1], ids[2]) artist = unicode(link.text.split(' - ')[1]) songlyrics = SongLyrics(id, title) songlyrics.artist = artist @@ -76,12 +76,12 @@ def get_lyrics(self, id): content = NotAvailable artist = NotAvailable title = NotAvailable - lyrdiv = self.parser.select(self.document.getroot(),'div#songlyrics_h') + lyrdiv = self.parser.select(self.document.getroot(), 'div#songlyrics_h') if len(lyrdiv) > 0: content = unicode(lyrdiv[0].text_content().strip()) - infos = self.parser.select(self.document.getroot(),'head > title',1).text + infos = self.parser.select(self.document.getroot(), 'head > title', 1).text artist = unicode(infos.split(' - ')[1]) - title = unicode(infos.split(' - ')[0].replace('Paroles ','')) + title = unicode(infos.split(' - ')[0].replace('Paroles ', '')) songlyrics = SongLyrics(id, title) songlyrics.artist = artist songlyrics.content = content diff --git a/modules/parolesmania/test.py b/modules/parolesmania/test.py index d397eae2bcfb9e2503e030e0ed1f29ae3c494989..5d5089aacf1a5b85ee3d225b93bc15bf4a81bc78 100644 --- a/modules/parolesmania/test.py +++ b/modules/parolesmania/test.py @@ -25,7 +25,7 @@ class ParolesmaniaTest(BackendTest): BACKEND = 'parolesmania' def test_search_song_n_get(self): - l_lyrics = list(self.backend.iter_lyrics('song','chien')) + l_lyrics = list(self.backend.iter_lyrics('song', 'chien')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title @@ -38,7 +38,7 @@ def test_search_song_n_get(self): assert full_lyr.content is not NotLoaded def test_search_artist(self): - l_lyrics = list(self.backend.iter_lyrics('artist','boris')) + l_lyrics = list(self.backend.iter_lyrics('artist', 'boris')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title diff --git a/modules/parolesmusique/backend.py b/modules/parolesmusique/backend.py index be427e65aa33b6e68eb6846f01ad405b04ad693a..663d4789b0f7d9eb2e4e8d871c12ab5153edf59a 100644 --- a/modules/parolesmusique/backend.py +++ b/modules/parolesmusique/backend.py @@ -41,7 +41,7 @@ def get_lyrics(self, id): return self.browser.get_lyrics(id) def iter_lyrics(self, criteria, pattern): - return self.browser.iter_lyrics(criteria,pattern.encode('utf-8')) + return self.browser.iter_lyrics(criteria, pattern.encode('utf-8')) def fill_songlyrics(self, songlyrics, fields): if 'content' in fields: @@ -50,5 +50,5 @@ def fill_songlyrics(self, songlyrics, fields): return songlyrics OBJECTS = { - SongLyrics:fill_songlyrics + SongLyrics: fill_songlyrics } diff --git a/modules/parolesmusique/browser.py b/modules/parolesmusique/browser.py index df8ec4ce0820ec635bcb058faa948b87e3f940f2..a4259054cb317f829a55694898aa5decd4a5f40e 100644 --- a/modules/parolesmusique/browser.py +++ b/modules/parolesmusique/browser.py @@ -37,12 +37,12 @@ class ParolesmusiqueBrowser(BaseBrowser): 'http://www.paroles-musique.com/lyrics-paroles-.*-0,0.php': ArtistResultsPage, 'http://www.paroles-musique.com/paroles-.*p[0-9]*': SonglyricsPage, 'http://www.paroles-musique.com/paroles-.*-lyrics,a[0-9]*': ArtistSongsPage, - } + } def iter_lyrics(self, criteria, pattern): self.location('http://www.paroles-musique.com') assert self.is_on_page(HomePage) - return self.page.iter_lyrics(criteria,pattern) + return self.page.iter_lyrics(criteria, pattern) def get_lyrics(self, id): self.location('http://www.paroles-musique.com/paroles-%s' % id) diff --git a/modules/parolesmusique/pages.py b/modules/parolesmusique/pages.py index 7aad55e76393ae8ac59b42330fb7465c516d3f93..aa3c048bf017a9a952ed1a184f66ce6504d64dec 100644 --- a/modules/parolesmusique/pages.py +++ b/modules/parolesmusique/pages.py @@ -23,11 +23,11 @@ from weboob.tools.browser import BasePage -__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage'] +__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage'] class HomePage(BasePage): - def iter_lyrics(self,criteria,pattern): + def iter_lyrics(self, criteria, pattern): self.browser.select_form(name='rechercher') if criteria == 'artist': self.browser['termes_a'] = pattern @@ -41,21 +41,21 @@ def iter_lyrics(self,criteria,pattern): class ArtistResultsPage(BasePage): def iter_lyrics(self): - for link in self.parser.select(self.document.getroot(),'div.cont_cat table a.std'): + for link in self.parser.select(self.document.getroot(), 'div.cont_cat table a.std'): artist = unicode(link.text_content()) - self.browser.location('http://www.paroles-musique.com%s'%link.attrib.get('href','')) + self.browser.location('http://www.paroles-musique.com%s' % link.attrib.get('href', '')) assert self.browser.is_on_page(ArtistSongsPage) for lyr in self.browser.page.iter_lyrics(artist): yield lyr class ArtistSongsPage(BasePage): - def iter_lyrics(self,artist): - for link in self.parser.select(self.document.getroot(),'div.cont_catA div.art_scroll a'): - href = link.attrib.get('href','') + def iter_lyrics(self, artist): + for link in self.parser.select(self.document.getroot(), 'div.cont_catA div.art_scroll a'): + href = link.attrib.get('href', '') if href.startswith('./paroles'): title = unicode(link.text) - id = href.replace('./paroles-','') + id = href.replace('./paroles-', '') songlyrics = SongLyrics(id, title) songlyrics.artist = artist songlyrics.content = NotLoaded @@ -65,14 +65,14 @@ def iter_lyrics(self,artist): class SongResultsPage(BasePage): def iter_lyrics(self): first = True - for tr in self.parser.select(self.document.getroot(),'div.cont_cat table tr'): + for tr in self.parser.select(self.document.getroot(), 'div.cont_cat table tr'): if first: first = False continue artist = NotAvailable - links = self.parser.select(tr,'a.std') + links = self.parser.select(tr, 'a.std') title = unicode(links[0].text) - id = links[0].attrib.get('href','').replace('/paroles-','') + id = links[0].attrib.get('href', '').replace('/paroles-', '') artist = unicode(links[1].text) songlyrics = SongLyrics(id, title) songlyrics.artist = artist @@ -84,8 +84,8 @@ class SonglyricsPage(BasePage): def get_lyrics(self, id): artist = NotAvailable title = NotAvailable - content = unicode(self.parser.select(self.document.getroot(),'div#lyr_scroll',1).text_content().strip()) - infos = self.parser.select(self.document.getroot(),'h2.lyrics > font') + content = unicode(self.parser.select(self.document.getroot(), 'div#lyr_scroll', 1).text_content().strip()) + infos = self.parser.select(self.document.getroot(), 'h2.lyrics > font') artist = unicode(infos[0].text) title = unicode(infos[1].text) songlyrics = SongLyrics(id, title) diff --git a/modules/parolesmusique/test.py b/modules/parolesmusique/test.py index 1008b769f3aba24d6a2469c70ac39618d3e375b0..fb02c0ee2ad9e3276fe9c002ee45623ee1393daa 100644 --- a/modules/parolesmusique/test.py +++ b/modules/parolesmusique/test.py @@ -25,7 +25,7 @@ class ParolesmusiqueTest(BackendTest): BACKEND = 'parolesmusique' def test_search_song_n_get(self): - l_lyrics = list(self.backend.iter_lyrics('song','chien')) + l_lyrics = list(self.backend.iter_lyrics('song', 'chien')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title @@ -38,7 +38,7 @@ def test_search_song_n_get(self): assert full_lyr.content is not NotLoaded def test_search_artist(self): - l_lyrics = list(self.backend.iter_lyrics('artist','boris')) + l_lyrics = list(self.backend.iter_lyrics('artist', 'boris')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title diff --git a/modules/seeklyrics/backend.py b/modules/seeklyrics/backend.py index beb7b27987cf68eb3dbec6414eb91cb06fc56bcc..4030bc755e6c2fc3f22eee24dcababc39736e36c 100644 --- a/modules/seeklyrics/backend.py +++ b/modules/seeklyrics/backend.py @@ -43,7 +43,7 @@ def get_lyrics(self, id): return self.browser.get_lyrics(id) def iter_lyrics(self, criteria, pattern): - return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('iso-8859-1'))) + return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('iso-8859-1'))) def fill_songlyrics(self, songlyrics, fields): if 'content' in fields: @@ -52,5 +52,5 @@ def fill_songlyrics(self, songlyrics, fields): return songlyrics OBJECTS = { - SongLyrics:fill_songlyrics + SongLyrics: fill_songlyrics } diff --git a/modules/seeklyrics/browser.py b/modules/seeklyrics/browser.py index 224f8a436b47284e214aea58804b766ed973fd0c..22c04775378a24ed1bd8f24a87f14e3cb0fb5781 100644 --- a/modules/seeklyrics/browser.py +++ b/modules/seeklyrics/browser.py @@ -36,14 +36,14 @@ class SeeklyricsBrowser(BaseBrowser): 'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage, 'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage, 'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage, - } + } def iter_lyrics(self, criteria, pattern): if criteria == 'artist': type = 2 else: type = 1 - self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern,type)) + self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern, type)) assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage) return self.page.iter_lyrics() diff --git a/modules/seeklyrics/pages.py b/modules/seeklyrics/pages.py index 6856c68a2174a2e5bb70c39de5233b984c615a52..2134f00b8231c1dd03e38a9566e28ff7efe9125e 100644 --- a/modules/seeklyrics/pages.py +++ b/modules/seeklyrics/pages.py @@ -23,27 +23,27 @@ from weboob.tools.browser import BasePage -__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] +__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] class ArtistResultsPage(BasePage): def iter_lyrics(self): - for link in self.parser.select(self.document.getroot(),'table[title~=Results] a.tlink'): + for link in self.parser.select(self.document.getroot(), 'table[title~=Results] a.tlink'): artist = unicode(link.text_content()) - self.browser.location('http://www.seeklyrics.com%s'%link.attrib.get('href','')) + self.browser.location('http://www.seeklyrics.com%s' % link.attrib.get('href', '')) assert self.browser.is_on_page(ArtistSongsPage) for lyr in self.browser.page.iter_lyrics(artist): yield lyr class ArtistSongsPage(BasePage): - def iter_lyrics(self,artist): - for th in self.parser.select(self.document.getroot(),'th.text'): + def iter_lyrics(self, artist): + for th in self.parser.select(self.document.getroot(), 'th.text'): txt = th.text_content() if txt.startswith('Top') and txt.endswith('Lyrics'): - for link in self.parser.select(th.getparent().getparent(),'a.tlink'): - title = unicode(link.attrib.get('title','').replace(' Lyrics','')) - id = link.attrib.get('href','').replace('/lyrics/','').replace('.html','') + for link in self.parser.select(th.getparent().getparent(), 'a.tlink'): + title = unicode(link.attrib.get('title', '').replace(' Lyrics', '')) + id = link.attrib.get('href', '').replace('/lyrics/', '').replace('.html', '') songlyrics = SongLyrics(id, title) songlyrics.artist = artist songlyrics.content = NotLoaded @@ -53,15 +53,15 @@ def iter_lyrics(self,artist): class SongResultsPage(BasePage): def iter_lyrics(self): first = True - for tr in self.parser.select(self.document.getroot(),'table[title~=Results] tr'): + for tr in self.parser.select(self.document.getroot(), 'table[title~=Results] tr'): if first: first = False continue artist = NotAvailable - ftitle = self.parser.select(tr,'a > font > font',1) + ftitle = self.parser.select(tr, 'a > font > font', 1) title = unicode(ftitle.getparent().getparent().text_content()) - id = ftitle.getparent().getparent().attrib.get('href','').replace('/lyrics/','').replace('.html','') - aartist = self.parser.select(tr,'a')[-1] + id = ftitle.getparent().getparent().attrib.get('href', '').replace('/lyrics/', '').replace('.html', '') + aartist = self.parser.select(tr, 'a')[-1] artist = unicode(aartist.text) songlyrics = SongLyrics(id, title) songlyrics.artist = artist @@ -73,12 +73,12 @@ class SonglyricsPage(BasePage): def get_lyrics(self, id): artist = NotAvailable title = NotAvailable - l_artitle = self.parser.select(self.document.getroot(),'table.text td > b > h2') + l_artitle = self.parser.select(self.document.getroot(), 'table.text td > b > h2') if len(l_artitle) > 0: artitle = l_artitle[0].text.split(' Lyrics by ') artist = unicode(artitle[1]) title = unicode(artitle[0]) - content = unicode(self.parser.select(self.document.getroot(),'div#songlyrics',1).text_content().strip()) + content = unicode(self.parser.select(self.document.getroot(), 'div#songlyrics', 1).text_content().strip()) songlyrics = SongLyrics(id, title) songlyrics.artist = artist songlyrics.content = content diff --git a/modules/seeklyrics/test.py b/modules/seeklyrics/test.py index d0d4056eda8edcae8a880b2091b7f1019b27b6fa..9a0cb9a7109e24036d6db0cfb71d5348650ce8e5 100644 --- a/modules/seeklyrics/test.py +++ b/modules/seeklyrics/test.py @@ -25,7 +25,7 @@ class SeeklyricsTest(BackendTest): BACKEND = 'seeklyrics' def test_search_song_n_get(self): - l_lyrics = list(self.backend.iter_lyrics('song','Complainte')) + l_lyrics = list(self.backend.iter_lyrics('song', 'Complainte')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title @@ -38,7 +38,7 @@ def test_search_song_n_get(self): assert full_lyr.content is not NotLoaded def test_search_artist(self): - l_lyrics = list(self.backend.iter_lyrics('artist','boris vian')) + l_lyrics = list(self.backend.iter_lyrics('artist', 'boris vian')) for songlyrics in l_lyrics: assert songlyrics.id assert songlyrics.title diff --git a/modules/tvsubtitles/backend.py b/modules/tvsubtitles/backend.py index d7cbaa6ee166ed55bffe724be080bccc9f938607..4b8e89339395b87151f3ab5bcc43a0821b7e1ec6 100644 --- a/modules/tvsubtitles/backend.py +++ b/modules/tvsubtitles/backend.py @@ -17,10 +17,10 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported +from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported from weboob.tools.backend import BaseBackend -from .browser import TvsubtitlesBrowser,LANGUAGE_LIST +from .browser import TvsubtitlesBrowser, LANGUAGE_LIST from urllib import quote_plus @@ -52,4 +52,4 @@ def get_subtitle_file(self, id): def iter_subtitles(self, language, pattern): if language not in LANGUAGE_LIST: raise LanguageNotSupported() - return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) + return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8'))) diff --git a/modules/tvsubtitles/browser.py b/modules/tvsubtitles/browser.py index 079e9e44e379e399221ef6e5ef76f1cbc364189a..73c1714a6609882f5e20ef6f8d31196057f0a614 100644 --- a/modules/tvsubtitles/browser.py +++ b/modules/tvsubtitles/browser.py @@ -20,14 +20,14 @@ from weboob.tools.browser import BaseBrowser -from .pages import SeriePage, SearchPage, SeasonPage,HomePage +from .pages import SeriePage, SearchPage, SeasonPage, HomePage __all__ = ['TvsubtitlesBrowser'] -LANGUAGE_LIST = ['en','es','fr','de','br','ru','ua','it','gr', - 'ar','hu','pl','tr','nl','pt','sv','da','fi', - 'ko','cn','jp','bg','cz','ro'] +LANGUAGE_LIST = ['en', 'es', 'fr', 'de', 'br', 'ru', 'ua', 'it', 'gr', + 'ar', 'hu', 'pl', 'tr', 'nl', 'pt', 'sv', 'da', 'fi', + 'ko', 'cn', 'jp', 'bg', 'cz', 'ro'] class TvsubtitlesBrowser(BaseBrowser): @@ -39,13 +39,13 @@ class TvsubtitlesBrowser(BaseBrowser): 'http://www.tvsubtitles.net': HomePage, 'http://www.tvsubtitles.net/search.php': SearchPage, 'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage, - 'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html' : SeasonPage - } + 'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html': SeasonPage + } def iter_subtitles(self, language, pattern): self.location('http://www.tvsubtitles.net') assert self.is_on_page(HomePage) - return self.page.iter_subtitles(language,pattern) + return self.page.iter_subtitles(language, pattern) def get_subtitle(self, id): self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id) diff --git a/modules/tvsubtitles/pages.py b/modules/tvsubtitles/pages.py index e2ca29d8529bfdfa058f24fb1ba74c8e39258a2e..0e64015030be0e9e81673a70e8995538b392e315 100644 --- a/modules/tvsubtitles/pages.py +++ b/modules/tvsubtitles/pages.py @@ -22,11 +22,11 @@ from weboob.tools.browser import BasePage -__all__ = ['HomePage','SearchPage','SeriePage','SeasonPage'] +__all__ = ['HomePage', 'SearchPage', 'SeriePage', 'SeasonPage'] class HomePage(BasePage): - def iter_subtitles(self,language,pattern): + def iter_subtitles(self, language, pattern): self.browser.select_form(nr=0) self.browser['q'] = pattern.encode('utf-8') self.browser.submit() @@ -38,15 +38,15 @@ def iter_subtitles(self,language,pattern): class SearchPage(BasePage): """ Page which contains results as a list of series """ - def iter_subtitles(self,language): - list_result = self.parser.select(self.document.getroot(),'div.left_articles ul') + def iter_subtitles(self, language): + list_result = self.parser.select(self.document.getroot(), 'div.left_articles ul') if len(list_result) > 0: - li_result = self.parser.select(list_result[0],'li') + li_result = self.parser.select(list_result[0], 'li') for line in li_result: - if len(self.parser.select(line,'img[alt=%s]'%language)) > 0: - link = self.parser.select(line,'a',1) - href = link.attrib.get('href','') - self.browser.location("http://%s%s"%(self.browser.DOMAIN,href)) + if len(self.parser.select(line, 'img[alt=%s]' % language)) > 0: + link = self.parser.select(line, 'a', 1) + href = link.attrib.get('href', '') + self.browser.location("http://%s%s" % (self.browser.DOMAIN, href)) assert self.browser.is_on_page(SeriePage) for subtitle in self.browser.page.iter_subtitles(language): yield subtitle @@ -55,26 +55,27 @@ def iter_subtitles(self,language): class SeriePage(BasePage): """ Page of all seasons """ - def iter_subtitles(self,language,only_one_season=False): + def iter_subtitles(self, language, only_one_season=False): # handle the current season - last_table_line = self.parser.select(self.document.getroot(),'table#table5 tr')[-1] - amount = int(self.parser.select(last_table_line,'td')[2].text_content()) + last_table_line = self.parser.select(self.document.getroot(), 'table#table5 tr')[-1] + amount = int(self.parser.select(last_table_line, 'td')[2].text_content()) if amount > 0: - my_lang_img = self.parser.select(last_table_line,'img[alt=%s]'%language) + my_lang_img = self.parser.select(last_table_line, 'img[alt=%s]' % language) if len(my_lang_img) > 0: - url_current_season = self.browser.geturl().split('/')[-1].replace('tvshow','subtitle').replace('.html','-%s.html'%language) + url_current_season = self.browser.geturl().split('/')[-1].replace( + 'tvshow', 'subtitle').replace('.html', '-%s.html' % language) self.browser.location(url_current_season) assert self.browser.is_on_page(SeasonPage) yield self.browser.page.iter_subtitles() if not only_one_season: # handle the other seasons by following top links - other_seasons_links = self.parser.select(self.document.getroot(),'p.description a') + other_seasons_links = self.parser.select(self.document.getroot(), 'p.description a') for link in other_seasons_links: - href = link.attrib.get('href','') - self.browser.location("http://%s/%s"%(self.browser.DOMAIN,href)) + href = link.attrib.get('href', '') + self.browser.location("http://%s/%s" % (self.browser.DOMAIN, href)) assert self.browser.is_on_page(SeriePage) - for subtitle in self.browser.page.iter_subtitles(language,True): + for subtitle in self.browser.page.iter_subtitles(language, True): yield subtitle @@ -82,19 +83,19 @@ class SeasonPage(BasePage): """ Page of a season with the right language """ def get_subtitle(self): - filename_line = self.parser.select(self.document.getroot(),'img[alt=filename]',1).getparent().getparent() - name = unicode(self.parser.select(filename_line,'td')[2].text) - id = self.browser.geturl().split('/')[-1].replace('.html','').replace('subtitle-','') - url = unicode('http://%s/download-%s.html'%(self.browser.DOMAIN,id)) - amount_line = self.parser.select(self.document.getroot(),'tr[title~=amount]',1) - nb_cd = int(self.parser.select(amount_line,'td')[2].text) + filename_line = self.parser.select(self.document.getroot(), 'img[alt=filename]', 1).getparent().getparent() + name = unicode(self.parser.select(filename_line, 'td')[2].text) + id = self.browser.geturl().split('/')[-1].replace('.html', '').replace('subtitle-', '') + url = unicode('http://%s/download-%s.html' % (self.browser.DOMAIN, id)) + amount_line = self.parser.select(self.document.getroot(), 'tr[title~=amount]', 1) + nb_cd = int(self.parser.select(amount_line, 'td')[2].text) lang = unicode(url.split('-')[-1].split('.html')[0]) - filenames_line = self.parser.select(self.document.getroot(),'tr[title~=list]',1) - file_names = self.parser.select(filenames_line,'td')[2].text_content().strip().replace('.srt','.srt\n') + filenames_line = self.parser.select(self.document.getroot(), 'tr[title~=list]', 1) + file_names = self.parser.select(filenames_line, 'td')[2].text_content().strip().replace('.srt', '.srt\n') desc = u"files :\n" desc += file_names - subtitle = Subtitle(id,name) + subtitle = Subtitle(id, name) subtitle.url = url subtitle.language = lang subtitle.nb_cd = nb_cd diff --git a/modules/tvsubtitles/test.py b/modules/tvsubtitles/test.py index 2432ee0026cc52c716ca694d9c73764278c884ae..e1515efd6daa78ffdf5d6976f221e0ffd4d31461 100644 --- a/modules/tvsubtitles/test.py +++ b/modules/tvsubtitles/test.py @@ -26,7 +26,7 @@ class TvsubtitlesTest(BackendTest): BACKEND = 'tvsubtitles' def test_subtitle(self): - subtitles = list(self.backend.iter_subtitles('fr','sopranos')) + subtitles = list(self.backend.iter_subtitles('fr', 'sopranos')) assert (len(subtitles) > 0) for subtitle in subtitles: assert subtitle.url.startswith('http')