From 4798fdd4119fb3ff10b48b7ee846aab7b878473c Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Thu, 14 Mar 2013 20:18:13 +0100 Subject: [PATCH] new backend marmiton --- modules/marmiton/__init__.py | 22 +++++ modules/marmiton/backend.py | 63 ++++++++++++++ modules/marmiton/browser.py | 46 ++++++++++ modules/marmiton/pages.py | 103 +++++++++++++++++++++++ modules/marmiton/test.py | 33 ++++++++ scripts/cookboob | 27 ++++++ weboob/applications/cookboob/cookboob.py | 24 +++--- weboob/capabilities/recipe.py | 7 +- 8 files changed, 312 insertions(+), 13 deletions(-) create mode 100644 modules/marmiton/__init__.py create mode 100644 modules/marmiton/backend.py create mode 100644 modules/marmiton/browser.py create mode 100644 modules/marmiton/pages.py create mode 100644 modules/marmiton/test.py create mode 100755 scripts/cookboob diff --git a/modules/marmiton/__init__.py b/modules/marmiton/__init__.py new file mode 100644 index 0000000000..713a4d9261 --- /dev/null +++ b/modules/marmiton/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import MarmitonBackend + +__all__ = ['MarmitonBackend'] diff --git a/modules/marmiton/backend.py b/modules/marmiton/backend.py new file mode 100644 index 0000000000..5cc0dcbd5c --- /dev/null +++ b/modules/marmiton/backend.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.recipe import ICapRecipe,Recipe +from weboob.tools.backend import BaseBackend + +from .browser import MarmitonBrowser + +from urllib import quote_plus + +__all__ = ['MarmitonBackend'] + + +class MarmitonBackend(BaseBackend, ICapRecipe): + NAME = 'marmiton' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.f' + DESCRIPTION = 'Marmiton recipe website' + LICENSE = 'AGPLv3+' + BROWSER = MarmitonBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_recipe(self, id): + return self.browser.get_recipe(id) + + def iter_recipes(self, pattern): + return self.browser.iter_recipes(quote_plus(pattern.encode('utf-8'))) + + def fill_recipe(self, recipe, fields): + if 'thumbnail_url' in fields or 'instructions' in fields: + rec = self.get_recipe(recipe.id) + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time + recipe.preparation_time = rec.preparation_time + + return recipe + + OBJECTS = { + Recipe:fill_recipe, + } diff --git a/modules/marmiton/browser.py b/modules/marmiton/browser.py new file mode 100644 index 0000000000..f68ba496f7 --- /dev/null +++ b/modules/marmiton/browser.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import RecipePage, ResultsPage + + +__all__ = ['MarmitonBrowser'] + +class MarmitonBrowser(BaseBrowser): + DOMAIN = 'www.marmiton.org' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage, + 'http://www.marmiton.org/recettes/recette_.*': RecipePage, + } + + def iter_recipes(self, pattern): + self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern)) + assert self.is_on_page(ResultsPage) + return self.page.iter_recipes() + + def get_recipe(self, id): + self.location('http://www.marmiton.org/recettes/recette_%s.aspx' % id) + assert self.is_on_page(RecipePage) + return self.page.get_recipe(id) diff --git a/modules/marmiton/pages.py b/modules/marmiton/pages.py new file mode 100644 index 0000000000..4ec631b4ba --- /dev/null +++ b/modules/marmiton/pages.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.recipe import Recipe +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage + + +__all__ = ['RecipePage', 'ResultsPage'] + + +class ResultsPage(BasePage): + """ Page which contains results as a list of recipies + """ + def iter_recipes(self): + for div in self.parser.select(self.document.getroot(),'div.m_search_result'): + tds = self.parser.select(div,'td') + if len(tds) == 2: + title = NotAvailable + thumbnail_url = NotAvailable + short_description = NotAvailable + imgs = self.parser.select(tds[0],'img') + if len(imgs) > 0: + thumbnail_url = unicode(imgs[0].attrib('src','')) + link = self.parser.select(tds[1],'div.m_search_titre_recette a',1) + title = unicode(link.text) + id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','') + short_description = unicode(' '.join(self.parser.select(tds[1],'div.m_search_result_part4',1).text.strip().split('\n'))) + + recipe = Recipe(id,title) + recipe.thumbnail_url = thumbnail_url + recipe.short_description= short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded + recipe.preparation_time = NotLoaded + yield recipe + + + +class RecipePage(BasePage): + """ Page which contains a recipe + """ + def get_recipe(self, id): + title = NotAvailable + preparation_time = NotAvailable + cooking_time = NotAvailable + nb_person = NotAvailable + ingredients = NotAvailable + picture_url = NotAvailable + instructions = NotAvailable + comments = [] + + title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip()) + main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1) + preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content()) + cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content()) + ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content() + if '(pour' in ing_header_line and ')' in ing_header_line: + nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0]) + ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ') + ingredients=ingredients[1:] + rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip() + instructions = u'' + for line in rinstructions.split('\n'): + instructions += '%s\n'%line.strip() + imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img') + if len(imgillu) > 0: + picture_url = unicode(imgillu[0].attrib.get('src','')) + for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'): + note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip() + user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip() + content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip() + comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content)) + + recipe = Recipe(id,title) + recipe.preparation_time = preparation_time + recipe.cooking_time = cooking_time + recipe.nb_person = nb_person + recipe.ingredients = ingredients + recipe.instructions = instructions + recipe.picture_url = picture_url + recipe.comments = comments + recipe.thumbnail_url = NotLoaded + return recipe diff --git a/modules/marmiton/test.py b/modules/marmiton/test.py new file mode 100644 index 0000000000..9e9885d582 --- /dev/null +++ b/modules/marmiton/test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + +class MarmitonTest(BackendTest): + BACKEND = 'marmiton' + + def test_recipe(self): + recipes = self.backend.iter_recipes('fondue') + for recipe in recipes: + full_recipe = self.backend.get_recipe(recipe.id) + assert full_recipe.instructions + assert full_recipe.ingredients + assert full_recipe.title + assert full_recipe.preparation_time + diff --git a/scripts/cookboob b/scripts/cookboob new file mode 100755 index 0000000000..0f20dab6c5 --- /dev/null +++ b/scripts/cookboob @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# vim: ft=python et softtabstop=4 cinoptions=4 shiftwidth=4 ts=4 ai + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.applications.cookboob import Cookboob + + +if __name__ == '__main__': + Cookboob.run() diff --git a/weboob/applications/cookboob/cookboob.py b/weboob/applications/cookboob/cookboob.py index 1d986d0203..5209e1bf37 100644 --- a/weboob/applications/cookboob/cookboob.py +++ b/weboob/applications/cookboob/cookboob.py @@ -22,16 +22,16 @@ import sys from weboob.capabilities.recipe import ICapRecipe +from weboob.capabilities.base import NotAvailable, NotLoaded from weboob.tools.application.repl import ReplApplication from weboob.tools.application.formatters.iformatter import IFormatter, PrettyFormatter -from weboob.core import CallErrors __all__ = ['Cookboob'] class RecipeInfoFormatter(IFormatter): - MANDATORY_FIELDS = ('id', 'title', 'preparation_time', 'cooking_time', 'ingredients', 'instructions', 'nb_person') + MANDATORY_FIELDS = ('id', 'title', 'preparation_time', 'cooking_time', 'ingredients', 'instructions', 'nb_person', 'comments') def format_obj(self, obj, alias): result = u'%s%s%s\n' % (self.BOLD, obj.title, self.NC) @@ -39,12 +39,14 @@ def format_obj(self, obj, alias): result += 'Preparation time: %s\n' % obj.preparation_time result += 'Cooking time: %s\n' % obj.cooking_time result += 'Amount of people: %s\n' % obj.nb_person - result += '\n%Ingredients%s\n' % (self.BOLD, self.NC) + result += '\n%sIngredients%s\n' % (self.BOLD, self.NC) for i in obj.ingredients: - result += ' * %s'%i - result += '\n\n%Instructions%s\n' % (self.BOLD, self.NC) - for i in obj.instructions: - result += ' * %s'%i + result += ' * %s\n'%i + result += '\n%sInstructions%s\n' % (self.BOLD, self.NC) + result += '%s\n'%obj.instructions + result += '\n%sComments%s\n' % (self.BOLD, self.NC) + for c in obj.comments: + result += ' * %s\n'%c return result @@ -56,10 +58,10 @@ def get_title(self, obj): def get_description(self, obj): result = u'' - if obj.short_description != NotAvailable: - result += 'description: %s '%obj.short_description - if obj.preparation_time != NotAvailable: + if obj.preparation_time != NotAvailable and obj.preparation_time != NotLoaded: result += 'prep time: %smin'%obj.preparation_time + if obj.short_description != NotAvailable: + result += 'description: %s\n'%obj.short_description return result @@ -90,7 +92,7 @@ def do_info(self, id): """ recipe = self.get_object(id, 'get_recipe') - if not recipee: + if not recipe: print >>sys.stderr, 'Recipe not found: %s' % id return 3 diff --git a/weboob/capabilities/recipe.py b/weboob/capabilities/recipe.py index 3d649c8fce..5bc51716fe 100644 --- a/weboob/capabilities/recipe.py +++ b/weboob/capabilities/recipe.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . -from .base import IBaseCap, CapBaseObject, StringField, IntField, UserError, Field +from .base import IBaseCap, CapBaseObject, StringField, IntField, Field __all__ = ['Recipe', 'ICapRecipe'] @@ -30,11 +30,14 @@ class Recipe(CapBaseObject): """ title = StringField('Title of the recipe') thumbnail_url = StringField('Direct url to recipe thumbnail') + picture_url = StringField('Direct url to recipe picture') + short_description = StringField('Short description of a recipe') nb_person = IntField('The recipe was made for this amount of persons') preparation_time = IntField('Preparation time of the recipe in minutes') cooking_time = IntField('Cooking time of the recipe in minutes') ingredients = Field('Ingredient list necessary for the recipe',list) - instructions = Field('Instruction step list of the recipe',list) + instructions = StringField('Instruction step list of the recipe') + comments = Field('User comments about the recipe',list) def __init__(self, id, title): CapBaseObject.__init__(self, id) -- GitLab