From 3c993099b0da3b7b33bc9d4268729ef1db9c7588 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Thu, 28 Mar 2013 20:28:34 +0100 Subject: [PATCH] new backend allrecipes --- modules/allrecipes/__init__.py | 22 +++++++ modules/allrecipes/backend.py | 62 ++++++++++++++++++ modules/allrecipes/browser.py | 51 +++++++++++++++ modules/allrecipes/pages.py | 115 +++++++++++++++++++++++++++++++++ modules/allrecipes/test.py | 32 +++++++++ 5 files changed, 282 insertions(+) create mode 100644 modules/allrecipes/__init__.py create mode 100644 modules/allrecipes/backend.py create mode 100644 modules/allrecipes/browser.py create mode 100644 modules/allrecipes/pages.py create mode 100644 modules/allrecipes/test.py diff --git a/modules/allrecipes/__init__.py b/modules/allrecipes/__init__.py new file mode 100644 index 0000000000..c66314c304 --- /dev/null +++ b/modules/allrecipes/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import AllrecipesBackend + +__all__ = ['AllrecipesBackend'] diff --git a/modules/allrecipes/backend.py b/modules/allrecipes/backend.py new file mode 100644 index 0000000000..c588bf5ea5 --- /dev/null +++ b/modules/allrecipes/backend.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.recipe import ICapRecipe, Recipe +from weboob.tools.backend import BaseBackend + +from .browser import AllrecipesBrowser + +__all__ = ['AllrecipesBackend'] + + +class AllrecipesBackend(BaseBackend, ICapRecipe): + NAME = 'allrecipes' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.g' + DESCRIPTION = u'Allrecipes English recipe website' + LICENSE = 'AGPLv3+' + BROWSER = AllrecipesBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_recipe(self, id): + return self.browser.get_recipe(id) + + def iter_recipes(self, pattern): + return self.browser.iter_recipes(pattern.encode('utf-8')) + + def fill_recipe(self, recipe, fields): + if 'nb_person' in fields or 'instructions' in fields: + rec = self.get_recipe(recipe.id) + recipe.picture_url = rec.picture_url + recipe.instructions = rec.instructions + recipe.ingredients = rec.ingredients + recipe.comments = rec.comments + recipe.author = rec.author + recipe.nb_person = rec.nb_person + recipe.cooking_time = rec.cooking_time + recipe.preparation_time = rec.preparation_time + + return recipe + + OBJECTS = { + Recipe: fill_recipe, + } diff --git a/modules/allrecipes/browser.py b/modules/allrecipes/browser.py new file mode 100644 index 0000000000..eccc2306cf --- /dev/null +++ b/modules/allrecipes/browser.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound + +from .pages import RecipePage, ResultsPage, FourOFourPage + + +__all__ = ['AllrecipesBrowser'] + + +class AllrecipesBrowser(BaseBrowser): + DOMAIN = 'allrecipes.com' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://allrecipes.com/search/default.aspx\?qt=k&wt=.*&rt=r&origin=.*': ResultsPage, + 'http://allrecipes.com/Recipe/.*/Detail.aspx': RecipePage, + 'http://allrecipes.com/404.aspx.*': FourOFourPage + } + + def iter_recipes(self, pattern): + self.location('http://allrecipes.com/search/default.aspx?qt=k&wt=%s&rt=r&origin=Home%%20Page' % (pattern)) + assert self.is_on_page(ResultsPage) + return self.page.iter_recipes() + + def get_recipe(self, id): + try: + self.location('http://allrecipes.com/Recipe/%s/Detail.aspx' % id) + except BrowserHTTPNotFound: + return + if self.is_on_page(RecipePage): + return self.page.get_recipe(id) diff --git a/modules/allrecipes/pages.py b/modules/allrecipes/pages.py new file mode 100644 index 0000000000..220e964022 --- /dev/null +++ b/modules/allrecipes/pages.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.recipe import Recipe, Comment +from weboob.capabilities.base import NotAvailable, NotLoaded +from weboob.tools.browser import BasePage + + +__all__ = ['RecipePage', 'ResultsPage', 'FourOFourPage'] + + +class FourOFourPage(BasePage): + pass + + +class ResultsPage(BasePage): + """ Page which contains results as a list of recipies + """ + def iter_recipes(self): + for div in self.parser.select(self.document.getroot(), 'div.recipe-info'): + thumbnail_url = NotAvailable + short_description = NotAvailable + imgs = self.parser.select(div.getparent(), 'img') + if len(imgs) > 0: + url = unicode(imgs[0].attrib.get('src', '')) + if url.startswith('http://'): + thumbnail_url = url + + link = self.parser.select(div, 'a.title', 1) + title = unicode(link.text) + id = unicode(link.attrib.get('href', '').split('/')[2]) + + recipe = Recipe(id, title) + recipe.thumbnail_url = thumbnail_url + recipe.short_description = short_description + recipe.instructions = NotLoaded + recipe.ingredients = NotLoaded + recipe.nb_person = NotLoaded + recipe.cooking_time = NotLoaded + recipe.preparation_time = NotLoaded + recipe.author = NotLoaded + yield recipe + + +class RecipePage(BasePage): + """ Page which contains a recipe + """ + def get_recipe(self, id): + title = NotAvailable + preparation_time = NotAvailable + cooking_time = NotAvailable + author = NotAvailable + nb_person = NotAvailable + ingredients = NotAvailable + picture_url = NotAvailable + instructions = NotAvailable + comments = [] + + title = unicode(self.parser.select(self.document.getroot(), 'h1#itemTitle', 1).text) + imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto') + if len(imgillu) > 0: + picture_url = unicode(imgillu[0].attrib.get('src', '')) + + ingredients = [] + l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient') + for ing in l_ing: + ingtxt = unicode(ing.text_content().strip()) + if ingtxt != '': + ingredients.append(ingtxt) + + instructions = u'' + l_divinst = self.parser.select(self.document.getroot(), 'div.directLeft li') + num_instr = 1 + for inst in l_divinst: + instructions += '%s: %s\n' % (num_instr, inst.text_content()) + num_instr += 1 + + emprep = self.parser.select(self.document.getroot(), 'span#prepMinsSpan em') + if len(emprep) > 0: + preparation_time = int(emprep[0].text) + emcooktime = self.parser.select(self.document.getroot(), 'span#cookMinsSpan em') + if len(emcooktime) > 0: + cooking_time = int(emcooktime[0].text) + l_nbpers = self.parser.select(self.document.getroot(), 'span#lblYield[itemprop=recipeYield]') + if len(l_nbpers) > 0: + nb_person = int(l_nbpers[0].text.split()[0]) + + recipe = Recipe(id, title) + recipe.preparation_time = preparation_time + recipe.cooking_time = cooking_time + recipe.nb_person = nb_person + recipe.ingredients = ingredients + recipe.instructions = instructions + recipe.picture_url = picture_url + recipe.comments = comments + recipe.author = author + recipe.thumbnail_url = NotLoaded + return recipe diff --git a/modules/allrecipes/test.py b/modules/allrecipes/test.py new file mode 100644 index 0000000000..4f0a33578b --- /dev/null +++ b/modules/allrecipes/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest + + +class AllrecipesTest(BackendTest): + BACKEND = 'allrecipes' + + def test_recipe(self): + recipes = self.backend.iter_recipes('french fries') + for recipe in recipes: + full_recipe = self.backend.get_recipe(recipe.id) + assert full_recipe.instructions + assert full_recipe.ingredients + assert full_recipe.title -- GitLab