diff --git a/modules/allrecipes/browser.py b/modules/allrecipes/browser.py
index d45e643353d639facd7c77fc40241d4e265afb7b..0281b800538cbf887b653cda1e10f50843e5dad8 100644
--- a/modules/allrecipes/browser.py
+++ b/modules/allrecipes/browser.py
@@ -16,36 +16,24 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-
-
-from weboob.deprecated.browser import Browser, BrowserHTTPNotFound
-
-from .pages import RecipePage, ResultsPage, FourOFourPage
-
+from weboob.browser import PagesBrowser, URL
+from .pages import ResultsPage, RecipePage
__all__ = ['AllrecipesBrowser']
-class AllrecipesBrowser(Browser):
- DOMAIN = 'allrecipes.com'
- PROTOCOL = 'http'
- ENCODING = 'utf-8'
- USER_AGENT = Browser.USER_AGENTS['wget']
- PAGES = {
- 'http://allrecipes.com/search/default.aspx\?qt=k&wt=.*&rt=r&origin=.*': ResultsPage,
- 'http://allrecipes.com/Recipe/.*/Detail.aspx': RecipePage,
- 'http://allrecipes.com/404.aspx.*': FourOFourPage
- }
+class AllrecipesBrowser(PagesBrowser):
+ BASEURL = 'http://allrecipes.com'
+ results = URL('search/results/\?wt=(?P.*)\&sort=re',
+ 'recipes/.*', ResultsPage)
+ recipe = URL('recipe/(?P<_id>.*)/', RecipePage)
def iter_recipes(self, pattern):
- self.location('http://allrecipes.com/search/default.aspx?qt=k&wt=%s&rt=r&origin=Home%%20Page' % (pattern))
- assert self.is_on_page(ResultsPage)
- return self.page.iter_recipes()
-
- def get_recipe(self, id):
- try:
- self.location('http://allrecipes.com/Recipe/%s/Detail.aspx' % id)
- except BrowserHTTPNotFound:
- return
- if self.is_on_page(RecipePage):
- return self.page.get_recipe(id)
+ return self.results.go(pattern=pattern).iter_recipes()
+
+ def get_recipe(self, _id, obj=None):
+ recipe = self.recipe.go(_id=_id).get_recipe(obj=obj)
+ comments = list(self.page.get_comments())
+ if comments:
+ recipe.comments = comments
+ return recipe
diff --git a/modules/allrecipes/module.py b/modules/allrecipes/module.py
index f629b90fa3350cbdd6316b88b348078633c708d1..f4623ad2549981f898ca94c42dec48d042df6266 100644
--- a/modules/allrecipes/module.py
+++ b/modules/allrecipes/module.py
@@ -43,19 +43,8 @@ def iter_recipes(self, pattern):
return self.browser.iter_recipes(quote_plus(pattern.encode('utf-8')))
def fill_recipe(self, recipe, fields):
- if 'nb_person' in fields or 'instructions' in fields:
- rec = self.get_recipe(recipe.id)
- recipe.picture_url = rec.picture_url
- recipe.instructions = rec.instructions
- recipe.ingredients = rec.ingredients
- recipe.comments = rec.comments
- recipe.author = rec.author
- recipe.nb_person = rec.nb_person
- recipe.cooking_time = rec.cooking_time
- recipe.preparation_time = rec.preparation_time
-
+ if 'nb_person' in fields or 'instructions' in fields or 'thumbnail_url' in fields:
+ recipe = self.browser.get_recipe(recipe.id, recipe)
return recipe
- OBJECTS = {
- Recipe: fill_recipe,
- }
+ OBJECTS = {Recipe: fill_recipe}
diff --git a/modules/allrecipes/pages.py b/modules/allrecipes/pages.py
index 8fc91e35f4788bc2e269a47e86a26cfc8c419387..9ab519ae72db773a35b6b6b50cbb0df663da4650 100644
--- a/modules/allrecipes/pages.py
+++ b/modules/allrecipes/pages.py
@@ -18,109 +18,80 @@
# along with weboob. If not, see .
-from weboob.capabilities.recipe import Recipe
-from weboob.capabilities.base import NotAvailable, NotLoaded
-from weboob.deprecated.browser import Page
-
-
-class FourOFourPage(Page):
- pass
-
-
-class ResultsPage(Page):
- """ Page which contains results as a list of recipies
- """
-
- def iter_recipes(self):
- for div in self.parser.select(self.document.getroot(), 'div.recipe-info'):
- thumbnail_url = NotAvailable
- short_description = NotAvailable
- imgs = self.parser.select(div.getparent(), 'img')
- if len(imgs) > 0:
- url = unicode(imgs[0].attrib.get('src', ''))
- if url.startswith('http://'):
- thumbnail_url = url
-
- link = self.parser.select(div, 'a.title', 1)
- title = unicode(link.text)
- id = unicode(link.attrib.get('href', '').split('/')[2])
-
- recipe = Recipe(id, title)
- recipe.thumbnail_url = thumbnail_url
- recipe.short_description = short_description
- recipe.instructions = NotLoaded
- recipe.ingredients = NotLoaded
- recipe.nb_person = NotLoaded
- recipe.cooking_time = NotLoaded
- recipe.preparation_time = NotLoaded
- recipe.author = NotLoaded
- yield recipe
-
-
-class RecipePage(Page):
- """ Page which contains a recipe
- """
-
- def get_recipe(self, id):
- title = NotAvailable
- preparation_time = NotAvailable
- cooking_time = NotAvailable
- author = NotAvailable
- nb_person = NotAvailable
- ingredients = NotAvailable
- picture_url = NotAvailable
- instructions = NotAvailable
- comments = NotAvailable
-
- title = unicode(self.parser.select(self.document.getroot(), 'h1#itemTitle', 1).text)
- imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto')
- if len(imgillu) > 0:
- picture_url = unicode(imgillu[0].attrib.get('src', ''))
-
- ingredients = []
- l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient')
- for ing in l_ing:
- ingtxt = unicode(ing.text_content().strip())
- if ingtxt != '':
- ingredients.append(ingtxt)
-
- instructions = u''
- l_divinst = self.parser.select(self.document.getroot(), 'div.directLeft li')
- num_instr = 1
- for inst in l_divinst:
- instructions += '%s: %s\n' % (num_instr, inst.text_content())
- num_instr += 1
-
- prepmin = 0
- emprep = self.parser.select(self.document.getroot(), 'span#prepHoursSpan em')
- if len(emprep) > 0:
- prepmin += int(emprep[0].text) * 60
- emprep = self.parser.select(self.document.getroot(), 'span#prepMinsSpan em')
- if len(emprep) > 0:
- prepmin += int(emprep[0].text)
- if prepmin != 0:
- preparation_time = prepmin
- cookmin = 0
- emcooktime = self.parser.select(self.document.getroot(), 'span#cookHoursSpan em')
- if len(emcooktime) > 0:
- cookmin += int(emcooktime[0].text) * 60
- emcooktime = self.parser.select(self.document.getroot(), 'span#cookMinsSpan em')
- if len(emcooktime) > 0:
- cookmin += int(emcooktime[0].text)
- if cookmin != 0:
- cooking_time = cookmin
- l_nbpers = self.parser.select(self.document.getroot(), 'span#lblYield[itemprop=recipeYield]')
- if len(l_nbpers) > 0 and 'servings' in l_nbpers[0].text:
- nb_person = [int(l_nbpers[0].text.split()[0])]
-
- recipe = Recipe(id, title)
- recipe.preparation_time = preparation_time
- recipe.cooking_time = cooking_time
- recipe.nb_person = nb_person
- recipe.ingredients = ingredients
- recipe.instructions = instructions
- recipe.picture_url = picture_url
- recipe.comments = comments
- recipe.author = author
- recipe.thumbnail_url = NotLoaded
- return recipe
+from weboob.browser.pages import HTMLPage, pagination
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.capabilities.recipe import Recipe, Comment
+from weboob.capabilities.base import NotAvailable
+from weboob.browser.filters.standard import Regexp, CleanText, Env, Duration
+from weboob.browser.filters.html import CleanHTML
+
+import re
+
+
+class CookingDuration(Duration):
+ _regexp = re.compile(r'PT((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?')
+
+
+class ResultsPage(HTMLPage):
+ @pagination
+ @method
+ class iter_recipes(ListElement):
+ item_xpath = '//article[@class="grid-col--fixed-tiles"]'
+
+ def next_page(self):
+ return CleanText('//button[@id="btnMoreResults"]/@href')(self)
+
+ class item(ItemElement):
+ klass = Recipe
+
+ obj_id = Regexp(CleanText('./a[1]/@href'),
+ '/recipe/(.*)/')
+ obj_title = CleanText('./a/h3')
+ obj_short_description = CleanText('./a/div/div[@class="rec-card__description"]')
+
+
+class RecipePage(HTMLPage):
+ @method
+ class get_recipe(ItemElement):
+ klass = Recipe
+
+ obj_id = Env('_id')
+ obj_title = CleanText('//h1[@itemprop="name"]')
+
+ def obj_preparation_time(self):
+ dt = CookingDuration(CleanText('//time[@itemprop="prepTime"]/@datetime'))(self)
+ return int(dt.total_seconds() / 60)
+
+ def obj_cooking_time(self):
+ dt = CookingDuration(CleanText('//time[@itemprop="cookTime"]/@datetime'))(self)
+ return int(dt.total_seconds() / 60)
+
+ def obj_nb_person(self):
+ nb_pers = CleanText('//meta[@id="metaRecipeServings"]/@content')(self)
+ return [nb_pers] if nb_pers else NotAvailable
+
+ def obj_ingredients(self):
+ ingredients = []
+ for el in self.el.xpath('//ul[has-class("checklist")]/li/label/span[@itemprop="ingredients"]'):
+ ing = CleanText('.')(el)
+ if ing:
+ ingredients.append(ing)
+ return ingredients
+
+ obj_instructions = CleanHTML('//ol[@itemprop="recipeInstructions"]')
+ obj_thumbnail_url = CleanText('//section[has-class("hero-photo")]/span/a/img/@src')
+
+ obj_picture_url = CleanText('//section[has-class("hero-photo")]/span/a/img/@src')
+
+ @method
+ class get_comments(ListElement):
+ item_xpath = '//div[@itemprop="review"]'
+ ignore_duplicate = True
+
+ class item(ItemElement):
+ klass = Comment
+
+ obj_author = CleanText('./article/a/div/a/ul/li/h4[@itemprop="author"]')
+ obj_rate = CleanText('./article/div/div[@class="rating-stars"]/@data-ratingstars')
+ obj_text = CleanText('./p[@itemprop="reviewBody"]')
+ obj_id = CleanText('./article/a/@href')
diff --git a/modules/allrecipes/test.py b/modules/allrecipes/test.py
index 799098769d44ed7142a7291d5d75c4815cb40389..fc04c144ec28b5b138bfbea3f3752fbcb55542b2 100644
--- a/modules/allrecipes/test.py
+++ b/modules/allrecipes/test.py
@@ -19,14 +19,16 @@
from weboob.tools.test import BackendTest
+import itertools
+
class AllrecipesTest(BackendTest):
MODULE = 'allrecipes'
def test_recipe(self):
- recipes = self.backend.iter_recipes('french fries')
- for recipe in recipes:
- full_recipe = self.backend.get_recipe(recipe.id)
- assert full_recipe.instructions
- assert full_recipe.ingredients
- assert full_recipe.title
+ recipes = list(itertools.islice(self.backend.iter_recipes('french fries'), 0, 20))
+ assert len(recipes)
+ full_recipe = self.backend.get_recipe(recipes[0].id)
+ assert full_recipe.instructions
+ assert full_recipe.ingredients
+ assert full_recipe.title