diff --git a/modules/marmiton/browser.py b/modules/marmiton/browser.py
index 0863774ec75f004574a65b0b2347b77efb45b6b3..9db9b32bd29e0aad3f4d4b35c04031844bdf3726 100644
--- a/modules/marmiton/browser.py
+++ b/modules/marmiton/browser.py
@@ -16,33 +16,34 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
+import re
-from woob.browser.exceptions import BrowserHTTPNotFound
-from woob.browser import PagesBrowser, URL
-
-from .pages import RecipePage, ResultsPage, CommentsPage
+from woob.browser import URL, PagesBrowser
+from .pages import CommentsPage, RecipePage, ResultsPage
__all__ = ['MarmitonBrowser']
class MarmitonBrowser(PagesBrowser):
BASEURL = 'https://www.marmiton.org'
- search = URL('/recettes/recherche.aspx\?aqt=(?P.*)&start=(?P\d*)',
- '/recettes/recherche.aspx\?aqt=.*',
+ search = URL(r'/recettes/recherche.aspx\?aqt=(?P.*)&start=(?P\d*)&page=(?P\d*)',
+ r'/recettes/recherche.aspx\?aqt=.*',
ResultsPage)
- recipe = URL('/recettes/recette_(?P.*).aspx', RecipePage)
- comment = URL('/recettes/recette-avis_(?P.*).aspx', CommentsPage)
+ recipe = URL(r'/recettes/recette_(?P.*).aspx', RecipePage)
+ comment = URL(r'https://api-uno.marmiton.org/origin/(?P<_id>\d*)/top-reviews\?originType=RECIPE', CommentsPage)
def iter_recipes(self, pattern):
- return self.search.go(pattern=pattern, start=0).iter_recipes(pattern=pattern)
+ return self.search.go(pattern=pattern, start=0, page=0).iter_recipes(pattern=pattern)
def get_recipe(self, id, recipe=None):
- try:
- recipe = self.recipe.go(id=id).get_recipe(obj=recipe)
- comments = list(self.comment.go(id=id).get_comments())
+ recipe = self.recipe.go(id=id).get_recipe(obj=recipe)
+
+ m = re.match(r'.*_(\d*)$', recipe.id, re.DOTALL)
+ if m:
+ _id = m.group(1)
+ self.session.headers['x-site-id'] = '13'
+ comments = list(self.comment.go(_id=_id).get_comments())
if comments:
recipe.comments = comments
return recipe
- except BrowserHTTPNotFound:
- return
diff --git a/modules/marmiton/pages.py b/modules/marmiton/pages.py
index 7049486d1b6250d0a65636537cf9ddc3bacfbd10..47931b8ccfda39d0148dd14027f746f6fd3f0f7f 100644
--- a/modules/marmiton/pages.py
+++ b/modules/marmiton/pages.py
@@ -17,9 +17,9 @@
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see .
-from woob.browser.pages import HTMLPage, pagination
-from woob.browser.elements import ItemElement, ListElement, method
-from woob.browser.filters.standard import Regexp, CleanText, Format, Env, CleanDecimal, Eval
+from woob.browser.pages import HTMLPage, pagination, JsonPage
+from woob.browser.elements import ItemElement, method, DictElement
+from woob.browser.filters.standard import BrowserURL, Regexp, CleanText, Format, Env, CleanDecimal, Eval
from woob.browser.filters.html import XPath
from woob.browser.filters.json import Dict
from woob.capabilities.recipe import Recipe, Comment
@@ -30,23 +30,32 @@
class ResultsPage(HTMLPage):
""" Page which contains results as a list of recipies
"""
+
+ ENCODING = 'utf-8'
+
+ def build_doc(self, content):
+ content = HTMLPage.build_doc(self, content)
+ return json.loads(CleanText('//script[@id="__NEXT_DATA__"]')(content))
+
@pagination
@method
- class iter_recipes(ListElement):
- item_xpath = "//a[@class='recipe-card-link']"
+ class iter_recipes(DictElement):
+ item_xpath = "props/pageProps/searchResults/hits"
def next_page(self):
- return CleanText('//nav/ul/li[@class="next-page"]/a/@href', default="")(self)
+ current_page = int(Env('page')(self))
+ if Dict('props/pageProps/searchResults/nbPages')(self) >= current_page:
+ return BrowserURL('search', pattern=Env('pattern'), start=Env('start'), page=current_page + 1)(self)
class item(ItemElement):
klass = Recipe
- obj_id = Regexp(CleanText('./@href'),
+ obj_id = Regexp(Dict('url'),
'/recettes/recette_(.*).aspx')
- obj_title = CleanText('./div/h4')
- obj_short_description = Format('%s. %s',
- CleanText('./div/div[@class="recipe-card__description"]',
- replace=[(u'Ingrédients : ', ''), ('...', '')]),
- CleanText('./div/div[@class="recipe-card__duration"]'))
+ obj_title = Dict('title')
+ obj_short_description = Format('%s - %s - Nutriscore : %s',
+ Dict('dishType'),
+ Dict('cookingType'),
+ Dict('nutriScore'))
class RecipePage(HTMLPage):
@@ -58,12 +67,12 @@ class get_recipe(ItemElement):
klass = Recipe
def parse(self, el):
- item = XPath(u'//script[@type="application/ld+json"]')(self)
-
- json_content = CleanText(u'.',
- replace=[('//', '')])(item[1])
- self.el = json.loads(json_content)
+ items = XPath(u'//script[@type="application/ld+json"]')(self)
+ for item in items:
+ content = json.loads(CleanText(u'.')(item))
+ if content['@type'] == "Recipe":
+ self.el = content
+ break
obj_id = Env('id')
obj_title = Dict('name')
@@ -72,7 +81,10 @@ def parse(self, el):
class obj_picture(ItemElement):
klass = BaseImage
- obj_url = Dict('image')
+ def obj_url(self):
+ url = Dict('image', default='')(self)
+ return url[0] if url else url
+
obj_thumbnail = Eval(Thumbnail, obj_url)
def obj_instructions(self):
@@ -88,22 +100,18 @@ def obj_nb_person(self):
return [Dict('recipeYield')(self)]
-class CommentsPage(HTMLPage):
+class CommentsPage(JsonPage):
""" Page which contains a comments
"""
@method
- class get_comments(ListElement):
- item_xpath = '//div[@class="commentaire"]/div/table/tr'
- ignore_duplicate = True
+ class get_comments(DictElement):
+ item_xpath = 'reviews'
class item(ItemElement):
klass = Comment
- obj_author = CleanText('./td/div[@class="txtCommentaire"]/div[1]')
- obj_rate = CleanText('./td/div[@class="bulle"]')
-
- def obj_text(self):
- return CleanText('./td/div[@class="txtCommentaire"]')(self)
-
- obj_id = CleanText('./td/div[@class="txtCommentaire"]/div[1]')
+ obj_author = Dict('username')
+ obj_rate = CleanText(Dict('rating'))
+ obj_text = Dict('content')
+ obj_id = Dict('reviewId')