diff --git a/modules/750g/browser.py b/modules/750g/browser.py index cd19a36b9f8b34053354a7aacf287d1597194532..7cfa159c75d196036a21494462fd7b4f9cad84e6 100644 --- a/modules/750g/browser.py +++ b/modules/750g/browser.py @@ -17,9 +17,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this woob module. If not, see . -from woob.browser.exceptions import BrowserHTTPNotFound +import re + from woob.browser import PagesBrowser, URL -from .pages import RecipePage, ResultsPage +from .pages import RecipePage, ResultsPage, CommentPage +from woob.tools.compat import quote_plus __all__ = ['SevenFiftyGramsBrowser'] @@ -28,29 +30,26 @@ class SevenFiftyGramsBrowser(PagesBrowser): BASEURL = 'https://www.750g.com' - search = URL('/recettes_(?P.*).htm', ResultsPage) + comment = URL('/recipe/(?P<_id>.*)/sort/lastest/comments.json', CommentPage) + search = URL(r'/recherche/\?q=(?P.*)&page=(?P\d*)', ResultsPage) recipe = URL('/(?P.*).htm', RecipePage) def iter_recipes(self, pattern): - try: - self.search.go(pattern=pattern.replace(' ', '_')) - except BrowserHTTPNotFound: - return [] - - if isinstance(self.page, ResultsPage): - return self.page.iter_recipes() - return [self.get_recipe_content()] + return self.search.go(pattern=quote_plus(pattern.encode('utf-8')), page=1).iter_recipes() def get_recipe(self, id, recipe=None): - try: - self.recipe.go(id=id) - return self.get_recipe_content(recipe) - except BrowserHTTPNotFound: - return + self.recipe.go(id=id) + return self.get_recipe_content(recipe) + + def get_comments(self, id): + m = re.match(r'.*r(\d*)', id, re.DOTALL) + if m: + _id = m.group(1) + return self.comment.go(_id=_id).get_comments() def get_recipe_content(self, recipe=None): recipe = self.page.get_recipe(obj=recipe) - comments = list(self.page.get_comments()) + comments = self.get_comments(recipe.id) if comments: - recipe.comments = comments + recipe.comments = list(comments) return recipe diff --git a/modules/750g/module.py b/modules/750g/module.py index bcb71b1bce053e435cf49369bcc459187b3ff627..0af12c7bb539ab35bf1b8e98007184703f07aa0f 100644 --- a/modules/750g/module.py +++ b/modules/750g/module.py @@ -19,15 +19,9 @@ from woob.capabilities.recipe import CapRecipe, Recipe from woob.tools.backend import Module -from woob.tools.compat import unicode from .browser import SevenFiftyGramsBrowser -import unicodedata - - -def strip_accents(s): - return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn') __all__ = ['SevenFiftyGramsModule'] @@ -45,7 +39,7 @@ def get_recipe(self, id): return self.browser.get_recipe(id) def iter_recipes(self, pattern): - return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8')) + return self.browser.iter_recipes(pattern) def fill_recipe(self, recipe, fields): if 'nb_person' in fields or 'instructions' in fields: diff --git a/modules/750g/pages.py b/modules/750g/pages.py index c86d3ed5775edaa8cad0beb74cfe47511a80bb3e..28a150f84e39b81ce14126da08277cf429d9d50f 100644 --- a/modules/750g/pages.py +++ b/modules/750g/pages.py @@ -21,9 +21,9 @@ from woob.capabilities.recipe import Recipe, Comment from woob.capabilities.base import NotAvailable from woob.capabilities.image import BaseImage, Thumbnail -from woob.browser.pages import HTMLPage, pagination -from woob.browser.elements import ItemElement, ListElement, method -from woob.browser.filters.standard import CleanText, Regexp, Env, CleanDecimal, Eval +from woob.browser.pages import HTMLPage, JsonPage, pagination +from woob.browser.elements import DictElement, ItemElement, ListElement, method +from woob.browser.filters.standard import CleanText, Regexp, Env, CleanDecimal, Eval, BrowserURL from woob.browser.filters.json import Dict, NotFound from datetime import datetime, date, time from dateutil.parser import parse as parse_date @@ -47,54 +47,58 @@ class ResultsPage(HTMLPage): @pagination @method class iter_recipes(ListElement): - item_xpath = '//section[has-class("c-recipe-row")]' + item_xpath = '//article/div' def next_page(self): - return CleanText('//li[@class="suivante"]/a/@href')(self) + suivant = CleanText( + '//li[@class="pagination-item"]/span/span[@class="pagination-txt" and text()="Suivant"]', + default="")(self) + if suivant == "Suivant": + page = Env('page')(self) + return BrowserURL('search', pattern=Env('pattern'), page=int(page) + 1)(self) class item(ItemElement): klass = Recipe - def condition(self): - return not CleanText('./div[@class="c-recipe-row__media"]/span[@class="c-recipe-row__video"]/@class', - default=None)(self) and CleanText('./div/h2/a/@href')(self) + obj_id = Regexp(CleanText('./div[@class="card-content"]/strong/a/@href'), + 'https://www.750g.com/(.*).htm') - obj_id = Regexp(CleanText('./div/h2/a/@href'), - '/(.*).htm') + obj_title = CleanText('./div[@class="card-content"]/strong/a') - obj_title = CleanText('./div/h2/a') + obj_short_description = CleanText('./div[@class="card-content"]/p[@class="card-text"]') class obj_picture(ItemElement): klass = BaseImage - obj_thumbnail = Eval(Thumbnail, CleanText('./div/img/@src')) + obj_thumbnail = Eval(Thumbnail, + CleanText('./div[@class="card-media-wrapper"]/div/picture/@data-srcset')) - obj_short_description = CleanText('./div/p') - -class RecipePage(HTMLPage): - """ Page which contains a recipe +class CommentPage(JsonPage): + """ Page which contains a comments """ @method - class get_comments(ListElement): - item_xpath = '//div[has-class("c-comment__row")]' + class get_comments(DictElement): + item_xpath = "comments" class item(ItemElement): klass = Comment - def validate(self, obj): - return obj.id + obj_id = Dict('@id') + obj_author = Dict('author/nickname') + obj_text = Dict('content') + - obj_id = CleanText('./@data-id') - obj_author = CleanText('./article/div/header/strong/span[@itemprop="author"]') - obj_text = CleanText('./article/div/div/p') +class RecipePage(HTMLPage): + """ Page which contains a recipe + """ @method class get_recipe(ItemElement): klass = Recipe def parse(self, el): - json_content = CleanText('//head/script[@type="application/ld+json"]')(el) + json_content = CleanText('(//script[@type="application/ld+json"])[1]')(el) self.el = json.loads(json_content) obj_id = Env('id') @@ -104,15 +108,12 @@ def parse(self, el): obj_preparation_time = Time('prepTime') def obj_nb_person(self): - return [CleanDecimal(Dict('recipeYield'), default=0)(self)] + return [CleanDecimal(Dict('recipeYield', default=0))(self)] obj_instructions = Dict('recipeInstructions') obj_author = Dict('author/name', default=NotAvailable) def obj_picture(self): img = BaseImage() - try: - img.url = self.el['image'] - except KeyError: - return + img.url = self.el['image']['url'] return img