The new woob repository is here: https://gitlab.com/woob/woob. This gitlab will be removed soon.

The new woob repository is here: https://gitlab.com/woob/woob. This gitlab will be removed soon.

Commit 178c24d8 authored by Bezleputh's avatar Bezleputh Committed by Vincent A

[750g] fix module website got updated

parent 613d5083
......@@ -17,9 +17,11 @@
# You should have received a copy of the GNU Affero General Public License
# along with this woob module. If not, see <http://www.gnu.org/licenses/>.
from woob.browser.exceptions import BrowserHTTPNotFound
import re
from woob.browser import PagesBrowser, URL
from .pages import RecipePage, ResultsPage
from .pages import RecipePage, ResultsPage, CommentPage
from woob.tools.compat import quote_plus
__all__ = ['SevenFiftyGramsBrowser']
......@@ -28,29 +30,26 @@
class SevenFiftyGramsBrowser(PagesBrowser):
BASEURL = 'https://www.750g.com'
search = URL('/recettes_(?P<pattern>.*).htm', ResultsPage)
comment = URL('/recipe/(?P<_id>.*)/sort/lastest/comments.json', CommentPage)
search = URL(r'/recherche/\?q=(?P<pattern>.*)&page=(?P<page>\d*)', ResultsPage)
recipe = URL('/(?P<id>.*).htm', RecipePage)
def iter_recipes(self, pattern):
try:
self.search.go(pattern=pattern.replace(' ', '_'))
except BrowserHTTPNotFound:
return []
if isinstance(self.page, ResultsPage):
return self.page.iter_recipes()
return [self.get_recipe_content()]
return self.search.go(pattern=quote_plus(pattern.encode('utf-8')), page=1).iter_recipes()
def get_recipe(self, id, recipe=None):
try:
self.recipe.go(id=id)
return self.get_recipe_content(recipe)
except BrowserHTTPNotFound:
return
self.recipe.go(id=id)
return self.get_recipe_content(recipe)
def get_comments(self, id):
m = re.match(r'.*r(\d*)', id, re.DOTALL)
if m:
_id = m.group(1)
return self.comment.go(_id=_id).get_comments()
def get_recipe_content(self, recipe=None):
recipe = self.page.get_recipe(obj=recipe)
comments = list(self.page.get_comments())
comments = self.get_comments(recipe.id)
if comments:
recipe.comments = comments
recipe.comments = list(comments)
return recipe
......@@ -19,15 +19,9 @@
from woob.capabilities.recipe import CapRecipe, Recipe
from woob.tools.backend import Module
from woob.tools.compat import unicode
from .browser import SevenFiftyGramsBrowser
import unicodedata
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
__all__ = ['SevenFiftyGramsModule']
......@@ -45,7 +39,7 @@ def get_recipe(self, id):
return self.browser.get_recipe(id)
def iter_recipes(self, pattern):
return self.browser.iter_recipes(strip_accents(unicode(pattern)).encode('utf-8'))
return self.browser.iter_recipes(pattern)
def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields:
......
......@@ -21,9 +21,9 @@
from woob.capabilities.recipe import Recipe, Comment
from woob.capabilities.base import NotAvailable
from woob.capabilities.image import BaseImage, Thumbnail
from woob.browser.pages import HTMLPage, pagination
from woob.browser.elements import ItemElement, ListElement, method
from woob.browser.filters.standard import CleanText, Regexp, Env, CleanDecimal, Eval
from woob.browser.pages import HTMLPage, JsonPage, pagination
from woob.browser.elements import DictElement, ItemElement, ListElement, method
from woob.browser.filters.standard import CleanText, Regexp, Env, CleanDecimal, Eval, BrowserURL
from woob.browser.filters.json import Dict, NotFound
from datetime import datetime, date, time
from dateutil.parser import parse as parse_date
......@@ -47,54 +47,58 @@ class ResultsPage(HTMLPage):
@pagination
@method
class iter_recipes(ListElement):
item_xpath = '//section[has-class("c-recipe-row")]'
item_xpath = '//article/div'
def next_page(self):
return CleanText('//li[@class="suivante"]/a/@href')(self)
suivant = CleanText(
'//li[@class="pagination-item"]/span/span[@class="pagination-txt" and text()="Suivant"]',
default="")(self)
if suivant == "Suivant":
page = Env('page')(self)
return BrowserURL('search', pattern=Env('pattern'), page=int(page) + 1)(self)
class item(ItemElement):
klass = Recipe
def condition(self):
return not CleanText('./div[@class="c-recipe-row__media"]/span[@class="c-recipe-row__video"]/@class',
default=None)(self) and CleanText('./div/h2/a/@href')(self)
obj_id = Regexp(CleanText('./div[@class="card-content"]/strong/a/@href'),
'https://www.750g.com/(.*).htm')
obj_id = Regexp(CleanText('./div/h2/a/@href'),
'/(.*).htm')
obj_title = CleanText('./div[@class="card-content"]/strong/a')
obj_title = CleanText('./div/h2/a')
obj_short_description = CleanText('./div[@class="card-content"]/p[@class="card-text"]')
class obj_picture(ItemElement):
klass = BaseImage
obj_thumbnail = Eval(Thumbnail, CleanText('./div/img/@src'))
obj_thumbnail = Eval(Thumbnail,
CleanText('./div[@class="card-media-wrapper"]/div/picture/@data-srcset'))
obj_short_description = CleanText('./div/p')
class RecipePage(HTMLPage):
""" Page which contains a recipe
class CommentPage(JsonPage):
""" Page which contains a comments
"""
@method
class get_comments(ListElement):
item_xpath = '//div[has-class("c-comment__row")]'
class get_comments(DictElement):
item_xpath = "comments"
class item(ItemElement):
klass = Comment
def validate(self, obj):
return obj.id
obj_id = Dict('@id')
obj_author = Dict('author/nickname')
obj_text = Dict('content')
obj_id = CleanText('./@data-id')
obj_author = CleanText('./article/div/header/strong/span[@itemprop="author"]')
obj_text = CleanText('./article/div/div/p')
class RecipePage(HTMLPage):
""" Page which contains a recipe
"""
@method
class get_recipe(ItemElement):
klass = Recipe
def parse(self, el):
json_content = CleanText('//head/script[@type="application/ld+json"]')(el)
json_content = CleanText('(//script[@type="application/ld+json"])[1]')(el)
self.el = json.loads(json_content)
obj_id = Env('id')
......@@ -104,15 +108,12 @@ def parse(self, el):
obj_preparation_time = Time('prepTime')
def obj_nb_person(self):
return [CleanDecimal(Dict('recipeYield'), default=0)(self)]
return [CleanDecimal(Dict('recipeYield', default=0))(self)]
obj_instructions = Dict('recipeInstructions')
obj_author = Dict('author/name', default=NotAvailable)
def obj_picture(self):
img = BaseImage()
try:
img.url = self.el['image']
except KeyError:
return
img.url = self.el['image']['url']
return img
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment