diff --git a/modules/allrecipes/browser.py b/modules/allrecipes/browser.py index 0281b800538cbf887b653cda1e10f50843e5dad8..cd5ede063c3541c369e26dfc593691aa5e5c49fa 100644 --- a/modules/allrecipes/browser.py +++ b/modules/allrecipes/browser.py @@ -17,21 +17,42 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.browser import PagesBrowser, URL -from .pages import ResultsPage, RecipePage +from .pages import ResultsPage, RecipePage, HomePage + +import urllib __all__ = ['AllrecipesBrowser'] class AllrecipesBrowser(PagesBrowser): - BASEURL = 'http://allrecipes.com' - results = URL('search/results/\?wt=(?P.*)\&sort=re', - 'recipes/.*', ResultsPage) - recipe = URL('recipe/(?P<_id>.*)/', RecipePage) + BASEURL = 'https://apps.allrecipes.com' + results = URL('/v1/recipes\?(?P.*)', ResultsPage) + recipe = URL('/v1/recipes/(?P<_id>.*)/', RecipePage) + home = URL('http://allrecipes.com', HomePage) + + TOKEN = None + + def fill_token(self): + self.home.open() + self.TOKEN = 'Bearer %s' % self.session.cookies.get('ARToken') + self.session.headers['X-Requested-With'] = 'XMLHttpRequest' + self.session.headers['Authorization'] = self.TOKEN def iter_recipes(self, pattern): - return self.results.go(pattern=pattern).iter_recipes() + query = {'query': pattern, + 'page': 1, + 'pagesize': 20, + 'sort': 're'} + + if not self.TOKEN: + self.fill_token() + + return self.results.go(query=urllib.urlencode(query)).iter_recipes() def get_recipe(self, _id, obj=None): + if not self.TOKEN: + self.fill_token() + recipe = self.recipe.go(_id=_id).get_recipe(obj=obj) comments = list(self.page.get_comments()) if comments: diff --git a/modules/allrecipes/pages.py b/modules/allrecipes/pages.py index 9ab519ae72db773a35b6b6b50cbb0df663da4650..1cb28252b26f4118479f065dba2ef1f4bed39999 100644 --- a/modules/allrecipes/pages.py +++ b/modules/allrecipes/pages.py @@ -18,80 +18,75 @@ # along with weboob. If not, see . -from weboob.browser.pages import HTMLPage, pagination -from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.pages import HTMLPage, JsonPage, pagination +from weboob.browser.elements import ItemElement, DictElement, method from weboob.capabilities.recipe import Recipe, Comment from weboob.capabilities.base import NotAvailable -from weboob.browser.filters.standard import Regexp, CleanText, Env, Duration -from weboob.browser.filters.html import CleanHTML +from weboob.browser.filters.standard import Env, Format, Join +from weboob.browser.filters.json import Dict -import re +class HomePage(HTMLPage): + pass -class CookingDuration(Duration): - _regexp = re.compile(r'PT((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?') - -class ResultsPage(HTMLPage): +class ResultsPage(JsonPage): @pagination @method - class iter_recipes(ListElement): - item_xpath = '//article[@class="grid-col--fixed-tiles"]' + class iter_recipes(DictElement): + + item_xpath = 'recipes' def next_page(self): - return CleanText('//button[@id="btnMoreResults"]/@href')(self) + return Dict('links/next/href', default=None)(self.page.doc) class item(ItemElement): klass = Recipe - obj_id = Regexp(CleanText('./a[1]/@href'), - '/recipe/(.*)/') - obj_title = CleanText('./a/h3') - obj_short_description = CleanText('./a/div/div[@class="rec-card__description"]') + obj_id = Dict('recipeID') + obj_title = Dict('title') + obj_short_description = Dict('description') -class RecipePage(HTMLPage): +class RecipePage(JsonPage): @method class get_recipe(ItemElement): klass = Recipe obj_id = Env('_id') - obj_title = CleanText('//h1[@itemprop="name"]') - - def obj_preparation_time(self): - dt = CookingDuration(CleanText('//time[@itemprop="prepTime"]/@datetime'))(self) - return int(dt.total_seconds() / 60) - - def obj_cooking_time(self): - dt = CookingDuration(CleanText('//time[@itemprop="cookTime"]/@datetime'))(self) - return int(dt.total_seconds() / 60) + obj_title = Dict('title') + obj_short_description = Dict('description') + obj_preparation_time = Dict('prepMinutes') + obj_cooking_time = Dict('cookMinutes') def obj_nb_person(self): - nb_pers = CleanText('//meta[@id="metaRecipeServings"]/@content')(self) + nb_pers = u'%s' % Dict('servings', default='')(self) return [nb_pers] if nb_pers else NotAvailable def obj_ingredients(self): ingredients = [] - for el in self.el.xpath('//ul[has-class("checklist")]/li/label/span[@itemprop="ingredients"]'): - ing = CleanText('.')(el) - if ing: - ingredients.append(ing) + for el in Dict('ingredients')(self): + ing = Format('%s (%s gramm)', + Dict('displayValue'), + Dict('grams'))(el) + ingredients.append(ing) return ingredients - obj_instructions = CleanHTML('//ol[@itemprop="recipeInstructions"]') - obj_thumbnail_url = CleanText('//section[has-class("hero-photo")]/span/a/img/@src') + def obj_instructions(self): + ins = [Dict('displayValue')(el) for el in Dict('directions')(self)] + return Join('\n * ', ins, addBefore=' * ', addAfter='\n')(self) - obj_picture_url = CleanText('//section[has-class("hero-photo")]/span/a/img/@src') + obj_thumbnail_url = Dict('photo/photoDetailUrl') + obj_picture_url = Dict('photo/photoDetailUrl') @method - class get_comments(ListElement): - item_xpath = '//div[@itemprop="review"]' - ignore_duplicate = True + class get_comments(DictElement): + item_xpath = 'topReviews' class item(ItemElement): klass = Comment - obj_author = CleanText('./article/a/div/a/ul/li/h4[@itemprop="author"]') - obj_rate = CleanText('./article/div/div[@class="rating-stars"]/@data-ratingstars') - obj_text = CleanText('./p[@itemprop="reviewBody"]') - obj_id = CleanText('./article/a/@href') + obj_author = Dict('submitter/name') + obj_rate = Dict('rating') + obj_text = Dict('text') + obj_id = Dict('reviewID')