Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
weboob
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
180
Issues
180
List
Boards
Labels
Milestones
Merge Requests
53
Merge Requests
53
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
weboob
weboob
Commits
159311bf
Commit
159311bf
authored
Sep 16, 2015
by
Bezleputh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[allrecipes] adapt to new version of website / bump to browser2
parent
7408950a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
103 additions
and
153 deletions
+103
-153
modules/allrecipes/browser.py
modules/allrecipes/browser.py
+15
-27
modules/allrecipes/module.py
modules/allrecipes/module.py
+3
-14
modules/allrecipes/pages.py
modules/allrecipes/pages.py
+77
-106
modules/allrecipes/test.py
modules/allrecipes/test.py
+8
-6
No files found.
modules/allrecipes/browser.py
View file @
159311bf
...
...
@@ -16,36 +16,24 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from
weboob.deprecated.browser
import
Browser
,
BrowserHTTPNotFound
from
.pages
import
RecipePage
,
ResultsPage
,
FourOFourPage
from
weboob.browser
import
PagesBrowser
,
URL
from
.pages
import
ResultsPage
,
RecipePage
__all__
=
[
'AllrecipesBrowser'
]
class
AllrecipesBrowser
(
Browser
):
DOMAIN
=
'allrecipes.com'
PROTOCOL
=
'http'
ENCODING
=
'utf-8'
USER_AGENT
=
Browser
.
USER_AGENTS
[
'wget'
]
PAGES
=
{
'http://allrecipes.com/search/default.aspx
\
?qt=k&wt=.*&rt=r&origin=.*'
:
ResultsPage
,
'http://allrecipes.com/Recipe/.*/Detail.aspx'
:
RecipePage
,
'http://allrecipes.com/404.aspx.*'
:
FourOFourPage
}
class
AllrecipesBrowser
(
PagesBrowser
):
BASEURL
=
'http://allrecipes.com'
results
=
URL
(
'search/results/
\
?wt=(?P<pattern>.*)
\
&sort=re'
,
'recipes/.*'
,
ResultsPage
)
recipe
=
URL
(
'recipe/(?P<_id>.*)/'
,
RecipePage
)
def
iter_recipes
(
self
,
pattern
):
self
.
location
(
'http://allrecipes.com/search/default.aspx?qt=k&wt=
%
s&rt=r&origin=Home
%%20
Page'
%
(
pattern
))
assert
self
.
is_on_page
(
ResultsPage
)
return
self
.
page
.
iter_recipes
()
def
get_recipe
(
self
,
id
):
try
:
self
.
location
(
'http://allrecipes.com/Recipe/
%
s/Detail.aspx'
%
id
)
except
BrowserHTTPNotFound
:
return
if
self
.
is_on_page
(
RecipePage
):
return
self
.
page
.
get_recipe
(
id
)
return
self
.
results
.
go
(
pattern
=
pattern
)
.
iter_recipes
()
def
get_recipe
(
self
,
_id
,
obj
=
None
):
recipe
=
self
.
recipe
.
go
(
_id
=
_id
)
.
get_recipe
(
obj
=
obj
)
comments
=
list
(
self
.
page
.
get_comments
())
if
comments
:
recipe
.
comments
=
comments
return
recipe
modules/allrecipes/module.py
View file @
159311bf
...
...
@@ -43,19 +43,8 @@ class AllrecipesModule(Module, CapRecipe):
return
self
.
browser
.
iter_recipes
(
quote_plus
(
pattern
.
encode
(
'utf-8'
)))
def
fill_recipe
(
self
,
recipe
,
fields
):
if
'nb_person'
in
fields
or
'instructions'
in
fields
:
rec
=
self
.
get_recipe
(
recipe
.
id
)
recipe
.
picture_url
=
rec
.
picture_url
recipe
.
instructions
=
rec
.
instructions
recipe
.
ingredients
=
rec
.
ingredients
recipe
.
comments
=
rec
.
comments
recipe
.
author
=
rec
.
author
recipe
.
nb_person
=
rec
.
nb_person
recipe
.
cooking_time
=
rec
.
cooking_time
recipe
.
preparation_time
=
rec
.
preparation_time
if
'nb_person'
in
fields
or
'instructions'
in
fields
or
'thumbnail_url'
in
fields
:
recipe
=
self
.
browser
.
get_recipe
(
recipe
.
id
,
recipe
)
return
recipe
OBJECTS
=
{
Recipe
:
fill_recipe
,
}
OBJECTS
=
{
Recipe
:
fill_recipe
}
modules/allrecipes/pages.py
View file @
159311bf
...
...
@@ -18,109 +18,80 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from
weboob.capabilities.recipe
import
Recipe
from
weboob.capabilities.base
import
NotAvailable
,
NotLoaded
from
weboob.deprecated.browser
import
Page
class
FourOFourPage
(
Page
):
pass
class
ResultsPage
(
Page
):
""" Page which contains results as a list of recipies
"""
def
iter_recipes
(
self
):
for
div
in
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'div.recipe-info'
):
thumbnail_url
=
NotAvailable
short_description
=
NotAvailable
imgs
=
self
.
parser
.
select
(
div
.
getparent
(),
'img'
)
if
len
(
imgs
)
>
0
:
url
=
unicode
(
imgs
[
0
]
.
attrib
.
get
(
'src'
,
''
))
if
url
.
startswith
(
'http://'
):
thumbnail_url
=
url
link
=
self
.
parser
.
select
(
div
,
'a.title'
,
1
)
title
=
unicode
(
link
.
text
)
id
=
unicode
(
link
.
attrib
.
get
(
'href'
,
''
)
.
split
(
'/'
)[
2
])
recipe
=
Recipe
(
id
,
title
)
recipe
.
thumbnail_url
=
thumbnail_url
recipe
.
short_description
=
short_description
recipe
.
instructions
=
NotLoaded
recipe
.
ingredients
=
NotLoaded
recipe
.
nb_person
=
NotLoaded
recipe
.
cooking_time
=
NotLoaded
recipe
.
preparation_time
=
NotLoaded
recipe
.
author
=
NotLoaded
yield
recipe
class
RecipePage
(
Page
):
""" Page which contains a recipe
"""
def
get_recipe
(
self
,
id
):
title
=
NotAvailable
preparation_time
=
NotAvailable
cooking_time
=
NotAvailable
author
=
NotAvailable
nb_person
=
NotAvailable
ingredients
=
NotAvailable
picture_url
=
NotAvailable
instructions
=
NotAvailable
comments
=
NotAvailable
title
=
unicode
(
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'h1#itemTitle'
,
1
)
.
text
)
imgillu
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'img#imgPhoto'
)
if
len
(
imgillu
)
>
0
:
picture_url
=
unicode
(
imgillu
[
0
]
.
attrib
.
get
(
'src'
,
''
))
ingredients
=
[]
l_ing
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'li#liIngredient'
)
for
ing
in
l_ing
:
ingtxt
=
unicode
(
ing
.
text_content
()
.
strip
())
if
ingtxt
!=
''
:
ingredients
.
append
(
ingtxt
)
instructions
=
u''
l_divinst
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'div.directLeft li'
)
num_instr
=
1
for
inst
in
l_divinst
:
instructions
+=
'
%
s:
%
s
\n
'
%
(
num_instr
,
inst
.
text_content
())
num_instr
+=
1
prepmin
=
0
emprep
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'span#prepHoursSpan em'
)
if
len
(
emprep
)
>
0
:
prepmin
+=
int
(
emprep
[
0
]
.
text
)
*
60
emprep
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'span#prepMinsSpan em'
)
if
len
(
emprep
)
>
0
:
prepmin
+=
int
(
emprep
[
0
]
.
text
)
if
prepmin
!=
0
:
preparation_time
=
prepmin
cookmin
=
0
emcooktime
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'span#cookHoursSpan em'
)
if
len
(
emcooktime
)
>
0
:
cookmin
+=
int
(
emcooktime
[
0
]
.
text
)
*
60
emcooktime
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'span#cookMinsSpan em'
)
if
len
(
emcooktime
)
>
0
:
cookmin
+=
int
(
emcooktime
[
0
]
.
text
)
if
cookmin
!=
0
:
cooking_time
=
cookmin
l_nbpers
=
self
.
parser
.
select
(
self
.
document
.
getroot
(),
'span#lblYield[itemprop=recipeYield]'
)
if
len
(
l_nbpers
)
>
0
and
'servings'
in
l_nbpers
[
0
]
.
text
:
nb_person
=
[
int
(
l_nbpers
[
0
]
.
text
.
split
()[
0
])]
recipe
=
Recipe
(
id
,
title
)
recipe
.
preparation_time
=
preparation_time
recipe
.
cooking_time
=
cooking_time
recipe
.
nb_person
=
nb_person
recipe
.
ingredients
=
ingredients
recipe
.
instructions
=
instructions
recipe
.
picture_url
=
picture_url
recipe
.
comments
=
comments
recipe
.
author
=
author
recipe
.
thumbnail_url
=
NotLoaded
return
recipe
from
weboob.browser.pages
import
HTMLPage
,
pagination
from
weboob.browser.elements
import
ItemElement
,
ListElement
,
method
from
weboob.capabilities.recipe
import
Recipe
,
Comment
from
weboob.capabilities.base
import
NotAvailable
from
weboob.browser.filters.standard
import
Regexp
,
CleanText
,
Env
,
Duration
from
weboob.browser.filters.html
import
CleanHTML
import
re
class
CookingDuration
(
Duration
):
_regexp
=
re
.
compile
(
r'PT((?P<hh>\d+)H)?((?P<mm>\d+)M)?((?P<ss>\d+)S)?'
)
class
ResultsPage
(
HTMLPage
):
@
pagination
@
method
class
iter_recipes
(
ListElement
):
item_xpath
=
'//article[@class="grid-col--fixed-tiles"]'
def
next_page
(
self
):
return
CleanText
(
'//button[@id="btnMoreResults"]/@href'
)(
self
)
class
item
(
ItemElement
):
klass
=
Recipe
obj_id
=
Regexp
(
CleanText
(
'./a[1]/@href'
),
'/recipe/(.*)/'
)
obj_title
=
CleanText
(
'./a/h3'
)
obj_short_description
=
CleanText
(
'./a/div/div[@class="rec-card__description"]'
)
class
RecipePage
(
HTMLPage
):
@
method
class
get_recipe
(
ItemElement
):
klass
=
Recipe
obj_id
=
Env
(
'_id'
)
obj_title
=
CleanText
(
'//h1[@itemprop="name"]'
)
def
obj_preparation_time
(
self
):
dt
=
CookingDuration
(
CleanText
(
'//time[@itemprop="prepTime"]/@datetime'
))(
self
)
return
int
(
dt
.
total_seconds
()
/
60
)
def
obj_cooking_time
(
self
):
dt
=
CookingDuration
(
CleanText
(
'//time[@itemprop="cookTime"]/@datetime'
))(
self
)
return
int
(
dt
.
total_seconds
()
/
60
)
def
obj_nb_person
(
self
):
nb_pers
=
CleanText
(
'//meta[@id="metaRecipeServings"]/@content'
)(
self
)
return
[
nb_pers
]
if
nb_pers
else
NotAvailable
def
obj_ingredients
(
self
):
ingredients
=
[]
for
el
in
self
.
el
.
xpath
(
'//ul[has-class("checklist")]/li/label/span[@itemprop="ingredients"]'
):
ing
=
CleanText
(
'.'
)(
el
)
if
ing
:
ingredients
.
append
(
ing
)
return
ingredients
obj_instructions
=
CleanHTML
(
'//ol[@itemprop="recipeInstructions"]'
)
obj_thumbnail_url
=
CleanText
(
'//section[has-class("hero-photo")]/span/a/img/@src'
)
obj_picture_url
=
CleanText
(
'//section[has-class("hero-photo")]/span/a/img/@src'
)
@
method
class
get_comments
(
ListElement
):
item_xpath
=
'//div[@itemprop="review"]'
ignore_duplicate
=
True
class
item
(
ItemElement
):
klass
=
Comment
obj_author
=
CleanText
(
'./article/a/div/a/ul/li/h4[@itemprop="author"]'
)
obj_rate
=
CleanText
(
'./article/div/div[@class="rating-stars"]/@data-ratingstars'
)
obj_text
=
CleanText
(
'./p[@itemprop="reviewBody"]'
)
obj_id
=
CleanText
(
'./article/a/@href'
)
modules/allrecipes/test.py
View file @
159311bf
...
...
@@ -19,14 +19,16 @@
from
weboob.tools.test
import
BackendTest
import
itertools
class
AllrecipesTest
(
BackendTest
):
MODULE
=
'allrecipes'
def
test_recipe
(
self
):
recipes
=
self
.
backend
.
iter_recipes
(
'french fries'
)
for
recipe
in
recipes
:
full_recipe
=
self
.
backend
.
get_recipe
(
recipe
.
id
)
assert
full_recipe
.
instructions
assert
full_recipe
.
ingredients
assert
full_recipe
.
title
recipes
=
list
(
itertools
.
islice
(
self
.
backend
.
iter_recipes
(
'french fries'
),
0
,
20
)
)
assert
len
(
recipes
)
full_recipe
=
self
.
backend
.
get_recipe
(
recipes
[
0
]
.
id
)
assert
full_recipe
.
instructions
assert
full_recipe
.
ingredients
assert
full_recipe
.
title
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment