Commit f586f546 authored by Florian Duguet's avatar Florian Duguet Committed by Vincent A

[impotsgouvfrpar] prevent infinite loop in iter_documents pagination

parent e1016ac5
......@@ -132,9 +132,31 @@ class DocumentsPage(LoggedPage, HTMLPage):
item_xpath = '//ul[has-class("documents")]/li'
def next_page(self):
previous_year = CleanText('//li[has-class("blocAnnee") and has-class("selected")]/following-sibling::li[1]/a')(
# only if previous_year, else we return to page with current year and fall to an infinite loop
previous_year = CleanText(
'//li[has-class("blocAnnee") and has-class("selected")]/following-sibling::li[1]/a',
# only if previous_year is not None and different from current year,
# else we return to page with current year and fall into infinite loop
if previous_year:
previous_year = int(Regexp(None, r'(\d{4})').filter(previous_year))
current_year = int(Regexp(CleanText(
'//li[has-class("blocAnnee") and has-class("selected")]/a',
), r'(\d{4})')(
if previous_year >= current_year:
# if previous year is 'something 2078' website return page of current year
# previous_year has to be nothing but digit
# don't return anything to not fall into infinite loop, but something bad has happened
"pagination loop, previous_year: %s pagination is unexpectedly superior or equal to current_year: %s",
previous_year, current_year
return{'n': previous_year})
class item(ItemElement):
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment