Commit bb3e3c7d authored by thibault douge's avatar thibault douge Committed by Vincent A

[impotsgouvfrpar] fix the regex for the date

parent ba22e921
......@@ -151,11 +151,11 @@ class DocumentsPage(LoggedPage, HTMLPage):
def parse(self, el):
label_ct = CleanText('./div[has-class("texte")]')
date = Regexp(label_ct, 'le ([\w\/]+)', default=None)(self)
date = Regexp(label_ct, r'le ([\w\/]+?),', default=NotAvailable)(self)
self.env['label'] = label_ct(self)
if not date:
year = Regexp(label_ct, '\s(\d{4})', default=None)(self)
year = Regexp(label_ct, r'\s(\d{4})', default=NotAvailable)(self)
if 'sur les revenus de' in self.env['label']:
# this kind of document always appear un july, (but we don't know the day)
date = '%s-07-01' % year
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment