From 9d36e7a0e66d5d37b96e28c53943fc2e2dfec179 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 16 Sep 2014 20:39:31 +0200 Subject: [PATCH] [minutes20] fix parsing insolite pages parsing --- modules/minutes20/pages/article.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/minutes20/pages/article.py b/modules/minutes20/pages/article.py index 56d3f02917..5bf1347af8 100644 --- a/modules/minutes20/pages/article.py +++ b/modules/minutes20/pages/article.py @@ -28,7 +28,7 @@ def on_loaded(self): self.main_div = self.document.getroot() self.element_title_selector = "h1" self.element_author_selector = "div.mna-signature" - self.element_body_selector = "div.mna-body" + self.element_body_selector = "div[role=main], div.mna-body" def get_body(self): try: @@ -38,6 +38,9 @@ def get_body(self): else: try_remove(self.parser, element_body, "div.mna-tools") try_remove(self.parser, element_body, "div.mna-comment-call") + try_remove(self.parser, element_body, "ul[class^=content-related]") + try_remove(self.parser, element_body, "ul[class^=content-related]") + try_remove(self.parser, element_body, "p.author-sign") try: element_body.remove(self.get_element_author()) except NoAuthorElement: -- GitLab