Commit fcd84320 authored by Florent Fourcot's avatar Florent Fourcot

Use clean_relative_urls

parent b100f77f
......@@ -19,7 +19,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage,\
try_drop_tree
try_drop_tree, clean_relativ_urls
class ArticlePage(GenericNewsPage):
......@@ -33,15 +33,7 @@ class ArticlePage(GenericNewsPage):
def get_body(self):
div = self.document.getroot().find('.//div[@class="sectbody"]')
try_drop_tree(self.parser, div, "div.anchor")
for a in div.findall('.//a'):
try:
if a.attrib["href"][0:7] != "http://":
a.attrib["href"] = "http://taz.de/" + a.attrib["href"]
except:
continue
for img in div.findall('.//img'):
if img.attrib["src"][0:7] != "http://":
img.attrib["src"] = "http://taz.de/" + img.attrib["src"]
clean_relativ_urls(div, "http://taz.de")
return self.parser.tostring(div)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment