pax_global_header 0000666 0000000 0000000 00000000064 13434577412 0014524 g ustar 00root root 0000000 0000000 52 comment=fc7621de12700bbbac66134da0acc19e3493cbf7
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/ 0000775 0000000 0000000 00000000000 13434577412 0023540 5 ustar 00root root 0000000 0000000 woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/ 0000775 0000000 0000000 00000000000 13434577412 0025210 5 ustar 00root root 0000000 0000000 woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/ 0000775 0000000 0000000 00000000000 13434577412 0027417 5 ustar 00root root 0000000 0000000 woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/__init__.py 0000664 0000000 0000000 00000001603 13434577412 0031530 0 ustar 00root root 0000000 0000000 "NewspaperPresseuropModule init"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from .module import NewspaperPresseuropModule
__all__ = ['NewspaperPresseuropModule']
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/browser.py 0000664 0000000 0000000 00000003472 13434577412 0031462 0 ustar 00root root 0000000 0000000 "browser for presseurop website"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from datetime import date, datetime, time
from .pages import PresseuropPage
from weboob.browser.browsers import AbstractBrowser
from weboob.browser.url import URL
class NewspaperPresseuropBrowser(AbstractBrowser):
"NewspaperPresseuropBrowser class"
PARENT = 'genericnewspaper'
BASEURL = 'http://www.voxeurop.eu'
presseurop_page = URL("/.*", PresseuropPage)
def __init__(self, *args, **kwargs):
self.weboob = kwargs['weboob']
super(NewspaperPresseuropBrowser, self).__init__(*args, **kwargs)
def get_daily_date(self, _id):
self.location(_id)
return self.page.get_daily_date()
def get_daily_infos(self, _id):
url = "http://www.voxeurop.eu/fr/today/" + _id
self.location(url)
title = self.page.get_title()
article_date = date(*[int(x)
for x in _id.split('-')])
article_time = time(0, 0, 0)
article_datetime = datetime.combine(article_date, article_time)
return url, title, article_datetime
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/favicon.png 0000664 0000000 0000000 00000000553 13434577412 0031555 0 ustar 00root root 0000000 0000000 PNG
IHDR @ @ XGl sRGB PLTEnE ]y$~4 pHYs tIME(S IDATHŕa
0e7ExWjB[%ݠ{+>-(%XO 0I5'U1Z! +@N9 aYUf f\' ; H :SzI- |͊ \s)%7LŲBc
l%|%ۀCrUIMH/XL}],Ea^VWo!l IENDB` woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/module.py 0000664 0000000 0000000 00000006100 13434577412 0031253 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
"backend for http://www.presseurop.eu"
from weboob.capabilities.messages import CapMessages, Thread
from weboob.tools.backend import AbstractModule
from weboob.tools.backend import BackendConfig
from weboob.tools.value import Value
from .browser import NewspaperPresseuropBrowser
from .tools import rssid, url2id
from weboob.tools.newsfeed import Newsfeed
class NewspaperPresseuropModule(AbstractModule, CapMessages):
MAINTAINER = u'Florent Fourcot'
EMAIL = 'weboob@flo.fourcot.fr'
VERSION = '1.5'
LICENSE = 'AGPLv3+'
STORAGE = {'seen': {}}
NAME = 'presseurop'
DESCRIPTION = u'Presseurop website'
BROWSER = NewspaperPresseuropBrowser
RSSID = staticmethod(rssid)
URL2ID = staticmethod(url2id)
RSSSIZE = 300
PARENT = 'genericnewspaper'
CONFIG = BackendConfig(Value('lang', label='Lang of articles',
choices={'fr': 'fr', 'de': 'de', 'en': 'en',
'cs': 'cs', 'es': 'es', 'it': 'it', 'nl': 'nl',
'pl': 'pl', 'pt': 'pt', 'ro': 'ro'},
default='fr'))
def __init__(self, *args, **kwargs):
super(self.__class__, self).__init__(*args, **kwargs)
self.RSS_FEED = 'http://www.voxeurop.eu/%s/rss.xml' % self.config['lang'].get()
def iter_threads(self):
daily = []
for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
if "/news-brief/" in article.link:
day = self.browser.get_daily_date(article.link)
if day and (day not in daily):
localid = url2id(article.link)
daily.append(day)
id, title, date = self.browser.get_daily_infos(day)
id = id + "#" + localid
thread = Thread(id)
thread.title = title
thread.date = date
yield(thread)
elif day is None:
thread = Thread(article.link)
thread.title = article.title
thread.date = article.datetime
yield(thread)
else:
thread = Thread(article.link)
thread.title = article.title
thread.date = article.datetime
yield(thread)
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/pages.py 0000664 0000000 0000000 00000005211 13434577412 0031067 0 ustar 00root root 0000000 0000000 "ArticlePage object for presseurope"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from weboob.browser.pages import AbstractPage
from weboob.browser.filters.html import CSS, CleanHTML
class PresseuropPage(AbstractPage):
"PresseuropPage object for presseurop"
_selector = CSS
PARENT = 'genericnewspaper'
PARENT_URL = 'generic_news_page'
def on_loaded(self):
self.main_div = self.doc.getroot()
self.element_title_selector = "title"
self.element_author_selector = "a[rel=author], div.profilecartoontext>p>a"
self.element_body_selector = "div.block, div.panel, div.bodytext"
def get_body(self):
element_body = self.get_element_body()
self.try_drop_tree(element_body, "li.button-social")
self.try_drop_tree(element_body, "div.sharecount")
self.try_drop_tree(element_body, "p.ruledtop")
self.try_drop_tree(element_body, "p.ctafeedback")
self.try_drop_tree(element_body, "aside.articlerelated")
self.try_drop_tree(element_body, "div.sharecount")
self.try_drop_tree(element_body, "iframe")
self.clean_relativ_urls(element_body, "http://presseurop.eu")
return CleanHTML('.')(element_body)
def get_title(self):
title = super(self.__class__, self).get_title()
title = title.split('|')[0]
return title
def get_author(self):
author = super(self.__class__, self).get_author()
try:
source = self.doc.getroot().xpath(
"//span[@class='sourceinfo']/a")[0]
source = source.text
author = author + " | " + source
return author
except:
return author
def get_daily_date(self):
plink = self.doc.getroot().xpath("//p[@class='w200']")
if len(plink) > 0:
link = plink[0].xpath('a')[0]
date = link.attrib['href'].split('/')[3]
return date
return None
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/test.py 0000664 0000000 0000000 00000002206 13434577412 0030750 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
from weboob.tools.test import BackendTest
from weboob.tools.value import Value
class PresseuropTest(BackendTest):
MODULE = 'presseurop'
def setUp(self):
if not self.is_backend_configured():
self.backend.config['lang'] = Value(value='fr')
def test_new_messages(self):
for message in self.backend.iter_unread_messages():
pass
woob-fc7621de12700bbbac66134da0acc19e3493cbf7-modules-presseurop/modules/presseurop/tools.py 0000664 0000000 0000000 00000002106 13434577412 0031130 0 ustar 00root root 0000000 0000000 "tools for presseurop backend"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
# This file is part of a weboob module.
#
# This weboob module is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This weboob module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this weboob module. If not, see .
import re
def url2id(url):
"return an id from an url"
if "/today/" in url:
return url.split("#")[1]
else:
regexp = re.compile(".*/.*-([0-9]+)\?.*")
id = regexp.match(url).group(1)
return id
def rssid(entry):
return url2id(entry.link)