Commit 4065f7ef authored by Romain Bignon's avatar Romain Bignon

renamed dlfp2mail to weboob

parent 7906433c
# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Romain Bignon
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
class DLFPUnavailable(Exception):
pass
class DLFPBanned(DLFPUnavailable):
pass
class DLFPIncorrectPassword(Exception):
pass
class DLFPRetry(Exception):
pass
...@@ -18,21 +18,23 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ...@@ -18,21 +18,23 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from mechanize import FormNotFoundError from weboob.tools.browser import Browser
from weboob.backends.dlfp.pages.index import IndexPage, LoginPage
class BasePage: class DLFP(Browser):
def __init__(self, dlfp, document, url=''):
self.dlfp = dlfp
self.document = document
self.url = url
def loaded(self): DOMAIN = 'linuxfr.org'
pass PAGES = {'https://linuxfr.org/': IndexPage,
'https://linuxfr.org/pub/': IndexPage,
'https://linuxfr.org/my/': IndexPage,
'https://linuxfr.org/login.html': LoginPage,
}
def isLogged(self): def home(self):
forms = self.document.getElementsByTagName('form') return self.location('https://linuxfr.org')
for form in forms:
if form.getAttribute('id') == 'formulaire': def login(self):
return False self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password))
return True def isLogged(self):
return (self.page and self.page.isLogged())
...@@ -24,14 +24,15 @@ import re ...@@ -24,14 +24,15 @@ import re
class Article: class Article:
RSS = None RSS = None
def __init__(self, _id, title, author, datetime): def __init__(self, _id, url, title, author, datetime):
self._id = _id self._id = _id
self.url = url
self.title = title self.title = title
self.author = author self.author = author
self.datetime = datetime self.datetime = datetime
class Newspaper(Article): class Newspaper(Article):
RSS = 'http://linuxfr.org/backend/news/rss20.rss' RSS = 'https://linuxfr.org/backend/news/rss20.rss'
class Telegram(Article): class Telegram(Article):
RSS = 'https://linuxfr.org/backend/journaux/rss20.rss' RSS = 'https://linuxfr.org/backend/journaux/rss20.rss'
...@@ -58,5 +59,5 @@ class ArticlesList: ...@@ -58,5 +59,5 @@ class ArticlesList:
warning('Unable to parse ID from link \'%s\'' % item['link']) warning('Unable to parse ID from link \'%s\'' % item['link'])
continue continue
_id = m.group(1) _id = m.group(1)
article = klass(_id, item['title'], item['author'], item['date_parsed']) article = klass(_id, item['link'], item['title'], item['author'], item['date_parsed'])
print _id, item['author'], item['title'] print _id, item['author'], item['title']
...@@ -18,12 +18,26 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ...@@ -18,12 +18,26 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from dlfp.pages.base import BasePage from weboob.tools.browser import BrowserIncorrectPassword, BasePage
class IndexPage(BasePage): class DLFPPage(BasePage):
def isLogged(self):
forms = self.document.getElementsByTagName('form')
for form in forms:
if form.getAttribute('id') == 'formulaire':
return False
return True
class IndexPage(DLFPPage):
pass pass
class LoginPage(BasePage): class LoginPage(DLFPPage):
def loaded(self):
if self.hasError():
raise BrowserIncorrectPassword()
def hasError(self): def hasError(self):
plist = self.document.getElementsByTagName('p') plist = self.document.getElementsByTagName('p')
for p in plist: for p in plist:
......
...@@ -18,7 +18,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ...@@ -18,7 +18,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from mechanize import Browser, response_seek_wrapper, BrowserStateError import mechanize.Browser
from mechanize import response_seek_wrapper, BrowserStateError
import urllib2 import urllib2
import html5lib import html5lib
from html5lib import treebuilders from html5lib import treebuilders
...@@ -27,9 +28,16 @@ import time ...@@ -27,9 +28,16 @@ import time
from logging import warning, error from logging import warning, error
from copy import copy from copy import copy
from dlfp.pages.login import IndexPage, LoginPage from weboob.tools.firefox_cookies import FirefoxCookieJar
from dlfp.exceptions import DLFPIncorrectPassword, DLFPUnavailable, DLFPRetry
from dlfp.firefox_cookies import FirefoxCookieJar class BrowserIncorrectPassword(Exception):
pass
class BrowserUnavailable(Exception):
pass
class BrowserRetry(Exception):
pass
class NoHistory: class NoHistory:
def __init__(self): pass def __init__(self): pass
...@@ -38,48 +46,67 @@ class NoHistory: ...@@ -38,48 +46,67 @@ class NoHistory:
def clear(self): pass def clear(self): pass
def close(self): pass def close(self): pass
class DLFP(Browser): class BasePage:
def __init__(self, browser, document, url=''):
self.browser = browser
self.document = document
self.url = url
def loaded(self):
pass
class Browser(mechanize.Browser):
# ------ Class attributes --------------------------------------
DOMAIN = None
PAGES = {}
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3'
# ------ Abstract methods --------------------------------------
# Go to home
def home(self):
raise NotImplementedError()
# Login to the website
def login(self):
raise NotImplementedError()
# Return True if we are logged on website
def isLogged(self):
raise NotImplementedError()
pages = {'https://linuxfr.org/': IndexPage, # ------ Browser methods ---------------------------------------
'https://linuxfr.org/pub/': IndexPage,
'https://linuxfr.org/my/': IndexPage,
'https://linuxfr.org/login.html': LoginPage,
}
def __init__(self, username, password=None, firefox_cookies=None): def __init__(self, username, password=None, firefox_cookies=None):
Browser.__init__(self, history=NoHistory()) mechanize.Browser.__init__(self, history=NoHistory())
self.addheaders = [ self.addheaders = [
['User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3'] ['User-agent', self.USER_AGENT]
] ]
# Share cookies with firefox # Share cookies with firefox
if firefox_cookies: if firefox_cookies:
self.__cookie = FirefoxCookieJar(firefox_cookies) self.__cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies)
self.__cookie.load() self.__cookie.load()
self.set_cookiejar(self.__cookie) self.set_cookiejar(self.__cookie)
else: else:
self.__cookie = None self.__cookie = None
self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom")) self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
self.__page = None self.page = None
self.__last_update = 0.0 self.last_update = 0.0
self.username = username self.username = username
self.password = password self.password = password
if self.password: if self.password:
try: try:
self.home() self.home()
except DLFPUnavailable: except BrowserUnavailable:
pass pass
def page(self):
return self.__page
def home(self):
return self.location('https://linuxfr.org')
def pageaccess(func): def pageaccess(func):
def inner(self, *args, **kwargs): def inner(self, *args, **kwargs):
if not self.__page or not self.__page.isLogged() and self.password: if not self.page or not self.page.isLogged() and self.password:
self.home() self.home()
return func(self, *args, **kwargs) return func(self, *args, **kwargs)
...@@ -89,57 +116,65 @@ class DLFP(Browser): ...@@ -89,57 +116,65 @@ class DLFP(Browser):
def keepalive(self): def keepalive(self):
self.home() self.home()
def login(self): def change_location(func):
self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password)) def inner(self, *args, **kwargs):
if args and args[0][0] == '/' and (not self.request or self.request.host != self.DOMAIN):
args = ('%s://%s%s' % (self.PROTOCOL, self.DOMAIN, args[0]),) + args[1:]
print args
return func(self, *args, **kwargs)
return inner
@change_location
def openurl(self, *args, **kwargs): def openurl(self, *args, **kwargs):
try: try:
return Browser.open(self, *args, **kwargs) return mechanize.Browser.open(self, *args, **kwargs)
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e) error(e)
raise DLFPUnavailable() raise BrowserUnavailable()
except BrowserStateError: except BrowserStateError:
self.home() self.home()
return Browser.open(self, *args, **kwargs) return mechanize.Browser.open(self, *args, **kwargs)
def submit(self, *args, **kwargs): def submit(self, *args, **kwargs):
try: try:
self.__changeLocation(Browser.submit(self, *args, **kwargs)) self.__changeLocation(mechanize.Browser.submit(self, *args, **kwargs))
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e) error(e)
self.__page = None self.page = None
raise DLFPUnavailable() raise BrowserUnavailable()
except (BrowserStateError,DLFPRetry): except (BrowserStateError,BrowserRetry):
self.home() self.home()
raise DLFPUnavailable() raise BrowserUnavailable()
def isOnPage(self, pageCls): def isOnPage(self, pageCls):
return isinstance(self.__page, pageCls) return isinstance(self.page, pageCls)
def follow_link(self, *args, **kwargs): def follow_link(self, *args, **kwargs):
try: try:
self.__changeLocation(Browser.follow_link(self, *args, **kwargs)) self.__changeLocation(mechanize.Browser.follow_link(self, *args, **kwargs))
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e) error(e)
self.__page = None self.page = None
raise DLFPUnavailable() raise BrowserUnavailable()
except (BrowserStateError,DLFPRetry): except (BrowserStateError,BrowserRetry):
self.home() self.home()
raise DLFPUnavailable() raise BrowserUnavailable()
@change_location
def location(self, *args, **kwargs): def location(self, *args, **kwargs):
keep_args = copy(args) keep_args = copy(args)
keep_kwargs = kwargs.copy() keep_kwargs = kwargs.copy()
try: try:
self.__changeLocation(Browser.open(self, *args, **kwargs)) self.__changeLocation(mechanize.Browser.open(self, *args, **kwargs))
except DLFPRetry: except BrowserRetry:
if not self.__page or not args or self.__page.url != args[0]: if not self.page or not args or self.page.url != args[0]:
self.location(keep_args, keep_kwargs) self.location(keep_args, keep_kwargs)
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e) error(e)
self.__page = None self.page = None
raise DLFPUnavailable() raise BrowserUnavailable()
except BrowserStateError: except BrowserStateError:
self.home() self.home()
self.location(*keep_args, **keep_kwargs) self.location(*keep_args, **keep_kwargs)
...@@ -147,7 +182,7 @@ class DLFP(Browser): ...@@ -147,7 +182,7 @@ class DLFP(Browser):
def __changeLocation(self, result): def __changeLocation(self, result):
# Find page from url # Find page from url
pageCls = None pageCls = None
for key, value in self.pages.items(): for key, value in self.PAGES.items():
regexp = re.compile('^%s$' % key) regexp = re.compile('^%s$' % key)
m = regexp.match(result.geturl()) m = regexp.match(result.geturl())
if m: if m:
...@@ -156,7 +191,7 @@ class DLFP(Browser): ...@@ -156,7 +191,7 @@ class DLFP(Browser):
# Not found # Not found
if not pageCls: if not pageCls:
self.__page = None self.page = None
r = result.read() r = result.read()
if isinstance(r, unicode): if isinstance(r, unicode):
r = r.encode('iso-8859-15', 'replace') r = r.encode('iso-8859-15', 'replace')
...@@ -165,18 +200,13 @@ class DLFP(Browser): ...@@ -165,18 +200,13 @@ class DLFP(Browser):
return return
print '[%s] Gone on %s' % (self.username, result.geturl()) print '[%s] Gone on %s' % (self.username, result.geturl())
self.__last_update = time.time() self.last_update = time.time()
document = self.__parser.parse(result, encoding='iso-8859-1') document = self.__parser.parse(result, encoding='iso-8859-1')
self.__page = pageCls(self, document, result.geturl()) self.page = pageCls(self, document, result.geturl())
self.__page.loaded() self.page.loaded()
# Special pages if not self.isLogged() and self.password:
if isinstance(self.__page, LoginPage):
if self.__page.hasError():
raise DLFPIncorrectPassword()
raise DLFPRetry()
if not self.__page.isLogged() and self.password:
print '!! Relogin !!' print '!! Relogin !!'
self.login() self.login()
return return
......
...@@ -31,11 +31,10 @@ from mechanize import CookieJar, Cookie ...@@ -31,11 +31,10 @@ from mechanize import CookieJar, Cookie
#logger.setLevel(logging.DEBUG) #logger.setLevel(logging.DEBUG)
class FirefoxCookieJar(CookieJar): class FirefoxCookieJar(CookieJar):
def __init__(self, domain, sqlite_file=None, policy=None):
def __init__(self, sqlite_file=None, policy=None):
CookieJar.__init__(self, policy) CookieJar.__init__(self, policy)
self.domain = domain
self.sqlite_file = sqlite_file self.sqlite_file = sqlite_file
def __connect(self): def __connect(self):
...@@ -47,7 +46,6 @@ class FirefoxCookieJar(CookieJar): ...@@ -47,7 +46,6 @@ class FirefoxCookieJar(CookieJar):
return db return db
def load(self): def load(self):
db = self.__connect() db = self.__connect()
...@@ -55,7 +53,7 @@ class FirefoxCookieJar(CookieJar): ...@@ -55,7 +53,7 @@ class FirefoxCookieJar(CookieJar):
cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure
FROM moz_cookies FROM moz_cookies
WHERE host LIKE '%linuxfr%'""") WHERE host LIKE '%%%s%%'""" % self.domain)
for entry in cookies: for entry in cookies:
...@@ -91,7 +89,7 @@ class FirefoxCookieJar(CookieJar): ...@@ -91,7 +89,7 @@ class FirefoxCookieJar(CookieJar):
db = self.__connect() db = self.__connect()
if not db: return if not db: return
db.execute("DELETE FROM moz_cookies WHERE host LIKE '%linuxfr%'") db.execute("DELETE FROM moz_cookies WHERE host LIKE '%%%s%%'" % self.domain)
for cookie in self: for cookie in self:
if cookie.secure: secure = 1 if cookie.secure: secure = 1
else: secure = 0 else: secure = 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment