From 4065f7efcd5aba245426b072f071e9b905a509f9 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sat, 13 Feb 2010 17:59:41 +0100 Subject: [PATCH] renamed dlfp2mail to weboob --- catin/modules/dlfp/exceptions.py | 31 ---- {catin => weboob}/__init__.py | 0 .../pages => weboob/backends}/__init__.py | 0 .../backends}/dlfp/__init__.py | 0 .../backends/dlfp/browser.py | 30 ++-- .../modules => weboob/backends}/dlfp/feeds.py | 7 +- weboob/backends/dlfp/pages/__init__.py | 0 .../backends/dlfp/pages/index.py | 20 ++- weboob/frontends/__init__.py | 0 weboob/tools/__init__.py | 0 .../modules/dlfp => weboob/tools}/browser.py | 142 +++++++++++------- .../dlfp => weboob/tools}/firefox_cookies.py | 10 +- catin2mail => weboob2mail | 0 13 files changed, 127 insertions(+), 113 deletions(-) delete mode 100644 catin/modules/dlfp/exceptions.py rename {catin => weboob}/__init__.py (100%) rename {catin/modules/dlfp/pages => weboob/backends}/__init__.py (100%) rename {catin/modules => weboob/backends}/dlfp/__init__.py (100%) rename catin/modules/dlfp/pages/base.py => weboob/backends/dlfp/browser.py (53%) rename {catin/modules => weboob/backends}/dlfp/feeds.py (87%) create mode 100644 weboob/backends/dlfp/pages/__init__.py rename catin/modules/dlfp/pages/login.py => weboob/backends/dlfp/pages/index.py (68%) create mode 100644 weboob/frontends/__init__.py create mode 100644 weboob/tools/__init__.py rename {catin/modules/dlfp => weboob/tools}/browser.py (54%) rename {catin/modules/dlfp => weboob/tools}/firefox_cookies.py (93%) rename catin2mail => weboob2mail (100%) diff --git a/catin/modules/dlfp/exceptions.py b/catin/modules/dlfp/exceptions.py deleted file mode 100644 index bb602f151b..0000000000 --- a/catin/modules/dlfp/exceptions.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Copyright(C) 2010 Romain Bignon - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -""" - -class DLFPUnavailable(Exception): - pass - -class DLFPBanned(DLFPUnavailable): - pass - -class DLFPIncorrectPassword(Exception): - pass - -class DLFPRetry(Exception): - pass diff --git a/catin/__init__.py b/weboob/__init__.py similarity index 100% rename from catin/__init__.py rename to weboob/__init__.py diff --git a/catin/modules/dlfp/pages/__init__.py b/weboob/backends/__init__.py similarity index 100% rename from catin/modules/dlfp/pages/__init__.py rename to weboob/backends/__init__.py diff --git a/catin/modules/dlfp/__init__.py b/weboob/backends/dlfp/__init__.py similarity index 100% rename from catin/modules/dlfp/__init__.py rename to weboob/backends/dlfp/__init__.py diff --git a/catin/modules/dlfp/pages/base.py b/weboob/backends/dlfp/browser.py similarity index 53% rename from catin/modules/dlfp/pages/base.py rename to weboob/backends/dlfp/browser.py index 126b96225b..1f48387afd 100644 --- a/catin/modules/dlfp/pages/base.py +++ b/weboob/backends/dlfp/browser.py @@ -18,21 +18,23 @@ """ -from mechanize import FormNotFoundError +from weboob.tools.browser import Browser +from weboob.backends.dlfp.pages.index import IndexPage, LoginPage -class BasePage: - def __init__(self, dlfp, document, url=''): - self.dlfp = dlfp - self.document = document - self.url = url +class DLFP(Browser): - def loaded(self): - pass + DOMAIN = 'linuxfr.org' + PAGES = {'https://linuxfr.org/': IndexPage, + 'https://linuxfr.org/pub/': IndexPage, + 'https://linuxfr.org/my/': IndexPage, + 'https://linuxfr.org/login.html': LoginPage, + } - def isLogged(self): - forms = self.document.getElementsByTagName('form') - for form in forms: - if form.getAttribute('id') == 'formulaire': - return False + def home(self): + return self.location('https://linuxfr.org') + + def login(self): + self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password)) - return True + def isLogged(self): + return (self.page and self.page.isLogged()) diff --git a/catin/modules/dlfp/feeds.py b/weboob/backends/dlfp/feeds.py similarity index 87% rename from catin/modules/dlfp/feeds.py rename to weboob/backends/dlfp/feeds.py index beb74d905c..e77f4fdadf 100644 --- a/catin/modules/dlfp/feeds.py +++ b/weboob/backends/dlfp/feeds.py @@ -24,14 +24,15 @@ class Article: RSS = None - def __init__(self, _id, title, author, datetime): + def __init__(self, _id, url, title, author, datetime): self._id = _id + self.url = url self.title = title self.author = author self.datetime = datetime class Newspaper(Article): - RSS = 'http://linuxfr.org/backend/news/rss20.rss' + RSS = 'https://linuxfr.org/backend/news/rss20.rss' class Telegram(Article): RSS = 'https://linuxfr.org/backend/journaux/rss20.rss' @@ -58,5 +59,5 @@ def fetch(self): warning('Unable to parse ID from link \'%s\'' % item['link']) continue _id = m.group(1) - article = klass(_id, item['title'], item['author'], item['date_parsed']) + article = klass(_id, item['link'], item['title'], item['author'], item['date_parsed']) print _id, item['author'], item['title'] diff --git a/weboob/backends/dlfp/pages/__init__.py b/weboob/backends/dlfp/pages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/catin/modules/dlfp/pages/login.py b/weboob/backends/dlfp/pages/index.py similarity index 68% rename from catin/modules/dlfp/pages/login.py rename to weboob/backends/dlfp/pages/index.py index 2c890564d9..e0dea65d67 100644 --- a/catin/modules/dlfp/pages/login.py +++ b/weboob/backends/dlfp/pages/index.py @@ -18,12 +18,26 @@ """ -from dlfp.pages.base import BasePage +from weboob.tools.browser import BrowserIncorrectPassword, BasePage -class IndexPage(BasePage): +class DLFPPage(BasePage): + def isLogged(self): + forms = self.document.getElementsByTagName('form') + for form in forms: + if form.getAttribute('id') == 'formulaire': + return False + + return True + +class IndexPage(DLFPPage): pass -class LoginPage(BasePage): +class LoginPage(DLFPPage): + + def loaded(self): + if self.hasError(): + raise BrowserIncorrectPassword() + def hasError(self): plist = self.document.getElementsByTagName('p') for p in plist: diff --git a/weboob/frontends/__init__.py b/weboob/frontends/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/weboob/tools/__init__.py b/weboob/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/catin/modules/dlfp/browser.py b/weboob/tools/browser.py similarity index 54% rename from catin/modules/dlfp/browser.py rename to weboob/tools/browser.py index 5479d086e7..36d34f60fd 100644 --- a/catin/modules/dlfp/browser.py +++ b/weboob/tools/browser.py @@ -18,7 +18,8 @@ """ -from mechanize import Browser, response_seek_wrapper, BrowserStateError +import mechanize.Browser +from mechanize import response_seek_wrapper, BrowserStateError import urllib2 import html5lib from html5lib import treebuilders @@ -27,9 +28,16 @@ from logging import warning, error from copy import copy -from dlfp.pages.login import IndexPage, LoginPage -from dlfp.exceptions import DLFPIncorrectPassword, DLFPUnavailable, DLFPRetry -from dlfp.firefox_cookies import FirefoxCookieJar +from weboob.tools.firefox_cookies import FirefoxCookieJar + +class BrowserIncorrectPassword(Exception): + pass + +class BrowserUnavailable(Exception): + pass + +class BrowserRetry(Exception): + pass class NoHistory: def __init__(self): pass @@ -38,48 +46,67 @@ def back(self, n, _response): pass def clear(self): pass def close(self): pass -class DLFP(Browser): +class BasePage: + def __init__(self, browser, document, url=''): + self.browser = browser + self.document = document + self.url = url + + def loaded(self): + pass + +class Browser(mechanize.Browser): + + # ------ Class attributes -------------------------------------- + + DOMAIN = None + PAGES = {} + USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3' + + # ------ Abstract methods -------------------------------------- + + # Go to home + def home(self): + raise NotImplementedError() + + # Login to the website + def login(self): + raise NotImplementedError() + + # Return True if we are logged on website + def isLogged(self): + raise NotImplementedError() - pages = {'https://linuxfr.org/': IndexPage, - 'https://linuxfr.org/pub/': IndexPage, - 'https://linuxfr.org/my/': IndexPage, - 'https://linuxfr.org/login.html': LoginPage, - } + # ------ Browser methods --------------------------------------- def __init__(self, username, password=None, firefox_cookies=None): - Browser.__init__(self, history=NoHistory()) + mechanize.Browser.__init__(self, history=NoHistory()) self.addheaders = [ - ['User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3'] + ['User-agent', self.USER_AGENT] ] # Share cookies with firefox if firefox_cookies: - self.__cookie = FirefoxCookieJar(firefox_cookies) + self.__cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies) self.__cookie.load() self.set_cookiejar(self.__cookie) else: self.__cookie = None self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom")) - self.__page = None - self.__last_update = 0.0 + self.page = None + self.last_update = 0.0 self.username = username self.password = password if self.password: try: self.home() - except DLFPUnavailable: + except BrowserUnavailable: pass - def page(self): - return self.__page - - def home(self): - return self.location('https://linuxfr.org') - def pageaccess(func): def inner(self, *args, **kwargs): - if not self.__page or not self.__page.isLogged() and self.password: + if not self.page or not self.page.isLogged() and self.password: self.home() return func(self, *args, **kwargs) @@ -89,57 +116,65 @@ def inner(self, *args, **kwargs): def keepalive(self): self.home() - def login(self): - self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password)) + def change_location(func): + def inner(self, *args, **kwargs): + if args and args[0][0] == '/' and (not self.request or self.request.host != self.DOMAIN): + args = ('%s://%s%s' % (self.PROTOCOL, self.DOMAIN, args[0]),) + args[1:] + print args + return func(self, *args, **kwargs) + return inner + + @change_location def openurl(self, *args, **kwargs): try: - return Browser.open(self, *args, **kwargs) + return mechanize.Browser.open(self, *args, **kwargs) except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: error(e) - raise DLFPUnavailable() + raise BrowserUnavailable() except BrowserStateError: self.home() - return Browser.open(self, *args, **kwargs) + return mechanize.Browser.open(self, *args, **kwargs) def submit(self, *args, **kwargs): try: - self.__changeLocation(Browser.submit(self, *args, **kwargs)) + self.__changeLocation(mechanize.Browser.submit(self, *args, **kwargs)) except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: error(e) - self.__page = None - raise DLFPUnavailable() - except (BrowserStateError,DLFPRetry): + self.page = None + raise BrowserUnavailable() + except (BrowserStateError,BrowserRetry): self.home() - raise DLFPUnavailable() + raise BrowserUnavailable() def isOnPage(self, pageCls): - return isinstance(self.__page, pageCls) + return isinstance(self.page, pageCls) def follow_link(self, *args, **kwargs): try: - self.__changeLocation(Browser.follow_link(self, *args, **kwargs)) + self.__changeLocation(mechanize.Browser.follow_link(self, *args, **kwargs)) except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: error(e) - self.__page = None - raise DLFPUnavailable() - except (BrowserStateError,DLFPRetry): + self.page = None + raise BrowserUnavailable() + except (BrowserStateError,BrowserRetry): self.home() - raise DLFPUnavailable() + raise BrowserUnavailable() + @change_location def location(self, *args, **kwargs): keep_args = copy(args) keep_kwargs = kwargs.copy() try: - self.__changeLocation(Browser.open(self, *args, **kwargs)) - except DLFPRetry: - if not self.__page or not args or self.__page.url != args[0]: + self.__changeLocation(mechanize.Browser.open(self, *args, **kwargs)) + except BrowserRetry: + if not self.page or not args or self.page.url != args[0]: self.location(keep_args, keep_kwargs) except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e: error(e) - self.__page = None - raise DLFPUnavailable() + self.page = None + raise BrowserUnavailable() except BrowserStateError: self.home() self.location(*keep_args, **keep_kwargs) @@ -147,7 +182,7 @@ def location(self, *args, **kwargs): def __changeLocation(self, result): # Find page from url pageCls = None - for key, value in self.pages.items(): + for key, value in self.PAGES.items(): regexp = re.compile('^%s$' % key) m = regexp.match(result.geturl()) if m: @@ -156,7 +191,7 @@ def __changeLocation(self, result): # Not found if not pageCls: - self.__page = None + self.page = None r = result.read() if isinstance(r, unicode): r = r.encode('iso-8859-15', 'replace') @@ -165,18 +200,13 @@ def __changeLocation(self, result): return print '[%s] Gone on %s' % (self.username, result.geturl()) - self.__last_update = time.time() + self.last_update = time.time() document = self.__parser.parse(result, encoding='iso-8859-1') - self.__page = pageCls(self, document, result.geturl()) - self.__page.loaded() - - # Special pages - if isinstance(self.__page, LoginPage): - if self.__page.hasError(): - raise DLFPIncorrectPassword() - raise DLFPRetry() - if not self.__page.isLogged() and self.password: + self.page = pageCls(self, document, result.geturl()) + self.page.loaded() + + if not self.isLogged() and self.password: print '!! Relogin !!' self.login() return diff --git a/catin/modules/dlfp/firefox_cookies.py b/weboob/tools/firefox_cookies.py similarity index 93% rename from catin/modules/dlfp/firefox_cookies.py rename to weboob/tools/firefox_cookies.py index 75e477e1b9..a8466b3823 100644 --- a/catin/modules/dlfp/firefox_cookies.py +++ b/weboob/tools/firefox_cookies.py @@ -31,11 +31,10 @@ #logger.setLevel(logging.DEBUG) class FirefoxCookieJar(CookieJar): - - def __init__(self, sqlite_file=None, policy=None): - + def __init__(self, domain, sqlite_file=None, policy=None): CookieJar.__init__(self, policy) + self.domain = domain self.sqlite_file = sqlite_file def __connect(self): @@ -47,7 +46,6 @@ def __connect(self): return db - def load(self): db = self.__connect() @@ -55,7 +53,7 @@ def load(self): cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure FROM moz_cookies - WHERE host LIKE '%linuxfr%'""") + WHERE host LIKE '%%%s%%'""" % self.domain) for entry in cookies: @@ -91,7 +89,7 @@ def save(self): db = self.__connect() if not db: return - db.execute("DELETE FROM moz_cookies WHERE host LIKE '%linuxfr%'") + db.execute("DELETE FROM moz_cookies WHERE host LIKE '%%%s%%'" % self.domain) for cookie in self: if cookie.secure: secure = 1 else: secure = 0 diff --git a/catin2mail b/weboob2mail similarity index 100% rename from catin2mail rename to weboob2mail -- GitLab