From 815bff0f1a5a8066a9b4fc2209b62fbf3b3867f2 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 3 Feb 2012 13:46:33 +0100 Subject: [PATCH] fix handling urls --- modules/dlfp/browser.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/modules/dlfp/browser.py b/modules/dlfp/browser.py index 4f896d19ff..1ac86cef8a 100644 --- a/modules/dlfp/browser.py +++ b/modules/dlfp/browser.py @@ -21,7 +21,7 @@ import urllib import re -from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserHTTPError, BrowserIncorrectPassword +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserHTTPError, BrowserIncorrectPassword, BrokenPageError from weboob.capabilities.messages import CantSendMessage from .pages.index import IndexPage, LoginPage @@ -34,22 +34,22 @@ class DLFP(BaseBrowser): DOMAIN = 'linuxfr.org' PROTOCOL = 'https' - PAGES = {'https?://.*linuxfr.org/?': IndexPage, - 'https?://.*linuxfr.org/compte/connexion': LoginPage, - 'https?://.*linuxfr.org/news/[^\.]+': ContentPage, - 'https?://.*linuxfr.org/wiki/(?!nouveau)[^/]+': ContentPage, - 'https?://.*linuxfr.org/wiki': WikiEditPage, - 'https?://.*linuxfr.org/wiki/nouveau': WikiEditPage, - 'https?://.*linuxfr.org/wiki/[^\.]+/modifier': WikiEditPage, - 'https?://.*linuxfr.org/suivi/[^\.]+': ContentPage, - 'https?://.*linuxfr.org/sondages/[^\.]+': ContentPage, - 'https?://.*linuxfr.org/users/[^\./]+/journaux/[^\.]+': ContentPage, - 'https?://.*linuxfr.org/forums/[^\./]+/posts/[^\.]+': ContentPage, - 'https?://.*linuxfr.org/nodes/(\d+)/comments/(\d+)': CommentPage, - 'https?://.*linuxfr.org/nodes/(\d+)/comments/nouveau': NewCommentPage, - 'https?://.*linuxfr.org/nodes/(\d+)/comments': NodePage, - 'https?://.*linuxfr.org/nodes/(\d+)/tags/nouveau': NewTagPage, - 'https?://.*linuxfr.org/board/index.xml': BoardIndexPage, + PAGES = {'https?://[^/]*linuxfr\.org/?': IndexPage, + 'https?://[^/]*linuxfr\.org/compte/connexion': LoginPage, + 'https?://[^/]*linuxfr\.org/news/[^\.]+': ContentPage, + 'https?://[^/]*linuxfr\.org/wiki/(?!nouveau)[^/]+': ContentPage, + 'https?://[^/]*linuxfr\.org/wiki': WikiEditPage, + 'https?://[^/]*linuxfr\.org/wiki/nouveau': WikiEditPage, + 'https?://[^/]*linuxfr\.org/wiki/[^\.]+/modifier': WikiEditPage, + 'https?://[^/]*linuxfr\.org/suivi/[^\.]+': ContentPage, + 'https?://[^/]*linuxfr\.org/sondages/[^\.]+': ContentPage, + 'https?://[^/]*linuxfr\.org/users/[^\./]+/journaux/[^\.]+': ContentPage, + 'https?://[^/]*linuxfr\.org/forums/[^\./]+/posts/[^\.]+': ContentPage, + 'https?://[^/]*linuxfr\.org/nodes/(\d+)/comments/(\d+)': CommentPage, + 'https?://[^/]*linuxfr\.org/nodes/(\d+)/comments/nouveau': NewCommentPage, + 'https?://[^/]*linuxfr\.org/nodes/(\d+)/comments': NodePage, + 'https?://[^/]*linuxfr\.org/nodes/(\d+)/tags/nouveau': NewTagPage, + 'https?://[^/]*linuxfr\.org/board/index.xml': BoardIndexPage, } last_board_msg_id = None @@ -145,6 +145,8 @@ def get_content(self, _id): content = self.page.get_comment(int(m.group(1))) else: content = self.page.get_article() + else: + raise BrokenPageError('Not on a content or comment page (%r)' % self.page) if _id is not None: content.id = _id -- GitLab