diff --git a/weboob/deprecated/__init__.py b/weboob/deprecated/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/weboob/deprecated/browser/__init__.py b/weboob/deprecated/browser/__init__.py
deleted file mode 100644
index e46bbda7b02374751b352a0aa8793241b626e2a5..0000000000000000000000000000000000000000
--- a/weboob/deprecated/browser/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2010-2011 Christophe Benz
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-
-from weboob.deprecated.browser.browser import BrowserIncorrectPassword, BrowserBanned, \
- BrowserUnavailable, BrowserRetry, \
- BrowserHTTPNotFound, BrowserHTTPError, \
- Page, Browser, BrokenPageError, \
- StandardBrowser, BrowserPasswordExpired, \
- BrowserForbidden, StateBrowser
-
-
-__all__ = ['BrowserIncorrectPassword', 'BrowserPasswordExpired', 'BrowserBanned',
- 'BrowserUnavailable', 'BrowserRetry', 'BrowserHTTPNotFound', 'BrowserHTTPError',
- 'Page', 'Browser', 'BrokenPageError', 'StandardBrowser', 'BrowserForbidden',
- 'StateBrowser']
diff --git a/weboob/deprecated/browser/browser.py b/weboob/deprecated/browser/browser.py
deleted file mode 100644
index 4d2fe80bebdfac0a2c24c9bb0ad4cb03a3fd5480..0000000000000000000000000000000000000000
--- a/weboob/deprecated/browser/browser.py
+++ /dev/null
@@ -1,809 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2010-2014 Romain Bignon
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-
-from __future__ import print_function
-import sys
-
-if sys.version_info >= (3,0):
- raise ImportError("This module isn't compatible with python3")
-
-
-from copy import copy
-from httplib import BadStatusLine
-
-try:
- import mechanize
-except ImportError:
- raise ImportError('Please install python-mechanize')
-
-import base64
-import hashlib
-import httplib
-import logging
-import mimetypes
-import os
-import pickle
-import re
-import socket
-import ssl
-import time
-import urllib
-import urllib2
-import warnings
-import zlib
-from contextlib import closing
-from gzip import GzipFile
-from threading import RLock
-
-from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserPasswordExpired, BrowserForbidden, BrowserBanned, BrowserHTTPNotFound, BrowserHTTPError, FormFieldConversionWarning, BrowserSSLError
-from weboob.tools.compat import basestring, unicode
-from weboob.tools.decorators import retry
-from weboob.tools.log import getLogger
-from weboob.deprecated.browser.parsers import get_parser
-
-__all__ = ['BrowserIncorrectPassword', 'BrowserForbidden', 'BrowserBanned', 'BrowserUnavailable', 'BrowserRetry',
- 'BrowserPasswordExpired', 'BrowserHTTPNotFound', 'BrowserHTTPError', 'BrokenPageError', 'Page',
- 'StandardBrowser', 'Browser', 'StateBrowser']
-
-
-class BrowserRetry(Exception):
- pass
-
-
-class NoHistory(object):
- """
- We don't want to fill memory with history
- """
-
- def __init__(self):
- pass
-
- def add(self, request, response):
- pass
-
- def back(self, n, _response):
- pass
-
- def clear(self):
- pass
-
- def close(self):
- pass
-
-
-class BrokenPageError(Exception):
- pass
-
-
-class Page(object):
- """
- Base page
- """
-
- ENCODING = None
-
- def __init__(self, browser, document, url='', groups=None, group_dict=None, logger=None):
- self.browser = browser
- self.parser = browser.parser
- self.document = document
- self.url = url
- self.groups = groups
- self.group_dict = group_dict
- self.logger = getLogger('page', logger)
-
- def on_loaded(self):
- """
- Called when the page is loaded.
- """
- pass
-
-
-def check_location(func):
- def inner(self, *args, **kwargs):
- if args and isinstance(args[0], basestring):
- url = args[0]
- if url.startswith('/') and hasattr(self, 'DOMAIN') and (not self.request or self.request.host != self.DOMAIN):
- url = '%s://%s%s' % (self.PROTOCOL, self.DOMAIN, url)
- url = re.sub('(.*)#.*', r'\1', url)
-
- if isinstance(url, unicode):
- url = url.encode('utf-8')
- args = (url,) + args[1:]
- return func(self, *args, **kwargs)
- return inner
-
-
-class StandardBrowser(mechanize.Browser):
- """
- Standard Browser.
-
- :param firefox_cookies: path to cookies sqlite file
- :type firefox_cookies: str
- :param parser: parser to use on HTML files
- :type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
- :param history: history manager; default value is an object which
- does not keep history
- :type history: object
- :param proxy: proxy URL to use
- :type proxy: str
- :param factory: mechanize factory. None to use Mechanize's default
- :type factory: object
- """
-
- # ------ Class attributes --------------------------------------
-
- ENCODING = 'utf-8'
- USER_AGENTS = {
- 'desktop_firefox': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
- 'android': 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17',
- 'microb': 'Mozilla/5.0 (X11; U; Linux armv7l; fr-FR; rv:1.9.2.3pre) Gecko/20100723 Firefox/3.5 Maemo Browser 1.7.4.8 RX-51 N900',
- 'wget': 'Wget/1.11.4',
- }
- USER_AGENT = USER_AGENTS['desktop_firefox']
- DEBUG_HTTP = False
- DEBUG_MECHANIZE = False
- DEFAULT_TIMEOUT = 15
- INSECURE = False # if True, do not validate SSL
-
- logger = None
-
- # ------ Browser methods ---------------------------------------
-
- # I'm not a robot, so disable the check of permissions in robots.txt.
- default_features = copy(mechanize.Browser.default_features)
- default_features.remove('_robots')
- default_features.remove('_refresh')
-
- def __init__(self, firefox_cookies=None, parser=None, history=NoHistory(), proxy=None, logger=None, factory=None, responses_dirname=None):
- mechanize.Browser.__init__(self, history=history, factory=factory)
- self.logger = getLogger('browser', logger)
- self.responses_dirname = responses_dirname
- self.save_responses = responses_dirname is not None
- self.responses_count = 0
-
- self.addheaders = [
- ['User-agent', self.USER_AGENT]
- ]
-
- # Use a proxy
- self.proxy = proxy
- if proxy is not None:
- self.set_proxies(proxy)
-
- # Share cookies with firefox
- if firefox_cookies:
- # Try to load cookies
- try:
- from .firefox_cookies import FirefoxCookieJar
- self._cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies)
- self._cookie.load()
- self.set_cookiejar(self._cookie)
- except ImportError as e:
- logging.warning("Unable to store Firefox cookies: %s", e)
- self._cookie = None
- else:
- self._cookie = None
-
- if parser is None:
- parser = get_parser()()
- elif isinstance(parser, (tuple,list,basestring)):
- parser = get_parser(parser)()
- self.parser = parser
- self.lock = RLock()
-
- if self.DEBUG_HTTP:
- # display messages from httplib
- self.set_debug_http(True)
-
- if logging.root.level <= logging.DEBUG:
- # Enable log messages from mechanize.Browser
- self.set_debug_redirects(True)
- mech_logger = logging.getLogger("mechanize")
- mech_logger.setLevel(logging.INFO)
-
- def __enter__(self):
- self.lock.acquire()
-
- def __exit__(self, t, v, tb):
- self.lock.release()
-
- def _openurl(self, *args, **kwargs):
- return mechanize.Browser.open(self, *args, **kwargs)
-
- @check_location
- @retry(BrowserHTTPError, tries=3)
- def openurl(self, *args, **kwargs):
- """
- Open an URL but do not create a Page object.
- """
- if_fail = kwargs.pop('if_fail', 'raise')
- self.logger.debug('Opening URL "%s", %s' % (args, kwargs))
-
- kwargs['timeout'] = kwargs.get('timeout', self.DEFAULT_TIMEOUT)
-
- try:
- return self._openurl(*args, **kwargs)
- except (mechanize.BrowserStateError, mechanize.response_seek_wrapper,
- urllib2.HTTPError, urllib2.URLError, BadStatusLine, ssl.SSLError) as e:
- if isinstance(e, mechanize.BrowserStateError) and hasattr(self, 'home'):
- self.home()
- return self._openurl(*args, **kwargs)
- elif if_fail == 'raise':
- raise self.get_exception(e)('%s (url="%s")' % (e, args and args[0] or 'None'))
- else:
- return None
- except BrowserRetry as e:
- return self._openurl(*args, **kwargs)
-
- def get_exception(self, e):
- if isinstance(e, urllib2.HTTPError) and hasattr(e, 'getcode'):
- if e.getcode() in (404, 403):
- return BrowserHTTPNotFound
- if e.getcode() == 401:
- return BrowserIncorrectPassword
- elif isinstance(e, mechanize.BrowserStateError):
- return BrowserHTTPNotFound
-
- return BrowserHTTPError
-
- def readurl(self, url, *args, **kwargs):
- """
- Download URL data specifying what to do on failure (nothing by default).
- """
- if 'if_fail' not in kwargs:
- kwargs['if_fail'] = None
- result = self.openurl(url, *args, **kwargs)
-
- if result:
- if self.save_responses:
- self.save_response(result)
- return result.read()
- else:
- return None
-
- def save_response(self, result, warning=False):
- """
- Save a stream to a temporary file, and log its name.
- The stream is rewinded after saving.
- """
- if self.responses_dirname is None:
- import tempfile
- self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_')
- print('Debug data will be saved in this directory: %s' % self.responses_dirname, file=sys.stderr)
- elif not os.path.isdir(self.responses_dirname):
- os.makedirs(self.responses_dirname)
- # get the content-type, remove optionnal charset part
- mimetype = result.info().get('Content-Type', '').split(';')[0]
- # due to http://bugs.python.org/issue1043134
- if mimetype == 'text/plain':
- ext = '.txt'
- else:
- # try to get an extension (and avoid adding 'None')
- ext = mimetypes.guess_extension(mimetype, False) or ''
- response_filepath = os.path.join(self.responses_dirname, unicode(self.responses_count)+ext)
- with open(response_filepath, 'w') as f:
- f.write(result.read())
- result.seek(0)
- match_filepath = os.path.join(self.responses_dirname, 'url_response_match.txt')
- with open(match_filepath, 'a') as f:
- f.write('%s\t%s\n' % (result.geturl(), os.path.basename(response_filepath)))
- self.responses_count += 1
-
- msg = u'Response saved to %s' % response_filepath
- if warning:
- self.logger.warning(msg)
- else:
- self.logger.info(msg)
-
- def get_document(self, result, parser=None, encoding=None):
- """
- Get a parsed document from a stream.
-
- :param result: HTML page stream
- :type result: stream
- """
- if parser is None:
- parser = self.parser
- elif isinstance(parser, (basestring, list, tuple)):
- parser = get_parser(parser)()
-
- if encoding is None:
- encoding = self.ENCODING
-
- return parser.parse(result, encoding)
-
- def location(self, *args, **kwargs):
- """
- Go on an URL and get the related document.
- """
- return self.get_document(self.openurl(*args, **kwargs))
-
- @staticmethod
- def buildurl(base, *args, **kwargs):
- """
- Build an URL and escape arguments.
-
- You can give a serie of tuples in args (and the order is keept), or
- a dict in kwargs (but the order is lost).
-
- Example:
-
- >>> StandardBrowser.buildurl('/blah.php', ('a', '&'), ('b', '='))
- '/blah.php?a=%26&b=%3D'
- >>> StandardBrowser.buildurl('/blah.php', a='&', b='=')
- '/blah.php?b=%3D&a=%26'
- """
-
- if not args:
- args = kwargs
- if not args:
- return base
- else:
- return '%s?%s' % (base, urllib.urlencode(args))
-
- def str(self, s):
- if isinstance(s, unicode):
- s = s.encode('iso-8859-15', 'replace')
- return s
-
- def set_field(self, args, label, field=None, value=None, is_list=False):
- """
- Set a value to a form field.
-
- :param args: arguments where to look for value
- :type args: dict
- :param label: label in args
- :type label: str
- :param field: field name. If None, use label instead
- :type field: str
- :param value: value to give on field
- :type value: str
- :param is_list: the field is a list
- :type is_list: bool
- """
- try:
- if not field:
- field = label
- if args.get(label, None) is not None:
- if not value:
- if is_list:
- if isinstance(is_list, (list, tuple)):
- try:
- value = [self.str(is_list.index(args[label]))]
- except ValueError as e:
- if args[label]:
- print('[%s] %s: %s' % (label, args[label], e), file=sys.stderr)
- return
- else:
- value = [self.str(args[label])]
- else:
- value = self.str(args[label])
- self[field] = value
- except mechanize.ControlNotFoundError:
- return
-
- def lowsslcheck(self, domain, hsh):
- if self.INSECURE or (self.logger is not None and self.logger.settings['ssl_insecure']) or self.proxy is not None:
- return
- certhash = self._certhash(domain)
- if self.logger:
- self.logger.debug('Found %s as certificate hash' % certhash)
- if isinstance(hsh, basestring):
- hsh = [hsh]
- if certhash not in hsh:
- raise BrowserSSLError()
-
- def _certhash(self, domain, port=443):
- for proto in HTTPSConnection2._PROTOCOLS:
- try:
- certs = ssl.get_server_certificate((domain, port), ssl_version=proto)
- except ssl.SSLError as e:
- continue
- else:
- return hashlib.sha256(certs).hexdigest()
- raise e
-
- def __setitem__(self, key, value):
- if isinstance(value, unicode):
- value = value.encode(self.ENCODING or 'utf-8')
- warnings.warn('Implicit conversion of form field %r from unicode to str' % key,
- FormFieldConversionWarning, stacklevel=2)
- if self.form is None:
- raise AttributeError('Please select a form before setting values to fields')
- return self.form.__setitem__(key, value)
-
-
-class Browser(StandardBrowser):
- """
- Base browser class to navigate on a website.
-
- :param username: username on website
- :type username: str
- :param password: password on website. If it is None, Browser will
- not try to login
- :type password: str
- :param firefox_cookies: path to cookies sqlite file
- :type firefox_cookies: str
- :param parser: parser to use on HTML files
- :type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
- :param history: history manager; default value is an object which
- does not keep history
- :type history: object
- :param proxy: proxy URL to use
- :type proxy: dictionnary
- :param logger: logger to use for logging
- :type logger: :class:`logging.Logger`
- :param factory: mechanize factory. None to use Mechanize's default
- :type factory: object
- :param get_home: try to get the homepage.
- :type get_homme: bool
- :param responses_dirname: directory to store responses
- :type responses_dirname: str
- """
-
- # ------ Class attributes --------------------------------------
-
- DOMAIN = None
- PROTOCOL = 'http'
- PAGES = {}
-
- # SHA-256 hash of server certificate. If set, it will automatically check it,
- # and raise a SSLError exception if it doesn't match.
- CERTHASH = None
-
- # ------ Abstract methods --------------------------------------
-
- def home(self):
- """
- Go to the home page.
- """
- if self.DOMAIN is not None:
- self.location('%s://%s/' % (self.PROTOCOL, self.DOMAIN))
-
- def login(self):
- """
- Login to the website.
-
- This function is called when is_logged() returns False and the password
- attribute is not None.
- """
- raise NotImplementedError()
-
- def is_logged(self):
- """
- Return True if we are logged on website. When Browser tries to access
- to a page, if this method returns False, it calls login().
-
- It is never called if the password attribute is None.
- """
- raise NotImplementedError()
-
- # ------ Browser methods ---------------------------------------
-
- def __init__(self, username=None, password=None, firefox_cookies=None,
- parser=None, history=NoHistory(), proxy=None, logger=None,
- factory=None, get_home=True, responses_dirname=None):
- StandardBrowser.__init__(self, firefox_cookies, parser, history, proxy, logger, factory, responses_dirname)
- self.page = None
- self.last_update = 0.0
- self.username = username
- self.password = password
-
- if self.CERTHASH is not None and self.DOMAIN is not None:
- self.lowsslcheck(self.DOMAIN, self.CERTHASH)
-
- if self.password and get_home:
- try:
- self.home()
- # Do not abort the build of browser when the website is down.
- except BrowserUnavailable:
- pass
-
- def submit(self, *args, **kwargs):
- """
- Submit the selected form.
- """
- no_login = kwargs.pop('nologin', kwargs.pop('no_login', False))
- try:
- self._change_location(mechanize.Browser.submit(self, *args, **kwargs), no_login=no_login)
- except (mechanize.response_seek_wrapper, urllib2.HTTPError, urllib2.URLError, BadStatusLine, ssl.SSLError) as e:
- self.page = None
- raise self.get_exception(e)(e)
- except (mechanize.BrowserStateError, BrowserRetry) as e:
- raise BrowserUnavailable(e)
-
- def is_on_page(self, pageCls):
- """
- Check the current page.
-
- :param pageCls: class of the page to check
- :type pageCls: :class:`Page`
- :rtype: bool
- """
- return isinstance(self.page, pageCls)
-
- def absurl(self, rel):
- """
- Get an absolute URL from a relative one.
- """
- if rel is None:
- return None
- if not rel.startswith('/'):
- rel = '/' + rel
- return '%s://%s%s' % (self.PROTOCOL, self.DOMAIN, rel)
-
- def follow_link(self, *args, **kwargs):
- """
- Follow a link on the page.
- """
- try:
- self._change_location(mechanize.Browser.follow_link(self, *args, **kwargs))
- except (mechanize.response_seek_wrapper, urllib2.HTTPError, urllib2.URLError, BadStatusLine, ssl.SSLError) as e:
- self.page = None
- raise self.get_exception(e)('%s (url="%s")' % (e, args and args[0] or 'None'))
- except (mechanize.BrowserStateError, BrowserRetry) as e:
- self.home()
- raise BrowserUnavailable(e)
-
- def _openurl(self, *args, **kwargs):
- return mechanize.Browser.open_novisit(self, *args, **kwargs)
-
- @check_location
- @retry(BrowserHTTPError, tries=3)
- def location(self, *args, **kwargs):
- """
- Change location of browser on an URL.
-
- When the page is loaded, it looks up PAGES to find a regexp which
- matches, and create the object. Then, the 'on_loaded' method of
- this object is called.
-
- If a password is set, and is_logged() returns False, it tries to login
- with login() and reload the page.
- """
- keep_args = copy(args)
- keep_kwargs = kwargs.copy()
-
- no_login = kwargs.pop('no_login', kwargs.pop('nologin', False))
- kwargs['timeout'] = kwargs.get('timeout', self.DEFAULT_TIMEOUT)
-
- try:
- self._change_location(mechanize.Browser.open(self, *args, **kwargs), no_login=no_login)
- except BrowserRetry:
- if not self.page or not args or self.page.url != args[0]:
- keep_kwargs['no_login'] = True
- self.location(*keep_args, **keep_kwargs)
- except (mechanize.response_seek_wrapper, urllib2.HTTPError, urllib2.URLError, BadStatusLine, ssl.SSLError) as e:
- self.page = None
- raise self.get_exception(e)('%s (url="%s")' % (e, args and args[0] or 'None'))
- except mechanize.BrowserStateError:
- self.home()
- self.location(*keep_args, **keep_kwargs)
-
- # DO NOT ENABLE THIS FUCKING PEACE OF CODE EVEN IF IT WOULD BE BETTER
- # TO SANITARIZE FUCKING HTML.
- #def _set_response(self, response, *args, **kwargs):
- # import time
- # if response and hasattr(response, 'set_data'):
- # print time.time()
- # r = response.read()
- # start = 0
- # end = 0
- # new = ''
- # lowr = r.lower()
- # start = lowr[end:].find('')
- # new += r[start_stop:end].replace('<', '<').replace('>', '>')
- # start = end + lowr[end:].find('