boobot.py 14.9 KB
Newer Older
Romain Bignon's avatar
Romain Bignon committed
1 2 3 4 5 6 7 8
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright(C) 2012  Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
9
# it under the terms of the GNU Lesser General Public License as published by
Romain Bignon's avatar
Romain Bignon committed
10 11 12 13 14 15
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
# GNU Lesser General Public License for more details.
Romain Bignon's avatar
Romain Bignon committed
17
#
18
# You should have received a copy of the GNU Lesser General Public License
Romain Bignon's avatar
Romain Bignon committed
19 20
# along with weboob. If not, see <http://www.gnu.org/licenses/>.

21
from __future__ import print_function
Romain Bignon's avatar
Romain Bignon committed
22

Laurent Bachelier's avatar
Laurent Bachelier committed
23
import itertools
24
import logging
25
import os
Laurent Bachelier's avatar
Laurent Bachelier committed
26
import re
Romain Bignon's avatar
Romain Bignon committed
27
import sys
28
import urllib
Laurent Bachelier's avatar
Laurent Bachelier committed
29 30 31 32 33 34 35
import urlparse
from datetime import datetime, timedelta
from math import log
from random import choice, randint
from threading import Event, Thread

from dateutil.parser import parse as parse_date
36
from irc.bot import SingleServerIRCBot
Romain Bignon's avatar
Romain Bignon committed
37

Romain Bignon's avatar
Romain Bignon committed
38 39 40
from weboob.browser import Browser
from weboob.browser.exceptions import HTTPNotFound
from weboob.browser.pages import HTMLPage
Laurent Bachelier's avatar
Laurent Bachelier committed
41 42
from weboob.core import Weboob
from weboob.exceptions import BrowserHTTPError, BrowserUnavailable
Romain Bignon's avatar
Romain Bignon committed
43
from weboob.tools.application.base import ApplicationStorage
Laurent Bachelier's avatar
Laurent Bachelier committed
44 45
from weboob.tools.misc import get_backtrace, to_unicode
from weboob.tools.storage import StandardStorage
Romain Bignon's avatar
Romain Bignon committed
46

47 48
IRC_CHANNELS = os.getenv('BOOBOT_CHANNELS', '#weboob').split(',')
IRC_NICKNAME = os.getenv('BOOBOT_NICKNAME', 'boobot')
Romain Bignon's avatar
Romain Bignon committed
49
IRC_SERVER = os.getenv('BOOBOT_SERVER', 'dickson.freenode.net')
50
IRC_IGNORE = [re.compile(i) for i in os.getenv('BOOBOT_IGNORE', '!~?irker@').split(',')]
51
STORAGE_FILE = os.getenv('BOOBOT_STORAGE', 'boobot.storage')
Romain Bignon's avatar
Romain Bignon committed
52

53

54
def fixurl(url):
55 56 57 58
    url = to_unicode(url)

    # remove javascript crap
    url = url.replace('/#!/', '/')
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

    # parse it
    parsed = urlparse.urlsplit(url)

    # divide the netloc further
    userpass, at, hostport = parsed.netloc.rpartition('@')
    user, colon1, pass_ = userpass.partition(':')
    host, colon2, port = hostport.partition(':')

    # encode each component
    scheme = parsed.scheme.encode('utf8')
    user = urllib.quote(user.encode('utf8'))
    colon1 = colon1.encode('utf8')
    pass_ = urllib.quote(pass_.encode('utf8'))
    at = at.encode('utf8')
    host = host.encode('idna')
    colon2 = colon2.encode('utf8')
    port = port.encode('utf8')
77
    path = '/'.join(pce.encode('utf8') for pce in parsed.path.split('/'))
78 79
    # while valid, it is most likely an error
    path = path.replace('//', '/')
80 81
    query = parsed.query.encode('utf8')
    fragment = parsed.fragment.encode('utf8')
82 83 84 85 86 87

    # put it back together
    netloc = ''.join((user, colon1, pass_, at, host, colon2, port))
    return urlparse.urlunsplit((scheme, netloc, path, query, fragment))


Romain Bignon's avatar
Romain Bignon committed
88 89
class BoobotBrowser(Browser):
    TIMEOUT = 3.0
90

91
    def urlinfo(self, url, maxback=2):
92 93
        if urlparse.urlsplit(url).netloc == 'mobile.twitter.com':
            url = url.replace('mobile.twitter.com', 'twitter.com', 1)
94
        try:
Romain Bignon's avatar
Romain Bignon committed
95
            r = self.open(url, method='HEAD')
96
            body = False
Romain Bignon's avatar
Romain Bignon committed
97 98
        except HTTPNotFound as e:
            if maxback and not url[-1].isalnum():
99
                return self.urlinfo(url[:-1], maxback-1)
Romain Bignon's avatar
Romain Bignon committed
100 101 102 103 104
            raise e
        except BrowserHTTPError as e:
            if e.response.status_code in (501, 405):
                r = self.open(url)
                body = True
105 106
            else:
                raise e
Romain Bignon's avatar
Romain Bignon committed
107
        content_type = r.headers.get('Content-Type')
108
        try:
Romain Bignon's avatar
Romain Bignon committed
109
            size = int(r.headers.get('Content-Length'))
110 111 112 113
            hsize = self.human_size(size)
        except TypeError:
            size = None
            hsize = None
Romain Bignon's avatar
Romain Bignon committed
114
        is_html = ('html' in content_type) if content_type else re.match(r'\.x?html?$', url)
115 116
        title = None
        if is_html:
117
            if not body:
Romain Bignon's avatar
Romain Bignon committed
118
                r = self.open(url)
119
            # update size has we might not have it from headers
Romain Bignon's avatar
Romain Bignon committed
120
            size = len(r.content)
121
            hsize = self.human_size(size)
Romain Bignon's avatar
Romain Bignon committed
122 123 124 125 126 127 128 129

            page = HTMLPage(self, r)

            for title in page.doc.xpath('//head/title'):
                title = to_unicode(title.text_content()).strip()
                title = ' '.join(title.split())
            if urlparse.urlsplit(url).netloc.endswith('twitter.com'):
                for title in page.doc.getroot().cssselect('.permalink-tweet .tweet-text'):
130
                    title = to_unicode(title.text_content()).strip()
Romain Bignon's avatar
Romain Bignon committed
131
                    title = ' '.join(title.splitlines())
132

133 134 135 136
        return content_type, hsize, title

    def human_size(self, size):
        if size:
137 138
            units = ('B', 'KiB', 'MiB', 'GiB',
                     'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
139 140 141 142 143
            exponent = int(log(size, 1024))
            return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
        return '0 B'


Romain Bignon's avatar
Romain Bignon committed
144 145 146 147 148 149 150
class Task(object):
    def __init__(self, datetime, message, channel=None):
        self.datetime = datetime
        self.message = message
        self.channel = channel


Romain Bignon's avatar
Romain Bignon committed
151
class MyThread(Thread):
152 153
    daemon = True

Romain Bignon's avatar
Romain Bignon committed
154 155 156 157 158
    def __init__(self, bot):
        Thread.__init__(self)
        self.weboob = Weboob(storage=StandardStorage(STORAGE_FILE))
        self.weboob.load_backends()
        self.bot = bot
Romain Bignon's avatar
Romain Bignon committed
159
        self.bot.set_weboob(self.weboob)
Romain Bignon's avatar
Romain Bignon committed
160 161

    def run(self):
162
        for ev in self.bot.joined.values():
163
            ev.wait()
Romain Bignon's avatar
Romain Bignon committed
164

Romain Bignon's avatar
Romain Bignon committed
165
        self.weboob.repeat(5, self.check_tasks)
166
        self.weboob.repeat(300, self.check_board)
Romain Bignon's avatar
Romain Bignon committed
167
        self.weboob.repeat(600, self.check_dlfp)
168
        self.weboob.repeat(600, self.check_twitter)
Romain Bignon's avatar
Romain Bignon committed
169 170 171

        self.weboob.loop()

172
    def find_keywords(self, text):
173
        for word in [
Laurent Bachelier's avatar
Laurent Bachelier committed
174 175 176 177 178
                     'weboob', 'videoob', 'havesex', 'havedate', 'monboob', 'boobmsg',
                     'flatboob', 'boobill', 'pastoob', 'radioob', 'translaboob', 'traveloob', 'handjoob',
                     'boobathon', 'boobank', 'boobtracker', 'comparoob', 'wetboobs',
                     'webcontentedit', 'weboorrents', 'assnet',
                     'budget insight', 'budget-insight', 'budgetinsight', 'budgea']:
179 180 181 182
            if word in text.lower():
                return word
        return None

183
    def check_twitter(self):
184 185
        nb_tweets = 10

186
        for backend in self.weboob.iter_backends(module='twitter'):
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
            for thread in list(itertools.islice(backend.iter_resources(None, ['search', 'weboob']),
                                                0,
                                                nb_tweets)):

                if not backend.storage.get('lastpurge'):
                    backend.storage.set('lastpurge', datetime.now() - timedelta(days=60))
                    backend.storage.save()

                if thread.id not in backend.storage.get('seen', default={}) and\
                   thread.date > backend.storage.get('lastpurge'):
                    _item = thread.id.split('#')
                    url = 'https://twitter.com/%s/status/%s' % (_item[0], _item[1])
                    for msg in self.bot.on_url(url):
                        self.bot.send_message('%s: %s' % (_item[0], url))
                        self.bot.send_message(msg)

                    backend.set_message_read(backend.fill_thread(thread, ['root']).root)
204

Romain Bignon's avatar
Romain Bignon committed
205
    def check_dlfp(self):
206
        for msg in self.weboob.do('iter_unread_messages', backends=['dlfp']):
207 208 209
            word = self.find_keywords(msg.content)
            if word is not None:
                url = msg.signature[msg.signature.find('https://linuxfr'):]
210 211
                self.bot.send_message('[DLFP] %s talks about %s: %s' % (
                    msg.sender, word, url))
212
            self.weboob[msg.backend].set_message_read(msg)
213 214

    def check_board(self):
215
        def iter_messages(backend):
216
            return backend.browser.iter_new_board_messages()
217

218
        for msg in self.weboob.do(iter_messages, backends=['dlfp']):
219
            word = self.find_keywords(msg.message)
220
            if word is not None and msg.login != 'moules':
221 222
                message = msg.message.replace(word, '\002%s\002' % word)
                self.bot.send_message('[DLFP] <%s> %s' % (msg.login, message))
Romain Bignon's avatar
Romain Bignon committed
223

Romain Bignon's avatar
Romain Bignon committed
224 225 226 227 228 229
    def check_tasks(self):
        for task in list(self.bot.tasks_queue):
            if task.datetime < datetime.now():
                self.bot.send_message(task.message, task.channel)
                self.bot.tasks_queue.remove(task)

Romain Bignon's avatar
Romain Bignon committed
230 231
    def stop(self):
        self.weboob.want_stop()
232
        self.weboob.deinit()
Romain Bignon's avatar
Romain Bignon committed
233

234

235 236
class Boobot(SingleServerIRCBot):
    def __init__(self, channels, nickname, server, port=6667):
237
        SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
238
        # self.connection.add_global_handler('pubmsg', self.on_pubmsg)
239 240
        self.connection.add_global_handler('join', self.on_join)
        self.connection.add_global_handler('welcome', self.on_welcome)
241
        self.connection.buffer_class.errors = 'replace'
242

243 244 245 246
        self.mainchannel = channels[0]
        self.joined = dict()
        for channel in channels:
            self.joined[channel] = Event()
Romain Bignon's avatar
Romain Bignon committed
247
        self.weboob = None
Romain Bignon's avatar
Romain Bignon committed
248 249
        self.storage = None

Romain Bignon's avatar
Romain Bignon committed
250 251
        self.tasks_queue = []

Romain Bignon's avatar
Romain Bignon committed
252 253 254 255
    def set_weboob(self, weboob):
        self.weboob = weboob
        self.storage = ApplicationStorage('boobot', weboob.storage)
        self.storage.load({})
Romain Bignon's avatar
Romain Bignon committed
256

257 258 259
    def on_welcome(self, c, event):
        for channel in self.joined.keys():
            c.join(channel)
260

261 262 263 264 265 266 267
    def on_join(self, c, event):
        # irclib 5.0 compatibility
        if callable(event.target):
            channel = event.target()
        else:
            channel = event.target
        self.joined[channel].set()
Romain Bignon's avatar
Romain Bignon committed
268

269
    def send_message(self, msg, channel=None):
270
        for m in msg.splitlines():
Romain Bignon's avatar
Romain Bignon committed
271 272
            msg = to_unicode(m).encode('utf-8')[:450].decode('utf-8')
            self.connection.privmsg(to_unicode(channel or self.mainchannel), msg)
Romain Bignon's avatar
Romain Bignon committed
273

274
    def on_pubmsg(self, c, event):
275 276 277
        # irclib 5.0 compatibility
        if callable(event.arguments):
            text = ' '.join(event.arguments())
278
            channel = event.target()
279
            nick = event.source()
280 281
        else:
            text = ' '.join(event.arguments)
282
            channel = event.target
283 284 285 286
            nick = event.source
        for ignore in IRC_IGNORE:
            if ignore.search(nick):
                return
Romain Bignon's avatar
Romain Bignon committed
287
        for m in re.findall('([\w\d_\-]+@\w+)', text):
288 289
            for msg in self.on_boobid(m):
                self.send_message(msg, channel)
290
        for m in re.findall(u'(https?://[^\s\xa0+]+)', text):
291 292
            for msg in self.on_url(m):
                self.send_message(msg, channel)
293

Romain Bignon's avatar
Romain Bignon committed
294 295
        m = re.match('^%(?P<cmd>\w+)(?P<args>.*)$', text)
        if m and hasattr(self, 'cmd_%s' % m.groupdict()['cmd']):
Laurent Bachelier's avatar
Laurent Bachelier committed
296
            getattr(self, 'cmd_%s' % m.groupdict()['cmd'])(nick, channel, m.groupdict()['args'].strip())
Romain Bignon's avatar
Romain Bignon committed
297

Romain Bignon's avatar
Romain Bignon committed
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
    def cmd_at(self, nick, channel, text):
        try:
            datetime, message = text.split(' ', 1)
        except ValueError:
            self.send_message('Syntax: %at [YYYY-MM-DDT]HH:MM[:SS] message', channel)
            return

        try:
            datetime = parse_date(datetime)
        except ValueError:
            self.send_message('Unable to read date %r' % datetime)
            return

        self.tasks_queue.append(Task(datetime, message, channel))

Romain Bignon's avatar
Romain Bignon committed
313 314 315 316 317
    def cmd_addquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])
        quotes.append({'author': nick, 'timestamp': datetime.now(), 'text': text})
        self.storage.set(channel, 'quotes', quotes)
        self.storage.save()
Romain Bignon's avatar
Romain Bignon committed
318
        self.send_message('Quote #%s added' % (len(quotes) - 1), channel)
Romain Bignon's avatar
Romain Bignon committed
319 320 321 322 323 324 325 326 327 328 329 330 331 332

    def cmd_delquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])

        try:
            n = int(text)
        except ValueError:
            self.send_message("Quote #%s not found gros" % text, channel)
            return

        quotes.pop(n)
        self.storage.set(channel, 'quotes', quotes)
        self.storage.save()
        self.send_message('Quote #%s removed' % n, channel)
Romain Bignon's avatar
Romain Bignon committed
333 334 335

    def cmd_searchquote(self, nick, channel, text):
        try:
Laurent Bachelier's avatar
Laurent Bachelier committed
336
            pattern = re.compile(to_unicode(text), re.IGNORECASE | re.UNICODE)
Romain Bignon's avatar
Romain Bignon committed
337 338 339 340 341 342
        except Exception as e:
            self.send_message(str(e), channel)
            return

        quotes = []
        for quote in self.storage.get(channel, 'quotes', default=[]):
343
            if pattern.search(to_unicode(quote['text'])):
Romain Bignon's avatar
Romain Bignon committed
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
                quotes.append(quote)

        try:
            quote = choice(quotes)
        except IndexError:
            self.send_message('No match', channel)
        else:
            self.send_message('%s' % quote['text'], channel)

    def cmd_getquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])
        if len(quotes) == 0:
            return

        try:
            n = int(text)
        except ValueError:
            n = randint(0, len(quotes)-1)

        try:
            quote = quotes[n]
        except IndexError:
            self.send_message('Unable to find quote #%s' % n, channel)
        else:
            self.send_message('[%s] %s' % (n, quote['text']), channel)

370 371 372 373 374
    def on_boobid(self, boobid):
        _id, backend_name = boobid.split('@', 1)
        if backend_name in self.weboob.backend_instances:
            backend = self.weboob.backend_instances[backend_name]
            for cap in backend.iter_caps():
Romain Bignon's avatar
Romain Bignon committed
375
                func = 'obj_info_%s' % cap.__name__[3:].lower()
376 377
                if hasattr(self, func):
                    try:
378 379 380
                        for msg in getattr(self, func)(backend, _id):
                            yield msg
                    except Exception as e:
381
                        print(get_backtrace())
Romain Bignon's avatar
Romain Bignon committed
382
                        yield u'Oops: [%s] %s' % (type(e).__name__, e)
383 384 385
                    break

    def on_url(self, url):
386
        url = fixurl(url)
387 388 389
        try:
            content_type, hsize, title = BoobotBrowser().urlinfo(url)
            if title:
390
                yield u'URL: %s' % title
391
            elif hsize:
392
                yield u'URL (file): %s, %s' % (content_type, hsize)
393
            else:
394
                yield u'URL (file): %s' % content_type
395
        except BrowserUnavailable as e:
396 397
            yield u'URL (error): %s' % e
        except Exception as e:
398
            print(get_backtrace())
Romain Bignon's avatar
Romain Bignon committed
399
            yield u'Oops: [%s] %s' % (type(e).__name__, e)
Romain Bignon's avatar
Romain Bignon committed
400 401 402 403

    def obj_info_video(self, backend, id):
        v = backend.get_video(id)
        if v:
404
            yield u'Video: %s (%s)' % (v.title, v.duration)
Romain Bignon's avatar
Romain Bignon committed
405 406 407 408

    def obj_info_housing(self, backend, id):
        h = backend.get_housing(id)
        if h:
409
            yield u'Housing: %s (%sm² / %s%s)' % (h.title, h.area, h.cost, h.currency)
Romain Bignon's avatar
Romain Bignon committed
410

411

Romain Bignon's avatar
Romain Bignon committed
412
def main():
413
    logging.basicConfig(level=logging.DEBUG)
414
    bot = Boobot(IRC_CHANNELS, IRC_NICKNAME, IRC_SERVER)
Romain Bignon's avatar
Romain Bignon committed
415 416 417 418 419

    thread = MyThread(bot)
    thread.start()

    try:
Laurent Bachelier's avatar
Laurent Bachelier committed
420
        bot.start()
Romain Bignon's avatar
Romain Bignon committed
421
    except KeyboardInterrupt:
422
        print("Stopped.")
Romain Bignon's avatar
Romain Bignon committed
423 424 425

    thread.stop()

Laurent Bachelier's avatar
Laurent Bachelier committed
426

Romain Bignon's avatar
Romain Bignon committed
427 428
if __name__ == "__main__":
    sys.exit(main())