diff --git a/modules/reddit/__init__.py b/modules/reddit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f32bc3f1a8f5331f15a1ba6376b54ef76605745
--- /dev/null
+++ b/modules/reddit/__init__.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Vincent A
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from __future__ import unicode_literals
+
+
+from .module import RedditModule
+
+
+__all__ = ['RedditModule']
diff --git a/modules/reddit/browser.py b/modules/reddit/browser.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5d84a2e2a048b71fdee63d90e1e7586620fd0a2
--- /dev/null
+++ b/modules/reddit/browser.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Vincent A
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from __future__ import unicode_literals
+
+from weboob.browser import PagesBrowser, URL
+
+from .pages import ListPage, SearchPage, EntryPage, CatchHTTP
+
+
+class RedditBrowser(PagesBrowser):
+ BASEURL = 'https://www.reddit.com/r/pics/'
+
+ listing = URL(r'(?P\w*)/?\?count=\d+&after=(?P\w+)',
+ r'(?P\w*)/?$',
+ ListPage)
+ entry = URL(r'/comments/(?P\w+)/.*', EntryPage)
+ search = URL(r'search\?sort=(?P\w+)&restrict_sr=on', SearchPage)
+ # catch-all to avoid BrowserHTTPSDowngrade
+ catch_http = URL(r'http://.*', CatchHTTP)
+
+ def __init__(self, sub, *args, **kwargs):
+ super(RedditBrowser, self).__init__(*args, **kwargs)
+ self.BASEURL = 'https://www.reddit.com/r/%s/' % sub
+
+ def iter_images(self, cat=''):
+ self.listing.go(cat=cat)
+ return self.page.iter_images()
+
+ def search_images(self, pattern, sort='top', nsfw=False):
+ nsfw = {True: 'yes', False: 'no'}[nsfw]
+ pattern = '%s nsfw:%s' % (pattern, nsfw)
+
+ self.search.go(sort=sort, params={'q': pattern})
+ return self.page.iter_images()
+
+ def iter_threads(self, cat=''):
+ self.listing.go(cat=cat)
+ return self.page.iter_threads()
+
+ def fill_thread(self, thread):
+ self.location(thread.url, params={'sort': 'old'})
+ assert self.entry.is_here()
+ self.page.fill_thread(thread)
+
+ def get_thread(self, id):
+ self.entry.go(id=id, params={'sort': 'old'})
+ return self.page.get_thread(id)
+
+ def get_image(self, id):
+ self.entry.go(id=id)
+ img = self.page.get_image()
+ img.id = id
+ return img
diff --git a/modules/reddit/favicon.png b/modules/reddit/favicon.png
new file mode 100644
index 0000000000000000000000000000000000000000..418710daf08c927b5767e085a16214304e6577ad
Binary files /dev/null and b/modules/reddit/favicon.png differ
diff --git a/modules/reddit/module.py b/modules/reddit/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..f111dbae59db4f5b180bb55dfc13410f828ff70e
--- /dev/null
+++ b/modules/reddit/module.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Vincent A
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from __future__ import unicode_literals
+
+from weboob.tools.backend import Module, BackendConfig
+from weboob.tools.value import Value
+from weboob.capabilities.image import CapImage, BaseImage, Thumbnail
+from weboob.capabilities.messages import CapMessages, Thread
+from weboob.capabilities.collection import CapCollection, Collection
+
+from .browser import RedditBrowser
+
+
+__all__ = ['RedditModule']
+
+
+def register_resources_handler(d, *path):
+ def decorator(func):
+ d[path] = func
+ return func
+ return decorator
+
+
+class RedditModule(Module, CapImage, CapCollection, CapMessages):
+ NAME = 'reddit'
+ DESCRIPTION = u'reddit website'
+ MAINTAINER = u'Vincent A'
+ EMAIL = 'dev@indigo.re'
+ LICENSE = 'AGPLv3+'
+ VERSION = '1.4'
+ CONFIG = BackendConfig(
+ Value('subreddit', label='Name of the sub-reddit', regexp='[^/]+', default='pics'),
+ )
+
+ BROWSER = RedditBrowser
+
+ def create_default_browser(self):
+ return self.create_browser(self.config['subreddit'].get())
+
+ def get_file(self, _id):
+ raise NotImplementedError()
+
+ def get_image(self, id):
+ return self.browser.get_image(id)
+
+ def search_file(self, pattern, sortby=CapImage.SEARCH_RELEVANCE):
+ return self.browser.search_images(pattern, sortby, True)
+
+ def search_image(self, pattern, sortby=CapImage.SEARCH_RELEVANCE, nsfw=False):
+ sorting = {
+ CapImage.SEARCH_RELEVANCE: 'relevance',
+ CapImage.SEARCH_RATING: 'top',
+ CapImage.SEARCH_VIEWS: 'top', # not implemented
+ CapImage.SEARCH_DATE: 'new',
+ }
+ sortby = sorting[sortby]
+ return self.browser.search_images(pattern, sortby, nsfw)
+
+ def iter_threads(self):
+ return self.browser.iter_threads()
+
+ def get_thread(self, id):
+ return self.browser.get_thread(id)
+
+ def iter_resources(self, objs, split_path):
+ for k in self.RESOURCES:
+ if len(k) == len(split_path) and all(a is None or a == b for a, b in zip(k, split_path)):
+ f = self.RESOURCES[k]
+ return f(self, objs, *split_path)
+
+ RESOURCES = {}
+
+ @register_resources_handler(RESOURCES)
+ def iter_resources_root(self, objs):
+ return [
+ Collection(['hot'], 'Hot threads'),
+ Collection(['new'], 'New threads'),
+ Collection(['rising'], 'Rising threads'),
+ Collection(['controversial'], 'Controversial threads'),
+ Collection(['top'], 'Top threads'),
+ ]
+
+ @register_resources_handler(RESOURCES, None)
+ def iter_resources_dir(self, objs, key):
+ if key == 'hot':
+ key = ''
+
+ if Thread in objs:
+ return self.iter_threads(cat=key)
+ if BaseImage in objs:
+ return self.browser.iter_images(cat=key)
+ return []
+
+ def fill_data(self, obj, fields):
+ if 'thumbnail' in fields and not obj.thumbnail.data:
+ obj.thumbnail.data = self.browser.open(obj.thumbnail.url).content
+ if 'data' in fields:
+ obj.data = self.browser.open(obj.url).content
+
+ def fill_thread(self, obj, fields):
+ if 'root' in fields:
+ self.browser.fill_thread(obj)
+
+ OBJECTS = {
+ BaseImage: fill_data,
+ Thumbnail: fill_data,
+ Thread: fill_thread,
+ }
diff --git a/modules/reddit/pages.py b/modules/reddit/pages.py
new file mode 100644
index 0000000000000000000000000000000000000000..db72b5d898efcfe42ad1228e3245337ddfdd8db9
--- /dev/null
+++ b/modules/reddit/pages.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Vincent A
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from __future__ import unicode_literals
+
+from collections import OrderedDict
+
+from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
+from weboob.browser.filters.standard import CleanText, Regexp, Field, DateTime
+from weboob.browser.filters.html import AbsoluteLink, Link, Attr, CleanHTML
+from weboob.browser.pages import HTMLPage, RawPage, pagination
+from weboob.capabilities.image import BaseImage, Thumbnail
+from weboob.capabilities.messages import Thread, Message
+from weboob.tools.compat import urljoin
+
+
+class list_entry(ItemElement):
+ obj_title = CleanText('.//a[has-class("title")]')
+ obj_date = DateTime(Attr('.//time[@class="live-timestamp"]', 'datetime'))
+ obj__page = AbsoluteLink('.//a[has-class("comments")]')
+ obj_id = Regexp(Field('_page'), '/comments/([^/]+)/')
+
+
+class ListPage(HTMLPage):
+ @pagination
+ @method
+ class iter_images(ListElement):
+ item_xpath = '//div[has-class("entry")]'
+
+ class item(list_entry):
+ klass = BaseImage
+
+ obj_author = CleanText('.//a[has-class("author")]')
+
+ def obj_thumbnail(self):
+ path = Attr('..//a[has-class("thumbnail")]/img', 'src', default=None)(self)
+ if path is None:
+ raise SkipItem('not an image thread')
+ return Thumbnail(urljoin(self.page.url, path))
+
+ def obj_url(self):
+ self.obj_thumbnail()
+
+ url = urljoin(self.page.url, Link('..//a[has-class("thumbnail")]')(self))
+ if url != Field('_page')(self):
+ return url
+ # TODO lazy load with fillobj?
+ return self.page.browser.open(url).page.get_image_url()
+
+ next_page = Link('//a[contains(@rel,"next")]', default=None)
+
+ @pagination
+ @method
+ class iter_threads(ListElement):
+ item_xpath = '//div[has-class("entry")]'
+
+ class item(list_entry):
+ klass = Thread
+
+ obj_url = Field('_page')
+
+ next_page = Link('//a[contains(@rel,"next")]', default=None)
+
+
+class SearchPage(HTMLPage):
+ @pagination
+ @method
+ class iter_images(ListElement):
+ item_xpath = '//div[has-class("search-result")]'
+
+ class item(ItemElement):
+ klass = BaseImage
+
+ obj__page = AbsoluteLink('.//a[has-class("search-comments")]')
+ obj_id = Regexp(Field('_page'), '/comments/([^/]+)/')
+ obj_date = DateTime(Attr('.//time', 'datetime'))
+ obj_title = CleanText('.//a[has-class("search-title")]')
+ obj_author = CleanText('.//a[has-class("author")]')
+
+ def obj_thumbnail(self):
+ path = Attr('./a[has-class("thumbnail")]/img', 'src', default=None)(self)
+ if path is None:
+ raise SkipItem('not an image thread')
+ return Thumbnail(urljoin(self.page.url, path))
+
+ def obj_url(self):
+ self.obj_thumbnail()
+
+ url = urljoin(self.page.url, Link('./a[has-class("thumbnail")]')(self))
+ if url != Field('_page')(self):
+ return url
+ # TODO lazy load with fillobj?
+ return self.page.browser.open(url).page.get_image_url()
+
+
+class EntryPage(HTMLPage):
+ @method
+ class get_image(ItemElement):
+ klass = BaseImage
+
+ obj_title = CleanText('//div[@id="siteTable"]//a[has-class("title")]')
+ obj_date = DateTime(Attr('//div[@id="siteTable"]//time', 'datetime'))
+ obj_author = CleanText('//div[@id="siteTable"]//a[has-class("author")]')
+
+ def obj_thumbnail(self):
+ path = Attr('//div[@id="siteTable"]//a[has-class("thumbnail")]/img', 'src', default=None)(self)
+ if path is None:
+ raise SkipItem('not an image thread')
+ return Thumbnail(urljoin(self.page.url, path))
+
+ def obj_url(self):
+ return self.page.get_image_url()
+
+ def obj__page(self):
+ return self.page.url
+
+ def get_image_url(self):
+ if self.doc.xpath('//video[@class="preview"]'):
+ raise SkipItem('Videos are not implemented')
+ return urljoin(self.url, Link('//a[img[@class="preview"]]')(self.doc))
+
+ def get_thread(self, id):
+ thr = Thread(id=id)
+ self.fill_thread(thr)
+ thr.date = thr.root.date
+ thr.title = thr.root.title
+ thr.url = thr.root.url
+ return thr
+
+ def fill_thread(self, thread):
+ thread.root = None
+ msgs = OrderedDict()
+
+ title = CleanText('//a[has-class("title")]')(self.doc)
+
+ for m in self.iter_messages():
+ m.thread = thread
+ if not m.url:
+ assert not thread.root, 'there cannot be 2 roots'
+ thread.root = m
+ m.id = thread.id
+ m.parent = None
+ m.url = self.url
+ else:
+ assert m.id not in msgs
+ msgs[m.id] = m
+ m.id = '%s.%s' % (thread.id, m.id)
+
+ for m in msgs.values():
+ if m is thread.root:
+ continue
+
+ if m._parent_part:
+ m.parent = msgs[m._parent_part]
+ else:
+ m.parent = thread.root
+ m.parent.children.append(m)
+ m.title = 'Re: %s' % title
+
+ thread.root.title = title
+
+ @method
+ class iter_messages(ListElement):
+ item_xpath = '//div[has-class("entry")]'
+
+ class item(ItemElement):
+ klass = Message
+
+ # TODO deleted messages, collapsed messages, pagination
+
+ def condition(self):
+ if len(self.el.xpath('./span[@class="morecomments"]')):
+ return False
+ if len(self.el.xpath('.//div[has-class("usertext")][has-class("grayed")]')):
+ return False
+ if len(self.el.xpath('./ancestor::div[@id="siteTable_deleted"]')):
+ return False
+ return True
+
+ obj_content = CleanHTML('.//div[has-class("usertext-body")]')
+ obj_sender = CleanText('.//a[has-class("author")]')
+ obj_date = DateTime(Attr('.//time[@class="live-timestamp"]', 'datetime'))
+ obj_url = AbsoluteLink('.//a[@data-event-action="permalink"]', default='')
+ obj_id = Regexp(Field('url'), '/(\w+)/$', default=None)
+ obj__parent_part = Regexp(Link('.//a[@data-event-action="parent"]', default=''), r'#(\w+)', default=None)
+
+ def obj_children(self):
+ return []
+
+
+class CatchHTTP(RawPage):
+ pass
diff --git a/modules/reddit/test.py b/modules/reddit/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f2027444eefb676049669908ba7bc10e7534e11
--- /dev/null
+++ b/modules/reddit/test.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2017 Vincent A
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from __future__ import unicode_literals
+
+from contextlib import contextmanager
+
+from weboob.capabilities.image import BaseImage
+from weboob.tools.test import BackendTest
+
+
+@contextmanager
+def using_url(backend, url):
+ old = backend.browser.BASEURL
+ try:
+ backend.browser.BASEURL = url
+ yield
+ finally:
+ backend.browser.BASEURL = old
+
+
+class RedditTest(BackendTest):
+ MODULE = 'reddit'
+
+ def test_colls(self):
+ colls = list(self.backend.iter_resources((BaseImage,), []))
+ self.assertTrue(all(len(c.split_path) == 1 for c in colls))
+ self.assertSetEqual({'hot', 'top', 'new', 'controversial', 'rising'},
+ set(c.split_path[0] for c in colls))
+
+ def test_images(self):
+ with using_url(self.backend, 'https://www.reddit.com/r/BotanicalPorn/'):
+ n = -1
+ for n, img in zip(range(10), self.backend.iter_resources((BaseImage,), ['hot'])):
+ self.assertTrue(img.id)
+ self.assertTrue(img.title)
+ self.assertTrue(img.url)
+ self.assertTrue(img.thumbnail.url)
+ self.assertTrue(img.date)
+ self.assertTrue(img.author)
+
+ self.assertEqual(n, 9)
+
+ new = self.backend.get_image(img.id)
+ self.assertEqual(new.id, img.id)
+ self.assertEqual(new.date, img.date)
+ self.assertEqual(new.title, img.title)
+ self.assertEqual(new.url, img.url)
+ self.assertEqual(new.thumbnail.url, img.thumbnail.url)
+ self.assertEqual(new.author, img.author)
+
+ def test_search(self):
+ with using_url(self.backend, 'https://www.reddit.com/r/BotanicalPorn/'):
+ n = -1
+ for n, img in zip(range(10), self.backend.search_image('lily')):
+ self.assertTrue(img.id)
+ self.assertTrue(img.title)
+ self.assertTrue(img.url)
+ self.assertTrue(img.thumbnail.url)
+ self.assertTrue(img.date)
+ self.assertTrue(img.author)
+
+ self.assertEqual(n, 9)
+
+ def test_thread(self):
+ expanded = False
+
+ for i, thr in zip(range(10), self.backend.iter_threads()):
+ self.assertTrue(thr.title)
+ self.assertTrue(thr.date)
+
+ if not expanded:
+ new = self.backend.get_thread(thr.id)
+ self.assertEqual(thr.id, new.id)
+ self.assertEqual(thr.title, new.title)
+
+ j = -1
+
+ for j, msg in enumerate(new.iter_all_messages()):
+ self.assertIs(msg.thread, new)
+ self.assertTrue(msg.title)
+ self.assertTrue(msg.sender)
+ self.assertTrue(msg.id)
+ if msg is new.root:
+ self.assertIsNone(msg.parent)
+ else:
+ self.assertTrue(msg.content)
+ self.assertTrue(msg.parent)
+ self.assertIn(msg, msg.parent.children)
+
+ if j > 10:
+ expanded = True
+
+ self.assertEqual(i, 9)
+
diff --git a/tools/py3-compatible.modules b/tools/py3-compatible.modules
index a407e0ab81dc94f28e92ace51924b6cf77e00472..988f4fe57e311bdf8731fd8b301804bf663c5397 100644
--- a/tools/py3-compatible.modules
+++ b/tools/py3-compatible.modules
@@ -92,6 +92,7 @@ popolemploi
pornhub
ratp
razibus
+reddit
regionsjob
relaiscolis
s2e