From 9a9b11b0f5a87dbc3a80ed4c7f31cb7f2b0d3877 Mon Sep 17 00:00:00 2001 From: Vincent A Date: Tue, 22 Aug 2017 01:06:04 +0200 Subject: [PATCH] [reddit] new CapMessages/CapImage module TODO CapMessagesPost --- modules/reddit/__init__.py | 26 +++++ modules/reddit/browser.py | 70 ++++++++++++ modules/reddit/favicon.png | Bin 0 -> 9632 bytes modules/reddit/module.py | 125 +++++++++++++++++++++ modules/reddit/pages.py | 208 +++++++++++++++++++++++++++++++++++ modules/reddit/test.py | 111 +++++++++++++++++++ tools/py3-compatible.modules | 1 + 7 files changed, 541 insertions(+) create mode 100644 modules/reddit/__init__.py create mode 100644 modules/reddit/browser.py create mode 100644 modules/reddit/favicon.png create mode 100644 modules/reddit/module.py create mode 100644 modules/reddit/pages.py create mode 100644 modules/reddit/test.py diff --git a/modules/reddit/__init__.py b/modules/reddit/__init__.py new file mode 100644 index 0000000000..7f32bc3f1a --- /dev/null +++ b/modules/reddit/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import unicode_literals + + +from .module import RedditModule + + +__all__ = ['RedditModule'] diff --git a/modules/reddit/browser.py b/modules/reddit/browser.py new file mode 100644 index 0000000000..d5d84a2e2a --- /dev/null +++ b/modules/reddit/browser.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import unicode_literals + +from weboob.browser import PagesBrowser, URL + +from .pages import ListPage, SearchPage, EntryPage, CatchHTTP + + +class RedditBrowser(PagesBrowser): + BASEURL = 'https://www.reddit.com/r/pics/' + + listing = URL(r'(?P\w*)/?\?count=\d+&after=(?P\w+)', + r'(?P\w*)/?$', + ListPage) + entry = URL(r'/comments/(?P\w+)/.*', EntryPage) + search = URL(r'search\?sort=(?P\w+)&restrict_sr=on', SearchPage) + # catch-all to avoid BrowserHTTPSDowngrade + catch_http = URL(r'http://.*', CatchHTTP) + + def __init__(self, sub, *args, **kwargs): + super(RedditBrowser, self).__init__(*args, **kwargs) + self.BASEURL = 'https://www.reddit.com/r/%s/' % sub + + def iter_images(self, cat=''): + self.listing.go(cat=cat) + return self.page.iter_images() + + def search_images(self, pattern, sort='top', nsfw=False): + nsfw = {True: 'yes', False: 'no'}[nsfw] + pattern = '%s nsfw:%s' % (pattern, nsfw) + + self.search.go(sort=sort, params={'q': pattern}) + return self.page.iter_images() + + def iter_threads(self, cat=''): + self.listing.go(cat=cat) + return self.page.iter_threads() + + def fill_thread(self, thread): + self.location(thread.url, params={'sort': 'old'}) + assert self.entry.is_here() + self.page.fill_thread(thread) + + def get_thread(self, id): + self.entry.go(id=id, params={'sort': 'old'}) + return self.page.get_thread(id) + + def get_image(self, id): + self.entry.go(id=id) + img = self.page.get_image() + img.id = id + return img diff --git a/modules/reddit/favicon.png b/modules/reddit/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..418710daf08c927b5767e085a16214304e6577ad GIT binary patch literal 9632 zcmZ`E2UVAySqCSch?pz?pi(^f)sa1aF^ol?i8oUUH;rZ_uRu- zlI%ui=9NcgBb61U&{0WHK_C#ijI_8a@EG)VAR_|*dxT9!fCofVIVo|_>)YRd9VJP? z85AdJ9aj+O9q!u!1Io-M0!|{i$tXx5ZNVcWqoBZ;jH&^r$ZgcM-NYR2?JXSKfR`YU zn2Uvpn}s>0r;VF6rId_Ik`e!+?zME9V@_Mn>51F&seIPq%G#+%)9I>1mc4*| z<4V0umU&j111yz7FzpY~uOTpIXtgk;1GVNIknFm^rvNE}+MSPf@;*1C+1T0xQxp3S z!sip`4_{Ki>P1W%WwIJz)&KMK@OO6d5lwT&5Mt?HF6I@p!8^2vz&g|*2u7DaG7DT4 z0?)&h{E)v4C2BSz4%C~HIxw{icH_HD15{u`4(c8Rk%mi_^)%%2A@X{EfFKY=82nu{ zFu}5P%0iK(8rF-5A7LjdU3kr6keR-rn{h)6T8Sg7@d)C0rQ5Kf)HI)G-y4QKhrxt# zpOy0So|ZL6iqAysCp7Z5!4ZB3Umo}Hw=finS#+hDkP(%i+(CyNVg~&#`d{`@+0=;& z);)YR{zYmGoIT&s(UJAHZ`4>sn?`JDX4cjb!ND+L;o&m0I6E#aF7NYTS)m3{2jkKO z&I(xL+fu}P{%Z1cJ(7enaMb*<#q;CMitl-czUvrovwqb(VPh|^^scTiu@pkXw!b_2 z-?6tt|D8kPOo(>jpT)SqUQLe&>~Z|Bk{b;!Yn{b5d*Q;@R9C+1|9&mb&RSS<5!d1D zUU_q;jbwCmbnL|iT+_JDf;Ea)9oVf#D=G3o5>g1TH+dK<-R@CtN^lQx{AIgoMhok#U z_tZ-vkoA>Lf8P(R9B6(u3)kfu0i&p(Dp(S^4}UD{oR2G;YQ3*4R~+t=g;&n+Tt`@& zHm)9qJ1XJcMLifIK(7~E@l~43+-!mjxS%LJpjemD<#uoGo0}Uo!|1Bx`i6!-laoRD z47+n+JhN8(w&o1;KcrhyeSdjC5$GZayMY}yK;Va^+ouMSPDA42Mpx3>fBzN^ z4$O9x3Z~m&a3xYutZ_@i|vb-E;+voW*-2_1An^^Od<}llaLnLb@meAi2y{%^@+QA6+Jd z?yAj)aMU$Eq>VVgJb4@XT}lL8e1GqBe_>L)X!im13#Jq4$?;(o7;k(CR0ygS^uZ!M z7iJCSC4`|?9E zRwKd2esr?+fEy040yilM$tW=9%#W*XNjhq3X0eyPh?6k~$lt)Vu75{gVJ~Xk(ucig z=nIvU0~6L~5&g85_D;m}Ls9s?iwC{3bmwNFO+QiwInrDoB~me9qkPS>`goE7+#OV`o)N+#u!P{zZ!pgwQ6@Rh;o zbHVXbT^WL1^fh5}8xNgI;bYnwCdd04`=ep5sbop@@mraFTfU)SYkr$lTLVgRabN7| zFNN6?!({!O#Kb#nA3!WOg(DL4gIXg=u|rNrA&rGP(GMSKJSLAmoa?a8#0`b>XW>9V zDgm9iDv@@{woUS)&T+XGjpLvR%5R|aXarb$5<&qa6;?gw9&9CS*@Vqm;ti||L~14S z+&-i>WR++e7Mf>b@!1nFT><^wKe-f{`s8^RpPu%EygML%deM_!;&VkyG8bZWD_p}#**d0%5i);>qkaJ(r zV|i9L7*oe(W_brAU&SS`5lmKd{l9s9@zxuBNkStuaf;_a=`jwy#8(?eS{b$gUoVjh zBP;4eo22)CVM7Xa4Cr-h2%Vt}F@{WsuSyX(c85M&5!A!AIy?&RB?}9ZQs9Tdh+yzH zFj&CQeKMZ%p3Q$8g1sTjo_L;hQdb&Pd+fXMUKlpVv(<^^5 z=QvN%Jy+7-xxGk9e7GB)EM&RDm*}#r!+%`9dJqgVNw(Eg#}e`kWG1{N|8Jj~+n4VF z)Q||N!RBPx%;f5n(g&}Wx|yOgH7V(#Q?*c^rZKn5G8(yQn7;5ve4QqrLSyKQzWeuF zq1zC3b39*XF+xB>O6n|-1B?x=XXE*9-Rkw_`DCHyyFr@=_Rl`qT<0Xeen%?RAvIXh zEVo~0iR0@Bqfx8lPY+i*9X`BT2JZO6xpDGzVhng&V;|Rd4*nF-;zY`h@00{JS=`#k zU;dltEt^`67$cd($VN~t;2FFl<08>v(5NsB&igpb(66%9o|?}`Z+6C?PlTKDiXm50 z+3b_X=)=wwsKgAbtS+gnSG2v)ZV}+`FSOj`xUZJ`h&MJi*1USAV$dBd8YJ2|ZI~JHX@Fd3+yZ|9#QWQcUkQuzu6pV|kB!nP?{=(dR`B|BHcg+gz zuG6OVaiO~vF$lsihw%joHmK0iP2az()uS9A8#B++@Q)Rxa$*3|?&`%nbjn+QJ)DNW7H{HQv{S0O=$ zFRiTHoz*h3u(mEuL~knee|Oq^D!meHj%zAR!;-nmYUJ zn^wc756L{O%E4gx?_X5Q@O}d^EHd;cxt`M+F;6fG*>bVEu1_=}5-(7RCHvI<#@~YB zkPeQIX>upF-2_xLZeXR_Hk|jaPS!fIp8wsY!9j(M(-1)RAqSPLX$^IYb`^R}v80ae z&ib?FtSlMwF1IH-u)(08gzS3&4=9|rvCS)f5}uGK{R?5L^+3=4>=}EtKXKJh>R4?y zfRV~>H2!#V{BRL2GFodfa^cfKh-}0{kTuD76$wkpkW=_h_|K_FoXhR73Rgzq@85A| z1F@G&w!bfd&rkmO^Cd7)1StFJNz^P-x)R%6E5@i0FgSD!zYnwiI$)aT-M@4BY4K?u z9)+Gwr~ho05Flnmh!1|h^x33@bJ>41;eTD}o*3@FFECUGcwL4ej`mryS9=9?-T@&+$ExY6=%2s80R>2<6QDyq35wz+QloJy~r_0}9^x zV&lvBFCjVV1O^37`HaQ6apIEc$dTRr;$p<6Wd#ER@-AbnxPk9In+zk#)Ef!1MW`)5 zAFlRs(GYFy?8q7vr+EMt4n(7SljG7W+nPs~I)RelXTp#Xm9e>~?$kBJ>0sa}&qpF@-D1{9{XwRPCv&@e10i2}<6uWbJv zR)%89h^(KI66S=+tKi##gh570hLJR6Ufg?a8l>YfvFn13hNx3-jV+hPR#a3J3Jh-h z>H!%Z79JL+-H_d*&HpQSQnjJAs=$WCp{Xq!K86<7rKQ_~lQwIN;Kc~7Vsq0382lgW zI#@m!G!C=~R^x_)B^9SzM1NDk8)A|RcysABHgrw)058s4!I`9~Zn9>S{ESPjr6 zSrZR3mCzh@y2<3BC~Zb$AZbgKa_N$WHe6byOQy+00DsGCL*HDQH(?BcM}-v^#6BodTMdL)&abykO#&5vgX~NU2}Qb ze%8(sV$OL7Ip>}Kz)V1@`2Aa6FRrOc5c0jfm{C)@+{mTDyq<~3EA7T%*@PmYFp*c84n?R z`GPXffi}3yG0iwCPr^k!D&OxT&CnW~{~5Z8^_!1$7(V=qM1jif$zso1hp+cxQQE=5 z0g#_@79`R6^DwPAu|I?{r2u&jcDLY{KH!>WMhZ^N05E!Kt>$)$05o~=YNj)m~-Ud2zC?73{M6I}74OH6ZUEEg79OJ?LM znuH;7B0h4iy@w)0;)qH6<>@LbGK@PX;yCFeNRaapk5&8wMo~hbOc(~frwu@tnVy`4 zn#BqA4<*r$1dAr*`QqFMMi__>mC=Soox2Z-f7RVXOs*j~zVyxT5};Ew?Bne!Z7q6XYRG%*1n%aOYwq zv;T7a0^eKmgk&U=h-xT*6D$n|eG^11!yfz3=Q+Jq*d)fXZqeGoVGoehOn|C-|9U?F zkgznkU<1?>-CiA-$TC=H@In4!SZPLF3%SfR4-o(6?by%FfM(ZjarqDc30x7GuZ&tB|V+at5nnzNc|A6$xq@yN1{)t;?N2~-M3-OUE z1;+_xK-Y9Xv%T+FJjO56Ju>*0( z^OwvZ88~RDwb~D6nNMQ{g%XkXKitS<4^#sv6A)CF2d-0GU&nQ@XHtP zJjL(=Wjnc53cNtp#tr=$b0Ki(lN>s14;DC(pK0#XBzyY0Cx#gVS{bdSra*91RHFQU zg^7P5-(Uu#=EF^!%$Dk9{_z_APOH>wsttS%c=?+KQD>@Qmplm+lpiWoUWTWA{j#x6 z`8a!`PGq^`#QzpKqx){c`(drl?l5#xI@%kFFC3O!ROta{Xb-=piL88w?!q`8|8cCH z4-lc6NrrLYa&8*GC0W6O9YbbV{&Dx5A=5rbE}<>)0ev8ycqjc2=PE)PK?brebyzN) zK?|xh+%yy9a7`6)b!D~hOypnts+4?A!C->cDhvui?Il!AJJcf!>e4!X)an?sD-lMs zfe$HX8X74}OS%r750(@@2UNnZ4-?ZfGdyJ%V?1bsg`5T0(XLiQxreME=z$1*Iz^0e z|8_UMbP4CQ44#-km`9tJ7aL)eC4^R2)VhMj(vHY%L$H65BD&71SC@6qs%|P_EKZ!O zm{F8T?CY%7&!gWGH(o!CxDmoN=iPzC;dej6BPS9Z{@FE2LQlLB1fGk|0{ zQ$K8(ceuHUqZc^(m9^7@^Qw&l?6k$1{V5^^R zvJf?Rk;h0C?K_%UR68zN&8nkm{@u+0ua z9XIi)J^^=9->7BZetMdcc~=*&_Y;!484Q$BBXH519{}?=ZdTvaWRyJ<2nG0Vby?_-f*^w|VSWuRN`b;)k-q#Mp%cFE*nauyj@im0v2zjya7Kh~}f4F3lb7Lbh z<1*O70FzD^X~J#@V3Rcv#S7)J*QF+8R#=&vlho4EYU#)NY)VsAkce2VWFxUz*~FgUt(!c=5bEqYur)a9&?z=qLj!Q%KQ^|+l)CQrRN&sa zwbI&bX^QsMAD0S4F8;F0-2GG8zU*GlTYv}Q#YvSuw+3muDxfbEdya*fY!3B>t&Mzo8^3JJ&Y}0u>`utBn+U*05*Iuq{|<%O5BJNdVjZbq^asvnDX$Yt>G z5-o0jq=`Y%(_RH}MKDYXEtE+^fB+fVb;0)pDaBb;=JAU&6g98n>vcDy>2|YekqoG? zU%RPC!9HTG77_|Bx(O%%j5I^ua^~graH4jRLqlU>bflf}+NJR2@*gCbv^&Jr>6(u3#%BNseFto3W%|n>U)jL zu(sZm@9060@CR{Sh&|V`HwfZq>_g?d1QB?$?-kCVm2ifvc9T@etW3y1Lz(! zr}G84$utHPkX5YU zLi-*Yi$URktpe2RKnTJ+HWV5_jfjbfp`oD}6CncD2_Ue#t2Ra=(Rz%5U6k#CxT_V{ ziOcSH>{zIwgS##~A3wSdkb7Q0;V?&$krjUl4!zCeQxIJ~`N^bL1{ArP@DE;VwfZ|+ z9F%~&wslU=GyOM^g$;&AbH5HI(6nDq$(^i2a;0g%auAu1W%AlBR0pS~rhYy16;W>| zy+DIBjW``Y__<=6g#>M@1+|`=zd4ghzibjf6 zu0Qa@1alQSJux5G1_Ad%0kGkOa~=EV)ExW|-7>4H*gVeH#&DGcyOtdx3jAbj^Eq;S zSs(CFodI19e2)8ce0^F;1h`^}))i2)(DlBYzzLDF#+B7nEJF1$JXg>=(Fov4`u;bw zfOIhJr*PpRVMUXdro|zYR^unzo6Hjve7WD|;pMIIIMwBITpPWaH&OzS84#tkO&l>y z!dYVgdp|rlFrKe6czU^ml+@KFiBog#aUe0PX=#1zfSW?^*-#36#Qp4fR>9*vCID}S zDS@OPhX8`}B6oH&;Wqud$G`)5c{~<;JSgznCS6ZoW6Ps<5y-*+(CTHV2n+&&40~`v<+%MT^|Uu}dPzAfV`NKL_oXDa76g6}`ir_kgq? zh$RR7x6YTF-%7q=lbJrwW4!a#)_?M&z_YqafNwXN!7cc(lhpq7cZ7$JkD%C6zL!Ot zMytb+y~{ZMlk47w6F&#qcR(B}<_Jh=X%X&EB7+9#*WI>@%~trRlrr&I!}05*yv%g zpRi-tSE{Rj>#v=QOH1FDPH!W#fBARWJr5c3WsS}7jLo>O)-RtH7uyt%S!Nv!t%iqJRMva39RMK2>#e5bcj= zqVUg=L1mBK1#;@^>z&?q`27EQbY2&+*8n?xRn1#bC|UWJf-fb%e4^gJT!ahWt+-y_ z-NmKcd&Aw+_Xd>$+5z=Fh75E(&XGB0I%Ai}i)OYvoZ@%pn!9al^p6A}kOdtbIsN_pD!9tUBsh1sw|}$N zpTeXlyng_8UJS1wQ#e2THiRZjJX%&v(SuibgERuS3^s+R$c6;yHlz;C!S>6I&VT`` zU`tb$f-M;MA_#1nw%^R@y>*E+LmwMej72w&6^EwnOD{ll1N)IRCOwE}?N^Fb?SF@B zU7t&20b7YiDz@79&f%?Gvm85h((85i92+F(A@82>oY52)hbPrFoAB-(p{&u)RUHi$ zet?#@TWw7NqzvF;@>GgecLO$Rq&Ri^-J0)PF#uo93}`^yxB)4fu*)EoY=^2s4IYI8 zz>&ab)^vKlnXmgc%k;*2075!5D5PM z?OuSM9lR2aLD63z=B$wAdHA88aTJA=8bmXl?NvOl?M1OEA&+6COeLcmzT zGF50=rJ$fFE-4AerB|Gsp8jq^gb<_5=)-`ZPMz={1d=;GuBe#J|An87E_b5fSm0`J zY`gWi$_yZV6*_grpd%O8Ncx@=Hvt3V`;84s!1n`sx+~!6VUP=mGvG0Sd@7`Z(&4UA zw@wPyi1_%%a~M z|3NMl&k~`rzW%LKbPqOVoLhJv+5A`$#@O@SG9P4I*N~dAI!=Z?Gx}ECH7Z zzyV~y%X(4yT*4s zLGJppoL&`m*DMfVTO7zTzN|`)sx_b{J=!jPIuIyrGW#>Qt@wR!S%BgNd}rLBeE`M* z;SH%z%{q2`wrdxoVVfh7X`qlGt9qz4fBBr84a1QIsvbnpkK;!@Er|FeW=QApcvDXs zBWpDB={o>FRSQO~&vTX>(xrAc*Uv9`ZG`z`EL?ERUbs6Siycu8xfk;xU?BYdNM$i{CWl}fThtM(a!!6SJj7ch^nhv92N>I!EDNrf`GhiL`)gQ>`E*y zrx~t(NIsQuY_ik5k}SVLYq_F7z9r_22a_)YYx*G{sYXT;+m&%?NO#LF_NIw}yH~Iv z>V;lP*f)FPcu-L;XtSxCsa?dX$V&suu8Td{C!nJG9Ue+ZBB%zFEHdn&wtUJJCkd$xq|KPz3AiwX$}!5{=KL zDh^T0$ZnTWm3b1{?T{8}ez(Q*ilOAuDvqE-Z{$itgqji|_0rNFx653$YGI;TqRd=u z2C}JRFXy2j`tUy&Gk&8vzZXAjFPCV4J3o97H$0i1N{DQdE|itsC_%FNcBlvoiP0iT rwft~IMz#F^zoY1X?=zB_ens+rL^N+|dYyW^QA$QaQM~G_amfDw8B?;2 literal 0 HcmV?d00001 diff --git a/modules/reddit/module.py b/modules/reddit/module.py new file mode 100644 index 0000000000..f111dbae59 --- /dev/null +++ b/modules/reddit/module.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import unicode_literals + +from weboob.tools.backend import Module, BackendConfig +from weboob.tools.value import Value +from weboob.capabilities.image import CapImage, BaseImage, Thumbnail +from weboob.capabilities.messages import CapMessages, Thread +from weboob.capabilities.collection import CapCollection, Collection + +from .browser import RedditBrowser + + +__all__ = ['RedditModule'] + + +def register_resources_handler(d, *path): + def decorator(func): + d[path] = func + return func + return decorator + + +class RedditModule(Module, CapImage, CapCollection, CapMessages): + NAME = 'reddit' + DESCRIPTION = u'reddit website' + MAINTAINER = u'Vincent A' + EMAIL = 'dev@indigo.re' + LICENSE = 'AGPLv3+' + VERSION = '1.4' + CONFIG = BackendConfig( + Value('subreddit', label='Name of the sub-reddit', regexp='[^/]+', default='pics'), + ) + + BROWSER = RedditBrowser + + def create_default_browser(self): + return self.create_browser(self.config['subreddit'].get()) + + def get_file(self, _id): + raise NotImplementedError() + + def get_image(self, id): + return self.browser.get_image(id) + + def search_file(self, pattern, sortby=CapImage.SEARCH_RELEVANCE): + return self.browser.search_images(pattern, sortby, True) + + def search_image(self, pattern, sortby=CapImage.SEARCH_RELEVANCE, nsfw=False): + sorting = { + CapImage.SEARCH_RELEVANCE: 'relevance', + CapImage.SEARCH_RATING: 'top', + CapImage.SEARCH_VIEWS: 'top', # not implemented + CapImage.SEARCH_DATE: 'new', + } + sortby = sorting[sortby] + return self.browser.search_images(pattern, sortby, nsfw) + + def iter_threads(self): + return self.browser.iter_threads() + + def get_thread(self, id): + return self.browser.get_thread(id) + + def iter_resources(self, objs, split_path): + for k in self.RESOURCES: + if len(k) == len(split_path) and all(a is None or a == b for a, b in zip(k, split_path)): + f = self.RESOURCES[k] + return f(self, objs, *split_path) + + RESOURCES = {} + + @register_resources_handler(RESOURCES) + def iter_resources_root(self, objs): + return [ + Collection(['hot'], 'Hot threads'), + Collection(['new'], 'New threads'), + Collection(['rising'], 'Rising threads'), + Collection(['controversial'], 'Controversial threads'), + Collection(['top'], 'Top threads'), + ] + + @register_resources_handler(RESOURCES, None) + def iter_resources_dir(self, objs, key): + if key == 'hot': + key = '' + + if Thread in objs: + return self.iter_threads(cat=key) + if BaseImage in objs: + return self.browser.iter_images(cat=key) + return [] + + def fill_data(self, obj, fields): + if 'thumbnail' in fields and not obj.thumbnail.data: + obj.thumbnail.data = self.browser.open(obj.thumbnail.url).content + if 'data' in fields: + obj.data = self.browser.open(obj.url).content + + def fill_thread(self, obj, fields): + if 'root' in fields: + self.browser.fill_thread(obj) + + OBJECTS = { + BaseImage: fill_data, + Thumbnail: fill_data, + Thread: fill_thread, + } diff --git a/modules/reddit/pages.py b/modules/reddit/pages.py new file mode 100644 index 0000000000..db72b5d898 --- /dev/null +++ b/modules/reddit/pages.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import unicode_literals + +from collections import OrderedDict + +from weboob.browser.elements import method, ListElement, ItemElement, SkipItem +from weboob.browser.filters.standard import CleanText, Regexp, Field, DateTime +from weboob.browser.filters.html import AbsoluteLink, Link, Attr, CleanHTML +from weboob.browser.pages import HTMLPage, RawPage, pagination +from weboob.capabilities.image import BaseImage, Thumbnail +from weboob.capabilities.messages import Thread, Message +from weboob.tools.compat import urljoin + + +class list_entry(ItemElement): + obj_title = CleanText('.//a[has-class("title")]') + obj_date = DateTime(Attr('.//time[@class="live-timestamp"]', 'datetime')) + obj__page = AbsoluteLink('.//a[has-class("comments")]') + obj_id = Regexp(Field('_page'), '/comments/([^/]+)/') + + +class ListPage(HTMLPage): + @pagination + @method + class iter_images(ListElement): + item_xpath = '//div[has-class("entry")]' + + class item(list_entry): + klass = BaseImage + + obj_author = CleanText('.//a[has-class("author")]') + + def obj_thumbnail(self): + path = Attr('..//a[has-class("thumbnail")]/img', 'src', default=None)(self) + if path is None: + raise SkipItem('not an image thread') + return Thumbnail(urljoin(self.page.url, path)) + + def obj_url(self): + self.obj_thumbnail() + + url = urljoin(self.page.url, Link('..//a[has-class("thumbnail")]')(self)) + if url != Field('_page')(self): + return url + # TODO lazy load with fillobj? + return self.page.browser.open(url).page.get_image_url() + + next_page = Link('//a[contains(@rel,"next")]', default=None) + + @pagination + @method + class iter_threads(ListElement): + item_xpath = '//div[has-class("entry")]' + + class item(list_entry): + klass = Thread + + obj_url = Field('_page') + + next_page = Link('//a[contains(@rel,"next")]', default=None) + + +class SearchPage(HTMLPage): + @pagination + @method + class iter_images(ListElement): + item_xpath = '//div[has-class("search-result")]' + + class item(ItemElement): + klass = BaseImage + + obj__page = AbsoluteLink('.//a[has-class("search-comments")]') + obj_id = Regexp(Field('_page'), '/comments/([^/]+)/') + obj_date = DateTime(Attr('.//time', 'datetime')) + obj_title = CleanText('.//a[has-class("search-title")]') + obj_author = CleanText('.//a[has-class("author")]') + + def obj_thumbnail(self): + path = Attr('./a[has-class("thumbnail")]/img', 'src', default=None)(self) + if path is None: + raise SkipItem('not an image thread') + return Thumbnail(urljoin(self.page.url, path)) + + def obj_url(self): + self.obj_thumbnail() + + url = urljoin(self.page.url, Link('./a[has-class("thumbnail")]')(self)) + if url != Field('_page')(self): + return url + # TODO lazy load with fillobj? + return self.page.browser.open(url).page.get_image_url() + + +class EntryPage(HTMLPage): + @method + class get_image(ItemElement): + klass = BaseImage + + obj_title = CleanText('//div[@id="siteTable"]//a[has-class("title")]') + obj_date = DateTime(Attr('//div[@id="siteTable"]//time', 'datetime')) + obj_author = CleanText('//div[@id="siteTable"]//a[has-class("author")]') + + def obj_thumbnail(self): + path = Attr('//div[@id="siteTable"]//a[has-class("thumbnail")]/img', 'src', default=None)(self) + if path is None: + raise SkipItem('not an image thread') + return Thumbnail(urljoin(self.page.url, path)) + + def obj_url(self): + return self.page.get_image_url() + + def obj__page(self): + return self.page.url + + def get_image_url(self): + if self.doc.xpath('//video[@class="preview"]'): + raise SkipItem('Videos are not implemented') + return urljoin(self.url, Link('//a[img[@class="preview"]]')(self.doc)) + + def get_thread(self, id): + thr = Thread(id=id) + self.fill_thread(thr) + thr.date = thr.root.date + thr.title = thr.root.title + thr.url = thr.root.url + return thr + + def fill_thread(self, thread): + thread.root = None + msgs = OrderedDict() + + title = CleanText('//a[has-class("title")]')(self.doc) + + for m in self.iter_messages(): + m.thread = thread + if not m.url: + assert not thread.root, 'there cannot be 2 roots' + thread.root = m + m.id = thread.id + m.parent = None + m.url = self.url + else: + assert m.id not in msgs + msgs[m.id] = m + m.id = '%s.%s' % (thread.id, m.id) + + for m in msgs.values(): + if m is thread.root: + continue + + if m._parent_part: + m.parent = msgs[m._parent_part] + else: + m.parent = thread.root + m.parent.children.append(m) + m.title = 'Re: %s' % title + + thread.root.title = title + + @method + class iter_messages(ListElement): + item_xpath = '//div[has-class("entry")]' + + class item(ItemElement): + klass = Message + + # TODO deleted messages, collapsed messages, pagination + + def condition(self): + if len(self.el.xpath('./span[@class="morecomments"]')): + return False + if len(self.el.xpath('.//div[has-class("usertext")][has-class("grayed")]')): + return False + if len(self.el.xpath('./ancestor::div[@id="siteTable_deleted"]')): + return False + return True + + obj_content = CleanHTML('.//div[has-class("usertext-body")]') + obj_sender = CleanText('.//a[has-class("author")]') + obj_date = DateTime(Attr('.//time[@class="live-timestamp"]', 'datetime')) + obj_url = AbsoluteLink('.//a[@data-event-action="permalink"]', default='') + obj_id = Regexp(Field('url'), '/(\w+)/$', default=None) + obj__parent_part = Regexp(Link('.//a[@data-event-action="parent"]', default=''), r'#(\w+)', default=None) + + def obj_children(self): + return [] + + +class CatchHTTP(RawPage): + pass diff --git a/modules/reddit/test.py b/modules/reddit/test.py new file mode 100644 index 0000000000..0f2027444e --- /dev/null +++ b/modules/reddit/test.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2017 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import unicode_literals + +from contextlib import contextmanager + +from weboob.capabilities.image import BaseImage +from weboob.tools.test import BackendTest + + +@contextmanager +def using_url(backend, url): + old = backend.browser.BASEURL + try: + backend.browser.BASEURL = url + yield + finally: + backend.browser.BASEURL = old + + +class RedditTest(BackendTest): + MODULE = 'reddit' + + def test_colls(self): + colls = list(self.backend.iter_resources((BaseImage,), [])) + self.assertTrue(all(len(c.split_path) == 1 for c in colls)) + self.assertSetEqual({'hot', 'top', 'new', 'controversial', 'rising'}, + set(c.split_path[0] for c in colls)) + + def test_images(self): + with using_url(self.backend, 'https://www.reddit.com/r/BotanicalPorn/'): + n = -1 + for n, img in zip(range(10), self.backend.iter_resources((BaseImage,), ['hot'])): + self.assertTrue(img.id) + self.assertTrue(img.title) + self.assertTrue(img.url) + self.assertTrue(img.thumbnail.url) + self.assertTrue(img.date) + self.assertTrue(img.author) + + self.assertEqual(n, 9) + + new = self.backend.get_image(img.id) + self.assertEqual(new.id, img.id) + self.assertEqual(new.date, img.date) + self.assertEqual(new.title, img.title) + self.assertEqual(new.url, img.url) + self.assertEqual(new.thumbnail.url, img.thumbnail.url) + self.assertEqual(new.author, img.author) + + def test_search(self): + with using_url(self.backend, 'https://www.reddit.com/r/BotanicalPorn/'): + n = -1 + for n, img in zip(range(10), self.backend.search_image('lily')): + self.assertTrue(img.id) + self.assertTrue(img.title) + self.assertTrue(img.url) + self.assertTrue(img.thumbnail.url) + self.assertTrue(img.date) + self.assertTrue(img.author) + + self.assertEqual(n, 9) + + def test_thread(self): + expanded = False + + for i, thr in zip(range(10), self.backend.iter_threads()): + self.assertTrue(thr.title) + self.assertTrue(thr.date) + + if not expanded: + new = self.backend.get_thread(thr.id) + self.assertEqual(thr.id, new.id) + self.assertEqual(thr.title, new.title) + + j = -1 + + for j, msg in enumerate(new.iter_all_messages()): + self.assertIs(msg.thread, new) + self.assertTrue(msg.title) + self.assertTrue(msg.sender) + self.assertTrue(msg.id) + if msg is new.root: + self.assertIsNone(msg.parent) + else: + self.assertTrue(msg.content) + self.assertTrue(msg.parent) + self.assertIn(msg, msg.parent.children) + + if j > 10: + expanded = True + + self.assertEqual(i, 9) + diff --git a/tools/py3-compatible.modules b/tools/py3-compatible.modules index a407e0ab81..988f4fe57e 100644 --- a/tools/py3-compatible.modules +++ b/tools/py3-compatible.modules @@ -92,6 +92,7 @@ popolemploi pornhub ratp razibus +reddit regionsjob relaiscolis s2e -- GitLab