From d24ca46ef61df797d189eb56c5d8794422d6ec95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Fri, 31 Aug 2012 19:23:29 +0200 Subject: [PATCH] Add a video module for gdcvault.com MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now it only fetches the speaker video, but each page can have both a speaker and slides video feed. TODO: search Signed-off-by: François Revol Signed-off-by: Romain Bignon --- modules/gdcvault/__init__.py | 3 + modules/gdcvault/backend.py | 82 ++++++++++++++++++ modules/gdcvault/browser.py | 52 ++++++++++++ modules/gdcvault/favicon.png | Bin 0 -> 3482 bytes modules/gdcvault/favicon.xcf | Bin 0 -> 5024 bytes modules/gdcvault/pages.py | 159 +++++++++++++++++++++++++++++++++++ modules/gdcvault/test.py | 42 +++++++++ modules/gdcvault/video.py | 44 ++++++++++ 8 files changed, 382 insertions(+) create mode 100644 modules/gdcvault/__init__.py create mode 100644 modules/gdcvault/backend.py create mode 100644 modules/gdcvault/browser.py create mode 100644 modules/gdcvault/favicon.png create mode 100644 modules/gdcvault/favicon.xcf create mode 100644 modules/gdcvault/pages.py create mode 100644 modules/gdcvault/test.py create mode 100644 modules/gdcvault/video.py diff --git a/modules/gdcvault/__init__.py b/modules/gdcvault/__init__.py new file mode 100644 index 0000000000..c6833aff7e --- /dev/null +++ b/modules/gdcvault/__init__.py @@ -0,0 +1,3 @@ +from .backend import GDCVaultBackend + +__all__ = ['GDCVaultBackend'] diff --git a/modules/gdcvault/backend.py b/modules/gdcvault/backend.py new file mode 100644 index 0000000000..f031d0e37a --- /dev/null +++ b/modules/gdcvault/backend.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo, BaseVideo +from weboob.tools.backend import BaseBackend +from weboob.capabilities.collection import ICapCollection, CollectionNotFound + +from .browser import GDCVaultBrowser +from .video import GDCVaultVideo + + +__all__ = ['GDCVaultBackend'] + + +class GDCVaultBackend(BaseBackend, ICapVideo, ICapCollection): + NAME = 'gdcvault' + MAINTAINER = u'François Revol' + EMAIL = 'revol@free.fr' + VERSION = '0.d' + DESCRIPTION = 'Game Developers Conferences Vault video streaming website' + LICENSE = 'AGPLv3+' + BROWSER = GDCVaultBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + SORTBY = ['relevance', 'rating', 'views', 'time'] + + # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + # with self.browser: + # return self.browser.search_videos(pattern, self.SORTBY[sortby]) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(GDCVaultVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield self.get_collection(objs, [u'latest']) + if collection.split_path == [u'latest']: + for video in self.browser.latest_videos(): + yield video + + def validate_collection(self, objs, collection): + if collection.path_level == 0: + return + if BaseVideo in objs and collection.split_path == [u'latest']: + collection.title = u'Latest GDCVault videos' + return + raise CollectionNotFound(collection.split_path) + + OBJECTS = {GDCVaultVideo: fill_video} diff --git a/modules/gdcvault/browser.py b/modules/gdcvault/browser.py new file mode 100644 index 0000000000..a4f5d97175 --- /dev/null +++ b/modules/gdcvault/browser.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +#from .pages.index import IndexPage +from .pages import VideoPage +from .video import GDCVaultVideo + + +__all__ = ['GDCVaultBrowser'] + + +class GDCVaultBrowser(BaseBrowser): + DOMAIN = 'gdcvault.com' + ENCODING = None + PAGES = {r'http://[w\.]*gdcvault.com/play/(?P[\d]+)/?.*': VideoPage, + } + + @id2url(GDCVaultVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + return self.page.get_video(video) + + # def search_videos(self, pattern, sortby): + # return None + # self.location(self.buildurl('http://gdcvault.com/en/search%s' % sortby, query=pattern.encode('utf-8'))) + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() + + # def latest_videos(self): + # self.home() + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() diff --git a/modules/gdcvault/favicon.png b/modules/gdcvault/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..70ef33e20bb41c825a43e51a42a0cdfc8c89dd36 GIT binary patch literal 3482 zcmV;L4Q29)P)e zSad^gZEa<4bO1wgWnpw>WFU8GbZ8()Nlj2!fese{01X03L_t(|+U=Ujk6hPv#(#Hu zucq#5b~nl9Ad2E3OHnL9HX_FkA}jI2%OEhai1PvI~MBKn!RX zBeoFTKhRN7bU7&$3)O}U=e)pd5eCIo*{`#MP zAbf#`<_kXN0L%fH126|*4!|6MIRJA2KG#tHj{vX_+oVzot-xr7Ga98dRzuE$PeRC< zlrjJvYbcz?83P#u}1Y?R_@(E5=PEq)4x0`OZt{InwQ*H3*6Zg9pSyLeO|w^$=+0ON)uwn=Y&(5p0#wl7N-<-%8aVOqzNFII%KTX z=%BGmvD|a~$@5S1{IMfgXCMh8i6k)S7F;MDM;A-p+uFz1;qe2YXs??F1o+#3`5^)< z^$ISm4*AA~6CCN6{Qk)mjDa6r+N2JF*WSHK$688fSzGM#%Ei+x_AJvn@YAcC+!)nN zL*lUUgpjaGv)nIu_2MbMbZm$=n)QPTZ+!4EoBIu!%%F4}8$y6D-V$m0gUYK0^L zqew}JS@2n?4r_L&H6L6b^WCqV=jcL@3oA=pTbb~y&0TiK4ML5x2B#H=ji*~!4rff6g$OKm9p_dTDU2a0p-DoDnIxH%Gf^@z2_a?bknk}OLV_#| zoa3n@3v``k5;O0tZ?jv~OzJ?L1usG!69;v}sP;4=B2NN<-k@MqMWVp8Np$U<)0!Y$ z-Z>;?p|F-C{Ss@DeovynxZguuA+q@3v$j$yY$uP`XBpO{9scjtcN3`#3}4Ajw)B-1S%gR;Qn zh*O4P$MNcOXRsQYD7<}bhpE4npHt5GlrTnNltL+mh$7#{;wi(p_I$W=z%#2udQS6u zr&idWOi3b48c#}~wP3AjKQjtZ9y0*#V!@|O%$dSyjt_fSrAaxl)GzqTnPV(3l%$+_ zZ)=Zt)_3qp-FCXPH$ZE&24(Jlp3|m%{`|2e&L3am_)>>&UOdG+>pSdDJef?#Dh6di z#})ka!%c3~@$ms5a!cn=LuBeBR>SGV9w!#M6wb2LDd-oL$|v62*x_G)c9}Ze8?+&3 zl+s8Bawe0JyQU>zRJ8)?TUqh+%7Ep5$=A=FBuXMB+H{(Pok@)~216u0E&#HCkBL#` zxjwDQ0;MrnB`Bo`BGf)Gsw2PL*yZx>m=N#3oJk!xY-;KlxG||1R{^;Xy?cS3NyC5q z`~%Ld4mp2($gopTC_~CZ9V2^<=i2V%p5o~@G1Tpi5d{=R(JL(dPC>_5oM~gDO;Z^k z5@R2zVkA5yAgzL4Vd)hGF-NK-)J@<)VooVVVHE@C=sAngZAQqnhcXR`sSo&+A7cmr zO%i+^7&QrVvmOwVHi!sWa+~iTK}J#Fu!$Tt5u;||^^o}@f)B!eY&d8FN~xK45D;lA zN_os#;C3#ElDUI!@~=-mpLOB#>D-igQe$q3%>now9BO8t>Bq$ZzpbM%uuwRt zQ=h4Qn_0AK-K#t!vCqmocL1O@6xvW&eNzqknD}k^0YTR}zVp>*>6V(!!-oI5w2sdi zl`@^e(sPEw7{*Ox;v0PWoB&v*`R>;)aB4Upi}0WCt@D$Qcc{a?to+9p*7&QJpCtoV zZ;bf+*WP^mH^3T(J;!RlLrgH}IBrZEFp5E8`SuG>v$EJD_`pB?=v{nDpN|8SQha!Q zM7OXkcT2u}dWC-B?&$+q;2Y=GZoYo~@+RgoKC;Xv4t*ci#^7TrN~>0g6Hbgg}$@&N=Jx^+TW#T9tJ=X;jj*Tv^!$4ThMWa3rB~nA53VXZ#U`S@s;x|^*hu)@{d1! zmvPfBwX}-A`?If8Xu~u#eDC`|A^X%S!_2Ix5{yy|jOFELPV&srB^I2SfvfqOKYfuX z5Mt(g|NaL4v!=m1MlNmbQ~8Y5nx|F<44lJhjRlSr1s9JmBLbr)^7hpoM1+nptS$6g z_a|kLL}?VOgAUKHE%EBJr#QLPqvI^r7?wIEgHFlOeo4m}28Fwg>Ed*ZFFm|DM4wuWi5#?sRM6Z-eBwCkZ)dWO`1_1`M0+&vo)$1 z*ZvM8=8*toVA3S6+!%3ep+~2*oLw34@!l9Mii;;#aR#PU!~53{2ziFOQoO&t&th4i zv_hkZQ8=h-3Nxgu3swg`I@U6tkwHcv3n>X!t9GMVa840Yn|^%;W7^XeEoh}tvqobQ zn8ZjG6TQN5P}MAyhQr!3ssr1jDL&@UFNAyw{Az2DFP~m!*eN-=IN;<$4{hN1phFhe zuRI^!7!h)!Fq%PWIeTP*)5{AimJVYzPHPsrj?!t2ZY}WEamu7Dvo>fO(^`X3imah9 znwS$QL&y=U^ekQt8bKR{)|!}wPwj}9X1&KO#GJAErb++n007&gimii+h0-zXmRwjH zqE+h^c*%TnIOd@8#H1ONh8NaWc=6OK-O`ehP=|obm|B#<8bin0n+=nY5>~YvX)}gG zD}~jXzO`ggq@0K;;oxRz-rlaZuMm^8Kq?_gMn1KaXV#wP(XF%xO5w^!-rwBi?DBwK zS#WM;5v8E(4EvLYjlBsIACa8txRSF+7g<>>kqmFFZ}Gv-0Wk^7eaD}_^eje0r_}h+ zF1q#+nXR{9I=a@-a|Y`S$A=xP*5nK+3EG$zDAVpnXxsKV3o56ZF0(<<8m!ioMpHUX zLP#k;P#QlFfF@=x?Huw)F)=9N=t36}sA6VR2iEs1DxZn8t#6|hPH9B9o2z`p8pR8z zRxz_vb6OK55;KfLVp0cWQCR3$UO2go($Fs)&z)GoY2BJ6F{7qqdlB00YG`zd9$3aySQ=0)Mb>PSEuM=bB#gj)_?iP$w z;H_)d`N5@iUV8di8^ua7ZsewA_WG3#l!8AvbCf}+;IQ)ie0`T|yOXwOXf!*cnyHWE zEL1+SeJ~+}#O|b~iUF+^ISZ4Nc=O5@XvJEui?twHV^y0%<-4<6PyDu2Vf4s9Dq3ha{%T5%mJ7KFb7}`z#M=%0AIlIze{Bb-m5q9z5oCK07*qo IM6N<$g5_6x-2eap literal 0 HcmV?d00001 diff --git a/modules/gdcvault/favicon.xcf b/modules/gdcvault/favicon.xcf new file mode 100644 index 0000000000000000000000000000000000000000..32695aaa5cfdb5863c913b5e077dbbc5859cf43d GIT binary patch literal 5024 zcmZ{m*>@Y)dB6oqc5F#*-PBtWr%~RV)7r;5$8qaErR_scA9_yz1x1zyJ9kklDT$IP zS(QXdj;bU|oY)#{1h_9E#YGp(Ut`(I;Za7T+BCje&4;{ zdI3bCX*qW0#HnMa6zY=);PuPrLz4dr4w(#o{WTn~{B!@IN78`u8*m(g<8?URdM-=c zn~>MQ`)deX_Wcvh8qG;<2Yfytz5M>aw4FTOaq`n+U5XC*v48!yfBz5Z&FlN9LUSBB z`99L7_*9yMwCuop?Hy;;Cy%u^s}-OA?C^W1kQ2@7=C)(+?cd&SLw~F;LR}zf<4L{JE+WhnJD#8qM+N5jpV2haY_O3;6lulTZHP!;e1t3~)?f0?mLT=OXNQ1nCG>IE^j>L<}*=rp=uE=Jl z!j$4G!I=x{mMgYmrGgi8x%Cd@ZDdW}5cgVJEZ@bJTPEgH?)$J^!btvJcLw^Jxm;vd zuBqiM_m^{ld+i9)d@E3hK4@1W?;^bB_BylGD^X|5!p=1n(tHn6_uC+KH&hDWgVc`@ zR^IV&xfs5of;(rf5W>bK6{5WxE-c<{MG*DPKr#BTQw6!0`s^eBuIElOg0y_a^6oD9 z_ErR6>4%u|k~Npm_rQq%5J_t;{NB0le0aXC>$BY&{X6KrqAy0SLPZ%uc6^m6E#1A) zs%`0LRUvO8745}GVAVhL=r_g~&H0+4v3&0~z4i^QJm<(}gOBg^KYVOH2Wdic{=t+l z=ye6Qas^u}auDIWCe!7Zo@8%(*eWe`| zoYq*P7thII^mx^l*w8Cstx%UX$d&%~*4tZL<_v@~%Bwp}>Yf%B$W>mX&MMXIUFR;G z?a)FkLDO*tx*|i03awf#zr0SY-oJ3=4_T4C(yoGTxKG>g(j-i$W~NqmwQ2sawYwb& zOy2BND$fVw5m-}NoGG*@uS6@W53bK|Ga^TnvKr(S#I5~(npo~ttGc%Z{I{*0liTs_ z?cHtfoi^>w4JO`$AYFt=o`IUg+JhVOJIo$i$;DeFO-1)gCGn@u&Oa8b(My^x6Z+`k z*I0UIqPwksgO2qokc%Y|Kc~974mG>4>(hMxx4m6jXsxe(yifqw`W!@Y{Ji?~wfkQ@ z_;RX{zUhbl*cY|EJ-`;s|joP@Sff?{=ZtYZJU;b$W-*|Ak?FtGO zLu*N@y4u%zD@Lu|(5kN|Yst&1Yg=^mPIL3^6+CiRiyTFKU7m8@=1AwPeQn*ASaK&C z$&kXq&o5T#H#{d3Eghj^ad$%=;j@N zRV#XtcV^5-aVDw$mI1 zOC_Ox98IklZU&dejCFU9GU~s&aKR>|{hPhc%p( zYg;>7+Rk1+-=^-obmI~{V3e&F`|jNCZC4y0*y;wu2;;A(-V$zT77D}aJ zsgTPg*9Y}FgK2&Xr>OjWWSu;ol*Wz=XI-=sK(rYNFd z*#~9DtubezO2#IJ$Dho3D+0b~otw0nCf2EH4x5~ubFU1dZ=+d#U99V&bz^z9>>4&0 z%nP|{1`L^GCUV$SV$w6v2&2Vff~;T`#F`P6p=p!5%5IDqQNw(ONzR*es9_>X65bIV z`eRghLhR8N=wD`>u4Sf-sA(ZZ?arA{)G!mLb6`M+zKim@xqPiK2C1xtX%wdnsBs}p zenZVB=`6&5jPk~X6qBCXUy5tw$00PIp_BVdO(&V$Gq}``&<&m0wo{7F!VsGrwR(II zZuB(6rsf7vR6ic0aQBD-Vrl(|V>7pH9kL8gmKwYb;<_h!E)Df$C~3xUIypaV)EUgs zn>W#lVbs2!*$UtXx~8lVmQL{Z2=x(TI*HYhsbnwtYIzXigCei*FmB(}E_1%i$Z zpa)TYti)wzjS{OHT`!gMxpZoGZCGM;<|p3pS|qx;xiSm4g6u##C<0O6x5k65rX5 zg{_0=TPST9oSdGWn|(57F`_TQkeNq^EjsiN%IHi(^g8ra z_<+8GHgpD~%-Ddn)gxZM)#)L21Qql~i`j_&6s0UX+4XUg-ZDA?7dwpd zhB#ivOwgE`TNI0Md1|CkP0gB2V_U`afJ7L547)ol5d~57NW^G?OD~uQW=l142ojub zvc%!@QlStNqR0-R2J_(1(6B{{v|uz3?YoV`1dQjanT4@QJ1w%4(%PQESe0HLf{9!e z_u?i4mWTgPcEi(Vy?!K7&YEGFaso4ej%V4-v&pq0FV;Agf^NG__DVH9u1AMUdn_;x zEEkJ~V!60IYc@?6_|hnf4sv4Ef{shIJ-uG$MWITSO;FP_ny!|e<^cy;&x{%@evi%S z-mKu^G4ouW&rhQ0FeB2#x`{$H^K9gaw^Ac*lY@Gw=o?t3nDxntZH8JNHVjV9KYQi~ zQ%rmcDzcd;2HgZHl4JU*BAZ>XSmrZq{;3g0-e;J_*{lsK(D`SB0~4*PdoR6kl2D)U3*UV_h0 z8D~mD;fYQ+o#6^#&?uFMcdX3UglgjHfN>(pRs++syMjoMpq8Y-MV?_vic3E=PZxyz z6pBtHxk_+u6c*-T)N6{e^*XUKYS39i6kBDeSY71IsLoa8s+H^p!KS9oPjhT$0!1xr z6`~jqjp^Zj+zehRreMFwP{tC-66K`D z$3PRvdbmc}Zv*xvoGS zunfzVBB9{Y+NJ}%1xROjr%=dci)58c+rdG=d*~XzC`q&{OVcEdmx@UjBq@6!x|2*K zGnvGa1LmD~6!Emj1|`N3PUMP(%+9LB(N@eC3@!x&Ud#$!1~rG*iGi0u&1!S`0#|AdwbiT+Xzg1-u-WZ4E9jB^6R2T67X}@YS^omm>=aEN6UnE9UmOz!AXP zvNRR3LGx-+Q6!~MIZ=|Tyo`5GRu2MMTUw*@GX@d7_lCzl);Eba=dD5&Mk zb8ISIWmA5qXOm!P9B1kx<+J+9dS%gph3Gm7wa7XVwA&YPp%Rj`e6~onPEkC$?smYg z;&wtyt95C4H(6&wF7JA^P6TXzvOz2Y5GV^|7?VtFc&NIpz9f{EoOWMP;5P${`G&|k z0amK<$ZY_B*STjoOnOgjSOJLgHI6Q& z8LqVG3|4q4s?H=`CGv6DutxwFpcuNe-=7S&NpjRSJVEwg6(pZU%XB#wER<4S%$F*q zT(J54sdBlv>jkd?!Qpa2gTD`C4$AIu`CJa0)4S-EikpLvivhP4d7~1LA)J3ci%9 literal 0 HcmV?d00001 diff --git a/modules/gdcvault/pages.py b/modules/gdcvault/pages.py new file mode 100644 index 0000000000..73ac80a782 --- /dev/null +++ b/modules/gdcvault/pages.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.mech import ClientForm +ControlNotFoundError = ClientForm.ControlNotFoundError + +from weboob.tools.browser import BasePage + +import re +import datetime +from dateutil.parser import parse as parse_dt + +from weboob.capabilities.base import NotAvailable +from weboob.tools.browser import BrokenPageError + +from .video import GDCVaultVideo + +#import lxml.etree + + + + +__all__ = ['VideoPage'] + +class VideoPage(BasePage): + def get_video(self, video=None): + if video is None: + video = GDCVaultVideo(self.group_dict['id']) + + # the config file has it too, but in CDATA + obj = self.parser.select(self.document.getroot(), 'title') + if len(obj) > 0: + title = obj[0].text.strip() + m = re.match('GDC Vault\s+-\s+(.*)', title) + if m: + title = m.group(1) + video.title = unicode(title) + + # get the config file for the rest + obj = self.parser.select(self.document.getroot(), 'iframe', 1) + if obj is None: + return None + iframe_url = obj.attrib['src'] + m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url) + if not m: + return None + config_url = m.group(1) + m.group(2) + + #config = self.browser.openurl(config_url).read() + config = self.browser.get_document(self.browser.openurl(config_url)) + + obj = self.parser.select(config.getroot(), 'akamaihost', 1) + host = obj.text + if host is None: + raise BrokenPageError('Missing tag in xml config file') + + videos = {} + + obj = self.parser.select(config.getroot(), 'speakervideo', 1) + videos['speaker'] = 'rtmp://' + host + '/' + obj.text + + obj = self.parser.select(config.getroot(), 'slidevideo', 1) + videos['slides'] = 'rtmp://' + host + '/' + obj.text + + #print videos + + obj = self.parser.select(config.getroot(), 'date', 1) + video.date = parse_dt(obj.text) + + obj = self.parser.select(config.getroot(), 'duration', 1) + m = re.match('(\d\d):(\d\d):(\d\d)', obj.text) + if m: + video.duration = datetime.timedelta(hours = int(m.group(1)), + minutes = int(m.group(2)), + seconds = int(m.group(3))) + + obj = self.parser.select(config.getroot(), 'speaker', 1) + #print obj.text_content() + + #TODO: speaker as CDATA + #video.author = u'European Parliament' + + #XXX + video.url = unicode(videos['speaker']) + #self.set_details(video) + + video.set_empty_fields(NotAvailable) + return video + + obj = self.parser.select(self.document.getroot(), 'title') + if len(obj) < 1: + return None + title = obj[0].text.strip() + m = re.match('GDC Vault\s+-\s+(.*)', title) + if m: + title = m.group(1) + + def set_details(self, v): + obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1) + if obj is not None: + value = obj.attrib['content'] + m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value) + if not m: + raise BrokenPageError('Unable to parse datetime: %r' % value) + day = m.group(1) + month = m.group(2) + year = m.group(3) + hour = m.group(4) + minute = m.group(5) + v.date = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(hour), + minute=int(minute)) + + obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1) + if obj is not None: + span = self.parser.select(obj, 'span.ep_date', 1) + value = span.text + m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value) + if not m: + raise BrokenPageError('Unable to parse datetime: %r' % value) + bhour = m.group(1) + bminute = m.group(2) + ehour = m.group(3) + eminute = m.group(4) + day = m.group(5) + month = m.group(6) + year = m.group(7) + + start = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(bhour), + minute=int(bminute)) + end = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(ehour), + minute=int(eminute)) + + v.duration = end - start diff --git a/modules/gdcvault/test.py b/modules/gdcvault/test.py new file mode 100644 index 0000000000..5429044815 --- /dev/null +++ b/modules/gdcvault/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +#from weboob.capabilities.video import BaseVideo + + +class GDCVaultTest(BackendTest): + BACKEND = 'gdcvault' + + # def test_search(self): + # l = list(self.backend.search_videos('linux')) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + # self.backend.browser.openurl(v.url) + + # def test_latest(self): + # l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/gdcvault/video.py b/modules/gdcvault/video.py new file mode 100644 index 0000000000..a65fd47196 --- /dev/null +++ b/modules/gdcvault/video.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + +import re + +__all__ = ['GDCVaultVideo'] + + +class GDCVaultVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = u'flv' + + @classmethod + def id2url(cls, _id): + # attempt to enlarge the id namespace to differentiate + # videos from the same page + m = re.match('\d+#speaker', _id) + if m: + return u'http://www.gdcvault.com/play/%s#speaker' % _id + m = re.match('\d+#slides', _id) + if m: + return u'http://www.gdcvault.com/play/%s#slides' % _id + return u'http://www.gdcvault.com/play/%s' % _id + -- GitLab