Commit 33675ba4 authored by ntome's avatar ntome

[larousse] site was updated

parent 81be9edb
......@@ -25,13 +25,10 @@ from .pages import LangList, WordPage
class LarousseBrowser(PagesBrowser):
BASEURL = 'http://www.larousse.fr'
BASEURL = 'https://www.larousse.fr'
langlist = URL('/dictionnaires/bilingues$', LangList)
# warning: the order of params is important...
word = URL(r'/dictionnaires/rechercher\?q=(?P<word>.*)&l=(?P<src>\w+)-(?P<dst>\w+)&culture=',
r'/dictionnaires/(?P<src>\w+)-(?P<dst>\w+)/(?P<word>[^/]+)(?:/(?P<id>\d+))?',
WordPage)
word = URL(r'/dictionnaires/(?P<src>\w+)-(?P<dst>\w+)/(?P<word>.*)', WordPage)
LANGS = None
......
......@@ -41,14 +41,12 @@ RCODES = {v: k for k, v in CODES.items()}
class LangList(HTMLPage):
def get_langs(self):
res = {}
for a in self.doc.xpath('//ul[@class="menu-items"]/li//a'):
for a in self.doc.xpath('//a[@class="item-dico-bil"]'):
url = a.attrib['href']
mtc = re.search(r'/dictionnaires/(\w+)-(\w+)', url)
if not mtc:
continue
src, dst = mtc.groups()
if dst == 'monolingue':
continue
res[CODES[src], CODES[dst]] = (src, dst)
return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment