From 7c573b47a6dcc007fd0e72ea69364bfddfbc5b3b Mon Sep 17 00:00:00 2001 From: lucien Date: Sun, 1 Apr 2012 18:46:13 +0200 Subject: [PATCH] * translaboob is a weboob application for automatic translation * new capability is defined, ICapTranslate * googletranslate is a backend for the translaboob application --- modules/googletranslate/__init__.py | 24 +++++++ modules/googletranslate/backend.py | 38 ++++++++++ modules/googletranslate/browser.py | 64 +++++++++++++++++ modules/googletranslate/pages.py | 31 ++++++++ modules/googletranslate/tools.py | 41 +++++++++++ scripts/translaboob | 26 +++++++ weboob/applications/translaboob/__init__.py | 23 ++++++ .../applications/translaboob/translaboob.py | 71 +++++++++++++++++++ weboob/capabilities/translate.py | 56 +++++++++++++++ 9 files changed, 374 insertions(+) create mode 100644 modules/googletranslate/__init__.py create mode 100644 modules/googletranslate/backend.py create mode 100644 modules/googletranslate/browser.py create mode 100644 modules/googletranslate/pages.py create mode 100644 modules/googletranslate/tools.py create mode 100755 scripts/translaboob create mode 100644 weboob/applications/translaboob/__init__.py create mode 100644 weboob/applications/translaboob/translaboob.py create mode 100644 weboob/capabilities/translate.py diff --git a/modules/googletranslate/__init__.py b/modules/googletranslate/__init__.py new file mode 100644 index 0000000000..9d568e9d3b --- /dev/null +++ b/modules/googletranslate/__init__.py @@ -0,0 +1,24 @@ +"GoogleTranslateBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import GoogleTranslateBackend + + +__all__ = ['GoogleTranslateBackend'] diff --git a/modules/googletranslate/backend.py b/modules/googletranslate/backend.py new file mode 100644 index 0000000000..50f567030f --- /dev/null +++ b/modules/googletranslate/backend.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +"backend for http://translate.google.com" + +from weboob.capabilities.translate import ICapTranslate +from weboob.tools.backend import BaseBackend, BackendConfig +from .browser import GoogleTranslateBrowser + +__all__ = ['GoogleTranslateBackend'] + +class GoogleTranslateBackend(BaseBackend, ICapTranslate): + MAINTAINER = 'Lucien Loiseau' + EMAIL = 'loiseau.lucien@gmail.com' + VERSION = '0.c' + LICENSE = 'AGPLv3+' + STORAGE = {'seen': {}} + NAME = 'googletranslate' + DESCRIPTION = u'Google translation web service' + BROWSER = GoogleTranslateBrowser + + def translate(self, lan_from, lan_to, text): + return self.browser.translate(lan_from, lan_to, text) diff --git a/modules/googletranslate/browser.py b/modules/googletranslate/browser.py new file mode 100644 index 0000000000..eb768f7b86 --- /dev/null +++ b/modules/googletranslate/browser.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword +from weboob.tools.browser.decorators import id2url, check_url +from weboob.capabilities.translate import TranslationFail + +from .pages import TranslatePage + +import urllib + +__all__ = ['GoogleTranslateBrowser'] + +class GoogleTranslateBrowser(BaseBrowser): + DOMAIN = 'translate.google.com' + ENCODING = 'UTF-8' + USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox'] + PAGES = { + 'https?://translate\.google\.com': TranslatePage + } + + def __init__(self, *args, **kwargs): + BaseBrowser.__init__(self, *args, **kwargs) + + def translate(self, source, to, text): + """ + translate 'text' from 'source' language to 'to' language + """ + try: + d = { + 'sl': source, + 'tl': to, + 'js': 'n', + 'prev': '_t', + 'hl': 'en', + 'ie': 'UTF-8', + 'layout': '2', + 'eotf': '1', + 'text': text, + } + self.location('http://'+self.DOMAIN, urllib.urlencode(d)) + translation = self.page.get_translation() + return translation + + except TranslationFail: + return "no translation available" + diff --git a/modules/googletranslate/pages.py b/modules/googletranslate/pages.py new file mode 100644 index 0000000000..4710fed7f8 --- /dev/null +++ b/modules/googletranslate/pages.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from datetime import date +from weboob.tools.browser import BasePage + + +class TranslatePage(BasePage): + def get_translation(self): + for span in self.document.getiterator('span'): + if (span.attrib.get('id', '') == 'result_box'): + for children in span.getchildren(): + return children.text + + diff --git a/modules/googletranslate/tools.py b/modules/googletranslate/tools.py new file mode 100644 index 0000000000..15d6b9ec9e --- /dev/null +++ b/modules/googletranslate/tools.py @@ -0,0 +1,41 @@ +"tools for lefigaro backend" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re + +def id2url(_id): + "return an url from an id" + regexp2 = re.compile("(\w+).([0-9]+).(.*$)") + match = regexp2.match(_id) + if match: + return 'http://www.20minutes.fr/%s/%s/%s' % (match.group(1), + match.group(2), + match.group(3)) + else: + raise ValueError("id doesn't match") + + +def url2id(url): + "return an id from an url" + return url + + +def rssid(entry): + return url2id(entry.id) diff --git a/scripts/translaboob b/scripts/translaboob new file mode 100755 index 0000000000..0f86a8ad21 --- /dev/null +++ b/scripts/translaboob @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.applications.translaboob import Translaboob + + +if __name__ == '__main__': + Translaboob.run() diff --git a/weboob/applications/translaboob/__init__.py b/weboob/applications/translaboob/__init__.py new file mode 100644 index 0000000000..eea16995ab --- /dev/null +++ b/weboob/applications/translaboob/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .translaboob import Translaboob + +__all__ = ['Translaboob'] diff --git a/weboob/applications/translaboob/translaboob.py b/weboob/applications/translaboob/translaboob.py new file mode 100644 index 0000000000..93608a56e0 --- /dev/null +++ b/weboob/applications/translaboob/translaboob.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import sys + +import os +import sys +import codecs +import locale + +from weboob.capabilities.translate import ICapTranslate +from weboob.tools.application.repl import ReplApplication + +__all__ = ['Translaboob'] + +class Translaboob(ReplApplication): + APPNAME = 'translaboob' + VERSION = '0.c' + COPYRIGHT = 'Copyright(C) 2012 Lucien Loiseau' + DESCRIPTION = 'Console application to translate text from one language to another' + CAPS = ICapTranslate + + def main(self, argv): + return ReplApplication.main(self, argv) + + def do_translate(self, line): + lan_from, lan_to, text = self.parse_command_args(line, 3, 1) + """ + translate + translate from one language to another, + : source language + : destination language + : language to translate, standart input if - is given + """ + if not text or text == '-': + text = self.acquire_input() + + print "from : "+lan_from+" to : "+lan_to + print "" + print text + print "" + + for backend, translation in self.do('translate', lan_from, lan_to, text): + print "" + print translation + print "" + + + + + + + + diff --git a/weboob/capabilities/translate.py b/weboob/capabilities/translate.py new file mode 100644 index 0000000000..753b293d46 --- /dev/null +++ b/weboob/capabilities/translate.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lucien Loiseau +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from datetime import date, datetime + +from .base import CapBaseObject, Field, StringField, DateField, DecimalField, IntField +from .collection import ICapCollection + + +__all__ = ['TranslationFail', 'ICapTranslate'] + + +class TranslationFail(Exception): + """ + Raised when no translation matches the given request + """ + + def __init__(self, msg='No Translation Available'): + Exception.__init__(self, msg) + + +class ICapTranslate(ICapCollection): + """ + Capability of online translation website to translate word or sentence + """ + def translate(self, source_language, destination_language, request): + """ + perfom a translation + + :param source_language: language in which the request is written + :param destination_language: language to translate the request into + :param request: the sentence to be translated + """ + raise TranslationFail() + + + + + -- GitLab