check_xpath.py 4.75 KB
Newer Older
1
#!/usr/bin/env python3
2 3 4 5 6 7

# Copyright(C) 2017  Vincent A
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU Lesser General Public License as published by
9 10 11 12 13 14
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU Lesser General Public License for more details.
16
#
17
# You should have received a copy of the GNU Lesser General Public License
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
# along with weboob. If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function

import ast
import fnmatch
import os
import traceback

import lxml.etree
from weboob.browser.filters import standard


class Error(SyntaxError):
    def __init__(self, file, line, message):
        super(Error, self).__init__('%s:%s: %s' % (file, line, message))
        self.file = file
        self.line = line


def do_visits(*funcs):
    def wrapper(self, node):
        for func in funcs:
            func(self, node)
        self.generic_visit(node)
    return wrapper


class Visitor(ast.NodeVisitor):
    def __init__(self, file, *args, **kwargs):
        self.warnings = kwargs.pop('warnings', False)
49
        super(Visitor, self).__init__(*args, **kwargs)
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
        self.file = file

        self.filters = []
        self.filters.extend(f for f in dir(standard) if isinstance(getattr(standard, f), type) and issubclass(getattr(standard, f), standard.CleanText))
        self.filters.extend(['Regexp', 'XPath', 'Attr', 'Link'])

        self.element_context = []

    def check_xpath(self, s, lineno):
        try:
            lxml.etree.XPath(s)
        except lxml.etree.XPathSyntaxError as exc:
            raise Error(self.file, lineno, exc)

        if self.warnings:
65
            if not s.lstrip('(').startswith('.') and len(self.element_context) >= 2:
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
                if self.element_context[-1] == 'ItemElement' and self.element_context[-2] in ('TableElement', 'ListElement'):
                    print('%s:%s: probable missing "." at start of XPath' % (self.file, lineno))

    def _item_xpath(self, node):
        try:
            target, = node.targets
        except ValueError:
            return
        if not isinstance(target, ast.Name) or target.id != 'item_xpath':
            return
        try:
            if self.element_context[-1] not in ('TableElement', 'ListElement'):
                return
        except IndexError:
            return
        if not isinstance(node.value, ast.Str):
            return

        self.check_xpath(node.value.s, node.lineno)

    visit_Assign = do_visits(_item_xpath)

    def _xpath_call(self, node):
        if not isinstance(node.func, ast.Attribute):
            return
        if node.func.attr != 'xpath':
            return
        try:
            if not isinstance(node.args[0], ast.Str):
                return
        except IndexError:
            return

        self.check_xpath(node.args[0].s, node.lineno)

    def _filter_call(self, node):
        if not isinstance(node.func, ast.Name):
            return
        if node.func.id not in self.filters:
            return
        try:
            if not isinstance(node.args[0], ast.Str):
                return
        except IndexError:
            return

        self.check_xpath(node.args[0].s, node.lineno)

    visit_Call = do_visits(_xpath_call, _filter_call)

    def visit_ClassDef(self, node):
        has_element = False

119 120 121 122
        for basenode in node.bases:
            if isinstance(basenode, ast.Name) and basenode.id in ('ListElement', 'ItemElement', 'TableElement'):
                self.element_context.append(basenode.id)
                has_element = True
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
                break

        self.generic_visit(node)

        if has_element:
            self.element_context.pop()


def search_py(root):
    for path, dirs, files in os.walk(root):
        dirs.sort()
        for f in fnmatch.filter(files, '*.py'):
            yield os.path.join(path, f)


138 139 140 141 142 143 144 145 146 147 148 149 150 151
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="Check XPath definitions")
    parser.add_argument('-w', '--warnings', action='store_true')
    args = parser.parse_args()

    modpath = os.getenv('WEBOOB_MODULES', os.path.normpath(os.path.dirname(__file__) + '/../modules'))
    for fn in search_py(modpath):
        with open(fn) as fd:
            try:
                node = ast.parse(fd.read(), fn)
            except SyntaxError as exc:
                print('In file', fn)
                traceback.print_exc(exc)
152
        try:
153
            Visitor(fn, warnings=args.warnings).visit(node)
154
        except SyntaxError as exc:
155
            print(exc)