From 54c2d50ecd8f94231ab99ec8231320cdb3d629d7 Mon Sep 17 00:00:00 2001 From: Vincent Ardisson Date: Thu, 6 Aug 2020 15:58:58 +0200 Subject: [PATCH] weboob.capabilities.bank: add capability method for diffing accounts PFMs need to keep track of accounts, which is done using various generic methods. However, complicated cases may occur on some sites, where no generic method works, without harming other modules. A new capability method is introduced to allow module-specific diff behavior. Essentially, a diff consists in: - matching accounts, pairs of new/old objects - new accounts, that were not seen before and are known to be new - obsolete accounts, that are known to have disappeared - unknown accounts, which could simply not be matched, but may be mapped by another method or manually by a user --- weboob/capabilities/bank/pfm.py | 100 +++++++++++++++ .../tools/capabilities/bank/data_matching.py | 120 ++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 weboob/capabilities/bank/pfm.py create mode 100644 weboob/tools/capabilities/bank/data_matching.py diff --git a/weboob/capabilities/bank/pfm.py b/weboob/capabilities/bank/pfm.py new file mode 100644 index 0000000000..23a4d47dac --- /dev/null +++ b/weboob/capabilities/bank/pfm.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2016 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with weboob. If not, see . + +from .base import CapBank + +__all__ = [ + 'CapBankMatching', 'AccountDiff', +] + + +class AccountDiff: + """Difference between 2 accounts lists + """ + + def __init__(self): + self.matching = [] + """List of new-old account pairs matching together""" + + self.obsolete = [] + """Accounts from the previous state that are not present in the latest state""" + + self.new = [] + """Accounts from the latest state that are not present in the previous state""" + + self.unknown = [] + + +class CapBankMatching(CapBank): + """ + Capability for matching data between synchronisations. + + This is mostly useful for PFM which have to compare states across time. + For example, a PFM has to compare accounts freshly returned to accounts + returned in a previous sync. + """ + + def diff_accounts(self, new_accounts, old_accounts): + """Compute difference between 2 states of accounts lists. + + This function will remove elements from `new_accounts` and + `old_accounts` as they are matched and put in the resulting + `AccountDiff` object. + + Limitations may apply to the fields of `old_accounts` objects, see + documentation of. + + :param new_accounts: list of freshly fetched, not-matched-yet accounts + :type new_accounts: iter[:class:`Account`] + :param old_accounts: list of old, not-matched-yet accounts + :type old_accounts: iter[:class:`Account`] + :rtype: iter[:class:`AccountDiff`] + """ + + diff = AccountDiff() + for new_account in new_accounts: + old_account = self.match_account(new_account, old_accounts) + if old_account: + old_accounts.remove(old_account) + new_accounts.remove(new_account) + diff.matching.append((new_account, old_account)) + return diff + + def match_account(self, account, old_accounts): + """Search an account in `old_accounts` corresponding to `account`. + + `old_accounts` is a list of accounts found in a previous + synchronisation. + However, they may not be the exact same objects but only reconstructed + objects with the same data, although even it could be partial. + For example, they may have been marshalled, sometimes loosely, thus some + attributes may be missing (like `_private` attributes) or unset (some + PFM may choose not to even save all attributes). + Also, `old_accounts` may not contain all accounts from previous state, + but only accounts which have not been matched yet. + + :param account: fresh account to search for + :type account: :class:`Account` + :param old_accounts: candidates accounts from previous sync + :type old_accounts: iter[:class:`Account`] + :return: the corresponding account from `old_accounts`, or `None` if none matches + :rtype: :class:`Account` + """ + + raise NotImplementedError() diff --git a/weboob/tools/capabilities/bank/data_matching.py b/weboob/tools/capabilities/bank/data_matching.py new file mode 100644 index 0000000000..c25962e6ad --- /dev/null +++ b/weboob/tools/capabilities/bank/data_matching.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2020 weboob project +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with weboob. If not, see . + +from weboob.tools.capabilities.bank import AccountDiff + + +__all__ = ['diff_accounts'] + + +def group_by(iterable, func): + grouped = {} + for obj in iterable: + key = func(obj) + grouped.setdefault(key, []).append(obj) + return grouped + + +def match_unique(new_accounts, old_accounts, func): + # even though it should be unique and maybe required, don't error if it's missing or not unique + # just ignore bad groups + new_groups = {key: accs for key, accs in group_by(new_accounts, func).items() if key and len(accs) == 1} + old_groups = {key: accs for key, accs in group_by(old_accounts, func).items() if key and len(accs) == 1} + + for matching_key in (new_groups.keys() & old_groups.keys()): + new = new_groups[matching_key][0] + old = old_groups[matching_key][0] + + new_accounts.remove(new) + old_accounts.remove(old) + yield new, old + + +class IdSet: + """A set for unhashable objects, expecting to pass the same objects""" + def __init__(self, elements=()): + self.container = {} + for el in elements: + self.add(el) + + def __iter__(self): + return self.container.values() + + def __contains__(self, el): + return id(el) in self.container + + def __len__(self): + return len(self.container) + + def add(self, el): + self.container[id(el)] = el + + def remove(self, el): + del self.container[id(el)] + + def discard(self, el): + try: + self.remove(el) + except KeyError: + pass + + +def diff_accounts(backend, new_accounts, old_accounts): + """Compare accounts between a sync and previous sync + + Tries to match accounts just fetched with accounts fetched from a previous + run of `iter_accounts()`. + + :param backend: backend from which the objects come + :type backend: :class:`weboob.tools.backend.Module` + :type new_accounts: iter[:class:`weboob.capabilities.bank.Account`] + :type old_accounts: iter[:class:`weboob.capabilities.bank.Account`] + :rtype: :class:`weboob.capabilities.bank.AccountDiff` + """ + + new_accounts = IdSet(new_accounts) + old_accounts = IdSet(old_accounts) + + diff = AccountDiff() + diff.matching.extend(match_unique(new_accounts, old_accounts, lambda acc: acc.id)) + diff.matching.extend(match_unique(new_accounts, old_accounts, lambda acc: acc.iban)) + + if hasattr(backend, 'diff_accounts'): + try: + module_diff = backend.diff_accounts(new_accounts, old_accounts) + except NotImplementedError: + pass + else: + for new, old in module_diff.matching: + new_accounts.discard(new) + old_accounts.discard(old) + diff.matching.append((new, old)) + + diff.obsolete = module_diff.obsolete + diff.new = module_diff.new + diff.unknown = module_diff.unknown + + if not new_accounts: + diff.obsolete = list(old_accounts) + elif old_accounts: + diff.unknown = list(new_accounts) + else: + diff.new = list(new_accounts) + + return diff -- GitLab