From 6f4cd82d5b650b5521091e5749f42e41acfa36c4 Mon Sep 17 00:00:00 2001 From: Quentin Defenouillere Date: Fri, 23 Nov 2018 11:28:39 +0100 Subject: [PATCH] [weboob.browser.filters] Add colspan attribute to TableCell class The "colspan" attribute enables handling of tags that have a "colspan" attribute that is higher than 1. These columns occupy more than one slot in the table, creating a column shift that we must handle otherwise the col_names will not fit anymore with the col heads. --- weboob/browser/filters/standard.py | 40 +++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/weboob/browser/filters/standard.py b/weboob/browser/filters/standard.py index 3a982bf1a2..d6ee5478a5 100644 --- a/weboob/browser/filters/standard.py +++ b/weboob/browser/filters/standard.py @@ -191,10 +191,17 @@ class TableCell(_Filter): ... obj_date = Date(TableCell('date')) ... obj_label = CleanText(TableCell('label')) ... + + The 'colspan' variable enables the handling of table tags that have + a "colspan" attribute that modify the width of the column: + for example will occupy two columns instead of one, + creating a column shift for all the next columns that must be taken + in consideration when trying to match columns values with column heads. """ def __init__(self, *names, **kwargs): support_th = kwargs.pop('support_th', False) + self.colspan = kwargs.pop('colspan', False) super(TableCell, self).__init__(**kwargs) self.names = names @@ -203,7 +210,15 @@ def __init__(self, *names, **kwargs): else: self.td = './td[%s]' - def __call__(self, item): + """ + The two methods below are used to verify that modifying TableCell + to handle colspans does not modify the class behavior in weboob modules. + The "assert" should crash if a module does not return the same results + with and without handling colspans. + """ + + def call_without_colspan(self, item): + # Former behavior without handling colspans > 1 for name in self.names: idx = item.parent.get_colnum(name) if idx is not None: @@ -211,9 +226,32 @@ def __call__(self, item): for el in ret: self.highlight_el(el, item) return ret + return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names))) + def call_with_colspan(self, item): + # New behavior, handling colspans > 1 + for name in self.names: + col_idx = item.parent.get_colnum(name) + if col_idx is not None: + current_col = 0 + for td_idx in range(col_idx + 1): + ret = item.xpath(self.td % (td_idx + 1)) + if col_idx <= current_col: + for el in ret: + self.highlight_el(el, item) + return ret + current_col += int(ret[0].attrib.get('colspan', 1)) return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names))) + def __call__(self, item): + if self.colspan: + return self.call_with_colspan(item) + + ret_without_colspan = self.call_without_colspan(item) + ret_with_colspan = self.call_with_colspan(item) + assert ret_without_colspan == ret_with_colspan, 'Different behavior with and without colspan in TableCell' + return ret_with_colspan + class RawText(Filter): """Get raw text from an element. -- GitLab