From 72f9366689df92e291b11667a64fb5877a2068c2 Mon Sep 17 00:00:00 2001 From: Vincent A Date: Sat, 5 May 2018 00:00:19 +0200 Subject: [PATCH] docs/.../cookbook: add another method for json + describe table parsing --- docs/source/guides/cookbook.rst | 64 ++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/docs/source/guides/cookbook.rst b/docs/source/guides/cookbook.rst index b9af648b00..b8c36dc27e 100644 --- a/docs/source/guides/cookbook.rst +++ b/docs/source/guides/cookbook.rst @@ -36,7 +36,7 @@ POST JSON Data must be encoded as a string:: - browser.location('/quux', data=json.dumps({'foo': 'bar'}), headers={'Content-Type': 'application/json'}) + browser.location('/quux', json={'foo': 'bar'}) Will do:: @@ -45,9 +45,9 @@ Will do:: {"foo": "bar"} -.. - Alternatively:: - browser.location('/quux', json={'foo': 'bar'}) +Equivalent to:: + + browser.location('/quux', data=json.dumps({'foo': 'bar'}), headers={'Content-Type': 'application/json'}) Add custom headers for one request @@ -191,6 +191,62 @@ Some sites do not even do that and may use Javascript to follow a link. The ``on def on_load(self): self.browser.location(Link('//a[@id="target"]')(self.doc)) +Parse data from an HTML table +----------------------------- + +This example code isn't very semantic and could fail silently if columns are changed:: + + class MyPage(HTMLPage): + @method + class iter_stuff(ListElement): + item_xpath = '//table/tr[pos() > 1]' # data rows + + class item(ItemElement): + klass = Stuff + + obj_id = CleanText('./td[1]') + obj_foo = CleanText('./td[2]') + +It can be improved by using the column labels:: + + class MyPage(HTMLPage): + @method + class iter_stuff(ListElement): + head_xpath = '//table/tr/th' # where to look for column titles + + # these are the column titles from the site + col_id = 'Identifier' # Exact match + col_foo = re.compile(r'^Foo value for today \(.*\)') # regexp for finer matching + col_bar = ['Bar', 'Barr'] # Multiple exact matches + + item_xpath = '//table/tr[pos() > 1]' # data rows + + class item(ItemElement): + klass = Stuff + + obj_id = CleanText(TableCell('id')) + obj_foo = CleanText(TableCell('foo')) + +Handle multiple tables with similar headers +------------------------------------------- + +Sometimes, you might encounter a page with multiple tables to parse. The columns are titled the same, but they aren't at the same column index. +So, it's required to restart :class:`weboob.browser.elements.TableElement` column processing for each table. It's possible to encapsulate elements in other elements:: + + class MultiPage(HTMLPage): + @method + class iter_stuff(ListElement): + item_xpath = '//table' + + class one_table(TableElement): + head_xpath = './thead/tr/th' + item_xpath = './tbody/tr' + + col_foo = 'Foo' + + class item(ItemElement): + obj_foo = CleanText(TableCell('foo')) + Handle pagination ----------------- -- GitLab