From 99ca803bd2b1d9ac96286b5ec7215005a6db109f Mon Sep 17 00:00:00 2001 From: Vincent Ardisson Date: Mon, 10 Aug 2020 12:03:23 +0200 Subject: [PATCH] weboob/browser/browsers: when dumping HAR, add a key to tell binary data For binary response, HAR allows base64 encoding so data is untouched. But no such thing exists for binary posted data. Some generators write JSON with the binary posted data as if it were latin-1 encoded text. But when reading, what can we do? Think it's binary to encode back to latin-1? Or think it's text? HAR gives no answer, so we add a non-standard "x-binary" key to indicate "text" is actually binary disguised with latin-1. --- weboob/browser/browsers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/weboob/browser/browsers.py b/weboob/browser/browsers.py index e4e054162d..5395bf5387 100644 --- a/weboob/browser/browsers.py +++ b/weboob/browser/browsers.py @@ -286,6 +286,8 @@ def save_response(self, response, warning=False, **kwargs): 'content': { 'mimeType': response.headers.get('Content-Type', ''), 'size': len(response.content), + # systematically use base64 to avoid more content alteration + # than there already is... 'encoding': "base64", 'text': base64.b64encode(response.content).decode('ascii'), }, @@ -316,7 +318,10 @@ def save_response(self, response, warning=False, **kwargs): if isinstance(request.body, str): har_entry['request']['postData']['text'] = request.body else: + # HAR format has no proper way to encode posted binary data! har_entry['request']['postData']['text'] = request.body.decode('latin-1') + # add a non-standard key to indicate how should "text" be decoded. + har_entry['request']['postData']['x-binary'] = True if request.headers.get('Content-Type') == 'application/x-www-form-urlencoded': har_entry['request']['postData']['params'] = [ -- GitLab