From 4b9d48a391b5060e98062e5dcb644a4d36a136b8 Mon Sep 17 00:00:00 2001 From: Vincent Texier Date: Thu, 19 Jun 2014 17:56:38 +0200 Subject: [PATCH] Fix bug bad characters in titles The parser returns duble encoded unicode titles we have to convert to utf-8 --- modules/ina/pages/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ina/pages/search.py b/modules/ina/pages/search.py index 06f461157f..4ab0acf895 100644 --- a/modules/ina/pages/search.py +++ b/modules/ina/pages/search.py @@ -48,7 +48,8 @@ def iter_videos(self): video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url) video.thumbnail.url = video.thumbnail.id - video.title = unicode(self.parser.select(li, 'p.titre', 1).text) + # The title is poorly encoded is the source, we have to encode/decode it again + video.title = unicode(self.parser.select(li, 'p.titre', 1).text).encode('raw_unicode_escape').decode('utf8') date = self.parser.select(li, 'p.date', 1).text day, month, year = [int(s) for s in date.split('/')] -- GitLab