commit: dac532df96cb16626f4f1656b5aa2f82b8383c8d Author: Mart Raudsepp <leio <AT> gentoo <DOT> org> AuthorDate: Sun Dec 4 07:59:39 2016 +0000 Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org> CommitDate: Sun Dec 4 07:59:39 2016 +0000 URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dac532df
sync: Fix UTF-8 handling for projects.xml import Need to feed response.content bytestring into ElementTree, not response.text. With the latter ET seems to figure it's already decoded and goes all latin-1 on us. >From response.content bytestream it notices the UTF-8 encoding XML markup and >does things right. Diagnosed-by: Doug Freed <dwfreed <AT> mtu.edu> backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 4894315..22419bf 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -13,7 +13,7 @@ def get_project_data(): if not data: print("Failed retrieving projects.xml") return - root = ET.fromstring(data.text) + root = ET.fromstring(data.content) projects = {} # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 if root.tag.lower() != 'projects':