commit: 57d8ef85943af7ddfeaa51d0cacdacba80a75a13 Author: Magnus Granberg <zorry <AT> gentoo <DOT> org> AuthorDate: Thu Feb 3 21:23:28 2022 +0000 Commit: Magnus Granberg <zorry <AT> gentoo <DOT> org> CommitDate: Thu Feb 3 21:23:28 2022 +0000 URL: https://gitweb.gentoo.org/proj/tinderbox-cluster.git/commit/?id=57d8ef85
Fix UnicodeDecodeError in log parser Signed-off-by: Magnus Granberg <zorry <AT> gentoo.org> buildbot_gentoo_ci/logs/log_parser.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/buildbot_gentoo_ci/logs/log_parser.py b/buildbot_gentoo_ci/logs/log_parser.py index 7531532..448192c 100644 --- a/buildbot_gentoo_ci/logs/log_parser.py +++ b/buildbot_gentoo_ci/logs/log_parser.py @@ -166,13 +166,10 @@ def runLogParser(args): # Is stored in a db instead of files. log_search_pattern_list = get_log_search_pattern(Session, args.uuid, config['default_uuid']) Session.close() - #FIXME: UnicodeDecodeError: 'utf-8' codec can't decode byte ... in some logs - with io.TextIOWrapper(io.BufferedReader(gzip.open(args.file, 'rb'))) as f: - for text_line in f: - logfile_text_dict[index] = text_line.strip('\n') - index = index + 1 - max_text_lines = index - f.close() + for text_line in io.TextIOWrapper(io.BufferedReader(gzip.open(args.file)), encoding='utf8', errors='ignore'): + logfile_text_dict[index] = text_line.strip('\n') + index = index + 1 + max_text_lines = index # run the parse patten on the line for tmp_index, text in logfile_text_dict.items(): res = mp_pool.apply_async(search_buildlog, (log_search_pattern_list, logfile_text_dict, tmp_index, max_text_lines,))