[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

Andy Yankovsky via Phabricator via cfe-commits Thu, 29 Jul 2021 01:50:49 -0700

werat updated this revision to Diff 362678.
werat added a comment.

Fix incorrect replace



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py

Index: clang-tools-extra/clang-tidy/rename_check.py
===================================================================
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files.
 
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
     return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
     txt = f.read()
 
   txt = re.sub(sFrom, sTo, txt)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
     f.write(txt)
 
 
@@ -31,7 +36,7 @@
   if sFrom == sTo:
     return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
     txt = f.read()
 
   if sFrom not in txt:
@@ -39,7 +44,7 @@
 
   txt = txt.replace(sFrom, sTo)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
     f.write(txt)
 
 
@@ -70,7 +75,7 @@
 
 def deleteMatchingLines(fileName, pattern):
   lines = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@
 
   print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
   print('  ' + '  '.join([l for l in lines if re.search(pattern, l)]))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
     f.writelines(not_matching_lines)
 
   return True
@@ -101,7 +106,7 @@
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@
       return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
     cpp_found = False
     file_added = False
     for line in lines:
@@ -130,11 +135,11 @@
 def adapt_module(module_path, module, check_name, check_name_camel):
   modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
     header_added = False
     header_found = False
     check_added = False
@@ -169,7 +174,7 @@
 def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
   filename = os.path.normpath(os.path.join(clang_tidy_path,
                                            '../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@
   checkMatcher = re.compile('- The \'(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
     note_added = False
     header_found = False
     add_note_here = False
Index: clang-tools-extra/clang-tidy/add_new_check.py
===================================================================
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -31,7 +36,7 @@
       return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     cpp_found = False
     file_added = False
     for line in lines:
@@ -51,7 +56,7 @@
   check_name_dashes = module + '-' + check_name
   filename = os.path.join(module_path, check_name_camel) + '.h'
   print('Creating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     header_guard = ('LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_' + module.upper() + '_'
                     + check_name_camel.upper() + '_H')
     f.write('//===--- ')
@@ -104,7 +109,7 @@
 def write_implementation(module_path, module, namespace, check_name_camel):
   filename = os.path.join(module_path, check_name_camel) + '.cpp'
   print('Creating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     f.write('//===--- ')
     f.write(os.path.basename(filename))
     f.write(' - clang-tidy ')
@@ -158,11 +163,11 @@
       lambda p: p.lower() == module.lower() + 'tidymodule.cpp',
       os.listdir(module_path)))[0]
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     header_added = False
     header_found = False
     check_added = False
@@ -217,7 +222,7 @@
   check_name_dashes = module + '-' + check_name
   filename = os.path.normpath(os.path.join(module_path,
                                            '../../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
 
   lineMatcher = re.compile('New checks')
@@ -225,7 +230,7 @@
   checkMatcher = re.compile('- New :doc:`(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     note_added = False
     header_found = False
     add_note_here = False
@@ -271,7 +276,7 @@
   filename = os.path.normpath(os.path.join(module_path, '../../test/clang-tidy/checkers',
                                            check_name_dashes + '.' + test_extension))
   print('Creating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     f.write("""// RUN: %%check_clang_tidy %%s %(check_name_dashes)s %%t
 
 // FIXME: Add something that triggers the check here.
@@ -307,7 +312,7 @@
   docs_dir = os.path.join(clang_tidy_path, '../docs/clang-tidy/checks')
   filename = os.path.normpath(os.path.join(docs_dir, 'list.rst'))
   # Read the content of the current list.rst file
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
     lines = f.readlines()
   # Get all existing docs
   doc_files = list(filter(lambda s: s.endswith('.rst') and s != 'list.rst',
@@ -323,7 +328,7 @@
     if not os.path.isfile(checkerCode):
       return ""
 
-    with open(checkerCode) as f:
+    with io.open(checkerCode, encoding='utf8') as f:
       code = f.read()
       if 'FixItHint' in code or "ReplacementText" in code or "fixit" in code:
         # Some simple heuristics to figure out if a checker has an autofix or not.
@@ -333,7 +338,7 @@
   def process_doc(doc_file):
     check_name = doc_file.replace('.rst', '')
 
-    with open(os.path.join(docs_dir, doc_file), 'r') as doc:
+    with io.open(os.path.join(docs_dir, doc_file), 'r', encoding='utf8') as doc:
       content = doc.read()
       match = re.search('.*:orphan:.*', content)
 
@@ -376,7 +381,7 @@
   checks_alias = map(format_link_alias, doc_files)
 
   print('Updating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     for line in lines:
       f.write(line)
       if line.strip() == ".. csv-table::":
@@ -397,7 +402,7 @@
   filename = os.path.normpath(os.path.join(
       module_path, '../../docs/clang-tidy/checks/', check_name_dashes + '.rst'))
   print('Creating %s...' % filename)
-  with open(filename, 'w') as f:
+  with io.open(filename, 'w', encoding='utf8') as f:
     f.write(""".. title:: clang-tidy - %(check_name_dashes)s
 
 %(check_name_dashes)s

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

Reply via email to