[PATCH] D81563: [analyzer] SATest: Move from csv to json project maps

Valeriy Savchenko via Phabricator via cfe-commits Tue, 16 Jun 2020 03:52:17 -0700

This revision was automatically updated to reflect the committed changes.
Closed by commit rGfb4b565212b0: [analyzer] SATest: Move from csv to json 
project maps (authored by vsavchenko).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81563/new/

https://reviews.llvm.org/D81563

Files:
  clang/utils/analyzer/ProjectMap.py
  clang/utils/analyzer/SATestAdd.py
  clang/utils/analyzer/SATestBuild.py
  clang/utils/analyzer/SATestUpdateDiffs.py

Index: clang/utils/analyzer/SATestUpdateDiffs.py
===================================================================
--- clang/utils/analyzer/SATestUpdateDiffs.py
+++ clang/utils/analyzer/SATestUpdateDiffs.py
@@ -4,6 +4,7 @@
 Update reference results for static analyzer.
 """
 import SATestBuild
+from ProjectMap import ProjectInfo, ProjectMap
 
 import os
 import shutil
@@ -14,9 +15,9 @@
 Verbose = 0
 
 
-def update_reference_results(project_name: str, build_mode: int):
-    project_info = SATestBuild.ProjectInfo(project_name, build_mode)
-    tester = SATestBuild.ProjectTester(project_info)
+def update_reference_results(project: ProjectInfo):
+    test_info = SATestBuild.TestInfo(project)
+    tester = SATestBuild.ProjectTester(test_info)
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
@@ -54,7 +55,7 @@
         SATestBuild.run_cleanup_script(project_dir, build_log_file)
 
         SATestBuild.normalize_reference_results(
-            project_dir, ref_results_path, build_mode)
+            project_dir, ref_results_path, project.mode)
 
         # Clean up the generated difference results.
         SATestBuild.cleanup_reference_results(ref_results_path)
@@ -62,6 +63,7 @@
         run_cmd(f"git add '{ref_results_path}'")
 
 
+# TODO: use argparse
 def main(argv):
     if len(argv) == 2 and argv[1] in ("-h", "--help"):
         print("Update static analyzer reference results based "
@@ -70,9 +72,9 @@
               file=sys.stderr)
         sys.exit(1)
 
-    with open(SATestBuild.get_project_map_path(), "r") as f:
-        for project_name, build_mode in SATestBuild.get_projects(f):
-            update_reference_results(project_name, int(build_mode))
+    project_map = ProjectMap()
+    for project in project_map.projects:
+        update_reference_results(project)
 
 
 if __name__ == '__main__':
Index: clang/utils/analyzer/SATestBuild.py
===================================================================
--- clang/utils/analyzer/SATestBuild.py
+++ clang/utils/analyzer/SATestBuild.py
@@ -44,9 +44,9 @@
 """
 import CmpRuns
 import SATestUtils
+from ProjectMap import ProjectInfo, ProjectMap
 
 import argparse
-import csv
 import glob
 import logging
 import math
@@ -59,9 +59,11 @@
 import time
 
 from queue import Queue
+# mypy has problems finding InvalidFileException in the module
+# and this is we can shush that false positive
+from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import (cast, Dict, Iterable, IO, List, NamedTuple, Optional,
-                    Tuple, TYPE_CHECKING)
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
 
 
 ###############################################################################
@@ -105,9 +107,6 @@
 # Number of jobs.
 MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 
-# Project map stores info about all the "registered" projects.
-PROJECT_MAP_FILE = "projectMap.csv"
-
 # Names of the project specific scripts.
 # The script that downloads the project.
 DOWNLOAD_SCRIPT = "download_project.sh"
@@ -187,18 +186,6 @@
 ###############################################################################
 
 
-def get_project_map_path(should_exist: bool = True) -> str:
-    project_map_path = os.path.join(os.path.abspath(os.curdir),
-                                    PROJECT_MAP_FILE)
-
-    if should_exist and not os.path.exists(project_map_path):
-        stderr(f"Error: Cannot find the project map file {project_map_path}"
-               f"\nRunning script for the wrong directory?\n")
-        sys.exit(1)
-
-    return project_map_path
-
-
 def run_cleanup_script(directory: str, build_log_file: IO):
     """
     Run pre-processing script if any.
@@ -268,12 +255,11 @@
         sys.exit(1)
 
 
-class ProjectInfo(NamedTuple):
+class TestInfo(NamedTuple):
     """
     Information about a project and settings for its analysis.
     """
-    name: str
-    build_mode: int
+    project: ProjectInfo
     override_compiler: bool = False
     extra_analyzer_config: str = ""
     is_reference_build: bool = False
@@ -287,9 +273,9 @@
 # It is a common workaround for this situation:
 # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
 if TYPE_CHECKING:
-    ProjectQueue = Queue[ProjectInfo]  # this is only processed by mypy
+    TestQueue = Queue[TestInfo]  # this is only processed by mypy
 else:
-    ProjectQueue = Queue  # this will be executed at runtime
+    TestQueue = Queue  # this will be executed at runtime
 
 
 class RegressionTester:
@@ -306,25 +292,24 @@
         self.strictness = strictness
 
     def test_all(self) -> bool:
-        projects_to_test: List[ProjectInfo] = []
-
-        with open(get_project_map_path(), "r") as map_file:
-            validate_project_file(map_file)
-
-            # Test the projects.
-            for proj_name, proj_build_mode in get_projects(map_file):
-                projects_to_test.append(
-                    ProjectInfo(proj_name, int(proj_build_mode),
-                                self.override_compiler,
-                                self.extra_analyzer_config,
-                                self.regenerate, self.strictness))
+        projects_to_test: List[TestInfo] = []
+
+        project_map = ProjectMap()
+
+        # Test the projects.
+        for project in project_map.projects:
+            projects_to_test.append(
+                TestInfo(project,
+                         self.override_compiler,
+                         self.extra_analyzer_config,
+                         self.regenerate, self.strictness))
         if self.jobs <= 1:
             return self._single_threaded_test_all(projects_to_test)
         else:
             return self._multi_threaded_test_all(projects_to_test)
 
     def _single_threaded_test_all(self,
-                                  projects_to_test: List[ProjectInfo]) -> bool:
+                                  projects_to_test: List[TestInfo]) -> bool:
         """
         Run all projects.
         :return: whether tests have passed.
@@ -336,7 +321,7 @@
         return success
 
     def _multi_threaded_test_all(self,
-                                 projects_to_test: List[ProjectInfo]) -> bool:
+                                 projects_to_test: List[TestInfo]) -> bool:
         """
         Run each project in a separate thread.
 
@@ -345,7 +330,7 @@
 
         :return: whether tests have passed.
         """
-        tasks_queue = ProjectQueue()
+        tasks_queue = TestQueue()
 
         for project_info in projects_to_test:
             tasks_queue.put(project_info)
@@ -370,13 +355,12 @@
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, project_info: ProjectInfo):
-        self.project_name = project_info.name
-        self.build_mode = project_info.build_mode
-        self.override_compiler = project_info.override_compiler
-        self.extra_analyzer_config = project_info.extra_analyzer_config
-        self.is_reference_build = project_info.is_reference_build
-        self.strictness = project_info.strictness
+    def __init__(self, test_info: TestInfo):
+        self.project = test_info.project
+        self.override_compiler = test_info.override_compiler
+        self.extra_analyzer_config = test_info.extra_analyzer_config
+        self.is_reference_build = test_info.is_reference_build
+        self.strictness = test_info.strictness
 
     def test(self) -> bool:
         """
@@ -384,7 +368,11 @@
         :return tests_passed: Whether tests have passed according
         to the :param strictness: criteria.
         """
-        stdout(f" \n\n--- Building project {self.project_name}\n")
+        if not self.project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            return True
+
+        stdout(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
@@ -405,13 +393,13 @@
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project_name} "
+        stdout(f"Completed tests for project {self.project.name} "
                f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
     def get_project_dir(self) -> str:
-        return os.path.join(os.path.abspath(os.curdir), self.project_name)
+        return os.path.join(os.path.abspath(os.curdir), self.project.name)
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
@@ -441,7 +429,7 @@
 
         # Build and analyze the project.
         with open(build_log_path, "w+") as build_log_file:
-            if self.build_mode == 1:
+            if self.project.mode == 1:
                 download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 self.scan_build(directory, output_dir, build_log_file)
@@ -451,7 +439,7 @@
             if self.is_reference_build:
                 run_cleanup_script(directory, build_log_file)
                 normalize_reference_results(directory, output_dir,
-                                            self.build_mode)
+                                            self.project.mode)
 
         stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
                f"See the log for more details: {build_log_path}\n")
@@ -549,7 +537,7 @@
         prefix += " -Xclang -analyzer-config "
         prefix += f"-Xclang {self.generate_config()} "
 
-        if self.build_mode == 2:
+        if self.project.mode == 2:
             prefix += "-std=c++11 "
 
         plist_path = os.path.join(directory, output_dir, "date")
@@ -601,7 +589,7 @@
 
 
 class TestProjectThread(threading.Thread):
-    def __init__(self, tasks_queue: ProjectQueue,
+    def __init__(self, tasks_queue: TestQueue,
                  results_differ: threading.Event,
                  failure_flag: threading.Event):
         """
@@ -621,13 +609,13 @@
     def run(self):
         while not self.tasks_queue.empty():
             try:
-                project_info = self.tasks_queue.get()
+                test_info = self.tasks_queue.get()
 
-                Logger = logging.getLogger(project_info.name)
+                Logger = logging.getLogger(test_info.project.name)
                 LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
                 LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
 
-                tester = ProjectTester(project_info)
+                tester = ProjectTester(test_info)
                 if not tester.test():
                     self.results_differ.set()
 
@@ -841,7 +829,7 @@
                 os.remove(plist)
                 continue
 
-        except plistlib.InvalidFileException as e:
+        except InvalidFileException as e:
             stderr(f"Error parsing plist file {plist}: {str(e)}")
             continue
 
@@ -856,36 +844,6 @@
             os.removedirs(subdir)
 
 
-def get_projects(map_file: IO) -> Iterable[Tuple[str, str]]:
-    """
-    Iterate over all projects defined in the project file handler `map_file`
-    from the start.
-    """
-    map_file.seek(0)
-    # TODO: csv format is not very readable, change it to JSON
-    for project_info in csv.reader(map_file):
-        if SATestUtils.is_comment_csv_line(project_info):
-            continue
-        # suppress mypy error
-        yield cast(Tuple[str, str], project_info)
-
-
-def validate_project_file(map_file: IO):
-    """
-    Validate project file.
-    """
-    for project_info in get_projects(map_file):
-        if len(project_info) != 2:
-            stderr("Error: Rows in the project map file "
-                   "should have 2 entries.")
-            raise Exception()
-
-        if project_info[1] not in ('0', '1', '2'):
-            stderr("Error: Second entry in the project map file should be 0"
-                   " (single file), 1 (project), or 2(single file c++11).")
-            raise Exception()
-
-
 if __name__ == "__main__":
     # Parse command line arguments.
     parser = argparse.ArgumentParser(
Index: clang/utils/analyzer/SATestAdd.py
===================================================================
--- clang/utils/analyzer/SATestAdd.py
+++ clang/utils/analyzer/SATestAdd.py
@@ -43,13 +43,11 @@
                                               > changes_for_analyzer.patch
 """
 import SATestBuild
+from ProjectMap import ProjectMap, ProjectInfo
 
-import csv
 import os
 import sys
 
-from typing import IO
-
 
 def add_new_project(name: str, build_mode: int):
     """
@@ -57,9 +55,10 @@
     :param name: is a short string used to identify a project.
     """
 
-    project_info = SATestBuild.ProjectInfo(name, build_mode,
-                                           is_reference_build=True)
-    tester = SATestBuild.ProjectTester(project_info)
+    project_info = ProjectInfo(name, build_mode)
+    test_info = SATestBuild.TestInfo(project_info,
+                                     is_reference_build=True)
+    tester = SATestBuild.ProjectTester(test_info)
 
     project_dir = tester.get_project_dir()
     if not os.path.exists(project_dir):
@@ -70,33 +69,20 @@
     tester.test()
 
     # Add the project name to the project map.
-    project_map_path = SATestBuild.get_project_map_path(should_exist=False)
+    project_map = ProjectMap(should_exist=False)
 
-    if os.path.exists(project_map_path):
-        file_mode = "r+"
+    if is_existing_project(project_map, name):
+        print(f"Warning: Project with name '{name}' already exists.",
+              file=sys.stdout)
+        print("Reference output has been regenerated.", file=sys.stdout)
     else:
-        print("Warning: Creating the project map file!")
-        file_mode = "w+"
-
-    with open(project_map_path, file_mode) as map_file:
-        if is_existing_project(map_file, name):
-            print(f"Warning: Project with name '{name}' already exists.",
-                  file=sys.stdout)
-            print("Reference output has been regenerated.", file=sys.stdout)
-        else:
-            map_writer = csv.writer(map_file)
-            map_writer.writerow((name, build_mode))
-            print(f"The project map is updated: {project_map_path}")
-
-
-def is_existing_project(map_file: IO, project_name: str) -> bool:
-    map_reader = csv.reader(map_file)
+        project_map.projects.append(project_info)
+        project_map.save()
 
-    for raw_info in map_reader:
-        if project_name == raw_info[0]:
-            return True
 
-    return False
+def is_existing_project(project_map: ProjectMap, project_name: str) -> bool:
+    return any(existing_project.name == project_name
+               for existing_project in project_map.projects)
 
 
 # TODO: Use argparse
Index: clang/utils/analyzer/ProjectMap.py
===================================================================
--- /dev/null
+++ clang/utils/analyzer/ProjectMap.py
@@ -0,0 +1,94 @@
+import json
+import os
+
+from typing import Any, Dict, List, NamedTuple, Optional
+
+
+JSON = Dict[str, Any]
+
+
+DEFAULT_MAP_FILE = "projects.json"
+
+
+class ProjectInfo(NamedTuple):
+    """
+    Information about a project to analyze.
+    """
+    name: str
+    mode: int
+    enabled: bool = True
+
+
+class ProjectMap:
+    """
+    Project map stores info about all the "registered" projects.
+    """
+    def __init__(self, path: Optional[str] = None, should_exist: bool = True):
+        """
+        :param path: optional path to a project JSON file, when None defaults
+                     to DEFAULT_MAP_FILE.
+        :param should_exist: flag to tell if it's an exceptional situation when
+                             the project file doesn't exist, creates an empty
+                             project list instead if we are not expecting it to
+                             exist.
+        """
+        if path is None:
+            path = os.path.join(os.path.abspath(os.curdir), DEFAULT_MAP_FILE)
+
+        if not os.path.exists(path):
+            if should_exist:
+                raise ValueError(
+                    f"Cannot find the project map file {path}"
+                    f"\nRunning script for the wrong directory?\n")
+            else:
+                self._create_empty(path)
+
+        self.path = path
+        self._load_projects()
+
+    def save(self):
+        """
+        Save project map back to its original file.
+        """
+        self._save(self.projects, self.path)
+
+    def _load_projects(self):
+        with open(self.path) as raw_data:
+            raw_projects = json.load(raw_data)
+
+            if not isinstance(raw_projects, list):
+                raise ValueError(
+                    "Project map should be a list of JSON objects")
+
+            self.projects = self._parse(raw_projects)
+
+    @staticmethod
+    def _parse(raw_projects: List[JSON]) -> List[ProjectInfo]:
+        return [ProjectMap._parse_project(raw_project)
+                for raw_project in raw_projects]
+
+    @staticmethod
+    def _parse_project(raw_project: JSON) -> ProjectInfo:
+        try:
+            name: str = raw_project["name"]
+            build_mode: int = raw_project["mode"]
+            enabled: bool = raw_project.get("enabled", True)
+            return ProjectInfo(name, build_mode, enabled)
+
+        except KeyError as e:
+            raise ValueError(
+                f"Project info is required to have a '{e.args[0]}' field")
+
+    @staticmethod
+    def _create_empty(path: str):
+        ProjectMap._save([], path)
+
+    @staticmethod
+    def _save(projects: List[ProjectInfo], path: str):
+        with open(path, "w") as output:
+            json.dump(ProjectMap._convert_infos_to_dicts(projects),
+                      output, indent=2)
+
+    @staticmethod
+    def _convert_infos_to_dicts(projects: List[ProjectInfo]) -> List[JSON]:
+        return [project._asdict() for project in projects]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D81563: [analyzer] SATest: Move from csv to json project maps

Reply via email to