[PATCH] D83942: [analyzer][tests] Add a notion of project sizes

Valeriy Savchenko via Phabricator via cfe-commits Mon, 24 Aug 2020 06:13:44 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGaec12c1264ac: [analyzer][tests] Add a notion of project 
sizes (authored by vsavchenko).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83942/new/

https://reviews.llvm.org/D83942

Files:
  clang/utils/analyzer/ProjectMap.py
  clang/utils/analyzer/SATest.py
  clang/utils/analyzer/projects/projects.json

Index: clang/utils/analyzer/projects/projects.json
===================================================================
--- clang/utils/analyzer/projects/projects.json
+++ clang/utils/analyzer/projects/projects.json
@@ -4,139 +4,159 @@
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/jarro2783/cxxopts.git";,
-    "commit": "794c975"
+    "commit": "794c975",
+    "size": "tiny"
   },
   {
     "name": "box2d",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/erincatto/box2d.git";,
-    "commit": "1025f9a"
+    "commit": "1025f9a",
+    "size": "small"
   },
   {
     "name": "tinyexpr",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/codeplea/tinyexpr.git";,
-    "commit": "ffb0d41"
+    "commit": "ffb0d41",
+    "size": "tiny"
   },
   {
     "name": "symengine",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/symengine/symengine.git";,
-    "commit": "4f669d59"
+    "commit": "4f669d59",
+    "size": "small"
   },
   {
     "name": "termbox",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/nsf/termbox.git";,
-    "commit": "0df1355"
+    "commit": "0df1355",
+    "size": "tiny"
   },
   {
     "name": "tinyvm",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/jakogut/tinyvm.git";,
-    "commit": "10c25d8"
+    "commit": "10c25d8",
+    "size": "tiny"
   },
   {
     "name": "tinyspline",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/msteinbeck/tinyspline.git";,
-    "commit": "f8b1ab7"
+    "commit": "f8b1ab7",
+    "size": "tiny"
   },
   {
     "name": "oatpp",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/oatpp/oatpp.git";,
-    "commit": "d3e60fb"
+    "commit": "d3e60fb",
+    "size": "small"
   },
   {
     "name": "libsoundio",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/andrewrk/libsoundio.git";,
-    "commit": "b810bf2"
+    "commit": "b810bf2",
+    "size": "tiny"
   },
   {
     "name": "zstd",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/facebook/zstd.git";,
-    "commit": "2af4e073"
+    "commit": "2af4e073",
+    "size": "small"
   },
   {
     "name": "simbody",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/simbody/simbody.git";,
-    "commit": "5cf513d"
+    "commit": "5cf513d",
+    "size": "big"
   },
   {
     "name": "duckdb",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/cwida/duckdb.git";,
-    "commit": "d098c9f"
+    "commit": "d098c9f",
+    "size": "big"
   },
   {
     "name": "drogon",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/an-tao/drogon.git";,
-    "commit": "fd2a612"
+    "commit": "fd2a612",
+    "size": "small"
   },
   {
     "name": "fmt",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/fmtlib/fmt.git";,
-    "commit": "5e7c70e"
+    "commit": "5e7c70e",
+    "size": "small"
   },
   {
     "name": "re2",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/google/re2.git";,
-    "commit": "2b25567"
+    "commit": "2b25567",
+    "size": "small"
   },
   {
     "name": "cppcheck",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/danmar/cppcheck.git";,
-    "commit": "5fa3d53"
+    "commit": "5fa3d53",
+    "size": "small"
   },
   {
     "name": "harfbuzz",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/harfbuzz/harfbuzz.git";,
-    "commit": "f8d345e"
+    "commit": "f8d345e",
+    "size": "small"
   },
   {
     "name": "capnproto",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/capnproto/capnproto.git";,
-    "commit": "8be1c9f"
+    "commit": "8be1c9f",
+    "size": "small"
   },
   {
     "name": "tmux",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/tmux/tmux.git";,
-    "commit": "a5f99e1"
+    "commit": "a5f99e1",
+    "size": "big"
   },
   {
     "name": "faiss",
     "mode": 1,
     "source": "git",
     "origin": "https://github.com/facebookresearch/faiss.git";,
-    "commit": "9e5d5b7"
+    "commit": "9e5d5b7",
+    "size": "small"
   }
 ]
Index: clang/utils/analyzer/SATest.py
===================================================================
--- clang/utils/analyzer/SATest.py
+++ clang/utils/analyzer/SATest.py
@@ -37,7 +37,7 @@
 
     SATestBuild.VERBOSE = args.verbose
 
-    projects = get_projects(parser, args.projects)
+    projects = get_projects(parser, args)
     tester = SATestBuild.RegressionTester(args.jobs,
                                           projects,
                                           args.override_compiler,
@@ -84,7 +84,7 @@
 def benchmark(parser, args):
     from SATestBenchmark import Benchmark
 
-    projects = get_projects(parser, args.projects)
+    projects = get_projects(parser, args)
     benchmark = Benchmark(projects, args.iterations, args.output)
     benchmark.run()
 
@@ -94,14 +94,19 @@
     SATestBenchmark.compare(args.old, args.new, args.output)
 
 
-def get_projects(parser, projects_str):
-    from ProjectMap import ProjectMap
+def get_projects(parser, args):
+    from ProjectMap import ProjectMap, Size
 
     project_map = ProjectMap()
     projects = project_map.projects
 
-    if projects_str:
-        projects_arg = projects_str.split(",")
+    def filter_projects(projects, predicate, force=False):
+        return [project.with_fields(enabled=(force or project.enabled) and
+                                    predicate(project))
+                for project in projects]
+
+    if args.projects:
+        projects_arg = args.projects.split(",")
         available_projects = [project.name
                               for project in projects]
 
@@ -113,8 +118,17 @@
                              "{all}.".format(project=manual_project,
                                              all=available_projects))
 
-        projects = [project.with_fields(enabled=project.name in projects_arg)
-                    for project in projects]
+        projects = filter_projects(projects, lambda project:
+                                   project.name in projects_arg,
+                                   force=True)
+
+    try:
+        max_size = Size.from_str(args.max_size)
+    except ValueError as e:
+        parser.error("{}".format(e))
+
+    projects = filter_projects(projects, lambda project:
+                               project.size <= max_size)
 
     return projects
 
@@ -238,6 +252,8 @@
                               help="Arguments passed to to -analyzer-config")
     build_parser.add_argument("--projects", action="store", default="",
                               help="Comma-separated list of projects to test")
+    build_parser.add_argument("--max-size", action="store", default=None,
+                              help="Maximum size for the projects to test")
     build_parser.add_argument("-v", "--verbose", action="count", default=0)
     build_parser.set_defaults(func=build)
 
@@ -318,6 +334,8 @@
                               help="Output csv file for the benchmark results")
     bench_parser.add_argument("--projects", action="store", default="",
                               help="Comma-separated list of projects to test")
+    bench_parser.add_argument("--max-size", action="store", default=None,
+                              help="Maximum size for the projects to test")
     bench_parser.set_defaults(func=benchmark)
 
     bench_subparsers = bench_parser.add_subparsers()
Index: clang/utils/analyzer/ProjectMap.py
===================================================================
--- clang/utils/analyzer/ProjectMap.py
+++ clang/utils/analyzer/ProjectMap.py
@@ -1,7 +1,7 @@
 import json
 import os
 
-from enum import Enum
+from enum import auto, Enum
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple
 
 
@@ -17,6 +17,64 @@
     SCRIPT = "script"
 
 
+class Size(int, Enum):
+    """
+    Size of the project.
+
+    Sizes do not directly correspond to the number of lines or files in the
+    project.  The key factor that is important for the developers of the
+    analyzer is the time it takes to analyze the project.  Here is how
+    the following sizes map to times:
+
+    TINY:  <1min
+    SMALL: 1min-10min
+    BIG:   10min-1h
+    HUGE:  >1h
+
+    The borders are a bit of a blur, especially because analysis time varies
+    from one machine to another.  However, the relative times will stay pretty
+    similar, and these groupings will still be helpful.
+
+    UNSPECIFIED is a very special case, which is intentionally last in the list
+    of possible sizes.  If the user wants to filter projects by one of the
+    possible sizes, we want projects with UNSPECIFIED size to be filtered out
+    for any given size.
+    """
+    TINY = auto()
+    SMALL = auto()
+    BIG = auto()
+    HUGE = auto()
+    UNSPECIFIED = auto()
+
+    @staticmethod
+    def from_str(raw_size: Optional[str]) -> "Size":
+        """
+        Construct a Size object from an optional string.
+
+        :param raw_size: optional string representation of the desired Size
+                         object.  None will produce UNSPECIFIED size.
+
+        This method is case-insensitive, so raw sizes 'tiny', 'TINY', and
+        'TiNy' will produce the same result.
+        """
+        if raw_size is None:
+            return Size.UNSPECIFIED
+
+        raw_size_upper = raw_size.upper()
+        # The implementation is decoupled from the actual values of the enum,
+        # so we can easily add or modify it without bothering about this
+        # function.
+        for possible_size in Size:
+            if possible_size.name == raw_size_upper:
+                return possible_size
+
+        possible_sizes = [size.name.lower() for size in Size
+                          # no need in showing our users this size
+                          if size != Size.UNSPECIFIED]
+        raise ValueError(f"Incorrect project size '{raw_size}'. "
+                         f"Available sizes are {possible_sizes}")
+
+
 class ProjectInfo(NamedTuple):
     """
     Information about a project to analyze.
@@ -27,6 +85,7 @@
     origin: str = ""
     commit: str = ""
     enabled: bool = True
+    size: Size = Size.UNSPECIFIED
 
     def with_fields(self, **kwargs) -> "ProjectInfo":
         """
@@ -98,6 +157,7 @@
             build_mode: int = raw_project["mode"]
             enabled: bool = raw_project.get("enabled", True)
             source: DownloadType = raw_project.get("source", "zip")
+            size = Size.from_str(raw_project.get("size", None))
 
             if source == DownloadType.GIT:
                 origin, commit = ProjectMap._get_git_params(raw_project)
@@ -105,7 +165,7 @@
                 origin, commit = "", ""
 
             return ProjectInfo(name, build_mode, source, origin, commit,
-                               enabled)
+                               enabled, size)
 
         except KeyError as e:
             raise ValueError(

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83942: [analyzer][tests] Add a notion of project sizes

Reply via email to