Re: [llvm-branch-commits] [clang-tools-extra] 85c1c6a - [clangd] Add Random Forest runtime for code completion.

Eric Christopher via llvm-branch-commits Fri, 18 Sep 2020 15:48:59 -0700

On Fri, Sep 18, 2020 at 5:51 PM Eric Christopher <echri...@gmail.com> wrote:


> Hi Utkarsh,
>
> I've temporarily reverted this here:
>
> echristo@athyra ~/s/llvm-project (master)> git push
> To github.com:llvm/llvm-project.git
>    1f0b43638ed..549e55b3d56  master -> master
>
> the decision forest header file referenced in the unittest doesn't appear
> to have made it into the commit?
>
>
Aha. I see how this is supposed to work. It's a bit of a complicated
system. Is there any way we can break this down into smaller chunks to test?

-eric


> Thanks and feel free to follow up if I've missed something.
>
> -eric
>
> On Fri, Sep 18, 2020 at 12:38 PM Utkarsh Saxena via llvm-branch-commits <
> llvm-branch-commits@lists.llvm.org> wrote:
>
>>
>> Author: Utkarsh Saxena
>> Date: 2020-09-18T18:27:42+02:00
>> New Revision: 85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a
>>
>> URL:
>> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a
>> DIFF:
>> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a.diff
>>
>> LOG: [clangd] Add Random Forest runtime for code completion.
>>
>> Summary:
>> [WIP]
>> - Proposes a json format for representing Random Forest model.
>> - Proposes a way to test the generated runtime using a test model.
>>
>> TODO:
>> - Add generated source code snippet for easier review.
>> - Fix unused label warning.
>> - Figure out required using declarations for CATEGORICAL columns from
>> Features.json.
>> - Necessary Google3 internal modifications for blaze before landing.
>> - Add documentation for format of the model.
>> - Document more.
>>
>> Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet,
>> cfe-commits
>>
>> Tags: #clang
>>
>> Differential Revision: https://reviews.llvm.org/D83814
>>
>> Added:
>>     clang-tools-extra/clangd/quality/CompletionModel.cmake
>>     clang-tools-extra/clangd/quality/CompletionModelCodegen.py
>>     clang-tools-extra/clangd/quality/README.md
>>     clang-tools-extra/clangd/quality/model/features.json
>>     clang-tools-extra/clangd/quality/model/forest.json
>>     clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
>>
>> clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
>>     clang-tools-extra/clangd/unittests/decision_forest_model/features.json
>>     clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
>>
>> Modified:
>>     clang-tools-extra/clangd/CMakeLists.txt
>>     clang-tools-extra/clangd/unittests/CMakeLists.txt
>>     clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>>
>> Removed:
>>
>>
>>
>>
>> ################################################################################
>> diff  --git a/clang-tools-extra/clangd/CMakeLists.txt
>> b/clang-tools-extra/clangd/CMakeLists.txt
>> index 3a1a034ed17b..9d2ab5be222a 100644
>> --- a/clang-tools-extra/clangd/CMakeLists.txt
>> +++ b/clang-tools-extra/clangd/CMakeLists.txt
>> @@ -28,6 +28,9 @@ set(LLVM_LINK_COMPONENTS
>>    FrontendOpenMP
>>    Option
>>    )
>> +
>> +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake)
>> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model
>> CompletionModel clang::clangd::Example)
>>
>>  if(MSVC AND NOT CLANG_CL)
>>   set_source_files_properties(CompileCommands.cpp PROPERTIES
>> COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of
>> string constant
>> @@ -77,6 +80,7 @@ add_clang_library(clangDaemon
>>    TUScheduler.cpp
>>    URI.cpp
>>    XRefs.cpp
>> +  ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp
>>
>>    index/Background.cpp
>>    index/BackgroundIndexLoader.cpp
>> @@ -117,6 +121,11 @@ add_clang_library(clangDaemon
>>    omp_gen
>>    )
>>
>> +# Include generated CompletionModel headers.
>> +target_include_directories(clangDaemon PUBLIC
>> +  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
>> +)
>> +
>>  clang_target_link_libraries(clangDaemon
>>    PRIVATE
>>    clangAST
>>
>> diff  --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake
>> b/clang-tools-extra/clangd/quality/CompletionModel.cmake
>> new file mode 100644
>> index 000000000000..60c6d2aa8433
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake
>> @@ -0,0 +1,37 @@
>> +# Run the Completion Model Codegenerator on the model present in the
>> +# ${model} directory.
>> +# Produces a pair of files called ${filename}.h and  ${filename}.cpp in
>> the
>> +# ${CMAKE_CURRENT_BINARY_DIR}. The generated header
>> +# will define a C++ class called ${cpp_class} - which may be a
>> +# namespace-qualified class name.
>> +function(gen_decision_forest model filename cpp_class)
>> +  set(model_compiler
>> ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py)
>> +
>> +  set(output_dir ${CMAKE_CURRENT_BINARY_DIR})
>> +  set(header_file ${output_dir}/${filename}.h)
>> +  set(cpp_file ${output_dir}/${filename}.cpp)
>> +
>> +  add_custom_command(OUTPUT ${header_file} ${cpp_file}
>> +    COMMAND "${Python3_EXECUTABLE}" ${model_compiler}
>> +      --model ${model}
>> +      --output_dir ${output_dir}
>> +      --filename ${filename}
>> +      --cpp_class ${cpp_class}
>> +    COMMENT "Generating code completion model runtime..."
>> +    DEPENDS ${model_compiler} ${model}/forest.json ${model}/features.json
>> +    VERBATIM )
>> +
>> +  set_source_files_properties(${header_file} PROPERTIES
>> +    GENERATED 1)
>> +  set_source_files_properties(${cpp_file} PROPERTIES
>> +    GENERATED 1)
>> +
>> +  # Disable unused label warning for generated files.
>> +  if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
>> +    set_source_files_properties(${cpp_file} PROPERTIES
>> +      COMPILE_FLAGS /wd4102)
>> +  else()
>> +    set_source_files_properties(${cpp_file} PROPERTIES
>> +      COMPILE_FLAGS -Wno-unused)
>> +  endif()
>> +endfunction()
>>
>> diff  --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
>> b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
>> new file mode 100644
>> index 000000000000..8f8234f6ebbc
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
>> @@ -0,0 +1,283 @@
>> +"""Code generator for Code Completion Model Inference.
>> +
>> +Tool runs on the Decision Forest model defined in {model} directory.
>> +It generates two files: {output_dir}/{filename}.h and
>> {output_dir}/{filename}.cpp
>> +The generated files defines the Example class named {cpp_class} having
>> all the features as class members.
>> +The generated runtime provides an `Evaluate` function which can be used
>> to score a code completion candidate.
>> +"""
>> +
>> +import argparse
>> +import json
>> +import struct
>> +from enum import Enum
>> +
>> +
>> +class CppClass:
>> +    """Holds class name and names of the enclosing namespaces."""
>> +
>> +    def __init__(self, cpp_class):
>> +        ns_and_class = cpp_class.split("::")
>> +        self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0]
>> +        self.name = ns_and_class[-1]
>> +        if len(self.name) == 0:
>> +            raise ValueError("Empty class name.")
>> +
>> +    def ns_begin(self):
>> +        """Returns snippet for opening namespace declarations."""
>> +        open_ns = [f"namespace {ns} {{" for ns in self.ns]
>> +        return "\n".join(open_ns)
>> +
>> +    def ns_end(self):
>> +        """Returns snippet for closing namespace declarations."""
>> +        close_ns = [
>> +            f"}} // namespace {ns}" for ns in reversed(self.ns)]
>> +        return "\n".join(close_ns)
>> +
>> +
>> +def header_guard(filename):
>> +    '''Returns the header guard for the generated header.'''
>> +    return f"GENERATED_DECISION_FOREST_MODEL_{filename.upper()}_H"
>> +
>> +
>> +def boost_node(n, label, next_label):
>> +    """Returns code snippet for a leaf/boost node.
>> +    Adds value of leaf to the score and jumps to the root of the next
>> tree."""
>> +    return f"{label}: Score += {n['score']}; goto {next_label};"
>> +
>> +
>> +def if_greater_node(n, label, next_label):
>> +    """Returns code snippet for a if_greater node.
>> +    Jumps to true_label if the Example feature (NUMBER) is greater than
>> the threshold.
>> +    Comparing integers is much faster than comparing floats. Assuming
>> floating points
>> +    are represented as IEEE 754, it order-encodes the floats to integers
>> before comparing them.
>> +    Control falls through if condition is evaluated to false."""
>> +    threshold = n["threshold"]
>> +    return f"{label}: if (E.{n['feature']} >= {order_encode(threshold)}
>> /*{threshold}*/) goto {next_label};"
>> +
>> +
>> +def if_member_node(n, label, next_label):
>> +    """Returns code snippet for a if_member node.
>> +    Jumps to true_label if the Example feature (ENUM) is present in the
>> set of enum values
>> +    described in the node.
>> +    Control falls through if condition is evaluated to false."""
>> +    members = '|'.join([
>> +        f"BIT({n['feature']}_type::{member})"
>> +        for member in n["set"]
>> +    ])
>> +    return f"{label}: if (E.{n['feature']} & ({members})) goto
>> {next_label};"
>> +
>> +
>> +def node(n, label, next_label):
>> +    """Returns code snippet for the node."""
>> +    return {
>> +        'boost': boost_node,
>> +        'if_greater': if_greater_node,
>> +        'if_member': if_member_node,
>> +    }[n['operation']](n, label, next_label)
>> +
>> +
>> +def tree(t, tree_num: int, node_num: int):
>> +    """Returns code for inferencing a Decision Tree.
>> +    Also returns the size of the decision tree.
>> +
>> +    A tree starts with its label `t{tree#}`.
>> +    A node of the tree starts with label `t{tree#}_n{node#}`.
>> +
>> +    The tree contains two types of node: Conditional node and Leaf node.
>> +    -   Conditional node evaluates a condition. If true, it jumps to the
>> true node/child.
>> +        Code is generated using pre-order traversal of the tree
>> considering
>> +        false node as the first child. Therefore the false node is
>> always the
>> +        immediately next label.
>> +    -   Leaf node adds the value to the score and jumps to the next tree.
>> +    """
>> +    label = f"t{tree_num}_n{node_num}"
>> +    code = []
>> +    if node_num == 0:
>> +        code.append(f"t{tree_num}:")
>> +
>> +    if t["operation"] == "boost":
>> +        code.append(node(t, label=label, next_label=f"t{tree_num+1}"))
>> +        return code, 1
>> +
>> +    false_code, false_size = tree(
>> +        t['else'], tree_num=tree_num, node_num=node_num+1)
>> +
>> +    true_node_num = node_num+false_size+1
>> +    true_label = f"t{tree_num}_n{true_node_num}"
>> +
>> +    true_code, true_size = tree(
>> +        t['then'], tree_num=tree_num, node_num=true_node_num)
>> +
>> +    code.append(node(t, label=label, next_label=true_label))
>> +
>> +    return code+false_code+true_code, 1+false_size+true_size
>> +
>> +
>> +def gen_header_code(features_json: list, cpp_class, filename: str):
>> +    """Returns code for header declaring the inference runtime.
>> +
>> +    Declares the Example class named {cpp_class} inside relevant
>> namespaces.
>> +    The Example class contains all the features as class members. This
>> +    class can be used to represent a code completion candidate.
>> +    Provides `float Evaluate()` function which can be used to score the
>> Example.
>> +    """
>> +    setters = []
>> +    for f in features_json:
>> +        feature = f["name"]
>> +        if f["kind"] == "NUMBER":
>> +            # Floats are order-encoded to integers for faster comparison.
>> +            setters.append(
>> +                f"void set{feature}(float V) {{ {feature} =
>> OrderEncode(V); }}")
>> +        elif f["kind"] == "ENUM":
>> +            setters.append(
>> +                f"void set{feature}(unsigned V) {{ {feature} = 1 << V;
>> }}")
>> +        else:
>> +            raise ValueError("Unhandled feature type.", f["kind"])
>> +
>> +    # Class members represent all the features of the Example.
>> +    class_members = [f"uint32_t {f['name']} = 0;" for f in features_json]
>> +
>> +    nline = "\n  "
>> +    guard = header_guard(filename)
>> +    return f"""#ifndef {guard}
>> +#define {guard}
>> +#include <cstdint>
>> +
>> +{cpp_class.ns_begin()}
>> +class {cpp_class.name} {{
>> +public:
>> +  {nline.join(setters)}
>> +
>> +private:
>> +  {nline.join(class_members)}
>> +
>> +  // Produces an integer that sorts in the same order as F.
>> +  // That is: a < b <==> orderEncode(a) < orderEncode(b).
>> +  static uint32_t OrderEncode(float F);
>> +  friend float Evaluate(const {cpp_class.name}&);
>> +}};
>> +
>> +float Evaluate(const {cpp_class.name}&);
>> +{cpp_class.ns_end()}
>> +#endif // {guard}
>> +"""
>> +
>> +
>> +def order_encode(v: float):
>> +    i = struct.unpack('<I', struct.pack('<f', v))[0]
>> +    TopBit = 1 << 31
>> +    # IEEE 754 floats compare like sign-magnitude integers.
>> +    if (i & TopBit):  # Negative float
>> +        return (1 << 32) - i  # low half of integers, order reversed.
>> +    return TopBit + i  # top half of integers
>> +
>> +
>> +def evaluate_func(forest_json: list, cpp_class: CppClass):
>> +    """Generates code for `float Evaluate(const {Example}&)` function.
>> +    The generated function can be used to score an Example."""
>> +    code = f"float Evaluate(const {cpp_class.name}& E) {{\n"
>> +    lines = []
>> +    lines.append("float Score = 0;")
>> +    tree_num = 0
>> +    for tree_json in forest_json:
>> +        lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0])
>> +        lines.append("")
>> +        tree_num += 1
>> +
>> +    lines.append(f"t{len(forest_json)}: // No such tree.")
>> +    lines.append("return Score;")
>> +    code += "  " + "\n  ".join(lines)
>> +    code += "\n}"
>> +    return code
>> +
>> +
>> +def gen_cpp_code(forest_json: list, features_json: list, filename: str,
>> +                 cpp_class: CppClass):
>> +    """Generates code for the .cpp file."""
>> +    # Headers
>> +    # Required by OrderEncode(float F).
>> +    angled_include = [
>> +        f'#include <{h}>'
>> +        for h in ["cstring", "limits"]
>> +    ]
>> +
>> +    # Include generated header.
>> +    qouted_headers = {f"{filename}.h", "llvm/ADT/bit.h"}
>> +    # Headers required by ENUM features used by the model.
>> +    qouted_headers |= {f["header"]
>> +                       for f in features_json if f["kind"] == "ENUM"}
>> +    quoted_include = [f'#include "{h}"' for h in sorted(qouted_headers)]
>> +
>> +    # using-decl for ENUM features.
>> +    using_decls = "\n".join(f"using {feature['name']}_type =
>> {feature['type']};"
>> +                            for feature in features_json
>> +                            if feature["kind"] == "ENUM")
>> +    nl = "\n"
>> +    return f"""{nl.join(angled_include)}
>> +
>> +{nl.join(quoted_include)}
>> +
>> +#define BIT(X) (1 << X)
>> +
>> +{cpp_class.ns_begin()}
>> +
>> +{using_decls}
>> +
>> +uint32_t {cpp_class.name}::OrderEncode(float F) {{
>> +  static_assert(std::numeric_limits<float>::is_iec559, "");
>> +  constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1);
>> +
>> +  // Get the bits of the float. Endianness is the same as for integers.
>> +  uint32_t U = llvm::bit_cast<uint32_t>(F);
>> +  std::memcpy(&U, &F, sizeof(U));
>> +  // IEEE 754 floats compare like sign-magnitude integers.
>> +  if (U & TopBit)    // Negative float.
>> +    return 0 - U;    // Map onto the low half of integers, order
>> reversed.
>> +  return U + TopBit; // Positive floats map onto the high half of
>> integers.
>> +}}
>> +
>> +{evaluate_func(forest_json, cpp_class)}
>> +{cpp_class.ns_end()}
>> +"""
>> +
>> +
>> +def main():
>> +    parser = argparse.ArgumentParser('DecisionForestCodegen')
>> +    parser.add_argument('--filename', help='output file name.')
>> +    parser.add_argument('--output_dir', help='output directory.')
>> +    parser.add_argument('--model', help='path to model directory.')
>> +    parser.add_argument(
>> +        '--cpp_class',
>> +        help='The name of the class (which may be a namespace-qualified)
>> created in generated header.'
>> +    )
>> +    ns = parser.parse_args()
>> +
>> +    output_dir = ns.output_dir
>> +    filename = ns.filename
>> +    header_file = f"{output_dir}/{filename}.h"
>> +    cpp_file = f"{output_dir}/{filename}.cpp"
>> +    cpp_class = CppClass(cpp_class=ns.cpp_class)
>> +
>> +    model_file = f"{ns.model}/forest.json"
>> +    features_file = f"{ns.model}/features.json"
>> +
>> +    with open(features_file) as f:
>> +        features_json = json.load(f)
>> +
>> +    with open(model_file) as m:
>> +        forest_json = json.load(m)
>> +
>> +    with open(cpp_file, 'w+t') as output_cc:
>> +        output_cc.write(
>> +            gen_cpp_code(forest_json=forest_json,
>> +                         features_json=features_json,
>> +                         filename=filename,
>> +                         cpp_class=cpp_class))
>> +
>> +    with open(header_file, 'w+t') as output_h:
>> +        output_h.write(gen_header_code(
>> +            features_json=features_json, cpp_class=cpp_class,
>> filename=filename))
>> +
>> +
>> +if __name__ == '__main__':
>> +    main()
>>
>> diff  --git a/clang-tools-extra/clangd/quality/README.md
>> b/clang-tools-extra/clangd/quality/README.md
>> new file mode 100644
>> index 000000000000..36fa37320e54
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/quality/README.md
>> @@ -0,0 +1,220 @@
>> +# Decision Forest Code Completion Model
>> +
>> +## Decision Forest
>> +A **decision forest** is a collection of many decision trees. A
>> **decision tree** is a full binary tree that provides a quality prediction
>> for an input (code completion item). Internal nodes represent a **binary
>> decision** based on the input data, and leaf nodes represent a prediction.
>> +
>> +In order to predict the relevance of a code completion item, we traverse
>> each of the decision trees beginning with their roots until we reach a
>> leaf.
>> +
>> +An input (code completion candidate) is characterized as a set of
>> **features**, such as the *type of symbol* or the *number of existing
>> references*.
>> +
>> +At every non-leaf node, we evaluate the condition to decide whether to
>> go left or right. The condition compares one *feature** of the input
>> against a constant. The condition can be of two types:
>> +- **if_greater**: Checks whether a numerical feature is **>=** a
>> **threshold**.
>> +- **if_member**: Check whether the **enum** feature is contained in the
>> **set** defined in the node.
>> +
>> +A leaf node contains the value **score**.
>> +To compute an overall **quality** score, we traverse each tree in this
>> way and add up the scores.
>> +
>> +## Model Input Format
>> +The input model is represented in json format.
>> +
>> +### Features
>> +The file **features.json** defines the features available to the model.
>> +It is a json list of features. The features can be of following two
>> kinds.
>> +
>> +#### Number
>> +```
>> +{
>> +  "name": "a_numerical_feature",
>> +  "kind": "NUMBER"
>> +}
>> +```
>> +#### Enum
>> +```
>> +{
>> +  "name": "an_enum_feature",
>> +  "kind": "ENUM",
>> +  "enum": "fully::qualified::enum",
>> +  "header": "path/to/HeaderDeclaringEnum.h"
>> +}
>> +```
>> +The field `enum` specifies the fully qualified name of the enum.
>> +The maximum cardinality of the enum can be **32**.
>> +
>> +The field `header` specifies the header containing the declaration of
>> the enum.
>> +This header is included by the inference runtime.
>> +
>> +
>> +### Decision Forest
>> +The file `forest.json` defines the  decision forest. It is a json list
>> of **DecisionTree**.
>> +
>> +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**,
>> **LeafNode**.
>> +#### IfGreaterNode
>> +```
>> +{
>> +  "operation": "if_greater",
>> +  "feature": "a_numerical_feature",
>> +  "threshold": A real number,
>> +  "then": {A DecisionTree},
>> +  "else": {A DecisionTree}
>> +}
>> +```
>> +#### IfMemberNode
>> +```
>> +{
>> +  "operation": "if_member",
>> +  "feature": "an_enum_feature",
>> +  "set": ["enum_value1", "enum_value2", ...],
>> +  "then": {A DecisionTree},
>> +  "else": {A DecisionTree}
>> +}
>> +```
>> +#### LeafNode
>> +```
>> +{
>> +  "operation": "boost",
>> +  "score": A real number
>> +}
>> +```
>> +
>> +## Code Generator for Inference
>> +The implementation of inference runtime is split across:
>> +
>> +### Code generator
>> +The code generator `CompletionModelCodegen.py` takes input the
>> `${model}` dir and generates the inference library:
>> +- `${output_dir}/{filename}.h`
>> +- `${output_dir}/{filename}.cpp`
>> +
>> +Invocation
>> +```
>> +python3 CompletionModelCodegen.py \
>> +        --model path/to/model/dir \
>> +        --output_dir path/to/output/dir \
>> +        --filename OutputFileName \
>> +        --cpp_class clang::clangd::YourExampleClass
>> +```
>> +### Build System
>> +`CompletionModel.cmake` provides `gen_decision_forest` method .
>> +Client intending to use the CompletionModel for inference can use this
>> to trigger the code generator and generate the inference library.
>> +It can then use the generated API by including and depending on this
>> library.
>> +
>> +### Generated API for inference
>> +The code generator defines the Example `class` inside relevant
>> namespaces as specified in option `${cpp_class}`.
>> +
>> +Members of this generated class comprises of all the features mentioned
>> in `features.json`.
>> +Thus this class can represent a code completion candidate that needs to
>> be scored.
>> +
>> +The API also provides `float Evaluate(const MyClass&)` which can be used
>> to score the completion candidate.
>> +
>> +
>> +## Example
>> +### model/features.json
>> +```
>> +[
>> +  {
>> +    "name": "ANumber",
>> +    "type": "NUMBER"
>> +  },
>> +  {
>> +    "name": "AFloat",
>> +    "type": "NUMBER"
>> +  },
>> +  {
>> +    "name": "ACategorical",
>> +    "type": "ENUM",
>> +    "enum": "ns1::ns2::TestEnum",
>> +    "header": "model/CategoricalFeature.h"
>> +  }
>> +]
>> +```
>> +### model/forest.json
>> +```
>> +[
>> +  {
>> +    "operation": "if_greater",
>> +    "feature": "ANumber",
>> +    "threshold": 200.0,
>> +    "then": {
>> +      "operation": "if_greater",
>> +      "feature": "AFloat",
>> +      "threshold": -1,
>> +      "then": {
>> +        "operation": "boost",
>> +        "score": 10.0
>> +      },
>> +      "else": {
>> +        "operation": "boost",
>> +        "score": -20.0
>> +      }
>> +    },
>> +    "else": {
>> +      "operation": "if_member",
>> +      "feature": "ACategorical",
>> +      "set": [
>> +        "A",
>> +        "C"
>> +      ],
>> +      "then": {
>> +        "operation": "boost",
>> +        "score": 3.0
>> +      },
>> +      "else": {
>> +        "operation": "boost",
>> +        "score": -4.0
>> +      }
>> +    }
>> +  },
>> +  {
>> +    "operation": "if_member",
>> +    "feature": "ACategorical",
>> +    "set": [
>> +      "A",
>> +      "B"
>> +    ],
>> +    "then": {
>> +      "operation": "boost",
>> +      "score": 5.0
>> +    },
>> +    "else": {
>> +      "operation": "boost",
>> +      "score": -6.0
>> +    }
>> +  }
>> +]
>> +```
>> +### DecisionForestRuntime.h
>> +```
>> +...
>> +namespace ns1 {
>> +namespace ns2 {
>> +namespace test {
>> +class Example {
>> +public:
>> +  void setANumber(float V) { ... }
>> +  void setAFloat(float V) { ... }
>> +  void setACategorical(unsigned V) { ... }
>> +
>> +private:
>> +  ...
>> +};
>> +
>> +float Evaluate(const Example&);
>> +} // namespace test
>> +} // namespace ns2
>> +} // namespace ns1
>> +```
>> +
>> +### CMake Invocation
>> +Inorder to use the inference runtime, one can use `gen_decision_forest`
>> function
>> +described in `CompletionModel.cmake` which invokes
>> `CodeCompletionCodegen.py` with the appropriate arguments.
>> +
>> +For example, the following invocation reads the model present in
>> `path/to/model` and creates
>> +`${CMAKE_CURRENT_BINARY_DIR}/myfilename.h` and
>> `${CMAKE_CURRENT_BINARY_DIR}/myfilename.cpp`
>> +describing a `class` named `MyClass` in namespace `fully::qualified`.
>> +
>> +
>> +
>> +```
>> +gen_decision_forest(path/to/model
>> +  myfilename
>> +  ::fully::qualifed::MyClass)
>> +```
>> \ No newline at end of file
>>
>> diff  --git a/clang-tools-extra/clangd/quality/model/features.json
>> b/clang-tools-extra/clangd/quality/model/features.json
>> new file mode 100644
>> index 000000000000..e91eccd1ce20
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/quality/model/features.json
>> @@ -0,0 +1,8 @@
>> +[
>> +    {
>> +        "name": "ContextKind",
>> +        "kind": "ENUM",
>> +        "type": "clang::CodeCompletionContext::Kind",
>> +        "header": "clang/Sema/CodeCompleteConsumer.h"
>> +    }
>> +]
>> \ No newline at end of file
>>
>> diff  --git a/clang-tools-extra/clangd/quality/model/forest.json
>> b/clang-tools-extra/clangd/quality/model/forest.json
>> new file mode 100644
>> index 000000000000..78a1524e2d81
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/quality/model/forest.json
>> @@ -0,0 +1,18 @@
>> +[
>> +    {
>> +        "operation": "if_member",
>> +        "feature": "ContextKind",
>> +        "set": [
>> +            "CCC_DotMemberAccess",
>> +            "CCC_ArrowMemberAccess"
>> +        ],
>> +        "then": {
>> +            "operation": "boost",
>> +            "score": 3.0
>> +        },
>> +        "else": {
>> +            "operation": "boost",
>> +            "score": 1.0
>> +        }
>> +    }
>> +]
>> \ No newline at end of file
>>
>> diff  --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt
>> b/clang-tools-extra/clangd/unittests/CMakeLists.txt
>> index 2167b5e210e2..a84fd0b71ca5 100644
>> --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
>> +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
>> @@ -28,6 +28,9 @@ if (CLANGD_ENABLE_REMOTE)
>>    set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp)
>>  endif()
>>
>> +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake)
>> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model
>> DecisionForestRuntimeTest ::ns1::ns2::test::Example)
>> +
>>  add_custom_target(ClangdUnitTests)
>>  add_unittest(ClangdUnitTests ClangdTests
>>    Annotations.cpp
>> @@ -44,6 +47,7 @@ add_unittest(ClangdUnitTests ClangdTests
>>    ConfigCompileTests.cpp
>>    ConfigProviderTests.cpp
>>    ConfigYAMLTests.cpp
>> +  DecisionForestTests.cpp
>>    DexTests.cpp
>>    DiagnosticsTests.cpp
>>    DraftStoreTests.cpp
>> @@ -89,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests
>>    TweakTesting.cpp
>>    URITests.cpp
>>    XRefsTests.cpp
>> +  ${CMAKE_CURRENT_BINARY_DIR}/DecisionForestRuntimeTest.cpp
>>
>>    support/CancellationTests.cpp
>>    support/ContextTests.cpp
>> @@ -103,6 +108,11 @@ add_unittest(ClangdUnitTests ClangdTests
>>    $<TARGET_OBJECTS:obj.clangDaemonTweaks>
>>    )
>>
>> +# Include generated ComletionModel headers.
>> +target_include_directories(ClangdTests PUBLIC
>> +  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
>> +)
>> +
>>  clang_target_link_libraries(ClangdTests
>>    PRIVATE
>>    clangAST
>>
>> diff  --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>> b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>> index 635e036039a0..460976d64f9f 100644
>> --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>> +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>> @@ -10,6 +10,7 @@
>>  #include "ClangdServer.h"
>>  #include "CodeComplete.h"
>>  #include "Compiler.h"
>> +#include "CompletionModel.h"
>>  #include "Matchers.h"
>>  #include "Protocol.h"
>>  #include "Quality.h"
>> @@ -47,6 +48,7 @@ using ::testing::HasSubstr;
>>  using ::testing::IsEmpty;
>>  using ::testing::Not;
>>  using ::testing::UnorderedElementsAre;
>> +using ContextKind = CodeCompletionContext::Kind;
>>
>>  // GMock helpers for matching completion items.
>>  MATCHER_P(Named, Name, "") { return arg.Name == Name; }
>> @@ -161,6 +163,16 @@ Symbol withReferences(int N, Symbol S) {
>>    return S;
>>  }
>>
>> +TEST(DecisionForestRuntime, SanityTest) {
>> +  using Example = clangd::Example;
>> +  using clangd::Evaluate;
>> +  Example E1;
>> +  E1.setContextKind(ContextKind::CCC_ArrowMemberAccess);
>> +  Example E2;
>> +  E2.setContextKind(ContextKind::CCC_SymbolOrNewName);
>> +  EXPECT_GT(Evaluate(E1), Evaluate(E2));
>> +}
>> +
>>  TEST(CompletionTest, Limit) {
>>    clangd::CodeCompleteOptions Opts;
>>    Opts.Limit = 2;
>>
>> diff  --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
>> b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
>> new file mode 100644
>> index 000000000000..d29c8a4a0358
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
>> @@ -0,0 +1,29 @@
>> +#include "DecisionForestRuntimeTest.h"
>> +#include "decision_forest_model/CategoricalFeature.h"
>> +#include "gtest/gtest.h"
>> +
>> +namespace clang {
>> +namespace clangd {
>> +
>> +TEST(DecisionForestRuntime, Evaluate) {
>> +  using Example = ::ns1::ns2::test::Example;
>> +  using Cat = ::ns1::ns2::TestEnum;
>> +  using ::ns1::ns2::test::Evaluate;
>> +
>> +  Example E;
>> +  E.setANumber(200);         // True
>> +  E.setAFloat(0);            // True: +10.0
>> +  E.setACategorical(Cat::A); // True: +5.0
>> +  EXPECT_EQ(Evaluate(E), 15.0);
>> +
>> +  E.setANumber(200);         // True
>> +  E.setAFloat(-2.5);         // False: -20.0
>> +  E.setACategorical(Cat::B); // True: +5.0
>> +  EXPECT_EQ(Evaluate(E), -15.0);
>> +
>> +  E.setANumber(100);         // False
>> +  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
>> +  EXPECT_EQ(Evaluate(E), -3.0);
>> +}
>> +} // namespace clangd
>> +} // namespace clang
>>
>> diff  --git
>> a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
>> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
>> new file mode 100644
>> index 000000000000..dfb6ab3b199d
>> --- /dev/null
>> +++
>> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
>> @@ -0,0 +1,5 @@
>> +namespace ns1 {
>> +namespace ns2 {
>> +enum TestEnum { A, B, C, D };
>> +} // namespace ns2
>> +} // namespace ns1
>>
>> diff  --git
>> a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
>> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
>> new file mode 100644
>> index 000000000000..7f159f192e19
>> --- /dev/null
>> +++
>> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
>> @@ -0,0 +1,16 @@
>> +[
>> +    {
>> +        "name": "ANumber",
>> +        "kind": "NUMBER"
>> +    },
>> +    {
>> +        "name": "AFloat",
>> +        "kind": "NUMBER"
>> +    },
>> +    {
>> +        "name": "ACategorical",
>> +        "kind": "ENUM",
>> +        "type": "ns1::ns2::TestEnum",
>> +        "header": "decision_forest_model/CategoricalFeature.h"
>> +    }
>> +]
>> \ No newline at end of file
>>
>> diff  --git
>> a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
>> b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
>> new file mode 100644
>> index 000000000000..26f071da485d
>> --- /dev/null
>> +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
>> @@ -0,0 +1,52 @@
>> +[
>> +    {
>> +        "operation": "if_greater",
>> +        "feature": "ANumber",
>> +        "threshold": 200.0,
>> +        "then": {
>> +            "operation": "if_greater",
>> +            "feature": "AFloat",
>> +            "threshold": -1,
>> +            "then": {
>> +                "operation": "boost",
>> +                "score": 10.0
>> +            },
>> +            "else": {
>> +                "operation": "boost",
>> +                "score": -20.0
>> +            }
>> +        },
>> +        "else": {
>> +            "operation": "if_member",
>> +            "feature": "ACategorical",
>> +            "set": [
>> +                "A",
>> +                "C"
>> +            ],
>> +            "then": {
>> +                "operation": "boost",
>> +                "score": 3.0
>> +            },
>> +            "else": {
>> +                "operation": "boost",
>> +                "score": -4.0
>> +            }
>> +        }
>> +    },
>> +    {
>> +        "operation": "if_member",
>> +        "feature": "ACategorical",
>> +        "set": [
>> +            "A",
>> +            "B"
>> +        ],
>> +        "then": {
>> +            "operation": "boost",
>> +            "score": 5.0
>> +        },
>> +        "else": {
>> +            "operation": "boost",
>> +            "score": -6.0
>> +        }
>> +    }
>> +]
>> \ No newline at end of file
>>
>>
>>
>> _______________________________________________
>> llvm-branch-commits mailing list
>> llvm-branch-commits@lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
>>
>

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Re: [llvm-branch-commits] [clang-tools-extra] 85c1c6a - [clangd] Add Random Forest runtime for code completion.

Reply via email to