On Fri, Sep 18, 2020 at 5:51 PM Eric Christopher <echri...@gmail.com> wrote:
> Hi Utkarsh, > > I've temporarily reverted this here: > > echristo@athyra ~/s/llvm-project (master)> git push > To github.com:llvm/llvm-project.git > 1f0b43638ed..549e55b3d56 master -> master > > the decision forest header file referenced in the unittest doesn't appear > to have made it into the commit? > > Aha. I see how this is supposed to work. It's a bit of a complicated system. Is there any way we can break this down into smaller chunks to test? -eric > Thanks and feel free to follow up if I've missed something. > > -eric > > On Fri, Sep 18, 2020 at 12:38 PM Utkarsh Saxena via llvm-branch-commits < > llvm-branch-commits@lists.llvm.org> wrote: > >> >> Author: Utkarsh Saxena >> Date: 2020-09-18T18:27:42+02:00 >> New Revision: 85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a >> >> URL: >> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a >> DIFF: >> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a.diff >> >> LOG: [clangd] Add Random Forest runtime for code completion. >> >> Summary: >> [WIP] >> - Proposes a json format for representing Random Forest model. >> - Proposes a way to test the generated runtime using a test model. >> >> TODO: >> - Add generated source code snippet for easier review. >> - Fix unused label warning. >> - Figure out required using declarations for CATEGORICAL columns from >> Features.json. >> - Necessary Google3 internal modifications for blaze before landing. >> - Add documentation for format of the model. >> - Document more. >> >> Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, >> cfe-commits >> >> Tags: #clang >> >> Differential Revision: https://reviews.llvm.org/D83814 >> >> Added: >> clang-tools-extra/clangd/quality/CompletionModel.cmake >> clang-tools-extra/clangd/quality/CompletionModelCodegen.py >> clang-tools-extra/clangd/quality/README.md >> clang-tools-extra/clangd/quality/model/features.json >> clang-tools-extra/clangd/quality/model/forest.json >> clang-tools-extra/clangd/unittests/DecisionForestTests.cpp >> >> clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h >> clang-tools-extra/clangd/unittests/decision_forest_model/features.json >> clang-tools-extra/clangd/unittests/decision_forest_model/forest.json >> >> Modified: >> clang-tools-extra/clangd/CMakeLists.txt >> clang-tools-extra/clangd/unittests/CMakeLists.txt >> clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp >> >> Removed: >> >> >> >> >> ################################################################################ >> diff --git a/clang-tools-extra/clangd/CMakeLists.txt >> b/clang-tools-extra/clangd/CMakeLists.txt >> index 3a1a034ed17b..9d2ab5be222a 100644 >> --- a/clang-tools-extra/clangd/CMakeLists.txt >> +++ b/clang-tools-extra/clangd/CMakeLists.txt >> @@ -28,6 +28,9 @@ set(LLVM_LINK_COMPONENTS >> FrontendOpenMP >> Option >> ) >> + >> +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) >> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model >> CompletionModel clang::clangd::Example) >> >> if(MSVC AND NOT CLANG_CL) >> set_source_files_properties(CompileCommands.cpp PROPERTIES >> COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of >> string constant >> @@ -77,6 +80,7 @@ add_clang_library(clangDaemon >> TUScheduler.cpp >> URI.cpp >> XRefs.cpp >> + ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp >> >> index/Background.cpp >> index/BackgroundIndexLoader.cpp >> @@ -117,6 +121,11 @@ add_clang_library(clangDaemon >> omp_gen >> ) >> >> +# Include generated CompletionModel headers. >> +target_include_directories(clangDaemon PUBLIC >> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> >> +) >> + >> clang_target_link_libraries(clangDaemon >> PRIVATE >> clangAST >> >> diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake >> b/clang-tools-extra/clangd/quality/CompletionModel.cmake >> new file mode 100644 >> index 000000000000..60c6d2aa8433 >> --- /dev/null >> +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake >> @@ -0,0 +1,37 @@ >> +# Run the Completion Model Codegenerator on the model present in the >> +# ${model} directory. >> +# Produces a pair of files called ${filename}.h and ${filename}.cpp in >> the >> +# ${CMAKE_CURRENT_BINARY_DIR}. The generated header >> +# will define a C++ class called ${cpp_class} - which may be a >> +# namespace-qualified class name. >> +function(gen_decision_forest model filename cpp_class) >> + set(model_compiler >> ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) >> + >> + set(output_dir ${CMAKE_CURRENT_BINARY_DIR}) >> + set(header_file ${output_dir}/${filename}.h) >> + set(cpp_file ${output_dir}/${filename}.cpp) >> + >> + add_custom_command(OUTPUT ${header_file} ${cpp_file} >> + COMMAND "${Python3_EXECUTABLE}" ${model_compiler} >> + --model ${model} >> + --output_dir ${output_dir} >> + --filename ${filename} >> + --cpp_class ${cpp_class} >> + COMMENT "Generating code completion model runtime..." >> + DEPENDS ${model_compiler} ${model}/forest.json ${model}/features.json >> + VERBATIM ) >> + >> + set_source_files_properties(${header_file} PROPERTIES >> + GENERATED 1) >> + set_source_files_properties(${cpp_file} PROPERTIES >> + GENERATED 1) >> + >> + # Disable unused label warning for generated files. >> + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") >> + set_source_files_properties(${cpp_file} PROPERTIES >> + COMPILE_FLAGS /wd4102) >> + else() >> + set_source_files_properties(${cpp_file} PROPERTIES >> + COMPILE_FLAGS -Wno-unused) >> + endif() >> +endfunction() >> >> diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py >> b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py >> new file mode 100644 >> index 000000000000..8f8234f6ebbc >> --- /dev/null >> +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py >> @@ -0,0 +1,283 @@ >> +"""Code generator for Code Completion Model Inference. >> + >> +Tool runs on the Decision Forest model defined in {model} directory. >> +It generates two files: {output_dir}/{filename}.h and >> {output_dir}/{filename}.cpp >> +The generated files defines the Example class named {cpp_class} having >> all the features as class members. >> +The generated runtime provides an `Evaluate` function which can be used >> to score a code completion candidate. >> +""" >> + >> +import argparse >> +import json >> +import struct >> +from enum import Enum >> + >> + >> +class CppClass: >> + """Holds class name and names of the enclosing namespaces.""" >> + >> + def __init__(self, cpp_class): >> + ns_and_class = cpp_class.split("::") >> + self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0] >> + self.name = ns_and_class[-1] >> + if len(self.name) == 0: >> + raise ValueError("Empty class name.") >> + >> + def ns_begin(self): >> + """Returns snippet for opening namespace declarations.""" >> + open_ns = [f"namespace {ns} {{" for ns in self.ns] >> + return "\n".join(open_ns) >> + >> + def ns_end(self): >> + """Returns snippet for closing namespace declarations.""" >> + close_ns = [ >> + f"}} // namespace {ns}" for ns in reversed(self.ns)] >> + return "\n".join(close_ns) >> + >> + >> +def header_guard(filename): >> + '''Returns the header guard for the generated header.''' >> + return f"GENERATED_DECISION_FOREST_MODEL_{filename.upper()}_H" >> + >> + >> +def boost_node(n, label, next_label): >> + """Returns code snippet for a leaf/boost node. >> + Adds value of leaf to the score and jumps to the root of the next >> tree.""" >> + return f"{label}: Score += {n['score']}; goto {next_label};" >> + >> + >> +def if_greater_node(n, label, next_label): >> + """Returns code snippet for a if_greater node. >> + Jumps to true_label if the Example feature (NUMBER) is greater than >> the threshold. >> + Comparing integers is much faster than comparing floats. Assuming >> floating points >> + are represented as IEEE 754, it order-encodes the floats to integers >> before comparing them. >> + Control falls through if condition is evaluated to false.""" >> + threshold = n["threshold"] >> + return f"{label}: if (E.{n['feature']} >= {order_encode(threshold)} >> /*{threshold}*/) goto {next_label};" >> + >> + >> +def if_member_node(n, label, next_label): >> + """Returns code snippet for a if_member node. >> + Jumps to true_label if the Example feature (ENUM) is present in the >> set of enum values >> + described in the node. >> + Control falls through if condition is evaluated to false.""" >> + members = '|'.join([ >> + f"BIT({n['feature']}_type::{member})" >> + for member in n["set"] >> + ]) >> + return f"{label}: if (E.{n['feature']} & ({members})) goto >> {next_label};" >> + >> + >> +def node(n, label, next_label): >> + """Returns code snippet for the node.""" >> + return { >> + 'boost': boost_node, >> + 'if_greater': if_greater_node, >> + 'if_member': if_member_node, >> + }[n['operation']](n, label, next_label) >> + >> + >> +def tree(t, tree_num: int, node_num: int): >> + """Returns code for inferencing a Decision Tree. >> + Also returns the size of the decision tree. >> + >> + A tree starts with its label `t{tree#}`. >> + A node of the tree starts with label `t{tree#}_n{node#}`. >> + >> + The tree contains two types of node: Conditional node and Leaf node. >> + - Conditional node evaluates a condition. If true, it jumps to the >> true node/child. >> + Code is generated using pre-order traversal of the tree >> considering >> + false node as the first child. Therefore the false node is >> always the >> + immediately next label. >> + - Leaf node adds the value to the score and jumps to the next tree. >> + """ >> + label = f"t{tree_num}_n{node_num}" >> + code = [] >> + if node_num == 0: >> + code.append(f"t{tree_num}:") >> + >> + if t["operation"] == "boost": >> + code.append(node(t, label=label, next_label=f"t{tree_num+1}")) >> + return code, 1 >> + >> + false_code, false_size = tree( >> + t['else'], tree_num=tree_num, node_num=node_num+1) >> + >> + true_node_num = node_num+false_size+1 >> + true_label = f"t{tree_num}_n{true_node_num}" >> + >> + true_code, true_size = tree( >> + t['then'], tree_num=tree_num, node_num=true_node_num) >> + >> + code.append(node(t, label=label, next_label=true_label)) >> + >> + return code+false_code+true_code, 1+false_size+true_size >> + >> + >> +def gen_header_code(features_json: list, cpp_class, filename: str): >> + """Returns code for header declaring the inference runtime. >> + >> + Declares the Example class named {cpp_class} inside relevant >> namespaces. >> + The Example class contains all the features as class members. This >> + class can be used to represent a code completion candidate. >> + Provides `float Evaluate()` function which can be used to score the >> Example. >> + """ >> + setters = [] >> + for f in features_json: >> + feature = f["name"] >> + if f["kind"] == "NUMBER": >> + # Floats are order-encoded to integers for faster comparison. >> + setters.append( >> + f"void set{feature}(float V) {{ {feature} = >> OrderEncode(V); }}") >> + elif f["kind"] == "ENUM": >> + setters.append( >> + f"void set{feature}(unsigned V) {{ {feature} = 1 << V; >> }}") >> + else: >> + raise ValueError("Unhandled feature type.", f["kind"]) >> + >> + # Class members represent all the features of the Example. >> + class_members = [f"uint32_t {f['name']} = 0;" for f in features_json] >> + >> + nline = "\n " >> + guard = header_guard(filename) >> + return f"""#ifndef {guard} >> +#define {guard} >> +#include <cstdint> >> + >> +{cpp_class.ns_begin()} >> +class {cpp_class.name} {{ >> +public: >> + {nline.join(setters)} >> + >> +private: >> + {nline.join(class_members)} >> + >> + // Produces an integer that sorts in the same order as F. >> + // That is: a < b <==> orderEncode(a) < orderEncode(b). >> + static uint32_t OrderEncode(float F); >> + friend float Evaluate(const {cpp_class.name}&); >> +}}; >> + >> +float Evaluate(const {cpp_class.name}&); >> +{cpp_class.ns_end()} >> +#endif // {guard} >> +""" >> + >> + >> +def order_encode(v: float): >> + i = struct.unpack('<I', struct.pack('<f', v))[0] >> + TopBit = 1 << 31 >> + # IEEE 754 floats compare like sign-magnitude integers. >> + if (i & TopBit): # Negative float >> + return (1 << 32) - i # low half of integers, order reversed. >> + return TopBit + i # top half of integers >> + >> + >> +def evaluate_func(forest_json: list, cpp_class: CppClass): >> + """Generates code for `float Evaluate(const {Example}&)` function. >> + The generated function can be used to score an Example.""" >> + code = f"float Evaluate(const {cpp_class.name}& E) {{\n" >> + lines = [] >> + lines.append("float Score = 0;") >> + tree_num = 0 >> + for tree_json in forest_json: >> + lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0]) >> + lines.append("") >> + tree_num += 1 >> + >> + lines.append(f"t{len(forest_json)}: // No such tree.") >> + lines.append("return Score;") >> + code += " " + "\n ".join(lines) >> + code += "\n}" >> + return code >> + >> + >> +def gen_cpp_code(forest_json: list, features_json: list, filename: str, >> + cpp_class: CppClass): >> + """Generates code for the .cpp file.""" >> + # Headers >> + # Required by OrderEncode(float F). >> + angled_include = [ >> + f'#include <{h}>' >> + for h in ["cstring", "limits"] >> + ] >> + >> + # Include generated header. >> + qouted_headers = {f"{filename}.h", "llvm/ADT/bit.h"} >> + # Headers required by ENUM features used by the model. >> + qouted_headers |= {f["header"] >> + for f in features_json if f["kind"] == "ENUM"} >> + quoted_include = [f'#include "{h}"' for h in sorted(qouted_headers)] >> + >> + # using-decl for ENUM features. >> + using_decls = "\n".join(f"using {feature['name']}_type = >> {feature['type']};" >> + for feature in features_json >> + if feature["kind"] == "ENUM") >> + nl = "\n" >> + return f"""{nl.join(angled_include)} >> + >> +{nl.join(quoted_include)} >> + >> +#define BIT(X) (1 << X) >> + >> +{cpp_class.ns_begin()} >> + >> +{using_decls} >> + >> +uint32_t {cpp_class.name}::OrderEncode(float F) {{ >> + static_assert(std::numeric_limits<float>::is_iec559, ""); >> + constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1); >> + >> + // Get the bits of the float. Endianness is the same as for integers. >> + uint32_t U = llvm::bit_cast<uint32_t>(F); >> + std::memcpy(&U, &F, sizeof(U)); >> + // IEEE 754 floats compare like sign-magnitude integers. >> + if (U & TopBit) // Negative float. >> + return 0 - U; // Map onto the low half of integers, order >> reversed. >> + return U + TopBit; // Positive floats map onto the high half of >> integers. >> +}} >> + >> +{evaluate_func(forest_json, cpp_class)} >> +{cpp_class.ns_end()} >> +""" >> + >> + >> +def main(): >> + parser = argparse.ArgumentParser('DecisionForestCodegen') >> + parser.add_argument('--filename', help='output file name.') >> + parser.add_argument('--output_dir', help='output directory.') >> + parser.add_argument('--model', help='path to model directory.') >> + parser.add_argument( >> + '--cpp_class', >> + help='The name of the class (which may be a namespace-qualified) >> created in generated header.' >> + ) >> + ns = parser.parse_args() >> + >> + output_dir = ns.output_dir >> + filename = ns.filename >> + header_file = f"{output_dir}/{filename}.h" >> + cpp_file = f"{output_dir}/{filename}.cpp" >> + cpp_class = CppClass(cpp_class=ns.cpp_class) >> + >> + model_file = f"{ns.model}/forest.json" >> + features_file = f"{ns.model}/features.json" >> + >> + with open(features_file) as f: >> + features_json = json.load(f) >> + >> + with open(model_file) as m: >> + forest_json = json.load(m) >> + >> + with open(cpp_file, 'w+t') as output_cc: >> + output_cc.write( >> + gen_cpp_code(forest_json=forest_json, >> + features_json=features_json, >> + filename=filename, >> + cpp_class=cpp_class)) >> + >> + with open(header_file, 'w+t') as output_h: >> + output_h.write(gen_header_code( >> + features_json=features_json, cpp_class=cpp_class, >> filename=filename)) >> + >> + >> +if __name__ == '__main__': >> + main() >> >> diff --git a/clang-tools-extra/clangd/quality/README.md >> b/clang-tools-extra/clangd/quality/README.md >> new file mode 100644 >> index 000000000000..36fa37320e54 >> --- /dev/null >> +++ b/clang-tools-extra/clangd/quality/README.md >> @@ -0,0 +1,220 @@ >> +# Decision Forest Code Completion Model >> + >> +## Decision Forest >> +A **decision forest** is a collection of many decision trees. A >> **decision tree** is a full binary tree that provides a quality prediction >> for an input (code completion item). Internal nodes represent a **binary >> decision** based on the input data, and leaf nodes represent a prediction. >> + >> +In order to predict the relevance of a code completion item, we traverse >> each of the decision trees beginning with their roots until we reach a >> leaf. >> + >> +An input (code completion candidate) is characterized as a set of >> **features**, such as the *type of symbol* or the *number of existing >> references*. >> + >> +At every non-leaf node, we evaluate the condition to decide whether to >> go left or right. The condition compares one *feature** of the input >> against a constant. The condition can be of two types: >> +- **if_greater**: Checks whether a numerical feature is **>=** a >> **threshold**. >> +- **if_member**: Check whether the **enum** feature is contained in the >> **set** defined in the node. >> + >> +A leaf node contains the value **score**. >> +To compute an overall **quality** score, we traverse each tree in this >> way and add up the scores. >> + >> +## Model Input Format >> +The input model is represented in json format. >> + >> +### Features >> +The file **features.json** defines the features available to the model. >> +It is a json list of features. The features can be of following two >> kinds. >> + >> +#### Number >> +``` >> +{ >> + "name": "a_numerical_feature", >> + "kind": "NUMBER" >> +} >> +``` >> +#### Enum >> +``` >> +{ >> + "name": "an_enum_feature", >> + "kind": "ENUM", >> + "enum": "fully::qualified::enum", >> + "header": "path/to/HeaderDeclaringEnum.h" >> +} >> +``` >> +The field `enum` specifies the fully qualified name of the enum. >> +The maximum cardinality of the enum can be **32**. >> + >> +The field `header` specifies the header containing the declaration of >> the enum. >> +This header is included by the inference runtime. >> + >> + >> +### Decision Forest >> +The file `forest.json` defines the decision forest. It is a json list >> of **DecisionTree**. >> + >> +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**, >> **LeafNode**. >> +#### IfGreaterNode >> +``` >> +{ >> + "operation": "if_greater", >> + "feature": "a_numerical_feature", >> + "threshold": A real number, >> + "then": {A DecisionTree}, >> + "else": {A DecisionTree} >> +} >> +``` >> +#### IfMemberNode >> +``` >> +{ >> + "operation": "if_member", >> + "feature": "an_enum_feature", >> + "set": ["enum_value1", "enum_value2", ...], >> + "then": {A DecisionTree}, >> + "else": {A DecisionTree} >> +} >> +``` >> +#### LeafNode >> +``` >> +{ >> + "operation": "boost", >> + "score": A real number >> +} >> +``` >> + >> +## Code Generator for Inference >> +The implementation of inference runtime is split across: >> + >> +### Code generator >> +The code generator `CompletionModelCodegen.py` takes input the >> `${model}` dir and generates the inference library: >> +- `${output_dir}/{filename}.h` >> +- `${output_dir}/{filename}.cpp` >> + >> +Invocation >> +``` >> +python3 CompletionModelCodegen.py \ >> + --model path/to/model/dir \ >> + --output_dir path/to/output/dir \ >> + --filename OutputFileName \ >> + --cpp_class clang::clangd::YourExampleClass >> +``` >> +### Build System >> +`CompletionModel.cmake` provides `gen_decision_forest` method . >> +Client intending to use the CompletionModel for inference can use this >> to trigger the code generator and generate the inference library. >> +It can then use the generated API by including and depending on this >> library. >> + >> +### Generated API for inference >> +The code generator defines the Example `class` inside relevant >> namespaces as specified in option `${cpp_class}`. >> + >> +Members of this generated class comprises of all the features mentioned >> in `features.json`. >> +Thus this class can represent a code completion candidate that needs to >> be scored. >> + >> +The API also provides `float Evaluate(const MyClass&)` which can be used >> to score the completion candidate. >> + >> + >> +## Example >> +### model/features.json >> +``` >> +[ >> + { >> + "name": "ANumber", >> + "type": "NUMBER" >> + }, >> + { >> + "name": "AFloat", >> + "type": "NUMBER" >> + }, >> + { >> + "name": "ACategorical", >> + "type": "ENUM", >> + "enum": "ns1::ns2::TestEnum", >> + "header": "model/CategoricalFeature.h" >> + } >> +] >> +``` >> +### model/forest.json >> +``` >> +[ >> + { >> + "operation": "if_greater", >> + "feature": "ANumber", >> + "threshold": 200.0, >> + "then": { >> + "operation": "if_greater", >> + "feature": "AFloat", >> + "threshold": -1, >> + "then": { >> + "operation": "boost", >> + "score": 10.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -20.0 >> + } >> + }, >> + "else": { >> + "operation": "if_member", >> + "feature": "ACategorical", >> + "set": [ >> + "A", >> + "C" >> + ], >> + "then": { >> + "operation": "boost", >> + "score": 3.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -4.0 >> + } >> + } >> + }, >> + { >> + "operation": "if_member", >> + "feature": "ACategorical", >> + "set": [ >> + "A", >> + "B" >> + ], >> + "then": { >> + "operation": "boost", >> + "score": 5.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -6.0 >> + } >> + } >> +] >> +``` >> +### DecisionForestRuntime.h >> +``` >> +... >> +namespace ns1 { >> +namespace ns2 { >> +namespace test { >> +class Example { >> +public: >> + void setANumber(float V) { ... } >> + void setAFloat(float V) { ... } >> + void setACategorical(unsigned V) { ... } >> + >> +private: >> + ... >> +}; >> + >> +float Evaluate(const Example&); >> +} // namespace test >> +} // namespace ns2 >> +} // namespace ns1 >> +``` >> + >> +### CMake Invocation >> +Inorder to use the inference runtime, one can use `gen_decision_forest` >> function >> +described in `CompletionModel.cmake` which invokes >> `CodeCompletionCodegen.py` with the appropriate arguments. >> + >> +For example, the following invocation reads the model present in >> `path/to/model` and creates >> +`${CMAKE_CURRENT_BINARY_DIR}/myfilename.h` and >> `${CMAKE_CURRENT_BINARY_DIR}/myfilename.cpp` >> +describing a `class` named `MyClass` in namespace `fully::qualified`. >> + >> + >> + >> +``` >> +gen_decision_forest(path/to/model >> + myfilename >> + ::fully::qualifed::MyClass) >> +``` >> \ No newline at end of file >> >> diff --git a/clang-tools-extra/clangd/quality/model/features.json >> b/clang-tools-extra/clangd/quality/model/features.json >> new file mode 100644 >> index 000000000000..e91eccd1ce20 >> --- /dev/null >> +++ b/clang-tools-extra/clangd/quality/model/features.json >> @@ -0,0 +1,8 @@ >> +[ >> + { >> + "name": "ContextKind", >> + "kind": "ENUM", >> + "type": "clang::CodeCompletionContext::Kind", >> + "header": "clang/Sema/CodeCompleteConsumer.h" >> + } >> +] >> \ No newline at end of file >> >> diff --git a/clang-tools-extra/clangd/quality/model/forest.json >> b/clang-tools-extra/clangd/quality/model/forest.json >> new file mode 100644 >> index 000000000000..78a1524e2d81 >> --- /dev/null >> +++ b/clang-tools-extra/clangd/quality/model/forest.json >> @@ -0,0 +1,18 @@ >> +[ >> + { >> + "operation": "if_member", >> + "feature": "ContextKind", >> + "set": [ >> + "CCC_DotMemberAccess", >> + "CCC_ArrowMemberAccess" >> + ], >> + "then": { >> + "operation": "boost", >> + "score": 3.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": 1.0 >> + } >> + } >> +] >> \ No newline at end of file >> >> diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt >> b/clang-tools-extra/clangd/unittests/CMakeLists.txt >> index 2167b5e210e2..a84fd0b71ca5 100644 >> --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt >> +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt >> @@ -28,6 +28,9 @@ if (CLANGD_ENABLE_REMOTE) >> set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp) >> endif() >> >> +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake) >> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model >> DecisionForestRuntimeTest ::ns1::ns2::test::Example) >> + >> add_custom_target(ClangdUnitTests) >> add_unittest(ClangdUnitTests ClangdTests >> Annotations.cpp >> @@ -44,6 +47,7 @@ add_unittest(ClangdUnitTests ClangdTests >> ConfigCompileTests.cpp >> ConfigProviderTests.cpp >> ConfigYAMLTests.cpp >> + DecisionForestTests.cpp >> DexTests.cpp >> DiagnosticsTests.cpp >> DraftStoreTests.cpp >> @@ -89,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests >> TweakTesting.cpp >> URITests.cpp >> XRefsTests.cpp >> + ${CMAKE_CURRENT_BINARY_DIR}/DecisionForestRuntimeTest.cpp >> >> support/CancellationTests.cpp >> support/ContextTests.cpp >> @@ -103,6 +108,11 @@ add_unittest(ClangdUnitTests ClangdTests >> $<TARGET_OBJECTS:obj.clangDaemonTweaks> >> ) >> >> +# Include generated ComletionModel headers. >> +target_include_directories(ClangdTests PUBLIC >> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> >> +) >> + >> clang_target_link_libraries(ClangdTests >> PRIVATE >> clangAST >> >> diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp >> b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp >> index 635e036039a0..460976d64f9f 100644 >> --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp >> +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp >> @@ -10,6 +10,7 @@ >> #include "ClangdServer.h" >> #include "CodeComplete.h" >> #include "Compiler.h" >> +#include "CompletionModel.h" >> #include "Matchers.h" >> #include "Protocol.h" >> #include "Quality.h" >> @@ -47,6 +48,7 @@ using ::testing::HasSubstr; >> using ::testing::IsEmpty; >> using ::testing::Not; >> using ::testing::UnorderedElementsAre; >> +using ContextKind = CodeCompletionContext::Kind; >> >> // GMock helpers for matching completion items. >> MATCHER_P(Named, Name, "") { return arg.Name == Name; } >> @@ -161,6 +163,16 @@ Symbol withReferences(int N, Symbol S) { >> return S; >> } >> >> +TEST(DecisionForestRuntime, SanityTest) { >> + using Example = clangd::Example; >> + using clangd::Evaluate; >> + Example E1; >> + E1.setContextKind(ContextKind::CCC_ArrowMemberAccess); >> + Example E2; >> + E2.setContextKind(ContextKind::CCC_SymbolOrNewName); >> + EXPECT_GT(Evaluate(E1), Evaluate(E2)); >> +} >> + >> TEST(CompletionTest, Limit) { >> clangd::CodeCompleteOptions Opts; >> Opts.Limit = 2; >> >> diff --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp >> b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp >> new file mode 100644 >> index 000000000000..d29c8a4a0358 >> --- /dev/null >> +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp >> @@ -0,0 +1,29 @@ >> +#include "DecisionForestRuntimeTest.h" >> +#include "decision_forest_model/CategoricalFeature.h" >> +#include "gtest/gtest.h" >> + >> +namespace clang { >> +namespace clangd { >> + >> +TEST(DecisionForestRuntime, Evaluate) { >> + using Example = ::ns1::ns2::test::Example; >> + using Cat = ::ns1::ns2::TestEnum; >> + using ::ns1::ns2::test::Evaluate; >> + >> + Example E; >> + E.setANumber(200); // True >> + E.setAFloat(0); // True: +10.0 >> + E.setACategorical(Cat::A); // True: +5.0 >> + EXPECT_EQ(Evaluate(E), 15.0); >> + >> + E.setANumber(200); // True >> + E.setAFloat(-2.5); // False: -20.0 >> + E.setACategorical(Cat::B); // True: +5.0 >> + EXPECT_EQ(Evaluate(E), -15.0); >> + >> + E.setANumber(100); // False >> + E.setACategorical(Cat::C); // True: +3.0, False: -6.0 >> + EXPECT_EQ(Evaluate(E), -3.0); >> +} >> +} // namespace clangd >> +} // namespace clang >> >> diff --git >> a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h >> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h >> new file mode 100644 >> index 000000000000..dfb6ab3b199d >> --- /dev/null >> +++ >> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h >> @@ -0,0 +1,5 @@ >> +namespace ns1 { >> +namespace ns2 { >> +enum TestEnum { A, B, C, D }; >> +} // namespace ns2 >> +} // namespace ns1 >> >> diff --git >> a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json >> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json >> new file mode 100644 >> index 000000000000..7f159f192e19 >> --- /dev/null >> +++ >> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json >> @@ -0,0 +1,16 @@ >> +[ >> + { >> + "name": "ANumber", >> + "kind": "NUMBER" >> + }, >> + { >> + "name": "AFloat", >> + "kind": "NUMBER" >> + }, >> + { >> + "name": "ACategorical", >> + "kind": "ENUM", >> + "type": "ns1::ns2::TestEnum", >> + "header": "decision_forest_model/CategoricalFeature.h" >> + } >> +] >> \ No newline at end of file >> >> diff --git >> a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json >> b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json >> new file mode 100644 >> index 000000000000..26f071da485d >> --- /dev/null >> +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json >> @@ -0,0 +1,52 @@ >> +[ >> + { >> + "operation": "if_greater", >> + "feature": "ANumber", >> + "threshold": 200.0, >> + "then": { >> + "operation": "if_greater", >> + "feature": "AFloat", >> + "threshold": -1, >> + "then": { >> + "operation": "boost", >> + "score": 10.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -20.0 >> + } >> + }, >> + "else": { >> + "operation": "if_member", >> + "feature": "ACategorical", >> + "set": [ >> + "A", >> + "C" >> + ], >> + "then": { >> + "operation": "boost", >> + "score": 3.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -4.0 >> + } >> + } >> + }, >> + { >> + "operation": "if_member", >> + "feature": "ACategorical", >> + "set": [ >> + "A", >> + "B" >> + ], >> + "then": { >> + "operation": "boost", >> + "score": 5.0 >> + }, >> + "else": { >> + "operation": "boost", >> + "score": -6.0 >> + } >> + } >> +] >> \ No newline at end of file >> >> >> >> _______________________________________________ >> llvm-branch-commits mailing list >> llvm-branch-commits@lists.llvm.org >> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits >> >
_______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits