teemperor updated this revision to Diff 208569.
teemperor retitled this revision from "[lldb] Fix crash due to dollar character 
in variable names." to "[lldb] Fix crash due to unicode characters and dollars 
in variable names.".
teemperor edited the summary of this revision.
teemperor added a comment.

- Rewrote the patch to use the Clang lexer, dropped our ad-hoc lexer.
- Make sure we only lex the expression once, not for each local variable.
- Added tests for unicode characters and a bunch of dollar-related corner 
cases. The current behavior seems reasonable and what people expect in the 
comments, so I just keep them around.

I would prefer if we could just consider this patch as a regression fix and not 
some official support for dollar-variables in user code (which seems like a 
mess to define properly). I added some tests that at least make sure the 
current behavior is tested, we can decide later if that's really what we want 
to do (but the current behavior seems quite reasonable and similar to what 
people suggest).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64194/new/

https://reviews.llvm.org/D64194

Files:
  
lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile
  
lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py
  
lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c
  
lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile
  
lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py
  
lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp
  lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp

Index: lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
===================================================================
--- lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
+++ lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
@@ -9,6 +9,8 @@
 #include "ClangExpressionSourceCode.h"
 
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
 #include "llvm/ADT/StringRef.h"
 
 #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h"
@@ -164,44 +166,87 @@
   }
 }
 
-/// Checks if the expression body contains the given variable as a token.
-/// \param body The expression body.
-/// \param var The variable token we are looking for.
-/// \return True iff the expression body containes the variable as a token.
-static bool ExprBodyContainsVar(llvm::StringRef body, llvm::StringRef var) {
-  assert(var.find_if([](char c) { return !clang::isIdentifierBody(c); }) ==
-             llvm::StringRef::npos &&
-         "variable contains non-identifier chars?");
-
-  size_t start = 0;
-  // Iterate over all occurences of the variable string in our expression.
-  while ((start = body.find(var, start)) != llvm::StringRef::npos) {
-    // We found our variable name in the expression. Check that the token
-    // that contains our needle is equal to our variable and not just contains
-    // the character sequence by accident.
-    // Prevents situations where we for example inlcude the variable 'FOO' in an
-    // expression like 'FOObar + 1'.
-    bool has_characters_before =
-        start != 0 && clang::isIdentifierBody(body[start - 1]);
-    bool has_characters_after =
-        start + var.size() < body.size() &&
-        clang::isIdentifierBody(body[start + var.size()]);
-
-    // Our token just contained the variable name as a substring. Continue
-    // searching the rest of the expression.
-    if (has_characters_before || has_characters_after) {
-      ++start;
+namespace {
+/// Allows checking if a token is contained in a given expression.
+class TokenVerifier {
+  /// The tokens we found in the expression.
+  llvm::StringSet<> m_tokens;
+
+public:
+  TokenVerifier(std::string body);
+  /// Returns true iff the given expression body contained a token with the
+  /// given content.
+  bool hasToken(llvm::StringRef token) const {
+    return m_tokens.find(token) != m_tokens.end();
+  }
+};
+} // namespace
+
+TokenVerifier::TokenVerifier(std::string body) {
+  using namespace clang;
+
+  // We only care about tokens and not their original source locations. If we
+  // move the whole expression to only be in one line we can simplify the
+  // following code that extracts the token contents.
+  std::replace(body.begin(), body.end(), '\n', ' ');
+  std::replace(body.begin(), body.end(), '\r', ' ');
+
+  FileSystemOptions file_opts;
+  FileManager file_mgr(file_opts,
+                       FileSystem::Instance().GetVirtualFileSystem());
+
+  // Let's build the actual source code Clang needs and setup some utility
+  // objects.
+  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
+  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
+      new DiagnosticOptions());
+  DiagnosticsEngine diags(diag_ids, diags_opts);
+  clang::SourceManager SM(diags, file_mgr);
+  auto buf = llvm::MemoryBuffer::getMemBuffer(body);
+
+  FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get());
+
+  // Let's just enable the latest ObjC and C++ which should get most tokens
+  // right.
+  LangOptions Opts;
+  Opts.ObjC = true;
+  Opts.DollarIdents = true;
+  Opts.CPlusPlus17 = true;
+  Opts.LineComment = true;
+
+  Lexer lex(FID, buf.get(), SM, Opts);
+
+  Token token;
+  bool exit = false;
+  while (!exit) {
+    // Returns true if this is the last token we get from the lexer.
+    exit = lex.LexFromRawLexer(token);
+
+    // Extract the column number which we need to extract the token content.
+    // Our expression is just one line, so we don't need to handle any line
+    // numbers here.
+    bool invalid = false;
+    unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
+    if (invalid)
       continue;
-    }
-    return true;
+    // Column numbers start at 1, but indexes in our string start at 0.
+    --start;
+
+    // Annotations don't have a length, so let's skip them.
+    if (token.isAnnotation())
+      continue;
+
+    // Extract the token string from our source code and store it.
+    m_tokens.insert(body.substr(start, token.getLength()));
   }
-  return false;
 }
 
 static void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp,
                                   StreamString &stream,
                                   const std::string &expr,
                                   lldb::LanguageType wrapping_language) {
+  TokenVerifier tokens(expr);
+
   for (size_t i = 0; i < var_list_sp->GetSize(); i++) {
     lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i);
 
@@ -213,7 +258,7 @@
     if (!var_name || var_name == ".block_descriptor")
       continue;
 
-    if (!expr.empty() && !ExprBodyContainsVar(expr, var_name.GetStringRef()))
+    if (!expr.empty() && !tokens.hasToken(var_name.GetStringRef()))
       continue;
 
     if ((var_name == "self" || var_name == "_cmd") &&
Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp
@@ -0,0 +1,17 @@
+// Make sure we correctly handle unicode in variable names.
+
+struct A {
+  // We need a member variable in the context that could shadow our local
+  // variable. If our optimization code fails to handle this, then we won't
+  // correctly inject our local variable so that it won't get shadowed.
+  int foob\u00E1r = 2;
+  int foo() {
+    int foob\u00E1r = 3;
+    return foob\u00E1r; //%self.expect("expr foobár", substrs=['(int)', ' = 3'])
+  }
+};
+
+int main() {
+  A a;
+  return a.foo();
+}
Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py
@@ -0,0 +1,5 @@
+from lldbsuite.test import lldbinline
+from lldbsuite.test import decorators
+
+lldbinline.MakeInlineTest(__file__, globals(), None)
+
Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile
@@ -0,0 +1,4 @@
+LEVEL = ../../make
+CXX_SOURCES := main.cpp
+CXX_FLAGS_EXTRA := -finput-charset=UTF-8 -fextended-identifiers
+include $(LEVEL)/Makefile.rules
Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c
@@ -0,0 +1,21 @@
+// Make sure we correctly handle $ in variable names.
+
+int main() {
+  // Some variables that might conflict with our variables below.
+  int __lldb_expr_result = 2;
+  int $$foo = 1;
+  int R0 = 2;
+
+  // Some variables with dollar signs that should work (and shadow
+  // any built-in LLDB variables).
+  int $__lldb_expr_result = 11;
+  int $foo = 12;
+  int $R0 = 13;
+  int $0 = 14;
+
+  //%self.expect("expr $__lldb_expr_result", substrs=['(int) $0 = 11'])
+  //%self.expect("expr $foo", substrs=['(int)', ' = 12'])
+  //%self.expect("expr $R0", substrs=['(int)', ' = 13'])
+  //%self.expect("expr int $foo = 123", error=True, substrs=["declaration conflicts"])
+  return 0; //%self.expect("expr $0", substrs=['(int)', ' = 14'])
+}
Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py
@@ -0,0 +1,5 @@
+from lldbsuite.test import lldbinline
+from lldbsuite.test import decorators
+
+lldbinline.MakeInlineTest(__file__, globals(), None)
+
Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile
===================================================================
--- /dev/null
+++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile
@@ -0,0 +1,3 @@
+LEVEL = ../../make
+C_SOURCES := main.c
+include $(LEVEL)/Makefile.rules
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to