llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: John Harrison (ashgti)

<details>
<summary>Changes</summary>

I ran into this while debugging a different issue, but when we calcualte the 
'value' field of a variable we were not ensuring the contents were valid utf8. 
If assertions are enabled then llvm::json::Value will assert that the string 
contains invalid utf8.

To address this I added a wrapper type (`lldb_dap::protocol::SanitizedString`) 
that can be used as a thin wrapper around `std::string` to ensure a field 
contains valid utf8.

I've used it in a handful of places that I believe could contain invalid utf8 
output, but we can add it to other places as well in the protocol types.

---
Full diff: https://github.com/llvm/llvm-project/pull/181261.diff


7 Files Affected:

- (modified) lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py 
(+33-13) 
- (modified) lldb/test/API/tools/lldb-dap/variables/main.cpp (+4) 
- (modified) lldb/tools/lldb-dap/DAP.cpp (+4-3) 
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp (+17) 
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.h (+19) 
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+1-1) 
- (modified) lldb/tools/lldb-dap/Protocol/ProtocolTypes.h (+2-1) 


``````````diff
diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py 
b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
index 1dbb0143e7a55..f737d41eaecec 100644
--- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
+++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
@@ -188,6 +188,8 @@ def do_test_scopes_variables_setVariable_evaluate(
                 },
                 "readOnly": True,
             },
+            "valid_str": {},
+            "malformed_str": {},
             "x": {"equals": {"type": "int"}},
         }
 
@@ -340,9 +342,9 @@ def do_test_scopes_variables_setVariable_evaluate(
 
         verify_locals["argc"]["equals"]["value"] = "123"
         verify_locals["pt"]["children"]["x"]["equals"]["value"] = "111"
-        verify_locals["x @ main.cpp:19"] = {"equals": {"type": "int", "value": 
"89"}}
-        verify_locals["x @ main.cpp:21"] = {"equals": {"type": "int", "value": 
"42"}}
-        verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": 
"72"}}
+        verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": 
"89"}}
+        verify_locals["x @ main.cpp:25"] = {"equals": {"type": "int", "value": 
"42"}}
+        verify_locals["x @ main.cpp:27"] = {"equals": {"type": "int", "value": 
"72"}}
 
         self.verify_variables(verify_locals, 
self.dap_server.get_local_variables())
 
@@ -350,22 +352,22 @@ def do_test_scopes_variables_setVariable_evaluate(
         self.assertFalse(self.set_local("x2", 9)["success"])
         self.assertFalse(self.set_local("x @ main.cpp:0", 9)["success"])
 
-        self.assertTrue(self.set_local("x @ main.cpp:19", 19)["success"])
-        self.assertTrue(self.set_local("x @ main.cpp:21", 21)["success"])
-        self.assertTrue(self.set_local("x @ main.cpp:23", 23)["success"])
+        self.assertTrue(self.set_local("x @ main.cpp:23", 19)["success"])
+        self.assertTrue(self.set_local("x @ main.cpp:25", 21)["success"])
+        self.assertTrue(self.set_local("x @ main.cpp:27", 23)["success"])
 
         # The following should have no effect
-        self.assertFalse(self.set_local("x @ main.cpp:23", 
"invalid")["success"])
+        self.assertFalse(self.set_local("x @ main.cpp:27", 
"invalid")["success"])
 
-        verify_locals["x @ main.cpp:19"]["equals"]["value"] = "19"
-        verify_locals["x @ main.cpp:21"]["equals"]["value"] = "21"
-        verify_locals["x @ main.cpp:23"]["equals"]["value"] = "23"
+        verify_locals["x @ main.cpp:23"]["equals"]["value"] = "19"
+        verify_locals["x @ main.cpp:25"]["equals"]["value"] = "21"
+        verify_locals["x @ main.cpp:27"]["equals"]["value"] = "23"
 
         self.verify_variables(verify_locals, 
self.dap_server.get_local_variables())
 
         # The plain x variable shold refer to the innermost x
         self.assertTrue(self.set_local("x", 22)["success"])
-        verify_locals["x @ main.cpp:23"]["equals"]["value"] = "22"
+        verify_locals["x @ main.cpp:27"]["equals"]["value"] = "22"
 
         self.verify_variables(verify_locals, 
self.dap_server.get_local_variables())
 
@@ -382,9 +384,9 @@ def do_test_scopes_variables_setVariable_evaluate(
         names = [var["name"] for var in locals]
         # The first shadowed x shouldn't have a suffix anymore
         verify_locals["x"] = {"equals": {"type": "int", "value": "19"}}
-        self.assertNotIn("x @ main.cpp:19", names)
-        self.assertNotIn("x @ main.cpp:21", names)
         self.assertNotIn("x @ main.cpp:23", names)
+        self.assertNotIn("x @ main.cpp:25", names)
+        self.assertNotIn("x @ main.cpp:27", names)
 
         self.verify_variables(verify_locals, locals)
 
@@ -455,6 +457,22 @@ def do_test_scopes_and_evaluate_expansion(self, 
enableAutoVariableSummaries: boo
                 },
                 "readOnly": True,
             },
+            "valid_str": {
+                "equals": {
+                    "type": "const char *",
+                },
+                "matches": {
+                    "value": re.compile(r'0x\w+ "𐌢𐌰L𐌾𐍈 CπˆπŒΌπŒ΄πƒ"'),
+                },
+            },
+            "malformed_str": {
+                "equals": {
+                    "type": "const char *",
+                },
+                "matches": {
+                    "value": re.compile(r'0x\w+ "lone trailing \\x81\\x82 
bytes"'),
+                },
+            },
             "x": {
                 "equals": {"type": "int"},
                 "missing": ["indexedVariables"],
@@ -678,6 +696,8 @@ def test_return_variables(self):
             "argc": {},
             "argv": {},
             "pt": {"readOnly": True},
+            "valid_str": {},
+            "malformed_str": {},
             "x": {},
             "return_result": {"equals": {"type": "int"}},
         }
diff --git a/lldb/test/API/tools/lldb-dap/variables/main.cpp 
b/lldb/test/API/tools/lldb-dap/variables/main.cpp
index 0e363001f2f42..04fc62f02c22f 100644
--- a/lldb/test/API/tools/lldb-dap/variables/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/variables/main.cpp
@@ -5,6 +5,7 @@ struct PointType {
   int y;
   int buffer[BUFFER_SIZE];
 };
+#include <cstdio>
 #include <vector>
 int g_global = 123;
 static int s_global = 234;
@@ -16,6 +17,9 @@ int main(int argc, char const *argv[]) {
   PointType pt = {11, 22, {0}};
   for (int i = 0; i < BUFFER_SIZE; ++i)
     pt.buffer[i] = i;
+  const char *valid_str = "𐌢𐌰L𐌾𐍈 CπˆπŒΌπŒ΄πƒ";
+  const char *malformed_str = "lone trailing \x81\x82 bytes";
+  printf("print malformed utf8 %s %s\n", valid_str, malformed_str);
   int x = s_global - g_global - pt.y; // breakpoint 1
   {
     int x = 42;
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index b76b05c5d1459..a629453dd5347 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -411,9 +411,10 @@ void DAP::SendOutput(OutputType o, const llvm::StringRef 
output) {
     if (end == llvm::StringRef::npos)
       end = output.size() - 1;
     llvm::json::Object event(CreateEventObject("output"));
-    llvm::json::Object body;
-    body.try_emplace("category", category);
-    EmplaceSafeString(body, "output", output.slice(idx, end + 1).str());
+    llvm::json::Object body{
+        {"category", category},
+        {"output", protocol::SanitizedString(output.slice(idx, end + 
1).str())},
+    };
     event.try_emplace("body", std::move(body));
     SendJSON(llvm::json::Value(std::move(event)));
     idx = end + 1;
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp 
b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
index 72359214c8537..a9a929f9b88d2 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp
@@ -57,6 +57,23 @@ bool fromJSON(const json::Value &Params, MessageType &M, 
json::Path P) {
   return true;
 }
 
+json::Value toJSON(const SanitizedString &S) {
+  if (LLVM_LIKELY(llvm::json::isUTF8(std::string(S))))
+    return std::string(S);
+  llvm::errs() << "Here!\n";
+  return llvm::json::fixUTF8(std::string(S));
+}
+
+bool fromJSON(const llvm::json::Value &Param, SanitizedString &Str,
+              llvm::json::Path Path) {
+  if (auto s = Param.getAsString()) {
+    Str = *s;
+    return true;
+  }
+  Path.report("expected string");
+  return false;
+}
+
 json::Value toJSON(const Request &R) {
   assert(R.seq != kCalculateSeq && "invalid seq");
 
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h 
b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
index 09ce6802b17c0..3053312c4660f 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
@@ -20,6 +20,7 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H
 #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/JSON.h"
 #include <cstdint>
 #include <optional>
@@ -37,6 +38,24 @@ using Id = uint64_t;
 /// the current session.
 static constexpr Id kCalculateSeq = UINT64_MAX;
 
+/// A wrapper around a string to ensure the contents are sanitized as utf8
+/// during serialization. This value should be used for any strings that may
+/// contain raw data like variable values.
+class SanitizedString {
+public:
+  SanitizedString(std::string str) : m_str(str) {}
+  SanitizedString(llvm::StringRef str) : m_str(str.str()) {}
+  SanitizedString(const char *str) : m_str(str) {}
+  SanitizedString() = default;
+
+  operator std::string() const { return m_str; }
+
+private:
+  std::string m_str;
+};
+llvm::json::Value toJSON(const SanitizedString &s);
+bool fromJSON(const llvm::json::Value &, SanitizedString &, llvm::json::Path);
+
 /// A client or debug adapter initiated request.
 struct Request {
   /// The command to execute.
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h 
b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 28c9f48200e0c..af50e77ce6ab2 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -452,7 +452,7 @@ bool fromJSON(const llvm::json::Value &, 
SetVariableArguments &,
 /// Response to `setVariable` request.
 struct SetVariableResponseBody {
   /// The new value of the variable.
-  std::string value;
+  SanitizedString value;
 
   /// The type of the new value. Typically shown in the UI when hovering over
   /// the value.
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h 
b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
index 71046d24c9787..e627b02e37bd1 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h
@@ -21,6 +21,7 @@
 #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_TYPES_H
 
 #include "Protocol/DAPTypes.h"
+#include "Protocol/ProtocolBase.h"
 #include "lldb/lldb-defines.h"
 #include "lldb/lldb-types.h"
 #include "llvm/ADT/DenseSet.h"
@@ -942,7 +943,7 @@ struct Variable {
   /// its children are not yet visible.
   ///
   /// An empty string can be used if no value should be shown in the UI.
-  std::string value;
+  SanitizedString value;
 
   /// The type of the variable's value. Typically shown in the UI when hovering
   /// over the value.

``````````

</details>


https://github.com/llvm/llvm-project/pull/181261
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to