llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: Jonas Devlieghere (JDevlieghere)

<details>
<summary>Changes</summary>

This conditionally reimplements `PythonString::AsUTF8` using 
`PyUnicode_AsUTF8String` instead of `PyUnicode_AsUTF8AndSize`. The latter is 
slightly more efficient because it caches the result, which also means we can 
return an `llvm::StringRef` instead of having to allocate a `std::string`.

We pick the most efficient one depending on whether or not we're targeting the 
Python 3.10 stable API, however we now always return the result as a 
`std::string`.

---
Full diff: https://github.com/llvm/llvm-project/pull/152212.diff


4 Files Affected:

- (modified) lldb/bindings/python/python-wrapper.swig (+7-7) 
- (modified) lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp 
(+25-13) 
- (modified) lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h 
(+2-2) 
- (modified) 
lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp (+4-4) 


``````````diff
diff --git a/lldb/bindings/python/python-wrapper.swig 
b/lldb/bindings/python/python-wrapper.swig
index 2c30d536a753d..7f00ef43913f8 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -173,7 +173,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSwigPythonCallTypeScript(
   else
     result = pfunc(value_arg, dict, SWIGBridge::ToSWIGWrapper(*options_sp));
 
-  retval = result.Str().GetString().str();
+  retval = result.Str().GetString();
 
   return true;
 }
@@ -604,7 +604,7 @@ 
lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma
   if (result.IsNone())
     return std::nullopt;
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 StructuredData::DictionarySP
@@ -807,7 +807,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordProcess(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(process), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
@@ -831,7 +831,7 @@ std::optional<std::string> 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(thread)), dict);
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
@@ -854,7 +854,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(target), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
@@ -878,7 +878,7 @@ std::optional<std::string> 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(frame)), dict);
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
@@ -901,7 +901,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(value), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp 
b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 4a45673a6ebbc..7845e56c8c05c 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -405,28 +405,40 @@ bool PythonString::Check(PyObject *py_obj) {
   return false;
 }
 
-llvm::StringRef PythonString::GetString() const {
+std::string PythonString::GetString() const {
   auto s = AsUTF8();
   if (!s) {
     llvm::consumeError(s.takeError());
-    return llvm::StringRef("");
+    return "";
   }
   return s.get();
 }
 
-Expected<llvm::StringRef> PythonString::AsUTF8() const {
+Expected<std::string> PythonString::AsUTF8() const {
   if (!IsValid())
     return nullDeref();
 
-  Py_ssize_t size;
-  const char *data;
+  // PyUnicode_AsUTF8AndSize caches the UTF-8 representation of the string in
+  // the Unicode object, making it more efficient than PyUnicode_AsUTF8String.
+#if defined(Py_LIMITED_API) && (Py_LIMITED_API < 0x030a0000)
+  PyObject *py_bytes = PyUnicode_AsUTF8String(m_py_obj);
+  if (!py_bytes)
+    return exception();
 
-  data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+  auto release_py_str =
+      llvm::make_scope_exit([py_bytes] { Py_DECREF(py_bytes); });
 
-  if (!data)
+  Py_ssize_t size = PyBytes_Size(py_bytes);
+  const char *str = PyBytes_AsString(py_bytes);
+#else
+  Py_ssize_t size;
+  const char *str = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+#endif
+
+  if (!str)
     return exception();
 
-  return llvm::StringRef(data, size);
+  return std::string(str, size);
 }
 
 size_t PythonString::GetSize() const {
@@ -1248,12 +1260,12 @@ class TextPythonFile : public PythonIOFile {
       // EOF
       return Status();
     }
-    auto stringref = pystring.get().AsUTF8();
-    if (!stringref)
+    auto str = pystring.get().AsUTF8();
+    if (!str)
       // Cloning since the wrapped exception may still reference the PyThread.
-      return Status::FromError(stringref.takeError()).Clone();
-    num_bytes = stringref.get().size();
-    memcpy(buf, stringref.get().begin(), num_bytes);
+      return Status::FromError(str.takeError()).Clone();
+    num_bytes = str->size();
+    memcpy(buf, str->c_str(), num_bytes);
     return Status();
   }
 };
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h 
b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index 45bb499248329..9586eedd03176 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -460,9 +460,9 @@ class PythonString : public TypedPythonObject<PythonString> 
{
 
   static bool Check(PyObject *py_obj);
 
-  llvm::StringRef GetString() const; // safe, empty string on error
+  std::string GetString() const; // safe, empty string on error
 
-  llvm::Expected<llvm::StringRef> AsUTF8() const;
+  llvm::Expected<std::string> AsUTF8() const;
 
   size_t GetSize() const;
 
diff --git 
a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp 
b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 24d604f22a765..b98ff035e5798 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -2715,8 +2715,8 @@ bool 
ScriptInterpreterPythonImpl::GetShortHelpForCommandObject(
 
   if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
     PythonString py_string(PyRefType::Borrowed, py_return.get());
-    llvm::StringRef return_data(py_string.GetString());
-    dest.assign(return_data.data(), return_data.size());
+    std::string return_data = py_string.GetString();
+    dest.assign(return_data.c_str(), return_data.size());
     return true;
   }
 
@@ -2996,8 +2996,8 @@ bool 
ScriptInterpreterPythonImpl::GetLongHelpForCommandObject(
   bool got_string = false;
   if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
     PythonString str(PyRefType::Borrowed, py_return.get());
-    llvm::StringRef str_data(str.GetString());
-    dest.assign(str_data.data(), str_data.size());
+    std::string str_data = str.GetString();
+    dest.assign(str_data.c_str(), str_data.size());
     got_string = true;
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/152212
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to