This is an automated email from the ASF dual-hosted git repository.
xiazcy pushed a commit to branch 3.7-dev
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
The following commit(s) were added to refs/heads/3.7-dev by this push:
new 47a555b358 coerce set into list when it contains non-hashable elements
for python (#3359)
47a555b358 is described below
commit 47a555b3580701ca7715adb2af371a24342afcef
Author: Yang Xia <[email protected]>
AuthorDate: Mon Mar 30 13:57:37 2026 -0700
coerce set into list when it contains non-hashable elements for python
(#3359)
---
CHANGELOG.asciidoc | 1 +
docs/src/reference/gremlin-variants.asciidoc | 4 ++
docs/src/upgrade/release-3.7.x.asciidoc | 9 ++++
.../gremlin_python/structure/io/graphbinaryV1.py | 8 ++-
.../gremlin_python/structure/io/graphsonV3d0.py | 11 ++++-
.../driver/test_driver_remote_connection.py | 16 ++++++
.../python/tests/unit/io/test_graphbinaryV1.py | 42 ++++++++++++++++
.../main/python/tests/unit/io/test_graphsonV3d0.py | 57 ++++++++++++++++++++++
8 files changed, 146 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 5b5558d1c3..f2ca7eb3e4 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -35,6 +35,7 @@
image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima
* Added `SessionedChildClient` that borrows connections from a different
`Client` for use with `Sessions`.
* Added `reuseConnectionsForSessions` to Java GLV settings to decide whether
to use `SessionedChildClient` for remote transactions.
* Added support for Node 22 and 24 alongside Node 20.
+* Deserialize `set` into `list` when it contains non-hashable elements in
`gremlin-python`
* Fixed `cap()` step throwing an error when used mid-traversal in OLAP.
[[release-3-7-5]]
diff --git a/docs/src/reference/gremlin-variants.asciidoc
b/docs/src/reference/gremlin-variants.asciidoc
index 413c9243b6..55f9f0da8b 100644
--- a/docs/src/reference/gremlin-variants.asciidoc
+++ b/docs/src/reference/gremlin-variants.asciidoc
@@ -2852,6 +2852,10 @@ is different from JVM languages which produces different
`Set` results when thos
is detected during deserialization, the `Set` is coerced to a `List` so that
traversals return consistent
results within a collection across different languages. If a `Set` is needed
then convert `List` results
to `Set` manually.
+* Traversals that return a `Set` containing non-hashable items, such as
`Dictionary`, `Set` and `List`, will be coerced
+into a `List` during deserialization. Python requires set elements to be
hashable, for which Gremlin does not. If a
+`Set` is needed, convert elements to hashable equivalents manually (e.g.
`dict` to `HashableDict`, `list` to `tuple`,
+`set` to `frozenset`).
* Gremlin is capable of returning `Dictionary` results that use non-hashable
keys (e.g. Dictionary as a key) and Python
does not support that at a language level. Using GraphSON 3.0 or GraphBinary
(after 3.5.0) makes it possible to return
such results. In all other cases, Gremlin that returns such results will need
to be re-written to avoid that sort of
diff --git a/docs/src/upgrade/release-3.7.x.asciidoc
b/docs/src/upgrade/release-3.7.x.asciidoc
index 524179c87e..9a72acefc5 100644
--- a/docs/src/upgrade/release-3.7.x.asciidoc
+++ b/docs/src/upgrade/release-3.7.x.asciidoc
@@ -59,6 +59,15 @@ See:
link:https://issues.apache.org/jira/browse/TINKERPOP-3235[TINKERPOP-3235]
Gremlin Javascript now supports Node 22 and 24 alongside Node 20.
+==== Python Set Deserialization with Non-Hashable Elements
+
+Traversals that return a `Set` containing non-hashable items (such as
`Dictionary`, `Set`, or `List`) previously caused
+a `TypeError` during deserialization in Gremlin-Python. These results are now
coerced to a `List` to avoid errors. This
+applies to both GraphSON 3.0 and GraphBinary serialization formats. If a `Set`
is needed, convert the `List` results to
+Python hashable types manually (e.g. `dict` to `HashableDict`, `list` to
`tuple`, `set` to `frozenset`).
+
+See: link:https://issues.apache.org/jira/browse/TINKERPOP-3232[TINKERPOP-3232]
+
=== Upgrading for Providers
==== Graph System Providers
diff --git
a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py
b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py
index 33ec78431b..39a0c3a678 100644
---
a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py
+++
b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py
@@ -520,7 +520,13 @@ class SetDeserializer(ListIO):
@classmethod
def objectify(cls, buff, reader, nullable=True):
- return set(ListIO.objectify(buff, reader, nullable))
+ the_list = ListIO.objectify(buff, reader, nullable)
+ try:
+ return set(the_list)
+ except TypeError:
+ log.warning("Coercing Set to list as it contains unhashable
elements (e.g. dict, list). "
+ "See TINKERPOP-3232 for more details.")
+ return the_list
class MapIO(_GraphBinaryTypeIO):
diff --git
a/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py
b/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py
index 03415c69e2..c032e2fd88 100644
--- a/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py
+++ b/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py
@@ -438,9 +438,18 @@ class SetIO(_GraphSONTypeIO):
In case Java returns numeric values of different types which
python don't recognize, coerce and return a list.
See comments of TINKERPOP-1844 for more details
+
+ In case the set contains non-hashable elements (e.g. dict, list),
+ coerce and return a list.
+ See TINKERPOP-3232 for more details
"""
new_list = [reader.to_object(obj) for obj in s]
- new_set = set(new_list)
+ try:
+ new_set = set(new_list)
+ except TypeError:
+ log.warning("Coercing g:Set to list as it contains unhashable
elements (e.g. dict, list). "
+ "See TINKERPOP-3232 for more details.")
+ return new_list
if len(new_list) != len(new_set):
log.warning("Coercing g:Set to list due to java numeric values. "
"See TINKERPOP-1844 for more details.")
diff --git
a/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py
b/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py
index e59ec942ec..77e5516614 100644
---
a/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py
+++
b/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py
@@ -26,6 +26,7 @@ from gremlin_python.process.traversal import Traverser
from gremlin_python.process.traversal import TraversalStrategy
from gremlin_python.process.traversal import Bindings
from gremlin_python.process.traversal import P, Order, T
+from gremlin_python.process.traversal import Scope
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.structure.graph import Vertex
@@ -143,6 +144,21 @@ class TestDriverRemoteConnection(object):
assert p.objects[1].properties is not None and
len(p.objects[1].properties) > 0
assert p.objects[2].properties is not None and
len(p.objects[2].properties) > 0
+ def test_set_with_unhashable_elements(self, remote_connection):
+ # test that a query returning a Set containing non-hashable elements
(maps) can be
+ # deserialized without a TypeError - see TINKERPOP-3232
+ # GraphSON v2 does not have a Set type so it deserializes as list -
skip for v2
+ if isinstance(remote_connection._client._message_serializer,
GraphSONSerializersV2d0):
+ return
+ g = traversal().withRemote(remote_connection)
+ # g.V().valueMap().dedup(Scope.local) returns a Set of Map results
which previously
+ # threw TypeError because Python sets cannot contain unhashable dict
elements.
+ # The Set is now coerced to a list when it contains unhashable
elements.
+ results = g.V().valueMap().dedup(Scope.local).toList()
+ assert len(results) > 0
+ for r in results:
+ assert isinstance(r, list)
+
def test_lambda_traversals(self, remote_connection):
statics.load_statics(globals())
assert "remoteconnection[{},gmodern]".format(test_no_auth_url) ==
str(remote_connection)
diff --git a/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py
b/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py
index 92c8edc4ba..b1234369c1 100644
--- a/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py
+++ b/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py
@@ -122,6 +122,48 @@ class TestGraphSONWriter(object):
output =
self.graphbinary_reader.read_object(self.graphbinary_writer.write_object(x))
assert x == output
+ def test_set_with_unhashable_dict_elements(self):
+ # test that sets containing dicts are coerced to list - see
TINKERPOP-3232
+ x = [{"name": "marko", "age": 29}, {"name": "josh", "age": 32}]
+ list_payload = self.graphbinary_writer.write_object(x)
+ # patch outer type from list (0x09) to set (0x0b)
+ set_payload = bytearray(list_payload)
+ set_payload[0] = 0x0b
+ output = self.graphbinary_reader.read_object(set_payload)
+ assert isinstance(output, list)
+ assert len(output) == 2
+
+ def test_set_with_unhashable_list_elements(self):
+ # test that sets containing lists are coerced to list - see
TINKERPOP-3232
+ list_payload = self.graphbinary_writer.write_object([["marko",
"josh"], ["vadas", "peter"]])
+ # the first byte is the DataType for list (0x09), change it to set
(0x0b)
+ set_payload = bytearray(list_payload)
+ set_payload[0] = 0x0b
+ output = self.graphbinary_reader.read_object(set_payload)
+ assert isinstance(output, list)
+ assert len(output) == 2
+
+ def test_set_with_mixed_hashable_and_unhashable_elements(self):
+ # test that sets containing a mix of hashable and unhashable elements
are coerced to list - see TINKERPOP-3232
+ x = ["marko", {"name": "josh"}, 42]
+ list_payload = self.graphbinary_writer.write_object(x)
+ set_payload = bytearray(list_payload)
+ set_payload[0] = 0x0b
+ output = self.graphbinary_reader.read_object(set_payload)
+ assert isinstance(output, list)
+ assert len(output) == 3
+
+ def test_set_with_nested_unhashable_elements(self):
+ # test that sets containing dicts with list values are coerced to list
- see TINKERPOP-3232
+ x = [{"name": "marko", "langs": ["java", "python"]}, {"name": "josh",
"langs": ["gremlin"]}]
+ list_payload = self.graphbinary_writer.write_object(x)
+ # patch outer type from list (0x09) to set (0x0b)
+ set_payload = bytearray(list_payload)
+ set_payload[0] = 0x0b
+ output = self.graphbinary_reader.read_object(set_payload)
+ assert isinstance(output, list)
+ assert len(output) == 2
+
def test_dict(self):
x = {"yo": "what?",
"go": "no!",
diff --git a/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py
b/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py
index fd98da313d..3a03fff7cd 100644
--- a/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py
+++ b/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py
@@ -86,6 +86,63 @@ class TestGraphSONReader:
assert x.count("marko") == 1
assert x.count("josh") == 3
+ def test_set_with_unhashable_dict_elements(self):
+ # test that sets containing dicts are coerced to list - see
TINKERPOP-3232
+ x = self.graphson_reader.read_object(
+ json.dumps({"@type": "g:Set", "@value": [
+ {"@type": "g:Map", "@value": ["name", "marko", "age",
{"@type": "g:Int32", "@value": 29}]},
+ {"@type": "g:Map", "@value": ["name", "josh", "age", {"@type":
"g:Int32", "@value": 32}]}
+ ]}))
+ assert isinstance(x, list)
+ assert len(x) == 2
+
+ def test_set_with_unhashable_list_elements(self):
+ # test that sets containing lists are coerced to list - see
TINKERPOP-3232
+ x = self.graphson_reader.read_object(
+ json.dumps({"@type": "g:Set", "@value": [
+ {"@type": "g:List", "@value": ["marko", "josh"]},
+ {"@type": "g:List", "@value": ["vadas", "peter"]}
+ ]}))
+ assert isinstance(x, list)
+ assert len(x) == 2
+
+ def test_set_with_unhashable_set_elements(self):
+ # test that sets containing sets are coerced to list - see
TINKERPOP-3232
+ x = self.graphson_reader.read_object(
+ json.dumps({"@type": "g:Set", "@value": [
+ {"@type": "g:Set", "@value": ["a", "b"]},
+ {"@type": "g:Set", "@value": ["c", "d"]}
+ ]}))
+ assert isinstance(x, list)
+ assert len(x) == 2
+
+ def test_set_with_mixed_hashable_and_unhashable_elements(self):
+ # test that sets containing a mix of hashable and unhashable elements
are coerced to list - see TINKERPOP-3232
+ x = self.graphson_reader.read_object(
+ json.dumps({"@type": "g:Set", "@value": [
+ "marko",
+ {"@type": "g:Map", "@value": ["name", "josh"]},
+ {"@type": "g:Int32", "@value": 42}
+ ]}))
+ assert isinstance(x, list)
+ assert len(x) == 3
+
+ def test_set_with_nested_unhashable_elements(self):
+ # test that sets containing dicts with list values are coerced to list
- see TINKERPOP-3232
+ x = self.graphson_reader.read_object(
+ json.dumps({"@type": "g:Set", "@value": [
+ {"@type": "g:Map", "@value": [
+ "name", "marko",
+ "langs", {"@type": "g:List", "@value": ["java", "python"]}
+ ]},
+ {"@type": "g:Map", "@value": [
+ "name", "josh",
+ "langs", {"@type": "g:List", "@value": ["gremlin"]}
+ ]}
+ ]}))
+ assert isinstance(x, list)
+ assert len(x) == 2
+
def test_number_input(self):
x = self.graphson_reader.read_object(json.dumps({
"@type": "gx:Byte",