This is an automated email from the ASF dual-hosted git repository. xiazcy pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit b575e979dfcf95c6f66a2e83e781dbbe6f13a687 Merge: 8a1b2c8aa7 d73b824330 Author: Yang Xia <[email protected]> AuthorDate: Mon Mar 30 14:23:00 2026 -0700 Merge branch '3.8-dev' # Conflicts: # gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py # gremlin-python/src/main/python/tests/unit/structure/io/test_graphsonV4.py CHANGELOG.asciidoc | 1 + docs/src/reference/gremlin-variants.asciidoc | 4 ++ docs/src/upgrade/release-3.7.x.asciidoc | 9 ++++ .../gremlin_python/structure/io/graphbinaryV4.py | 8 ++- .../gremlin_python/structure/io/graphsonV4.py | 11 ++++- .../driver/test_driver_remote_connection.py | 12 ++++- .../tests/unit/structure/io/test_graphbinaryV4.py | 42 ++++++++++++++++ .../tests/unit/structure/io/test_graphsonV4.py | 57 ++++++++++++++++++++++ 8 files changed, 141 insertions(+), 3 deletions(-) diff --cc docs/src/reference/gremlin-variants.asciidoc index f3144c1ff9,595366b638..655a1fbc19 --- a/docs/src/reference/gremlin-variants.asciidoc +++ b/docs/src/reference/gremlin-variants.asciidoc @@@ -2881,25 -2944,14 +2881,29 @@@ is different from JVM languages which p is detected during deserialization, the `Set` is coerced to a `List` so that traversals return consistent results within a collection across different languages. If a `Set` is needed then convert `List` results to `Set` manually. + * Traversals that return a `Set` containing non-hashable items, such as `Dictionary`, `Set` and `List`, will be coerced + into a `List` during deserialization. Python requires set elements to be hashable, for which Gremlin does not. If a + `Set` is needed, convert elements to hashable equivalents manually (e.g. `dict` to `HashableDict`, `list` to `tuple`, + `set` to `frozenset`). * Gremlin is capable of returning `Dictionary` results that use non-hashable keys (e.g. Dictionary as a key) and Python does not support that at a language level. Using GraphSON 3.0 or GraphBinary (after 3.5.0) makes it possible to return -such results. In all other cases, Gremlin that returns such results will need to be re-written to avoid that sort of -key. +such results. However, it may not be possible to serialize these maps so they can't be re-inserted (or round tripped). +In all other cases, Gremlin that returns such results will need to be re-written to avoid that sort of key. +* The `float` type in Python is a double precision floating point number which is commonly referred to in other +languages as a `double`. This means that single precision floating point values will be deserialized into a double, so +there will be a precision difference. +* Gremlin supports multiple fixed-width integers such as byte (1-byte), short (2-byte), and long (8-byte). These are +deserialized into Python's variable size `int` type. These numbers can't be exactly round tripped because the original +type information is lost during deserialization. During serialization, these numbers will try to be converted into a +4-byte integer which may throw exceptions if the value is too large or waste space if the value is very small. +* The Gremlin Char type is deserialized as a string and therefore can't be round tripped as it will it will attempt to +be serialized as a string. +* Date and Duration types in Gremlin are deserialized as `datetime.datetime` and `datetime.timedelta` respectively in +`gremlin-python`. This can lead to errors for large values because they exceed the maximum size allowed for `datetime` +and `timedelta`. +* In Gremlin, 1 isn't equal to the boolean true value and 0 isn't equal to the boolean false value, but they are equal +in Python. This means that in `gremlin-python` if these values are in a `Set`, you will get a different behavior than +what is intended by Gremlin, since it follows Python's behavior. * The `subgraph()`-step is not supported by any variant that is not running on the Java Virtual Machine as there is no `Graph` instance to deserialize a result into on the client-side. A workaround is to replace the step with `aggregate(local)` and then convert those results to something the client can use locally. diff --cc gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py index 5de3825064,31271ac481..2e72e4a486 --- a/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py +++ b/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py @@@ -23,110 -21,28 +23,110 @@@ import pytes from gremlin_python import statics from gremlin_python.driver import serializer from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection -from gremlin_python.driver.protocol import GremlinServerError -from gremlin_python.statics import long, bigint -from gremlin_python.process.traversal import Traverser, Operator, Barrier -from gremlin_python.process.traversal import TraversalStrategy -from gremlin_python.process.traversal import Bindings -from gremlin_python.process.traversal import P, Order, T -from gremlin_python.process.traversal import Scope +from gremlin_python.statics import long - from gremlin_python.process.traversal import TraversalStrategy, P, Order, T, DT, GValue, Cardinality ++from gremlin_python.process.traversal import TraversalStrategy, P, Order, T, DT, GValue, Cardinality, Scope from gremlin_python.process.graph_traversal import __ from gremlin_python.process.anonymous_traversal import traversal -from gremlin_python.structure.graph import Vertex -from gremlin_python.process.strategies import SubgraphStrategy, ReservedKeysVerificationStrategy, SeedStrategy +from gremlin_python.structure.graph import Vertex, Edge, Graph +from gremlin_python.process.strategies import SubgraphStrategy, SeedStrategy, ReservedKeysVerificationStrategy from gremlin_python.structure.io.util import HashableDict -from gremlin_python.driver.serializer import GraphSONSerializersV2d0 - -__author__ = 'Marko A. Rodriguez (http://markorodriguez.com)' - -from integration.conftest import anonymous_url +from gremlin_python.driver.protocol import GremlinServerError -gremlin_server_url = os.environ.get('GREMLIN_SERVER_URL', 'ws://localhost:{}/gremlin') +gremlin_server_url = os.environ.get('GREMLIN_SERVER_URL', 'http://localhost:{}/') test_no_auth_url = gremlin_server_url.format(45940) + class TestDriverRemoteConnection(object): + + # this is a temporary test for basic graphSONV4 connectivity, once all types are implemented, enable graphSON testing + # in conftest.py and remove this + def test_graphSONV4_temp(self): + remote_conn = DriverRemoteConnection(test_no_auth_url, 'gmodern', + request_serializer=serializer.GraphSONSerializersV4(), + response_serializer=serializer.GraphSONSerializersV4()) + g = traversal().with_(remote_conn) + assert long(6) == g.V().count().to_list()[0] + # # + assert Vertex(1) == g.V(1).next() + assert Vertex(1) == g.V(Vertex(1)).next() + assert 1 == g.V(1).id_().next() + # assert Traverser(Vertex(1)) == g.V(1).nextTraverser() # TODO check back after bulking added back + assert 1 == len(g.V(1).to_list()) + assert isinstance(g.V(1).to_list(), list) + results = g.V().repeat(__.out()).times(2).name + results = results.to_list() + assert 2 == len(results) + assert "lop" in results + assert "ripple" in results + # # + assert 10 == g.V().repeat(__.both()).times(5)[0:10].count().next() + assert 1 == g.V().repeat(__.both()).times(5)[0:1].count().next() + assert 0 == g.V().repeat(__.both()).times(5)[0:0].count().next() + assert 4 == g.V()[2:].count().next() + assert 2 == g.V()[:2].count().next() + # # + results = g.with_side_effect('a', ['josh', 'peter']).V(1).out('created').in_('created').values('name').where( + P.within('a')).to_list() + assert 2 == len(results) + assert 'josh' in results + assert 'peter' in results + # # + results = g.V().has('name', 'peter').as_('a').out('created').as_('b').select('a', 'b').by( + __.value_map()).to_list() + assert 1 == len(results) + assert 'peter' == results[0]['a']['name'][0] + assert 35 == results[0]['a']['age'][0] + assert 'lop' == results[0]['b']['name'][0] + assert 'java' == results[0]['b']['lang'][0] + assert 2 == len(results[0]['a']) + assert 2 == len(results[0]['b']) + # # + results = g.V(1, 2).inject().values('name').to_list() + assert 2 == len(results) + assert 'marko' in results + assert 'vadas' in results + # # + # this test just validates that the underscored versions of steps conflicting with Gremlin work + # properly and can be removed when the old steps are removed - TINKERPOP-2272 + results = g.V().filter_(__.values('age').sum_().and_( + __.max_().is_(P.gt(0)), __.min_().is_(P.gt(0)))).range_(0, 1).id_().next() + assert 1 == results + # # + # test dict keys + # types for dict + results = g.V().has('person', 'name', 'marko').element_map("name").group_count().next() + assert {HashableDict.of({T.id: 1, T.label: 'person', 'name': 'marko'}): 1} == results + results = g.V().has('person', 'name', 'marko').both('knows').group_count().by(__.values('name').fold()).next() + assert {tuple(['vadas']): 1, tuple(['josh']): 1} == results + + def test_bulk_request_option(self, remote_connection): + g = traversal().with_(remote_connection) + result = g.inject(1,2,3,2,1).to_list() + assert 5 == len(result) + bulked_results = g.with_("language", "gremlin-lang").with_("bulkResults", True).inject(1,2,3,2,1).to_list() + assert 5 == len(bulked_results) + + def test_extract_request_options(self, remote_connection): + g = traversal().with_(remote_connection) + t = g.with_("evaluationTimeout", 1000).with_("batchSize", 100).V().count() + assert remote_connection.extract_request_options(t.gremlin_lang) == {'batchSize': 100, + 'evaluationTimeout': 1000, + 'bulkResults': True} + assert 6 == t.to_list()[0] + + @pytest.mark.skip(reason="investigate why 'ids' parameter name fails to parse in gremlin-lang") + def test_extract_request_options_with_params(self, remote_connection): + g = traversal().with_(remote_connection) + t = g.with_("evaluationTimeout", + 1000).with_("batchSize", 100).with_("userAgent", + "test").V(GValue('ids', [1, 2, 3])).count() + assert remote_connection.extract_request_options(t.gremlin_lang) == {'batchSize': 100, + 'evaluationTimeout': 1000, + 'userAgent': 'test', + 'bulkResults': True, + 'params': {'ids': [1, 2, 3]}} + assert 3 == t.to_list()[0] + def test_traversals(self, remote_connection): statics.load_statics(globals()) g = traversal().with_(remote_connection) @@@ -213,20 -137,40 +213,30 @@@ assert len(p.objects[1].properties) == 0 assert len(p.objects[2].properties) == 0 # # - # test materializeProperties in Path - 'all' should materialize properties on each element - p = g.with_("materializeProperties", "all").V().has('name', 'marko').outE().inV().has_label('software').path().next() - assert 3 == len(p.objects) - assert p.objects[0].properties is not None and len(p.objects[0].properties) > 0 - # edges have dict-like properties; ensure not empty - assert p.objects[1].properties is not None and len(p.objects[1].properties) > 0 - assert p.objects[2].properties is not None and len(p.objects[2].properties) > 0 + # subgraph - skipping GraphSON for now. we can remove this carve-out when we remove the GraphSON support which + # was meant to be temporary + if not isinstance(remote_connection._client._response_serializer, serializer.GraphSONSerializersV4): + sg = g.E().has_label('knows').subgraph('sg').cap('sg').next() + assert isinstance(sg, Graph) + assert len(sg.vertices) == 3 + assert len(sg.edges) == 2 + for v in sg.vertices.values(): + assert isinstance(v, Vertex) + assert v.label == 'person' + for e in sg.edges.values(): + assert isinstance(e, Edge) + assert e.label == 'knows' + def test_set_with_unhashable_elements(self, remote_connection): - # test that a query returning a Set containing non-hashable elements (maps) can be - # deserialized without a TypeError - see TINKERPOP-3232 - # GraphSON v2 does not have a Set type so it deserializes as list - skip for v2 - if isinstance(remote_connection._client._message_serializer, GraphSONSerializersV2d0): - return + g = traversal().withRemote(remote_connection) + # g.V().valueMap().dedup(Scope.local) returns a Set of Map results which previously + # threw TypeError because Python sets cannot contain unhashable dict elements. + # The Set is now coerced to a list when it contains unhashable elements. + results = g.V().valueMap().dedup(Scope.local).toList() + assert len(results) > 0 + for r in results: + assert isinstance(r, list) + - def test_lambda_traversals(self, remote_connection): - statics.load_statics(globals()) - assert "remoteconnection[{},gmodern]".format(test_no_auth_url) == str(remote_connection) - g = traversal().with_(remote_connection) - - assert 24.0 == g.withSack(1.0, lambda: ("x -> x + 1", "gremlin-groovy")).V().both().sack().sum_().next() - assert 24.0 == g.withSack(lambda: ("{1.0d}", "gremlin-groovy"), lambda: ("x -> x + 1", "gremlin-groovy")).V().both().sack().sum_().next() - - assert 48.0 == g.withSack(1.0, lambda: ("x, y -> x + y + 1", "gremlin-groovy")).V().both().sack().sum_().next() - assert 48.0 == g.withSack(lambda: ("{1.0d}", "gremlin-groovy"), lambda: ("x, y -> x + y + 1", "gremlin-groovy")).V().both().sack().sum_().next() - def test_iteration(self, remote_connection): statics.load_statics(globals()) g = traversal().with_(remote_connection) diff --cc gremlin-python/src/main/python/tests/unit/structure/io/test_graphsonV4.py index ac742abe05,362f9de5c9..3a51014f4e --- a/gremlin-python/src/main/python/tests/unit/structure/io/test_graphsonV4.py +++ b/gremlin-python/src/main/python/tests/unit/structure/io/test_graphsonV4.py @@@ -77,9 -77,75 +77,66 @@@ class TestGraphSONReader assert x['b'] == "marko" assert len(x) == 2 - # BulkSet gets coerced to a List - both have the same behavior - x = self.graphson_reader.read_object( - json.dumps({"@type": "g:BulkSet", - "@value": ["marko", {"@type": "g:Int64", "@value": 1}, "josh", {"@type": "g:Int64", "@value": 3}]})) - assert isinstance(x, list) - assert len(x) == 4 - assert x.count("marko") == 1 - assert x.count("josh") == 3 - + def test_set_with_unhashable_dict_elements(self): + # test that sets containing dicts are coerced to list - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Map", "@value": ["name", "marko", "age", {"@type": "g:Int32", "@value": 29}]}, + {"@type": "g:Map", "@value": ["name", "josh", "age", {"@type": "g:Int32", "@value": 32}]} + ]})) + assert isinstance(x, list) + assert len(x) == 2 + + def test_set_with_unhashable_list_elements(self): + # test that sets containing lists are coerced to list - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:List", "@value": ["marko", "josh"]}, + {"@type": "g:List", "@value": ["vadas", "peter"]} + ]})) + assert isinstance(x, list) + assert len(x) == 2 + + def test_set_with_unhashable_set_elements(self): + # test that sets containing sets are coerced to list - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Set", "@value": ["a", "b"]}, + {"@type": "g:Set", "@value": ["c", "d"]} + ]})) + assert isinstance(x, list) + assert len(x) == 2 + + def test_set_with_mixed_hashable_and_unhashable_elements(self): + # test that sets containing a mix of hashable and unhashable elements are coerced to list - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + "marko", + {"@type": "g:Map", "@value": ["name", "josh"]}, + {"@type": "g:Int32", "@value": 42} + ]})) + assert isinstance(x, list) + assert len(x) == 3 + + def test_set_with_nested_unhashable_elements(self): + # test that sets containing dicts with list values are coerced to list - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Map", "@value": [ + "name", "marko", + "langs", {"@type": "g:List", "@value": ["java", "python"]} + ]}, + {"@type": "g:Map", "@value": [ + "name", "josh", + "langs", {"@type": "g:List", "@value": ["gremlin"]} + ]} + ]})) + assert isinstance(x, list) + assert len(x) == 2 + def test_number_input(self): x = self.graphson_reader.read_object(json.dumps({ - "@type": "gx:Byte", + "@type": "g:Byte", "@value": 1 })) assert isinstance(x, SingleByte)
