Dear Spark Development Community, are their any known issues with SparkSession.createDataFrame in PySpark 4.0 with Python 3.12.3?
In my environment spark.range() and spark.read.json() work as expected, but spark.createDataFrame() throws an exception: Example: from pyspark.sql import Row, SparkSession def test_df(spark: SparkSession): df = spark.range(5) df.show() # works df = spark.read.option("multiline", "true").json(r"\path\to\testdata.json") df.show() # works df = spark.createDataFrame([Row(a=1, b=2), Row(a=3, b=4)]) df.show() # throws exception My Environment: Windows 11 Java 17 Python 3.12.3 PySpark 4.0.0 Exception: .venv\Lib\site-packages\pyspark\sql\classic\dataframe.py:285: in show print(self._show_string(n, truncate, vertical)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .venv\Lib\site-packages\pyspark\sql\classic\dataframe.py:303: in _show_string return self._jdf.showString(n, 20, vertical) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .venv\Lib\site-packages\py4j\java_gateway.py:1362: in __call__ return_value = get_return_value( .venv\Lib\site-packages\pyspark\errors\exceptions\captured.py:282: in deco return f(*a, **kw) ^^^^^^^^^^^ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ answer = 'xro74' gateway_client = <py4j.clientserver.JavaClient object at 0x00000239DC8548F0> target_id = 'o73', name = 'showString' def get_return_value(answer, gateway_client, target_id=None, name=None): """Converts an answer received from the Java gateway into a Python object. For example, string representation of integers are converted to Python integer, string representation of objects are converted to JavaObject instances, etc. :param answer: the string returned by the Java gateway :param gateway_client: the gateway client used to communicate with the Java Gateway. Only necessary if the answer is a reference (e.g., object, list, map) :param target_id: the name of the object from which the answer comes from (e.g., *object1* in `object1.hello()`). Optional. :param name: the name of the member from which the answer comes from (e.g., *hello* in `object1.hello()`). Optional. """ if is_error(answer)[0]: if len(answer) > 1: type = answer[1] value = OUTPUT_CONVERTER[type](answer[2:], gateway_client) if answer[1] == REFERENCE_TYPE: > raise Py4JJavaError( "An error occurred while calling {0}{1}{2}.\n". format(target_id, ".", name), value) E py4j.protocol.Py4JJavaError: An error occurred while calling o73.showString. Kind regards, Eyck