ueshin commented on code in PR #49790: URL: https://github.com/apache/spark/pull/49790#discussion_r1941764561
########## python/pyspark/pandas/typedef/typehints.py: ########## @@ -296,7 +296,13 @@ def spark_type_to_pandas_dtype( elif isinstance(spark_type, (types.TimestampType, types.TimestampNTZType)): return np.dtype("datetime64[ns]") else: - return np.dtype(to_arrow_type(spark_type).to_pandas_dtype()) + from pyspark.pandas.utils import default_session + + prefers_large_var_types = ( + default_session().conf.get("spark.sql.execution.arrow.useLargeVarTypes", "true") Review Comment: Need `.lower()`, just in case? ########## python/pyspark/worker.py: ########## @@ -384,7 +393,9 @@ def wrapped(left_key_series, left_value_series, right_key_series, right_value_se return result - return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))] + return lambda kl, vl, kr, vr: [ + (wrapped(kl, vl, kr, vr), to_arrow_type(return_type, _use_large_var_types)) Review Comment: nit: not directly related to this, but should we define the arrow return type outside of this `lambda`? ########## python/pyspark/worker.py: ########## @@ -502,7 +514,7 @@ def wrapped(key_series, value_series): return result - return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))] + return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type, _use_large_var_types))] Review Comment: ditto. ########## python/pyspark/worker.py: ########## @@ -627,13 +640,15 @@ def verify_element(result): state, ) - return lambda k, v, s: [(wrapped(k, v, s), to_arrow_type(return_type))] + return lambda k, v, s: [(wrapped(k, v, s), to_arrow_type(return_type, _use_large_var_types))] Review Comment: ditto. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org