wengh commented on code in PR #50313: URL: https://github.com/apache/spark/pull/50313#discussion_r2011011661
########## python/pyspark/errors/exceptions/base.py: ########## @@ -449,3 +449,35 @@ def summary(self) -> str: Summary of the exception cause. """ ... + + +T = TypeVar("T", bound=PySparkException) + + +def recover_python_exception(e: T) -> T: + """ + Recover Python exception stack trace. + + Many JVM exceptions types may wrap Python exceptions. For example: + - UDFs can cause PythonException + - UDTFs and Data Sources can cause AnalysisException + """ + python_exception_header = "Traceback (most recent call last):" + try: + from pyspark.errors.exceptions.tblib import Traceback + + message = str(e) + start = message.find(python_exception_header) + if start == -1: + # No Python exception found + return e + + # The message contains a Python exception. Parse it to use it as the exception's traceback. + # This allows richer error messages, for example showing line content in Python UDF. + python_exception_string = message[start:] + tb = Traceback.from_string(python_exception_string) + tb.populate_linecache() + return e.with_traceback(tb.as_traceback()) + except Exception: Review Comment: Here's a quick benchmark with different traceback sizes (1, 10, 100, 500, 1000) https://perfpy.com/985 | Benchmark | Runtime | | --------- | ------- | | 1 frame | 297 us | | 10 frames | 335 us | | 100 frames | 2.41 ms | | 500 frames | 12.4 ms | | 1000 frames | 28.7 ms | | 1000 frames, failed to parse | 3.13 ms | The default recursion limit is 1000 so tracebacks with more than 1000 frames should be unlikely. Also, the results I see when I benchmark locally is around 1/5 of the above. Therefore performance shouldn't be a concern. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org