wengh commented on code in PR #50313: URL: https://github.com/apache/spark/pull/50313#discussion_r2010597106
########## python/pyspark/errors/exceptions/base.py: ########## @@ -449,3 +449,35 @@ def summary(self) -> str: Summary of the exception cause. """ ... + + +T = TypeVar("T", bound=PySparkException) + + +def recover_python_exception(e: T) -> T: + """ + Recover Python exception stack trace. + + Many JVM exceptions types may wrap Python exceptions. For example: + - UDFs can cause PythonException + - UDTFs and Data Sources can cause AnalysisException + """ + python_exception_header = "Traceback (most recent call last):" + try: + from pyspark.errors.exceptions.tblib import Traceback + + message = str(e) + start = message.find(python_exception_header) + if start == -1: + # No Python exception found + return e + + # The message contains a Python exception. Parse it to use it as the exception's traceback. + # This allows richer error messages, for example showing line content in Python UDF. + python_exception_string = message[start:] + tb = Traceback.from_string(python_exception_string) + tb.populate_linecache() + return e.with_traceback(tb.as_traceback()) + except Exception: Review Comment: > Parsing exceptions can potentially cause a lot of performance overhead Parsing should take time linear to the size of the error message so I don't worry much about performance. Let me do a quick benchmark to validate this. > In addition, it would have to be BaseException if you absolutely want to catch all the exceptions. Good point :+1: (although I don't think it could possibly throw stuff like `KeyboardInterrupt` or `SystemExit`, but let's be safe) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org