itholic commented on code in PR #49572: URL: https://github.com/apache/spark/pull/49572#discussion_r1922940544
########## python/pyspark/sql/session.py: ########## @@ -2100,16 +2104,36 @@ def addArtifacts( file : bool Add a file to be downloaded with this Spark job on every node. The ``path`` passed can only be a local file for now. - - Notes - ----- - This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws - an exception. """ - raise PySparkRuntimeError( - errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT", - messageParameters={"feature": "SparkSession.addArtifact(s)"}, - ) + if sum([file, pyfile, archive]) > 1: + raise PySparkValueError( + errorClass="INVALID_MULTIPLE_ARGUMENT_CONDITIONS", + messageParameters={ + "arg_names": "'pyfile', 'archive' and/or 'file'", + "condition": "True together", + }, + ) + for p in path: + normalized_path = os.path.abspath(p) + target_dir = os.path.join( + SparkFiles.getRootDirectory(), os.path.basename(normalized_path) + ) + + # Check if the target path already exists + if os.path.exists(target_dir): + # Compare the contents of the files. If identical, skip adding this file. + # If different, raise an exception. + if filecmp.cmp(normalized_path, target_dir, shallow=False): Review Comment: Yes, this matches behavior with Spark Connect Python client and Spark Core -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org