abhiNB-star commented on issue #13410:
URL: https://github.com/apache/hudi/issues/13410#issuecomment-2955044830

   MY Yaml file 
   
   `apiVersion: sparkoperator.k8s.io/v1beta2
   kind: ScheduledSparkApplication
   metadata:
     annotations:
       kubectl.kubernetes.io/last-applied-configuration: |
         
{"apiVersion":"sparkoperator.k8s.io/v1beta2","kind":"ScheduledSparkApplication","metadata":{"annotations":{},"name":"gate-v4-20231130124511","namespace":"spark-hood"},"spec":{"concurrencyPolicy":"Replace","failedRunHistoryLimit":3,"schedule":"01
 01 * * 
*","successfulRunHistoryLimit":1,"template":{"arguments":["--op","UPSERT","--props","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/common.properties","--source-ordering-field","dummy_date_field","--table-type","COPY_ON_WRITE","--transformer-class","transferman.StarshipTransferMan","--source-class","transferman.StarshipSourceReader","--config-folder","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/","--target-table","dummy_table","--enable-hive-sync","--base-path-prefix","dummy_path","--source-limit","100000"],"deps":{"jars":["gs://starship-hood-prod/nbdata/resources/jars/v4/spark-jars.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/calcite-core-1.16.0.jar","gs://starship-ho
 
od-prod/nbdata/resources/jars/v4/calcite-linq4j-1.16.0.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/spark-avro_2.12-3.3.0.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/spark-catalyst_2.12-3.2.2.jar","gs://starship-hood-prod/nbdata/resources/jars/release-jars-final/transferman-4.9-SNAPSHOT.jar"]},"driver":{"coreLimit":"240m","coreRequest":"240m","initContainers":[{"args":["gate-v4-20231130124511","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/common.properties"],"image":"gcr.io/hood-starship/prevalidation:latest","name":"prevalidation-check-init-container"}],"labels":{"app":"driver"},"memory":"1000M","nodeSelector":{"node-selector":"preemptible-spark-driver"},"serviceAccount":"spark-operator-spark"},"executor":{"coreLimit":"2000m","coreRequest":"2000m","instances":3,"labels":{"app":"executor"},"memory":"10000M","nodeSelector":{"node-selector":"preemptible-large"}},"image":"asia.gcr.io/hood-starship/spark:3.1.1-gcs","imagePullPolicy":"Always"
 
,"mainApplicationFile":"gs://starship-hood-prod/nbdata/resources/jars/release-jars-final/hudi-utilities-bundle_2.12-0.13.1.jar","mainClass":"org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer","mode":"cluster","restartPolicy":{"onFailureRetries":3,"onFailureRetryInterval":120,"onSubmissionFailureRetries":1,"onSubmissionFailureRetryInterval":120,"type":"OnFailure"},"sparkConf":{"spark.kubernetes.executor.podNamePrefix":"gate-v4-20231130124511","spark.serializer":"org.apache.spark.serializer.KryoSerializer"},"sparkVersion":"3.1.1","type":"Scala"}}}
     creationTimestamp: "2023-11-30T07:15:36Z"
     generation: 1158
     name: testing-actualparty-new-job
     namespace: spark-hood
     resourceVersion: "2257236246"
     uid: 3d1d7484-21c4-4404-b652-581f8e0d6a91
   spec:
     concurrencyPolicy: Replace
     failedRunHistoryLimit: 3
     schedule: 59 12 * * *
     successfulRunHistoryLimit: 1
     template:
       arguments:
       - --op
       - UPSERT
       - --props
       - 
gs://starship-hood-prod/nbdata/config/v4/test/actual-party/new/common.properties
       - --source-ordering-field
       - starship_offset
       - --table-type
       - COPY_ON_WRITE
       - --transformer-class
       - transferman.StarshipTransferMan
       - --source-class
       - transferman.StarshipSourceReader
       - --config-folder
       - gs://starship-hood-prod/nbdata/config/v4/test/actual-party/new/
       - --target-table
       - dummy_table
       - --enable-hive-sync
       - --base-path-prefix
       - dummy_path
       - --source-limit
       - "500000"
       - --continuous
       deps:
         jars:
           - gs://{my}/nbdata/resources/jars/v4/spark-jars.jar
           - gs://{my}/nbdata/resources/jars/test/spark-avro_2.12-3.5.0.jar
           - gs://{my}/nbdata/resources/jars/test/json-simple-1.1.1.jar
           - 
gs://{my}/nbdata/resources/jars/test/abhi_transferman_dbsnapshot-1.0.jar
       driver:
         coreLimit: 600m
         coreRequest: 600m
         labels:
           app: driver
         memory: 1200M
         nodeSelector:
           node-selector: preemptible-spark-driver
         serviceAccount: spark-operator-spark
       executor:
         coreLimit: 3000m
         coreRequest: 3000m
         instances: 3
         labels:
           app: executor
         memory: 2000M
         nodeSelector:
           node-selector: preemptible-large
       image: asia.gcr.io/{my}/spark:3.5.5-gcs
       imagePullPolicy: Always
       mainApplicationFile: 
gs://{my}/nbdata/resources/jars/test/hudi-utilities-bundle_2.12-1.0.2.jar
       mainClass: org.apache.hudi.utilities.streamer.HoodieMultiTableStreamer
       mode: cluster
       restartPolicy:
         onFailureRetries: 3
         onFailureRetryInterval: 120
         onSubmissionFailureRetries: 1
         onSubmissionFailureRetryInterval: 120
         type: OnFailure
       sparkConf:
         # Existing configurations
         spark.executor.memoryOverhead: 700M
         spark.kubernetes.executor.podNamePrefix: testing-actualparty-new-job
         spark.serializer: org.apache.spark.serializer.KryoSerializer
         spark.driver.extraClassPath: 
"/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/aws-java-sdk-v2/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/redshift/jdbc/*:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/kinesis/spark-sql-kinesis/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/aws-java-sdk-v2/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/redshift/jdbc/*:/docker/usr/share/aws/redshift/spark-redshift/lib/*:/docker/usr/share/aws/kinesis/sp
 
ark-sql-kinesis/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/lib/hudi/hudi-spark-bundle.jar:/usr/lib/hudi/hudi-aws-bundle.jar"
         spark.executor.extraClassPath: 
"/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/aws-java-sdk-v2/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/redshift/jdbc/*:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/kinesis/spark-sql-kinesis/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/aws-java-sdk-v2/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/redshift/jdbc/*:/docker/usr/share/aws/redshift/spark-redshift/lib/*:/docker/usr/share/aws/kinesis/
 
spark-sql-kinesis/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/lib/hudi/hudi-spark-bundle.jar:/usr/lib/hudi/hudi-aws-bundle.jar"
         # New configurations from your SparkSession builder
         spark.jars: 
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
         spark.kryo.registrator: "org.apache.spark.HoodieSparkKryoRegistrar"
         spark.sql.catalog.spark_catalog: 
"org.apache.spark.sql.hudi.catalog.HoodieCatalog"
         spark.driver.extraClassPath: 
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
         spark.executor.extraClassPath: 
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
         spark.hadoop.fs.gs.impl: 
"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem"
         fs.AbstractFileSystem.gs.impl: 
"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"
         spark.hadoop.google.cloud.auth.service.account.enable: "true"
         spark.sql.hive.convertMetastoreParquet: "false"
         spark.sql.legacy.timeParserPolicy: "LEGACY"
         spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
         spark.sql.session.timeZone: "UTC"
       sparkVersion: 3.5.5
       type: Scala`
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to