abhiNB-star commented on issue #13410:
URL: https://github.com/apache/hudi/issues/13410#issuecomment-2955044830
MY Yaml file
`apiVersion: sparkoperator.k8s.io/v1beta2
kind: ScheduledSparkApplication
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"sparkoperator.k8s.io/v1beta2","kind":"ScheduledSparkApplication","metadata":{"annotations":{},"name":"gate-v4-20231130124511","namespace":"spark-hood"},"spec":{"concurrencyPolicy":"Replace","failedRunHistoryLimit":3,"schedule":"01
01 * *
*","successfulRunHistoryLimit":1,"template":{"arguments":["--op","UPSERT","--props","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/common.properties","--source-ordering-field","dummy_date_field","--table-type","COPY_ON_WRITE","--transformer-class","transferman.StarshipTransferMan","--source-class","transferman.StarshipSourceReader","--config-folder","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/","--target-table","dummy_table","--enable-hive-sync","--base-path-prefix","dummy_path","--source-limit","100000"],"deps":{"jars":["gs://starship-hood-prod/nbdata/resources/jars/v4/spark-jars.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/calcite-core-1.16.0.jar","gs://starship-ho
od-prod/nbdata/resources/jars/v4/calcite-linq4j-1.16.0.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/spark-avro_2.12-3.3.0.jar","gs://starship-hood-prod/nbdata/resources/jars/v4/spark-catalyst_2.12-3.2.2.jar","gs://starship-hood-prod/nbdata/resources/jars/release-jars-final/transferman-4.9-SNAPSHOT.jar"]},"driver":{"coreLimit":"240m","coreRequest":"240m","initContainers":[{"args":["gate-v4-20231130124511","gs://starship-hood-prod/nbdata/config/v4/mysql/gate-v4-20231130124511/common.properties"],"image":"gcr.io/hood-starship/prevalidation:latest","name":"prevalidation-check-init-container"}],"labels":{"app":"driver"},"memory":"1000M","nodeSelector":{"node-selector":"preemptible-spark-driver"},"serviceAccount":"spark-operator-spark"},"executor":{"coreLimit":"2000m","coreRequest":"2000m","instances":3,"labels":{"app":"executor"},"memory":"10000M","nodeSelector":{"node-selector":"preemptible-large"}},"image":"asia.gcr.io/hood-starship/spark:3.1.1-gcs","imagePullPolicy":"Always"
,"mainApplicationFile":"gs://starship-hood-prod/nbdata/resources/jars/release-jars-final/hudi-utilities-bundle_2.12-0.13.1.jar","mainClass":"org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer","mode":"cluster","restartPolicy":{"onFailureRetries":3,"onFailureRetryInterval":120,"onSubmissionFailureRetries":1,"onSubmissionFailureRetryInterval":120,"type":"OnFailure"},"sparkConf":{"spark.kubernetes.executor.podNamePrefix":"gate-v4-20231130124511","spark.serializer":"org.apache.spark.serializer.KryoSerializer"},"sparkVersion":"3.1.1","type":"Scala"}}}
creationTimestamp: "2023-11-30T07:15:36Z"
generation: 1158
name: testing-actualparty-new-job
namespace: spark-hood
resourceVersion: "2257236246"
uid: 3d1d7484-21c4-4404-b652-581f8e0d6a91
spec:
concurrencyPolicy: Replace
failedRunHistoryLimit: 3
schedule: 59 12 * * *
successfulRunHistoryLimit: 1
template:
arguments:
- --op
- UPSERT
- --props
-
gs://starship-hood-prod/nbdata/config/v4/test/actual-party/new/common.properties
- --source-ordering-field
- starship_offset
- --table-type
- COPY_ON_WRITE
- --transformer-class
- transferman.StarshipTransferMan
- --source-class
- transferman.StarshipSourceReader
- --config-folder
- gs://starship-hood-prod/nbdata/config/v4/test/actual-party/new/
- --target-table
- dummy_table
- --enable-hive-sync
- --base-path-prefix
- dummy_path
- --source-limit
- "500000"
- --continuous
deps:
jars:
- gs://{my}/nbdata/resources/jars/v4/spark-jars.jar
- gs://{my}/nbdata/resources/jars/test/spark-avro_2.12-3.5.0.jar
- gs://{my}/nbdata/resources/jars/test/json-simple-1.1.1.jar
-
gs://{my}/nbdata/resources/jars/test/abhi_transferman_dbsnapshot-1.0.jar
driver:
coreLimit: 600m
coreRequest: 600m
labels:
app: driver
memory: 1200M
nodeSelector:
node-selector: preemptible-spark-driver
serviceAccount: spark-operator-spark
executor:
coreLimit: 3000m
coreRequest: 3000m
instances: 3
labels:
app: executor
memory: 2000M
nodeSelector:
node-selector: preemptible-large
image: asia.gcr.io/{my}/spark:3.5.5-gcs
imagePullPolicy: Always
mainApplicationFile:
gs://{my}/nbdata/resources/jars/test/hudi-utilities-bundle_2.12-1.0.2.jar
mainClass: org.apache.hudi.utilities.streamer.HoodieMultiTableStreamer
mode: cluster
restartPolicy:
onFailureRetries: 3
onFailureRetryInterval: 120
onSubmissionFailureRetries: 1
onSubmissionFailureRetryInterval: 120
type: OnFailure
sparkConf:
# Existing configurations
spark.executor.memoryOverhead: 700M
spark.kubernetes.executor.podNamePrefix: testing-actualparty-new-job
spark.serializer: org.apache.spark.serializer.KryoSerializer
spark.driver.extraClassPath:
"/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/aws-java-sdk-v2/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/redshift/jdbc/*:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/kinesis/spark-sql-kinesis/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/aws-java-sdk-v2/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/redshift/jdbc/*:/docker/usr/share/aws/redshift/spark-redshift/lib/*:/docker/usr/share/aws/kinesis/sp
ark-sql-kinesis/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/lib/hudi/hudi-spark-bundle.jar:/usr/lib/hudi/hudi-aws-bundle.jar"
spark.executor.extraClassPath:
"/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/aws-java-sdk-v2/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/redshift/jdbc/*:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/kinesis/spark-sql-kinesis/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/aws-java-sdk-v2/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/redshift/jdbc/*:/docker/usr/share/aws/redshift/spark-redshift/lib/*:/docker/usr/share/aws/kinesis/
spark-sql-kinesis/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/lib/hudi/hudi-spark-bundle.jar:/usr/lib/hudi/hudi-aws-bundle.jar"
# New configurations from your SparkSession builder
spark.jars:
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
spark.kryo.registrator: "org.apache.spark.HoodieSparkKryoRegistrar"
spark.sql.catalog.spark_catalog:
"org.apache.spark.sql.hudi.catalog.HoodieCatalog"
spark.driver.extraClassPath:
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
spark.executor.extraClassPath:
"gs://{my}/nbdata/resources/jars/test/hudi-spark3.5-bundle_2.12-1.0.2.jar"
spark.hadoop.fs.gs.impl:
"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem"
fs.AbstractFileSystem.gs.impl:
"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"
spark.hadoop.google.cloud.auth.service.account.enable: "true"
spark.sql.hive.convertMetastoreParquet: "false"
spark.sql.legacy.timeParserPolicy: "LEGACY"
spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem"
spark.sql.session.timeZone: "UTC"
sparkVersion: 3.5.5
type: Scala`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]