I am looking for something like:

# prepare input data
val input_schema = StructType(Seq(
    StructField("col1", IntegerType),
    StructField("col2", IntegerType),
    StructField("col3", IntegerType)))
val input_data = spark.createDataFrame(
    sc.parallelize(Seq(
        Row(1, 2, 3),
        Row(4, 5, 6))),
    schema)

# reshape input dataframe according to the output_schema and save to parquet
val output_schema = StructType(Seq(
    StructField("col1", IntegerType),
    StructField("newcol2", StructType(Seq(
        StructField("col2", IntegerType),
        StructField("col3", IntegerType))))))
*val output_data = spark.createDataFrame(input_data, output_schema) # does
not work*
output_data.write.parquet("output_data.parquet")

Reply via email to