[
https://issues.apache.org/jira/browse/SPARK-18211?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15629446#comment-15629446
]
lostinoverflow commented on SPARK-18211:
----------------------------------------
It could be although I am not sure. I tried:
{code}
import org.apache.spark.sql.SparkSession
import org.apache.spark._
object App {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local[*]").setAppName("split_size")
val sc = new SparkContext(conf)
sc.hadoopConfiguration.setInt("mapred.min.split.size", args(0).toInt)
sc.hadoopConfiguration.setInt("mapred.max.split.size", args(0).toInt)
val spark = SparkSession.builder.config(conf).getOrCreate
println(spark.sparkContext.textFile(args(1)).partitions.size) // Respects
Hadoop conf
println(spark.read.textFile(args(1)).rdd.partitions.size) // Doesn't
respect Hadoop conf
spark.stop()
}
}
{code}
but the problem persists.
> Spark SQL ignores split.size
> ----------------------------
>
> Key: SPARK-18211
> URL: https://issues.apache.org/jira/browse/SPARK-18211
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.0.0
> Reporter: lostinoverflow
>
> I expect that RDD and DataFrame will have the same number of partitions
> (worked in 1.6) but it looks like Spark SQL ignores Hadoop configuration.
> {code}
> import org.apache.spark.sql.SparkSession
> object App {
> def main(args: Array[String]) {
> val spark = SparkSession
> .builder()
> .master("local[*]")
> .appName("split size")
> .getOrCreate()
> spark.sparkContext.hadoopConfiguration.setInt("mapred.min.split.size",
> args(0).toInt)
> spark.sparkContext.hadoopConfiguration.setInt("mapred.max.split.size",
> args(0).toInt)
> println(spark.sparkContext.textFile(args(1)).partitions.size)
> println(spark.read.textFile(args(1)).rdd.partitions.size)
> spark.stop()
> }
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]