Github user yanboliang commented on a diff in the pull request:
https://github.com/apache/spark/pull/9690#discussion_r44769440
--- Diff:
examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
---
@@ -169,36 +169,22 @@ object DecisionTreeExample {
algo: String,
fracTest: Double): (DataFrame, DataFrame) = {
val sqlContext = new SQLContext(sc)
- import sqlContext.implicits._
// Load training data
- val origExamples: RDD[LabeledPoint] = loadData(sc, input, dataFormat)
+ val origExamples: DataFrame = loadData(sqlContext, input, dataFormat)
// Load or create test set
- val splits: Array[RDD[LabeledPoint]] = if (testInput != "") {
+ val dataframes: Array[DataFrame] = if (testInput != "") {
// Load testInput.
- val numFeatures = origExamples.take(1)(0).features.size
- val origTestExamples: RDD[LabeledPoint] =
- loadData(sc, testInput, dataFormat, Some(numFeatures))
+ val numFeatures = origExamples.first().getAs[Vector](1).size
+ val origTestExamples: DataFrame =
+ loadData(sqlContext, testInput, dataFormat, Some(numFeatures))
Array(origExamples, origTestExamples)
} else {
// Split input into training, test.
origExamples.randomSplit(Array(1.0 - fracTest, fracTest), seed =
12345)
}
- // For classification, convert labels to Strings since we will index
them later with
- // StringIndexer.
- def labelsToStrings(data: DataFrame): DataFrame = {
- algo.toLowerCase match {
- case "classification" =>
- data.withColumn("labelString", data("label").cast(StringType))
- case "regression" =>
- data
- case _ =>
- throw new IllegalArgumentException("Algo ${params.algo} not
supported.")
- }
- }
--- End diff --
```StringIndexer``` will cast label column to String automatically and then
index, so we don't need this code snippet.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]