Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/7884#discussion_r38586934
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
---
@@ -216,43 +221,65 @@ class LogisticRegressionSuite extends SparkFunSuite
with MLlibTestSparkContext {
test("MultiClassSummarizer") {
val summarizer1 = (new MultiClassSummarizer)
.add(0.0).add(3.0).add(4.0).add(3.0).add(6.0)
- assert(summarizer1.histogram.zip(Array[Long](1, 0, 0, 2, 1, 0,
1)).forall(x => x._1 === x._2))
+ assert(summarizer1.histogram.zip(Array[Double](1, 0, 0, 2, 1, 0,
1)).forall(x => x._1 === x._2))
assert(summarizer1.countInvalid === 0)
assert(summarizer1.numClasses === 7)
val summarizer2 = (new MultiClassSummarizer)
.add(1.0).add(5.0).add(3.0).add(0.0).add(4.0).add(1.0)
- assert(summarizer2.histogram.zip(Array[Long](1, 2, 0, 1, 1,
1)).forall(x => x._1 === x._2))
+ assert(summarizer2.histogram.zip(Array[Double](1, 2, 0, 1, 1,
1)).forall(x => x._1 === x._2))
assert(summarizer2.countInvalid === 0)
assert(summarizer2.numClasses === 6)
val summarizer3 = (new MultiClassSummarizer)
.add(0.0).add(1.3).add(5.2).add(2.5).add(2.0).add(4.0).add(4.0).add(4.0).add(1.0)
- assert(summarizer3.histogram.zip(Array[Long](1, 1, 1, 0, 3)).forall(x
=> x._1 === x._2))
+ assert(summarizer3.histogram.zip(Array[Double](1, 1, 1, 0,
3)).forall(x => x._1 === x._2))
assert(summarizer3.countInvalid === 3)
assert(summarizer3.numClasses === 5)
val summarizer4 = (new MultiClassSummarizer)
.add(3.1).add(4.3).add(2.0).add(1.0).add(3.0)
- assert(summarizer4.histogram.zip(Array[Long](0, 1, 1, 1)).forall(x =>
x._1 === x._2))
+ assert(summarizer4.histogram.zip(Array[Double](0, 1, 1, 1)).forall(x
=> x._1 === x._2))
assert(summarizer4.countInvalid === 2)
assert(summarizer4.numClasses === 4)
// small map merges large one
val summarizerA = summarizer1.merge(summarizer2)
assert(summarizerA.hashCode() === summarizer2.hashCode())
- assert(summarizerA.histogram.zip(Array[Long](2, 2, 0, 3, 2, 1,
1)).forall(x => x._1 === x._2))
+ assert(summarizerA.histogram.zip(Array[Double](2, 2, 0, 3, 2, 1,
1)).forall(x => x._1 === x._2))
assert(summarizerA.countInvalid === 0)
assert(summarizerA.numClasses === 7)
// large map merges small one
val summarizerB = summarizer3.merge(summarizer4)
assert(summarizerB.hashCode() === summarizer3.hashCode())
- assert(summarizerB.histogram.zip(Array[Long](1, 2, 2, 1, 3)).forall(x
=> x._1 === x._2))
+ assert(summarizerB.histogram.zip(Array[Double](1, 2, 2, 1,
3)).forall(x => x._1 === x._2))
assert(summarizerB.countInvalid === 5)
assert(summarizerB.numClasses === 5)
}
+ test("MultiClassSummarizer with weighted samples") {
+ val summarizer1 = (new MultiClassSummarizer)
+ .add(0.0, 0.2).add(3.0, 0.8).add(4.0, 3.2).add(3.0, 1.3).add(6.0,
3.1)
--- End diff --
It would be nice to add named arguments for the first `add`.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]