weibozhao commented on a change in pull request #24: URL: https://github.com/apache/flink-ml/pull/24#discussion_r747282532
########## File path: flink-ml-lib/src/main/java/org/apache/flink/ml/algo/batch/knn/KnnTrainBatchOp.java ########## @@ -0,0 +1,230 @@ +package org.apache.flink.ml.algo.batch.knn; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.common.functions.RichMapPartitionFunction; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistance; +import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData; +import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceMatrixData; +import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceSparseData; +import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceVectorData; +import org.apache.flink.ml.common.BatchOperator; +import org.apache.flink.ml.common.MapPartitionFunctionWrapper; +import org.apache.flink.ml.common.linalg.DenseVector; +import org.apache.flink.ml.common.linalg.VectorUtil; +import org.apache.flink.ml.param.Param; +import org.apache.flink.ml.param.StringParam; +import org.apache.flink.ml.params.knn.HasKnnDistanceType; +import org.apache.flink.ml.params.knn.KnnTrainParams; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.table.api.internal.TableImpl; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.types.DataType; +import org.apache.flink.types.Row; +import org.apache.flink.util.Collector; +import org.apache.flink.util.Preconditions; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData.pGson; + +/** + * KNN is to classify unlabeled observations by assigning them to the class of the most similar + * labeled examples. Note that though there is no ``training process`` in KNN, we create a ``fake + * one`` to use in pipeline model. In this operator, we do some preparation to speed up the + * inference process. + */ +public final class KnnTrainBatchOp extends BatchOperator<KnnTrainBatchOp> Review comment: KnnTrainBatchOp is the kernel of KnnClassifier. The aim we introduce BatchOp is to tell algo developer: he is in the batch Env mode, he do all things in this env is batch action. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org