weibozhao commented on code in PR #156: URL: https://github.com/apache/flink-ml/pull/156#discussion_r1009193152
########## flink-ml-lib/src/main/java/org/apache/flink/ml/feature/vectorassembler/VectorAssemblerParams.java: ########## @@ -21,11 +21,44 @@ import org.apache.flink.ml.common.param.HasHandleInvalid; import org.apache.flink.ml.common.param.HasInputCols; import org.apache.flink.ml.common.param.HasOutputCol; +import org.apache.flink.ml.param.IntArrayParam; +import org.apache.flink.ml.param.Param; +import org.apache.flink.ml.param.ParamValidator; /** * Params of {@link VectorAssembler}. * * @param <T> The class type of this instance. */ public interface VectorAssemblerParams<T> - extends HasInputCols<T>, HasOutputCol<T>, HasHandleInvalid<T> {} + extends HasInputCols<T>, HasOutputCol<T>, HasHandleInvalid<T> { + Param<Integer[]> INPUT_SIZES = + new IntArrayParam( + "inputSizes", + "Sizes of the input elements to be assembled.", + null, + sizesValidator()); + + default Integer[] getInputSizes() { + return get(INPUT_SIZES); + } + + default T setInputSizes(Integer... value) { + return set(INPUT_SIZES, value); + } + + // Checks the inputSizes parameter. + static ParamValidator<Integer[]> sizesValidator() { + return inputSizes -> { + if (inputSizes == null) { + return false; Review Comment: We have discuss this case already. If record-1 and record-2 has different sizes, then we can't decide which one is used. For spark stores the sizes in meta info, but flink-ml has no meta info. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org