[ https://issues.apache.org/jira/browse/FLINK-2168?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15837091#comment-15837091 ]
ASF GitHub Bot commented on FLINK-2168: --------------------------------------- Github user wuchong commented on a diff in the pull request: https://github.com/apache/flink/pull/3149#discussion_r97701834 --- Diff: flink-connectors/flink-hbase/src/main/java/org/apache/flink/addons/hbase/HBaseTableSourceInputFormat.java --- @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.addons.hbase; + +import org.apache.flink.api.common.io.InputFormat; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.ResultTypeQueryable; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.types.Row; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * {@link InputFormat} subclass that wraps the access for HTables. Returns the result as {@link Row} + */ +public class HBaseTableSourceInputFormat extends TableInputFormat<Row> implements ResultTypeQueryable<Row> { + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(HBaseTableSourceInputFormat.class); + private String tableName; + private transient Connection conn; + private transient org.apache.hadoop.conf.Configuration conf; + private HBaseTableSchema schema; + + public HBaseTableSourceInputFormat(org.apache.hadoop.conf.Configuration conf, String tableName, HBaseTableSchema schema) { + this.tableName = tableName; + this.conf = conf; + this.schema = schema; + } + + @Override + public void configure(Configuration parameters) { + LOG.info("Initializing HBaseConfiguration"); + connectToTable(); + if(table != null) { + scan = getScanner(); + } + } + + @Override + protected Scan getScanner() { + // TODO : Pass 'rowkey'. For this we need FilterableTableSource + Scan scan = new Scan(); + Map<String, List<Pair>> familyMap = schema.getFamilyMap(); + for(String family : familyMap.keySet()) { + // select only the fields in the 'selectedFields' + List<Pair> colDetails = familyMap.get(family); + for(Pair<String, TypeInformation<?>> pair : colDetails) { + scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(pair.getFirst())); + } + } + return scan; + } + + @Override + public String getTableName() { + return tableName; + } + + @Override + protected Row mapResultToTuple(Result res) { + List<Object> values = new ArrayList<Object>(); + int i = 0; + Map<String, List<Pair>> familyMap = schema.getFamilyMap(); + Row[] rows = new Row[familyMap.size()]; + for(String family : familyMap.keySet()) { + List<Pair> colDetails = familyMap.get(family); + for(Pair<String, TypeInformation<?>> pair : colDetails) { + byte[] value = res.getValue(Bytes.toBytes(family), Bytes.toBytes(pair.getFirst())); + if(value != null) { + values.add(schema.deserialize(value, pair.getSecond())); + } else { + values.add(schema.deserializeNull(pair.getSecond())); --- End diff -- Do we really need this method to indicate `null` using the special default value? Why not set null `values.add(null)` directly ? `Row` supports nullable field, but Tuple doesn't. > Add HBaseTableSource > -------------------- > > Key: FLINK-2168 > URL: https://issues.apache.org/jira/browse/FLINK-2168 > Project: Flink > Issue Type: New Feature > Components: Table API & SQL > Affects Versions: 0.9 > Reporter: Fabian Hueske > Assignee: ramkrishna.s.vasudevan > Priority: Minor > Labels: starter > > Add a {{HBaseTableSource}} to read data from a HBase table. The > {{HBaseTableSource}} should implement the {{ProjectableTableSource}} > (FLINK-3848) and {{FilterableTableSource}} (FLINK-3849) interfaces. > The implementation can be based on Flink's {{TableInputFormat}}. -- This message was sent by Atlassian JIRA (v6.3.4#6332)