[jira] [Commented] (FLINK-2168) Add HBaseTableSource

ASF GitHub Bot (JIRA) Tue, 24 Jan 2017 19:10:42 -0800

    [ 
https://issues.apache.org/jira/browse/FLINK-2168?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15837091#comment-15837091
 ]


ASF GitHub Bot commented on FLINK-2168:
---------------------------------------

Github user wuchong commented on a diff in the pull request:

    https://github.com/apache/flink/pull/3149#discussion_r97701834
  
    --- Diff: 
flink-connectors/flink-hbase/src/main/java/org/apache/flink/addons/hbase/HBaseTableSourceInputFormat.java
 ---
    @@ -0,0 +1,160 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.addons.hbase;
    +
    +import org.apache.flink.api.common.io.InputFormat;
    +import org.apache.flink.api.common.typeinfo.TypeInformation;
    +import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
    +import org.apache.flink.api.java.typeutils.RowTypeInfo;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.types.Row;
    +import org.apache.hadoop.hbase.HBaseConfiguration;
    +import org.apache.hadoop.hbase.TableName;
    +import org.apache.hadoop.hbase.TableNotFoundException;
    +import org.apache.hadoop.hbase.client.HTable;
    +import org.apache.hadoop.hbase.client.Connection;
    +import org.apache.hadoop.hbase.client.ConnectionFactory;
    +import org.apache.hadoop.hbase.client.Result;
    +import org.apache.hadoop.hbase.client.Scan;
    +import org.apache.hadoop.hbase.util.Bytes;
    +import org.apache.hadoop.hbase.util.Pair;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Map;
    +
    +/**
    + * {@link InputFormat} subclass that wraps the access for HTables. Returns 
the result as {@link Row}
    + */
    +public class HBaseTableSourceInputFormat extends TableInputFormat<Row> 
implements ResultTypeQueryable<Row> {
    +
    +   private static final long serialVersionUID = 1L;
    +
    +   private static final Logger LOG = 
LoggerFactory.getLogger(HBaseTableSourceInputFormat.class);
    +   private String tableName;
    +   private transient Connection conn;
    +   private transient org.apache.hadoop.conf.Configuration conf;
    +   private HBaseTableSchema schema;
    +
    +   public HBaseTableSourceInputFormat(org.apache.hadoop.conf.Configuration 
conf, String tableName, HBaseTableSchema schema) {
    +           this.tableName = tableName;
    +           this.conf = conf;
    +           this.schema = schema;
    +   }
    +
    +   @Override
    +   public void configure(Configuration parameters) {
    +           LOG.info("Initializing HBaseConfiguration");
    +           connectToTable();
    +           if(table != null) {
    +                   scan = getScanner();
    +           }
    +   }
    +
    +   @Override
    +   protected Scan getScanner() {
    +           // TODO : Pass 'rowkey'. For this we need FilterableTableSource
    +           Scan scan = new Scan();
    +           Map<String, List<Pair>> familyMap = schema.getFamilyMap();
    +           for(String family : familyMap.keySet()) {
    +                   // select only the fields in the 'selectedFields'
    +                   List<Pair> colDetails = familyMap.get(family);
    +                   for(Pair<String, TypeInformation<?>> pair : colDetails) 
{
    +                           scan.addColumn(Bytes.toBytes(family), 
Bytes.toBytes(pair.getFirst()));
    +                   }
    +           }
    +           return scan;
    +   }
    +
    +   @Override
    +   public String getTableName() {
    +           return tableName;
    +   }
    +
    +   @Override
    +   protected Row mapResultToTuple(Result res) {
    +           List<Object> values = new ArrayList<Object>();
    +           int i = 0;
    +           Map<String, List<Pair>> familyMap = schema.getFamilyMap();
    +           Row[] rows = new Row[familyMap.size()];
    +           for(String family : familyMap.keySet()) {
    +                   List<Pair> colDetails = familyMap.get(family);
    +                   for(Pair<String, TypeInformation<?>> pair : colDetails) 
{
    +                           byte[] value = 
res.getValue(Bytes.toBytes(family), Bytes.toBytes(pair.getFirst()));
    +                           if(value != null) {
    +                                   values.add(schema.deserialize(value, 
pair.getSecond()));
    +                           } else {
    +                                   
values.add(schema.deserializeNull(pair.getSecond()));
    --- End diff --
    
    Do we really need this method to indicate `null` using the special default 
value? Why not set null `values.add(null)` directly ? 
    
    `Row` supports nullable field, but Tuple doesn't.


> Add HBaseTableSource
> --------------------
>
>                 Key: FLINK-2168
>                 URL: https://issues.apache.org/jira/browse/FLINK-2168
>             Project: Flink
>          Issue Type: New Feature
>          Components: Table API & SQL
>    Affects Versions: 0.9
>            Reporter: Fabian Hueske
>            Assignee: ramkrishna.s.vasudevan
>            Priority: Minor
>              Labels: starter
>
> Add a {{HBaseTableSource}} to read data from a HBase table. The 
> {{HBaseTableSource}} should implement the {{ProjectableTableSource}} 
> (FLINK-3848) and {{FilterableTableSource}} (FLINK-3849) interfaces.
> The implementation can be based on Flink's {{TableInputFormat}}.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

[jira] [Commented] (FLINK-2168) Add HBaseTableSource

Reply via email to