Oleksiy Sayankin created HIVE-17098: ---------------------------------------
Summary: Race condition in Hbase tables Key: HIVE-17098 URL: https://issues.apache.org/jira/browse/HIVE-17098 Project: Hive Issue Type: Bug Reporter: Oleksiy Sayankin Assignee: Oleksiy Sayankin These steps simulate our customer production env. *STEP 1. Create test tables* {code} CREATE TABLE for_loading( key int, value string, age int, salary decimal (10,2) ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; {code} {code} CREATE TABLE test_1( key int, value string, age int, salary decimal (10,2) ) ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( 'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 'serialization.format'='1') TBLPROPERTIES ( 'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 'hbase.table.name'='test_1', 'numFiles'='0', 'numRows'='0', 'rawDataSize'='0', 'totalSize'='0', 'transient_lastDdlTime'='1495769316'); {code} {code} CREATE TABLE test_2( key int, value string, age int, salary decimal (10,2) ) ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( 'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 'serialization.format'='1') TBLPROPERTIES ( 'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 'hbase.table.name'='test_2', 'numFiles'='0', 'numRows'='0', 'rawDataSize'='0', 'totalSize'='0', 'transient_lastDdlTime'='1495769316'); {code} *STEP 2. Create test data* {code} import java.io.IOException; import java.math.BigDecimal; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import static java.lang.String.format; public class Generator { private static List<String> lines = new ArrayList<>(); private static List<String> name = Arrays.asList("Brian", "John", "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo"); private static List<BigDecimal> salary = new ArrayList<>(); public static void main(String[] args) { generateData(Integer.parseInt(args[0]), args[1]); } public static void generateData(int rowNumber, String file) { double maxValue = 20000.55; double minValue = 1000.03; Random random = new Random(); for (int i = 1; i <= rowNumber; i++) { lines.add( i + "," + name.get(random.nextInt(name.size())) + "," + (random.nextInt(62) + 18) + "," + format("%.2f", (minValue + (maxValue - minValue) * random.nextDouble()))); } Path path = Paths.get(file); try { Files.write(path, lines, Charset.forName("UTF-8"), StandardOpenOption.APPEND); } catch (IOException e) { e.printStackTrace(); } } } {code} {code} javac Generator.java java Generator 3000000 dataset.csv hadoop fs -put dataset.csv / {code} *STEP 3. Upload test data* {code} load data local inpath '/home/myuser/dataset.csv' into table for_loading; {code} {code} from for_loading insert into table test_1 select key,value,age,salary; {code} {code} from for_loading insert into table test_2 select key,value,age,salary; {code} *STEP 4. Run test queries* Run in 5 parallel terminals for table {{test_1}} {code} for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done {code} Run in 5 parallel terminals for table {{test_2}} {code} for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done {code} *EXPECTED RESULT:* All queris are OK. *ACTUAL RESULT* {code} org.apache.hive.service.cli.HiveSQLException: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly ini tialized. Ensure you call initializeTable either in your constructor or initialize method at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484) at org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308) at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847) at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) at com.sun.proxy.$Proxy25.fetchResults(Unknown Source) at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504) at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717) at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428) at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146) at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099) at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479) ... 24 more Caused by: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579) at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225) at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261) at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525) at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459) ... 28 more {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)