[ https://issues.apache.org/jira/browse/HIVE-8247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Eugene Koifman updated HIVE-8247: --------------------------------- Attachment: reproducer.patch test case that repros the issue > Pig cursor written to Hive via HCat doesn't NULL-fill missing columns > --------------------------------------------------------------------- > > Key: HIVE-8247 > URL: https://issues.apache.org/jira/browse/HIVE-8247 > Project: Hive > Issue Type: Bug > Components: HCatalog > Affects Versions: 0.13.1 > Reporter: Eugene Koifman > Attachments: reproducer.patch > > > This started out as BUG-15650 but in BUG-15650 it's no longer clear what the > real issue is so I'm filing a new ticket. > Suppose a Hive table has columns (a,b,c,d) > If a Pig script writing to this table produces schema (a,b,c) it works: 'd' > will be NULL. > If a Pig script writing to this table produces schema (a,b,d) it fails with > error below. > This is an old issue. There is nothing in HCatalog documentation that > indicates whether this should work. > {noformat} > Running org.apache.hive.hcatalog.pig.TestOrcHCatStorer > Tests run: 1, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 30.113 sec > <<< FAILURE! - in org.apache.hive.hcatalog.pig.TestOrcHCatStorer > partialSchemaSepcification(org.apache.hive.hcatalog.pig.TestOrcHCatStorer) > Time elapsed: 29.886 sec <<< ERROR! > org.apache.pig.impl.logicalLayer.FrontendException: Unable to store alias ABD > at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1635) > at org.apache.pig.PigServer.registerQuery(PigServer.java:575) > at > org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92) > at > org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.run(ParentRunner.java:300) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124) > at > org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103) > Caused by: org.apache.pig.impl.plan.VisitorException: > <line 7, column 0> Output Location Validation Failed for: 'T More info to > follow: > org.apache.hive.hcatalog.common.HCatException : 2007 : Invalid column > position in partition schema : Expected column <c> at position 3, found > column <d> > at > org.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileVisitor.visit(InputOutputFileValidator.java:75) > at > org.apache.pig.newplan.logical.relational.LOStore.accept(LOStore.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:64) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.walk(DepthFirstWalker.java:53) > at org.apache.pig.newplan.PlanVisitor.visit(PlanVisitor.java:52) > at > org.apache.pig.newplan.logical.rules.InputOutputFileValidator.validate(InputOutputFileValidator.java:45) > at > org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.compile(HExecutionEngine.java:303) > at org.apache.pig.PigServer.compilePp(PigServer.java:1380) > at > org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1305) > at org.apache.pig.PigServer.execute(PigServer.java:1297) > at org.apache.pig.PigServer.access$400(PigServer.java:122) > at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1630) > at org.apache.pig.PigServer.registerQuery(PigServer.java:575) > at > org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92) > at > org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.run(ParentRunner.java:300) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124) > at > org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103) > Caused by: org.apache.hive.hcatalog.common.HCatException: > org.apache.hive.hcatalog.common.HCatException : 2007 : Invalid column > position in partition schema : Expected column <c> at position 3, found > column <d> > at > org.apache.hive.hcatalog.common.HCatUtil.validatePartitionSchema(HCatUtil.java:258) > at > org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat.setPartDetails(HCatBaseOutputFormat.java:231) > at > org.apache.hive.hcatalog.mapreduce.HCatOutputFormat.setSchema(HCatOutputFormat.java:244) > at > org.apache.hive.hcatalog.mapreduce.HCatOutputFormat.setSchema(HCatOutputFormat.java:231) > at > org.apache.hive.hcatalog.pig.HCatStorer.setStoreLocation(HCatStorer.java:206) > at > org.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileVisitor.visit(InputOutputFileValidator.java:68) > at > org.apache.pig.newplan.logical.relational.LOStore.accept(LOStore.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:64) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) > at > org.apache.pig.newplan.DepthFirstWalker.walk(DepthFirstWalker.java:53) > at org.apache.pig.newplan.PlanVisitor.visit(PlanVisitor.java:52) > at > org.apache.pig.newplan.logical.rules.InputOutputFileValidator.validate(InputOutputFileValidator.java:45) > at > org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.compile(HExecutionEngine.java:303) > at org.apache.pig.PigServer.compilePp(PigServer.java:1380) > at > org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1305) > at org.apache.pig.PigServer.execute(PigServer.java:1297) > at org.apache.pig.PigServer.access$400(PigServer.java:122) > at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1630) > at org.apache.pig.PigServer.registerQuery(PigServer.java:575) > at > org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92) > at > org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) > at org.junit.runners.ParentRunner.run(ParentRunner.java:300) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124) > at > org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103) > Results : > Tests in error: > > TestOrcHCatStorer>TestHCatStorer.partialSchemaSepcification:1035->HCatBaseTest.logAndRegister:92 > ? Frontend > {noformat} > Reproducer (which can be added to org.apache.hive.hcatalog.pig.TestHCatStorer) > {noformat} > @Test > public void partialSchemaSepcification() throws Exception { > driver.run("drop table if exists T"); > String createTable = "create table T(a int, b int, c string, d string) > stored as " + getStorageFormat(); > int retCode = driver.run(createTable).getResponseCode(); > if (retCode != 0) { > throw new IOException("Failed to create table."); > } > String[] inputData = {"1\t20\tstr1\tstr20", "2\t30\tstr2\tstr30", > "3\t40\tstr3\tstr40", "4\t50\tstr4\tstr40"}; > HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); > int lineNumber = 1; > PigServer ps = createPigServer(true); > logAndRegister(ps, "A1 = LOAD '" + INPUT_FILE_NAME + "' USING > PigStorage() AS (a:int,b:int,c:chararray,d:chararray);", lineNumber++); > logAndRegister(ps, "ROW1 = FILTER A1 BY a == 1;", lineNumber++); > logAndRegister(ps, "ABC = FOREACH ROW1 GENERATE a,b,c;", lineNumber++); > logAndRegister(ps, "STORE ABC INTO 'T' USING " + > HCatStorer.class.getName() + "();", lineNumber++); > logAndRegister(ps, "ROW2 = FILTER A1 BY a == 2;", lineNumber++); > logAndRegister(ps, "ABD = FOREACH ROW2 GENERATE a,b,d;", lineNumber++); > logAndRegister(ps, "STORE ABD INTO 'T' USING " + > HCatStorer.class.getName() + "();", lineNumber); > driver.run("select * from T"); > ArrayList<String> results = new ArrayList<String>(); > driver.getResults(results); > Assert.assertEquals(2, results.size()); > driver.run("drop table T"); > } > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)