[ https://issues.apache.org/jira/browse/HIVE-25264?focusedWorklogId=612602&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-612602 ]
ASF GitHub Bot logged work on HIVE-25264: ----------------------------------------- Author: ASF GitHub Bot Created on: 21/Jun/21 12:46 Start Date: 21/Jun/21 12:46 Worklog Time Spent: 10m Work Description: marton-bod commented on a change in pull request #2407: URL: https://github.com/apache/hive/pull/2407#discussion_r655344122 ########## File path: iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java ########## @@ -1273,6 +1274,510 @@ public void testScanTableCaseInsensitive() throws IOException { Assert.assertArrayEquals(new Object[] {1L, "Bob", "Green"}, rows.get(1)); } + @Test + public void testAddColumnToIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Add a new column (age long) to the Iceberg table. + icebergTable.updateSchema().addColumn("age", Types.LongType.get()).commit(); + + Schema customerSchemaWithAge = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "first_name", Types.StringType.get(), "This is first name"), + optional(3, "last_name", Types.StringType.get(), "This is last name"), + optional(4, "age", Types.LongType.get())); + + Schema customerSchemaWithAgeOnly = + new Schema(optional(1, "customer_id", Types.LongType.get()), optional(4, "age", Types.LongType.get())); + + // Also add a new entry to the table where the age column is set. + icebergTable = testTables.loadTable(TableIdentifier.of("default", "customers")); + List<Record> newCustomerWithAge = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAge) + .add(3L, "James", "Red", 34L).add(4L, "Lily", "Blue", null).build(); + testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newCustomerWithAge); + + // Do a 'select *' from Hive and check if the age column appears in the result. + // It should be null for the old data and should be filled for the data added after the column addition. + TestHelper.RecordsBuilder customersWithAgeBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAge) + .add(0L, "Alice", "Brown", null).add(1L, "Bob", "Green", null).add(2L, "Trudy", "Pink", null) + .add(3L, "James", "Red", 34L).add(4L, "Lily", "Blue", null); + List<Record> customersWithAge = customersWithAgeBuilder.build(); + + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithAge, HiveIcebergTestUtils.valueForRow(customerSchemaWithAge, rows), + 0); + + // Do a 'select customer_id, age' from Hive to check if the new column can be queried from Hive. + // The customer_id is needed because of the result sorting. + TestHelper.RecordsBuilder customerWithAgeOnlyBuilder = TestHelper.RecordsBuilder + .newInstance(customerSchemaWithAgeOnly).add(0L, null).add(1L, null).add(2L, null).add(3L, 34L).add(4L, null); + List<Record> customersWithAgeOnly = customerWithAgeOnlyBuilder.build(); + + rows = shell.executeStatement("SELECT customer_id, age FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithAgeOnly, + HiveIcebergTestUtils.valueForRow(customerSchemaWithAgeOnly, rows), 0); + + // Insert some data with age column from Hive. Insert an entry with null age and an entry with filled age. + shell.executeStatement( + "INSERT INTO default.customers values (5L, 'Lily', 'Magenta', NULL), (6L, 'Roni', 'Purple', 23L)"); + + customersWithAgeBuilder.add(5L, "Lily", "Magenta", null).add(6L, "Roni", "Purple", 23L); + customersWithAge = customersWithAgeBuilder.build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithAge, HiveIcebergTestUtils.valueForRow(customerSchemaWithAge, rows), + 0); + + customerWithAgeOnlyBuilder.add(5L, null).add(6L, 23L); + customersWithAgeOnly = customerWithAgeOnlyBuilder.build(); + rows = shell.executeStatement("SELECT customer_id, age FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithAgeOnly, + HiveIcebergTestUtils.valueForRow(customerSchemaWithAgeOnly, rows), 0); + } + + @Test + public void testAddRequiredColumnToIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, null); + + // Add a new required column (age long) to the Iceberg table. + icebergTable.updateSchema().allowIncompatibleChanges().addRequiredColumn("age", Types.LongType.get()).commit(); + + Schema customerSchemaWithAge = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "first_name", Types.StringType.get(), "This is first name"), + optional(3, "last_name", Types.StringType.get(), "This is last name"), + required(4, "age", Types.LongType.get())); + + // Insert some data with age column from Hive. + shell.executeStatement( + "INSERT INTO default.customers values (0L, 'Lily', 'Magenta', 28L), (1L, 'Roni', 'Purple', 33L)"); + + // Do a 'select *' from Hive and check if the age column appears in the result. + List<Record> customersWithAge = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAge) + .add(0L, "Lily", "Magenta", 28L).add(1L, "Roni", "Purple", 33L).build(); + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithAge, HiveIcebergTestUtils.valueForRow(customerSchemaWithAge, rows), + 0); + + // Should add test step to insert NULL value into the new required column. But at the moment it + // works inconsistently for different file types, so leave it for later when this behaviour is cleaned up. + } + + @Test + public void testAddColumnIntoStructToIcebergTable() throws IOException { + Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "person", Types.StructType + .of(required(3, "first_name", Types.StringType.get()), required(4, "last_name", Types.StringType.get())))); + List<Record> people = TestHelper.generateRandomRecords(schema, 3, 0L); + + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "people", schema, fileFormat, people); + // Add a new column (age long) to the Iceberg table into the person struct + icebergTable.updateSchema().addColumn("person", "age", Types.LongType.get()).commit(); + + Schema schemaWithAge = new Schema(required(1, "id", Types.LongType.get()), + required(2, "person", Types.StructType.of(required(3, "first_name", Types.StringType.get()), + required(4, "last_name", Types.StringType.get()), optional(5, "age", Types.LongType.get())))); + List<Record> newPeople = TestHelper.generateRandomRecords(schemaWithAge, 2, 10L); + + // Also add a new entry to the table where the age column is set. + icebergTable = testTables.loadTable(TableIdentifier.of("default", "people")); + testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newPeople); + + List<Record> sortedExpected = new ArrayList<>(people); + sortedExpected.addAll(newPeople); + sortedExpected.sort(Comparator.comparingLong(record -> (Long) record.get(0))); + List<Object[]> rows = shell + .executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people order by id"); + Assert.assertEquals(sortedExpected.size(), rows.size()); + for (int i = 0; i < sortedExpected.size(); i++) { + Object[] row = rows.get(i); + Long id = (Long) sortedExpected.get(i).get(0); + Record person = (Record) sortedExpected.get(i).getField("person"); + String lastName = (String) person.getField("last_name"); + String firstName = (String) person.getField("first_name"); + Long age = null; + if (person.getField("age") != null) { + age = (Long) person.getField("age"); + } + Assert.assertEquals(id, (Long) row[0]); + Assert.assertEquals(firstName, (String) row[1]); + Assert.assertEquals(lastName, (String) row[2]); + Assert.assertEquals(age, row[3]); + } + + // Insert some data with age column from Hive. Insert an entry with null age and an entry with filled age. + shell.executeStatement("CREATE TABLE dummy_tbl (id bigint, first_name string, last_name string, age bigint)"); + shell.executeStatement("INSERT INTO dummy_tbl VALUES (1, 'Lily', 'Blue', 34), (2, 'Roni', 'Grey', NULL)"); + shell.executeStatement("INSERT INTO default.people SELECT id, named_struct('first_name', first_name, " + + "'last_name', last_name, 'age', age) from dummy_tbl"); + + rows = shell.executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people " + + "where id in (1, 2) order by id"); + Assert.assertEquals(2, rows.size()); + Assert.assertEquals((Long) 1L, (Long) rows.get(0)[0]); + Assert.assertEquals("Lily", (String) rows.get(0)[1]); + Assert.assertEquals("Blue", (String) rows.get(0)[2]); + Assert.assertEquals((Long) 34L, (Long) rows.get(0)[3]); + Assert.assertEquals((Long) 2L, (Long) rows.get(1)[0]); + Assert.assertEquals("Roni", (String) rows.get(1)[1]); + Assert.assertEquals("Grey", (String) rows.get(1)[2]); + Assert.assertNull(rows.get(1)[3]); + } + + @Test + public void testMakeColumnRequiredInIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Add a new required column (age long) to the Iceberg table. + icebergTable.updateSchema().allowIncompatibleChanges().requireColumn("last_name").commit(); + + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); + + // Insert some data with last_name no NULL. + shell.executeStatement("INSERT INTO default.customers values (3L, 'Lily', 'Magenta'), (4L, 'Roni', 'Purple')"); + + List<Record> customerRecords = TestHelper.RecordsBuilder + .newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Alice", "Brown") + .add(1L, "Bob", "Green").add(2L, "Trudy", "Pink").add(3L, "Lily", "Magenta").add(4L, "Roni", "Purple").build(); + + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customerRecords, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); + + // Should add test step to insert NULL value into the new required column. But at the moment it + // works inconsistently for different file types, so leave it for later when this behaviour is cleaned up. + } + + @Test + public void testRemoveColumnFromIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Remove the first_name column from the table. + icebergTable.updateSchema().deleteColumn("first_name").commit(); + + Schema customerSchemaWithoutFirstName = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "last_name", Types.StringType.get(), "This is last name")); + + TestHelper.RecordsBuilder customersWithoutFirstNameBuilder = TestHelper.RecordsBuilder + .newInstance(customerSchemaWithoutFirstName).add(0L, "Brown").add(1L, "Green").add(2L, "Pink"); + List<Record> customersWithoutFirstName = customersWithoutFirstNameBuilder.build(); + + // Run a 'select *' from Hive to see if the result doesn't contain the first_name column any more. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithoutFirstName, + HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0); + + // Run a 'select first_name' and check if an exception is thrown. + AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, + "Invalid table alias or column reference 'first_name'", () -> { + shell.executeStatement("SELECT first_name FROM default.customers"); + }); + + // Insert an entry from Hive to check if it can be inserted without the first_name column. + shell.executeStatement("INSERT INTO default.customers values (4L, 'Magenta')"); + + rows = shell.executeStatement("SELECT * FROM default.customers"); + customersWithoutFirstNameBuilder.add(4L, "Magenta"); + customersWithoutFirstName = customersWithoutFirstNameBuilder.build(); + HiveIcebergTestUtils.validateData(customersWithoutFirstName, + HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0); + } + + @Test + public void testRemoveAndAddBackColumnFromIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Remove the first_name column + icebergTable.updateSchema().deleteColumn("first_name").commit(); + // Add a new column with the name first_name + icebergTable.updateSchema().addColumn("first_name", Types.StringType.get(), "This is new first name").commit(); + + // Add new data to the table with the new first_name column filled. + icebergTable = testTables.loadTable(TableIdentifier.of("default", "customers")); + Schema customerSchemaWithNewFirstName = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "last_name", Types.StringType.get(), "This is last name"), + optional(3, "first_name", Types.StringType.get(), "This is the newly added first name")); + List<Record> newCustomersWithNewFirstName = + TestHelper.RecordsBuilder.newInstance(customerSchemaWithNewFirstName).add(3L, "Red", "James").build(); + testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newCustomersWithNewFirstName); + + TestHelper.RecordsBuilder customersWithNewFirstNameBuilder = + TestHelper.RecordsBuilder.newInstance(customerSchemaWithNewFirstName).add(0L, "Brown", null) + .add(1L, "Green", null).add(2L, "Pink", null).add(3L, "Red", "James"); + List<Record> customersWithNewFirstName = customersWithNewFirstNameBuilder.build(); + + // Run a 'select *' from Hive and check if the first_name column is returned. + // It should be null for the old data and should be filled in the entry added after the column addition. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithNewFirstName, + HiveIcebergTestUtils.valueForRow(customerSchemaWithNewFirstName, rows), 0); + + Schema customerSchemaWithNewFirstNameOnly = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(3, "first_name", Types.StringType.get(), "This is the newly added first name")); + + TestHelper.RecordsBuilder customersWithNewFirstNameOnlyBuilder = TestHelper.RecordsBuilder + .newInstance(customerSchemaWithNewFirstNameOnly).add(0L, null).add(1L, null).add(2L, null).add(3L, "James"); + List<Record> customersWithNewFirstNameOnly = customersWithNewFirstNameOnlyBuilder.build(); + + // Run a 'select first_name' from Hive to check if the new first-name column can be queried. + rows = shell.executeStatement("SELECT customer_id, first_name FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithNewFirstNameOnly, + HiveIcebergTestUtils.valueForRow(customerSchemaWithNewFirstNameOnly, rows), 0); + + // Insert data from Hive with first_name filled and with null first_name value. + shell.executeStatement("INSERT INTO default.customers values (4L, 'Magenta', 'Lily'), (5L, 'Purple', NULL)"); + + // Check if the newly inserted data is returned correctly by select statements. + customersWithNewFirstNameBuilder.add(4L, "Magenta", "Lily").add(5L, "Purple", null); + customersWithNewFirstName = customersWithNewFirstNameBuilder.build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithNewFirstName, + HiveIcebergTestUtils.valueForRow(customerSchemaWithNewFirstName, rows), 0); + + customersWithNewFirstNameOnlyBuilder.add(4L, "Lily").add(5L, null); + customersWithNewFirstNameOnly = customersWithNewFirstNameOnlyBuilder.build(); + rows = shell.executeStatement("SELECT customer_id, first_name FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithNewFirstNameOnly, + HiveIcebergTestUtils.valueForRow(customerSchemaWithNewFirstNameOnly, rows), 0); + } + + @Test + public void testRenameColumnInIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Rename the last_name column to family_name + icebergTable.updateSchema().renameColumn("last_name", "family_name").commit(); + + Schema schemaWithFamilyName = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "first_name", Types.StringType.get(), "This is first name"), + optional(3, "family_name", Types.StringType.get(), "This is last name")); + + // Run a 'select *' from Hive to check if the same records are returned in the same order as before the rename. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, + HiveIcebergTestUtils.valueForRow(schemaWithFamilyName, rows), 0); + + Schema shemaWithFamilyNameOnly = new Schema(optional(1, "customer_id", Types.LongType.get()), + optional(2, "first_name", Types.StringType.get(), "This is first name")); + TestHelper.RecordsBuilder customersWithFamilyNameOnlyBuilder = TestHelper.RecordsBuilder + .newInstance(shemaWithFamilyNameOnly).add(0L, "Brown").add(1L, "Green").add(2L, "Pink"); + List<Record> customersWithFamilyNameOnly = customersWithFamilyNameOnlyBuilder.build(); + + // Run a 'select family_name' from Hive to check if the column can be queried with the new name. + rows = shell.executeStatement("SELECT customer_id, family_name FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithFamilyNameOnly, + HiveIcebergTestUtils.valueForRow(shemaWithFamilyNameOnly, rows), 0); + + // Run a 'select last_name' to check if an exception is thrown. + AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, + "Invalid table alias or column reference 'last_name'", () -> { + shell.executeStatement("SELECT last_name FROM default.customers"); + }); + + // Insert some data from Hive to check if the last_name column is still can be filled. + shell.executeStatement("INSERT INTO default.customers values (3L, 'Lily', 'Magenta'), (4L, 'Roni', NULL)"); + + List<Record> newCustomers = TestHelper.RecordsBuilder.newInstance(schemaWithFamilyName).add(0L, "Alice", "Brown") + .add(1L, "Bob", "Green").add(2L, "Trudy", "Pink").add(3L, "Lily", "Magenta").add(4L, "Roni", null).build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(newCustomers, HiveIcebergTestUtils.valueForRow(schemaWithFamilyName, rows), 0); + + customersWithFamilyNameOnlyBuilder.add(3L, "Magenta").add(4L, null); + customersWithFamilyNameOnly = customersWithFamilyNameOnlyBuilder.build(); + rows = shell.executeStatement("SELECT customer_id, family_name FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithFamilyNameOnly, + HiveIcebergTestUtils.valueForRow(shemaWithFamilyNameOnly, rows), 0); + } + + @Test + public void testMoveLastNameToFirstInIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Move the last_name column in the table schema as first_column + icebergTable.updateSchema().moveFirst("last_name").commit(); + + Schema customerSchemaLastNameFirst = + new Schema(optional(1, "last_name", Types.StringType.get(), "This is last name"), + optional(2, "customer_id", Types.LongType.get()), + optional(3, "first_name", Types.StringType.get(), "This is first name")); + + TestHelper.RecordsBuilder customersWithLastNameFirstBuilder = + TestHelper.RecordsBuilder.newInstance(customerSchemaLastNameFirst).add("Brown", 0L, "Alice") + .add("Green", 1L, "Bob").add("Pink", 2L, "Trudy"); + List<Record> customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + + // Run a 'select *' to check if the order of the column in the result has been changed. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + + // Query the data with names and check if the result is the same as when the table was created. + rows = shell.executeStatement("SELECT customer_id, first_name, last_name FROM default.customers"); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); + + // Insert data from Hive to check if the last_name column has to be first in the values list. + shell.executeStatement("INSERT INTO default.customers values ('Magenta', 3L, 'Lily')"); + + customersWithLastNameFirstBuilder.add("Magenta", 3L, "Lily"); + customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + } + + @Test + public void testMoveLastNameBeforeCustomerIdInIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Move the last_name column before the customer_id in the table schema. + icebergTable.updateSchema().moveBefore("last_name", "customer_id").commit(); + + Schema customerSchemaLastNameFirst = + new Schema(optional(1, "last_name", Types.StringType.get(), "This is last name"), + optional(2, "customer_id", Types.LongType.get()), + optional(3, "first_name", Types.StringType.get(), "This is first name")); + + TestHelper.RecordsBuilder customersWithLastNameFirstBuilder = + TestHelper.RecordsBuilder.newInstance(customerSchemaLastNameFirst).add("Brown", 0L, "Alice") + .add("Green", 1L, "Bob").add("Pink", 2L, "Trudy"); + List<Record> customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + + // Run a 'select *' to check if the order of the column in the result has been changed. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + + // Query the data with names and check if the result is the same as when the table was created. + rows = shell.executeStatement("SELECT customer_id, first_name, last_name FROM default.customers"); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); + + // Insert data from Hive to check if the last_name column has to be before the customer_id in the values list. + shell.executeStatement("INSERT INTO default.customers values ('Magenta', 3L, 'Lily')"); + + customersWithLastNameFirstBuilder.add("Magenta", 3L, "Lily"); + customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + } + + @Test + public void testMoveCustomerIdAfterFirstNameInIcebergTable() throws IOException { + // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data. + Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + + // Move the last_name column before the customer_id in the table schema. + icebergTable.updateSchema().moveAfter("customer_id", "first_name").commit(); + + Schema customerSchemaLastNameFirst = + new Schema(optional(1, "first_name", Types.StringType.get(), "This is first name"), + optional(2, "customer_id", Types.LongType.get()), + optional(3, "last_name", Types.StringType.get(), "This is last name")); + + TestHelper.RecordsBuilder customersWithLastNameFirstBuilder = + TestHelper.RecordsBuilder.newInstance(customerSchemaLastNameFirst).add("Alice", 0L, "Brown") + .add("Bob", 1L, "Green").add("Trudy", 2L, "Pink"); + List<Record> customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + + // Run a 'select *' to check if the order of the column in the result has been changed. + List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + + // Query the data with names and check if the result is the same as when the table was created. + rows = shell.executeStatement("SELECT customer_id, first_name, last_name FROM default.customers"); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0); + + // Insert data from Hive to check if the last_name column has to be before the customer_id in the values list. + shell.executeStatement("INSERT INTO default.customers values ('Lily', 3L, 'Magenta')"); + + customersWithLastNameFirstBuilder.add("Lily", 3L, "Magenta"); + customersWithLastNameFirst = customersWithLastNameFirstBuilder.build(); + rows = shell.executeStatement("SELECT * FROM default.customers"); + HiveIcebergTestUtils.validateData(customersWithLastNameFirst, + HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1); + } + + @Test + public void testUpdateColumnTypeInIcebergTable() throws IOException { + // Create an Iceberg table with int, float and decimal(2,1) types with some initial records + Schema schema = new Schema(optional(1, "id", Types.LongType.get()), + optional(2, "int_col", Types.IntegerType.get(), "This is an integer type"), + optional(3, "float_col", Types.FloatType.get(), "This is a float type"), + optional(4, "decimal_col", Types.DecimalType.of(2, 1), "This is a decimal type")); + + List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(0L, 35, 22F, BigDecimal.valueOf(13L, 1)) + .add(1L, 223344, 555.22F, BigDecimal.valueOf(22L, 1)).add(2L, -234, -342F, BigDecimal.valueOf(-12L, 1)).build(); + + Table icebergTable = testTables.createTable(shell, "types_table", schema, fileFormat, records); + + Schema schemaForResultSet = + new Schema(optional(1, "id", Types.LongType.get()), optional(2, "int_col", Types.IntegerType.get()), + optional(3, "float_col", Types.DoubleType.get()), optional(4, "decimal_col", Types.StringType.get())); Review comment: Shouldn't this be `Types.FloatType.get()` and `Types.DecimalType.of(2, 1)`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 612602) Time Spent: 1h 40m (was: 1.5h) > Add tests to verify Hive can read/write after schema change on Iceberg table > ---------------------------------------------------------------------------- > > Key: HIVE-25264 > URL: https://issues.apache.org/jira/browse/HIVE-25264 > Project: Hive > Issue Type: Test > Reporter: Marta Kuczora > Assignee: Marta Kuczora > Priority: Major > Labels: pull-request-available > Time Spent: 1h 40m > Remaining Estimate: 0h > > We should verify if Hive can properly read/write Iceberg tables after their > schema was modified through the Iceberg API (it's like when an other engine, > like Spark has done modification on the schema). > Unit tests should be added for the following operations offered by the > UpdateSchema interface in the Iceberg API: > - adding new top level column > - adding new nested column > - adding required column > - adding required nested column > - renaming a column > - updating a column > - making a column required > - delete a column > - changing the order of the columns in the schema -- This message was sent by Atlassian Jira (v8.3.4#803005)