[ https://issues.apache.org/jira/browse/HIVE-26102?focusedWorklogId=753221&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-753221 ]
ASF GitHub Bot logged work on HIVE-26102: ----------------------------------------- Author: ASF GitHub Bot Created on: 06/Apr/22 07:10 Start Date: 06/Apr/22 07:10 Worklog Time Spent: 10m Work Description: pvary commented on code in PR #3131: URL: https://github.com/apache/hive/pull/3131#discussion_r843549581 ########## iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java: ########## @@ -228,6 +230,104 @@ public void testReadAndWriteFormatV2Partitioned_PosDelete_RowSupplied() throws I Assert.assertArrayEquals(new Object[] {2L, "Trudy", "Pink"}, objects.get(3)); } + @Test + public void testDeleteStatementUnpartitioned() { + Assume.assumeFalse("Iceberg DELETEs are only implemented for non-vectorized mode for now", isVectorized); + + // create and insert an initial batch of records + testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + PartitionSpec.unpartitioned(), fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2); + // insert one more batch so that we have multiple data files within the same partition + shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, + TableIdentifier.of("default", "customers"), false)); + + shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or first_name='Joanna'"); + + List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id, last_name"); + Assert.assertEquals(6, objects.size()); + List<Record> expected = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + .add(1L, "Sharon", "Taylor") + .add(2L, "Jake", "Donnel") + .add(2L, "Susan", "Morrison") + .add(2L, "Bob", "Silver") + .add(4L, "Laci", "Zold") + .add(5L, "Peti", "Rozsaszin") + .build(); + HiveIcebergTestUtils.validateData(expected, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, objects), 0); + } + + @Test + public void testDeleteStatementPartitioned() { + Assume.assumeFalse("Iceberg DELETEs are only implemented for non-vectorized mode for now", isVectorized); + PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + .identity("last_name").bucket("customer_id", 16).build(); + + // create and insert an initial batch of records + testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + spec, fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2); + // insert one more batch so that we have multiple data files within the same partition + shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, + TableIdentifier.of("default", "customers"), false)); + + shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or first_name='Joanna'"); + + List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id, last_name"); + Assert.assertEquals(6, objects.size()); + List<Record> expected = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + .add(1L, "Sharon", "Taylor") + .add(2L, "Jake", "Donnel") + .add(2L, "Susan", "Morrison") + .add(2L, "Bob", "Silver") + .add(4L, "Laci", "Zold") + .add(5L, "Peti", "Rozsaszin") + .build(); + HiveIcebergTestUtils.validateData(expected, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, objects), 0); + } + + @Test + public void testDeleteStatementWithOtherTable() { + Assume.assumeFalse("Iceberg DELETEs are only implemented for non-vectorized mode for now", isVectorized); + PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + .identity("last_name").bucket("customer_id", 16).build(); + + // create a couple of tables, with an initial batch of records + testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + spec, fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2); + testTables.createTable(shell, "other", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + spec, fileFormat, HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, 2); + + shell.executeStatement("DELETE FROM customers WHERE customer_id in (select t1.customer_id from customers t1 join " + + "other t2 on t1.customer_id = t2.customer_id) or " + + "first_name in (select first_name from customers where first_name = 'Bob')"); + + List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id, last_name"); + Assert.assertEquals(5, objects.size()); + List<Record> expected = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + .add(1L, "Joanna", "Pierce") + .add(1L, "Sharon", "Taylor") + .add(2L, "Jake", "Donnel") + .add(2L, "Susan", "Morrison") + .add(2L, "Joanna", "Silver") + .build(); + HiveIcebergTestUtils.validateData(expected, + HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, objects), 0); + } + + @Test + public void testDeleteStatementThrowsIfVectorizationEnabled() { Review Comment: Would it be possible to turn off vectorization if we are deleting from an Iceberg table? Issue Time Tracking ------------------- Worklog Id: (was: 753221) Time Spent: 4h 50m (was: 4h 40m) > Implement DELETE statements for Iceberg tables > ---------------------------------------------- > > Key: HIVE-26102 > URL: https://issues.apache.org/jira/browse/HIVE-26102 > Project: Hive > Issue Type: New Feature > Reporter: Marton Bod > Assignee: Marton Bod > Priority: Major > Labels: pull-request-available > Time Spent: 4h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.20.1#820001)