Marta Kuczora created HIVE-22917:
------------------------------------

             Summary: Configuration for Hive to recognise non-empty destination 
folders
                 Key: HIVE-22917
                 URL: https://issues.apache.org/jira/browse/HIVE-22917
             Project: Hive
          Issue Type: Task
            Reporter: Marta Kuczora
            Assignee: Marta Kuczora


Currently Hive overwrites the LOCATION folder even if it is non-empty in case 
of INSERT or CTAS.
Investigate this behavior and if we can introduce a switch whereby any 
ALTER/INSERT or CTAS or CREATE or DROP operation / transaction would be aborted 
if the switch is ON and the LOCATION clause points at a non-empty folder.

{noformat}
>> create table test (json_data string)
 STORED AS TEXTFILE
 LOCATION 'hdfs://host-10-17-102-132.coe.>ra.com:8020/tmp/test'
 TBLPROPERTIES ('serialization.null.format' = '');

>> insert into test values('test0');
>> insert into test values('test1');
>> insert into test values('test2');

>> select * from test;
INFO : Compiling 
command(queryId=hive_20200207150101_601d6dbc-99cb-446d-86ac-6f8ce5304681): 
select * from test
INFO : Executing 
command(queryId=hive_20200207150101_601d6dbc-99cb-446d-86ac-6f8ce5304681): 
select * from test
INFO : Completed executing 
command(queryId=hive_20200207150101_601d6dbc-99cb-446d-86ac-6f8ce5304681); Time 
taken: 0.001 seconds
INFO : OK
-----------------+
test.json_data
-----------------+
test0
test1
test2
-----------------+

>> select * from test_id2;
INFO : Compiling 
command(queryId=hive_20200207145656_e99d1a0d-ea4c-4636-ae3a-dd930df14644): 
select * from test_id2
INFO : Executing 
command(queryId=hive_20200207145656_e99d1a0d-ea4c-4636-ae3a-dd930df14644): 
select * from test_id2
INFO : Completed executing 
command(queryId=hive_20200207145656_e99d1a0d-ea4c-4636-ae3a-dd930df14644); Time 
taken: 0.001 seconds
INFO : OK
--------------+
test_id2.id
--------------+
1
13
14
--------------+

>> create table test2 (json_data int)
 STORED AS TEXTFILE
 LOCATION 'hdfs://host-10-17-102-132.coe.>ra.com:8020/tmp/test'
 as SELECT * from test_id;

INFO : Completed executing 
command(queryId=hive_20200207150303_cbb57a17-1242-46dc-a98e-addf50f01c5b); Time 
taken: 13.137 seconds
INFO : OK
No rows affected (13.226 seconds)

SELECT * from test;
INFO : Compiling 
command(queryId=hive_20200207150404_d0aabd08-a15f-4e6c-99a3-e607b8a6cfd3): 
SELECT * from test
INFO : Executing 
command(queryId=hive_20200207150404_d0aabd08-a15f-4e6c-99a3-e607b8a6cfd3): 
SELECT * from test
INFO : Completed executing 
command(queryId=hive_20200207150404_d0aabd08-a15f-4e6c-99a3-e607b8a6cfd3); Time 
taken: 0.001 seconds
INFO : OK
-----------------+
test.json_data
-----------------+
1
13
14
-----------------+
3 rows selected (0.081 seconds)
{noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to