bithw1 opened a new issue, #12011:
URL: https://github.com/apache/hudi/issues/12011
Hi,
I am using Hudi 0.15.0, I find that the
option`hoodie.datasource.hive_sync.skip_ro_suffix` looks not working as
expected.
I first created an mor table and set
hoodie.datasource.hive_sync.skip_ro_suffix=true, then I look into the create
table statement from the hive cli
```
CREATE TABLE IF NOT EXISTS hudi_mor_17 (
a INT,
b INT,
c INT
)
USING hudi
tblproperties(
type='mor',
primaryKey='a',
preCombineField='c',
hoodie.datasource.hive_sync.skip_ro_suffix='true'
)
insert into hudi_mor_17 select 1,1,1;
insert into hudi_mor_17 select 2,2,2;
insert into hudi_mor_17 select 3,3,3;
insert into hudi_mor_17 select 4,4,4;
insert into hudi_mor_17 select 5,5,5;
update hudi_mor_17 set b = 11 where a = 1;
update hudi_mor_17 set b = 22 where a = 2;
```
when the table is created and some data insert/update in, I looked into the
table definition.
```
CREATE TABLE `hudi_mor_17`(
`_hoodie_commit_time` string,
`_hoodie_commit_seqno` string,
`_hoodie_record_key` string,
`_hoodie_partition_path` string,
`_hoodie_file_name` string,
`a` int,
`b` int,
`c` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
'hoodie.query.as.ro.table'='false',
'path'='hdfs://hadoop.master:9000/user/hive/warehouse/hudi_mor_17')
STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://hadoop.master:9000/user/hive/warehouse/hudi_mor_17'
TBLPROPERTIES (
'hoodie.datasource.hive_sync.skip_ro_suffix'='true',
'last_commit_completion_time_sync'='20240926132024516',
'last_commit_time_sync'='20240926132023403',
'preCombineField'='c',
'primaryKey'='a',
'provider'='hudi',
'spark.sql.create.version'='3.3.2',
'spark.sql.sources.provider'='hudi',
'spark.sql.sources.schema.numParts'='1',
'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},{"name":"a","type":"integer","nullable":true,"metadata":{}},{"name":"b","type":"integer","nullable":true,"metadata":{}},{"name":"c","type":"integer","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1727327985',
'type'='mor')
```
and
```
CREATE EXTERNAL TABLE `hudi_mor_17_rt`(
`_hoodie_commit_time` string COMMENT '',
`_hoodie_commit_seqno` string COMMENT '',
`_hoodie_record_key` string COMMENT '',
`_hoodie_partition_path` string COMMENT '',
`_hoodie_file_name` string COMMENT '',
`a` int COMMENT '',
`b` int COMMENT '',
`c` int COMMENT '')
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
'hoodie.query.as.ro.table'='false',
'path'='hdfs://hadoop.master:9000/user/hive/warehouse/hudi_mor_17')
STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://hadoop.master:9000/user/hive/warehouse/hudi_mor_17'
TBLPROPERTIES (
'last_commit_completion_time_sync'='20240926132024516',
'last_commit_time_sync'='20240926132023403',
'spark.sql.create.version'='3.3.2',
'spark.sql.sources.provider'='hudi',
'spark.sql.sources.schema.numParts'='1',
'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},{"name":"a","type":"integer","nullable":true,"metadata":{}},{"name":"b","type":"integer","nullable":true,"metadata":{}},{"name":"c","type":"integer","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1727328010')
```
Both of them have same `'hoodie.query.as.ro.table'='false'` and
`STORED AS INPUTFORMAT
'org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat'`,
looks to me that neither of these two tables are ro tables, so I would ask
what option`hoodie.datasource.hive_sync.skip_ro_suffix` does, it looks that ro
table has bee lost...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]