[ https://issues.apache.org/jira/browse/HIVE-12955?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Luis Gonzalez updated HIVE-12955: --------------------------------- Description: Hi! we think we have hitted a bug. We have tested this in many ways for a couple of hours now and there is apparently a problem with avro.schema.literal when you specify more than 50 fields. In our tests we have found that regarless the avro file (table) we want to load, it fails with 60 fields.. some tables we uses have more than 400 fields and have the same problem. For instance if we launch the command {code:shell} hive> drop table tableName; OK Time taken: 0.162 seconds hive> > CREATE EXTERNAL TABLE tableName > ROW FORMAT > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > WITH SERDEPROPERTIES ('avro.schema.literal'=' > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ > {"name": "Id", "type":["null", "string"],"default":null}, > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > {"name": "Name", "type":["null", "string"],"default":null}, > {"name": "Type", "type":["null", "string"],"default":null}, > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > {"name": "ParentId", "type":["null", "string"],"default":null}, > {"name": "Phone", "type":["null", "string"],"default":null}, > {"name": "Fax", "type":["null", "string"],"default":null}, > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > {"name": "Website", "type":["null", "string"],"default":null}, > {"name": "Industry", "type":["null", "string"],"default":null}, > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > {"name": "Description", "type":["null", "string"],"default":null}, > {"name": "OwnerId", "type":["null", "string"],"default":null}, > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > {"name": "CreatedById", "type":["null", "string"],"default":null}, > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null}, > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null}, > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, > {"name": "Division__c", "type":["null", "string"],"default":null}, > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > {"name": "Market_Country__c", "type":["null", "string"],"default":null} > ] } > ') > STORED AS > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > LOCATION 's3://bucket.../path/to/avro'; OK Time taken: 0.412 seconds hive> > select * from tableName limit 10; OK {code} but when using the same AVRO file and more fields it fails {code:shell} hive> drop table tableName; OK Time taken: 0.146 seconds hive> > CREATE EXTERNAL TABLE tableName > ROW FORMAT > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > WITH SERDEPROPERTIES ('avro.schema.literal'=' > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ > {"name": "Id", "type":["null", "string"],"default":null}, > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > {"name": "Name", "type":["null", "string"],"default":null}, > {"name": "Type", "type":["null", "string"],"default":null}, > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > {"name": "ParentId", "type":["null", "string"],"default":null}, > {"name": "Phone", "type":["null", "string"],"default":null}, > {"name": "Fax", "type":["null", "string"],"default":null}, > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > {"name": "Website", "type":["null", "string"],"default":null}, > {"name": "Industry", "type":["null", "string"],"default":null}, > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > {"name": "Description", "type":["null", "string"],"default":null}, > {"name": "OwnerId", "type":["null", "string"],"default":null}, > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > {"name": "CreatedById", "type":["null", "string"],"default":null}, > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null}, > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null}, > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, > {"name": "Division__c", "type":["null", "string"],"default":null}, > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > {"name": "Market_Country__c", "type":["null", "string"],"default":null}, > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null}, > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null}, > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null}, > {"name": "Comments__c", "type":["null", "string"],"default":null}, > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "City_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null}, > {"name": "Timezone__c", "type":["null", "string"],"default":null}, > {"name": "Billing_Language__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null} > ] } > ') > STORED AS > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > LOCATION 's3://bucket/path/to/avro'; OK Time taken: 0.48 seconds hive> > select * from tableName limit 10; OK Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account, expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel Time taken: 0.028 seconds {code} This problem doesn't affect when we store the avro schema with 400 fields in a file in s3 and use the avro.schema.url field. was: Hi! we think we have hitted a bug. We have tested this in many ways for a couple of hours now and there is apparently a problem with avro.schema.literal when you specify more than 50 fields. In our tests we have found that regarless the avro file (table) we want to load, it fails with 60 fields.. some tables we uses have more than 400 fields and have the same problem. For instance if we launch the command ``` hive> drop table tableName; OK Time taken: 0.162 seconds hive> > CREATE EXTERNAL TABLE tableName > ROW FORMAT > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > WITH SERDEPROPERTIES ('avro.schema.literal'=' > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ > {"name": "Id", "type":["null", "string"],"default":null}, > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > {"name": "Name", "type":["null", "string"],"default":null}, > {"name": "Type", "type":["null", "string"],"default":null}, > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > {"name": "ParentId", "type":["null", "string"],"default":null}, > {"name": "Phone", "type":["null", "string"],"default":null}, > {"name": "Fax", "type":["null", "string"],"default":null}, > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > {"name": "Website", "type":["null", "string"],"default":null}, > {"name": "Industry", "type":["null", "string"],"default":null}, > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > {"name": "Description", "type":["null", "string"],"default":null}, > {"name": "OwnerId", "type":["null", "string"],"default":null}, > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > {"name": "CreatedById", "type":["null", "string"],"default":null}, > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null}, > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null}, > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, > {"name": "Division__c", "type":["null", "string"],"default":null}, > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > {"name": "Market_Country__c", "type":["null", "string"],"default":null} > ] } > ') > STORED AS > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > LOCATION 's3://bucket.../path/to/avro'; OK Time taken: 0.412 seconds hive> > select * from tableName limit 10; OK ``` but when using the same AVRO file and more fields it fails ``` hive> drop table tableName; OK Time taken: 0.146 seconds hive> > CREATE EXTERNAL TABLE tableName > ROW FORMAT > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > WITH SERDEPROPERTIES ('avro.schema.literal'=' > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ > {"name": "Id", "type":["null", "string"],"default":null}, > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > {"name": "Name", "type":["null", "string"],"default":null}, > {"name": "Type", "type":["null", "string"],"default":null}, > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > {"name": "ParentId", "type":["null", "string"],"default":null}, > {"name": "Phone", "type":["null", "string"],"default":null}, > {"name": "Fax", "type":["null", "string"],"default":null}, > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > {"name": "Website", "type":["null", "string"],"default":null}, > {"name": "Industry", "type":["null", "string"],"default":null}, > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > {"name": "Description", "type":["null", "string"],"default":null}, > {"name": "OwnerId", "type":["null", "string"],"default":null}, > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > {"name": "CreatedById", "type":["null", "string"],"default":null}, > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null}, > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null}, > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, > {"name": "Division__c", "type":["null", "string"],"default":null}, > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > {"name": "Market_Country__c", "type":["null", "string"],"default":null}, > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null}, > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null}, > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null}, > {"name": "Comments__c", "type":["null", "string"],"default":null}, > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "City_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null}, > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null}, > {"name": "Timezone__c", "type":["null", "string"],"default":null}, > {"name": "Billing_Language__c", "type":["null", "string"],"default":null}, > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null} > ] } > ') > STORED AS > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > LOCATION 's3://bucket/path/to/avro'; OK Time taken: 0.48 seconds hive> > select * from tableName limit 10; OK Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account, expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel Time taken: 0.028 seconds ``` This problem doesn't affect when we store the avro schema with 400 fields in a file in s3 and use the avro.schema.url field. > avro.schema.literal don't support more than 50 fields. > ------------------------------------------------------- > > Key: HIVE-12955 > URL: https://issues.apache.org/jira/browse/HIVE-12955 > Project: Hive > Issue Type: Bug > Components: Hive > Affects Versions: 1.0.0 > Reporter: Luis Gonzalez > Priority: Minor > > Hi! > we think we have hitted a bug. We have tested this in many ways for a couple > of hours now and there is apparently a problem with avro.schema.literal when > you specify more than 50 fields. In our tests we have found that regarless > the avro file (table) we want to load, it fails with 60 fields.. some tables > we uses have more than 400 fields and have the same problem. > For instance if we launch the command > {code:shell} > hive> drop table tableName; > OK > Time taken: 0.162 seconds > hive> > > CREATE EXTERNAL TABLE tableName > > ROW FORMAT > > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > > WITH SERDEPROPERTIES ('avro.schema.literal'=' > > { "namespace": "cdr.avro", "type": "record", "name": "Account", > "fields": [ > > {"name": "Id", "type":["null", "string"],"default":null}, > > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > > {"name": "Name", "type":["null", "string"],"default":null}, > > {"name": "Type", "type":["null", "string"],"default":null}, > > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > > {"name": "ParentId", "type":["null", "string"],"default":null}, > > {"name": "Phone", "type":["null", "string"],"default":null}, > > {"name": "Fax", "type":["null", "string"],"default":null}, > > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > > {"name": "Website", "type":["null", "string"],"default":null}, > > {"name": "Industry", "type":["null", "string"],"default":null}, > > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > > {"name": "Description", "type":["null", "string"],"default":null}, > > {"name": "OwnerId", "type":["null", "string"],"default":null}, > > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > > {"name": "CreatedById", "type":["null", "string"],"default":null}, > > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > > {"name": "IsCustomerPortal", "type":["null", > "boolean"],"default":null}, > > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > > {"name": "IT_Developer_Fee__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Commercial_Area__c", "type":["null", > "string"],"default":null}, > > {"name": "Account_Status__c", "type":["null", > "string"],"default":null}, > > {"name": "Active_Fiscal_Details__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > > {"name": "Commercial_Brand__c", "type":["null", > "string"],"default":null}, > > {"name": "Agreed_payment_method__c", "type":["null", > "string"],"default":null}, > > {"name": "Division__c", "type":["null", "string"],"default":null}, > > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > > {"name": "Country_fiscal__c", "type":["null", > "string"],"default":null}, > > {"name": "Fiscal_Number_1__c", "type":["null", > "string"],"default":null}, > > {"name": "Fiscal_Number_2__c", "type":["null", > "string"],"default":null}, > > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > > {"name": "Post_Code_fiscal__c", "type":["null", > "string"],"default":null}, > > {"name": "Web_Prepayment__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Customer_Subtype__c", "type":["null", > "string"],"default":null}, > > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > > {"name": "Market_Country__c", "type":["null", "string"],"default":null} > > ] } > > ') > > STORED AS > > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > > LOCATION 's3://bucket.../path/to/avro'; > OK > Time taken: 0.412 seconds > hive> > > select * from tableName limit 10; > OK > {code} > but when using the same AVRO file and more fields it fails > {code:shell} > hive> drop table tableName; > OK > Time taken: 0.146 seconds > hive> > > CREATE EXTERNAL TABLE tableName > > ROW FORMAT > > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > > WITH SERDEPROPERTIES ('avro.schema.literal'=' > > { "namespace": "cdr.avro", "type": "record", "name": "Account", > "fields": [ > > {"name": "Id", "type":["null", "string"],"default":null}, > > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, > > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, > > {"name": "Name", "type":["null", "string"],"default":null}, > > {"name": "Type", "type":["null", "string"],"default":null}, > > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, > > {"name": "ParentId", "type":["null", "string"],"default":null}, > > {"name": "Phone", "type":["null", "string"],"default":null}, > > {"name": "Fax", "type":["null", "string"],"default":null}, > > {"name": "AccountNumber", "type":["null", "string"],"default":null}, > > {"name": "Website", "type":["null", "string"],"default":null}, > > {"name": "Industry", "type":["null", "string"],"default":null}, > > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, > > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, > > {"name": "Description", "type":["null", "string"],"default":null}, > > {"name": "OwnerId", "type":["null", "string"],"default":null}, > > {"name": "CreatedDate", "type":["null", "string"],"default":null}, > > {"name": "CreatedById", "type":["null", "string"],"default":null}, > > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, > > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, > > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, > > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, > > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, > > {"name": "IsCustomerPortal", "type":["null", > "boolean"],"default":null}, > > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, > > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, > > {"name": "IT_Developer_Fee__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, > > {"name": "Shortname__c", "type":["null", "string"],"default":null}, > > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Commercial_Area__c", "type":["null", > "string"],"default":null}, > > {"name": "Account_Status__c", "type":["null", > "string"],"default":null}, > > {"name": "Active_Fiscal_Details__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, > > {"name": "Commercial_Brand__c", "type":["null", > "string"],"default":null}, > > {"name": "Agreed_payment_method__c", "type":["null", > "string"],"default":null}, > > {"name": "Division__c", "type":["null", "string"],"default":null}, > > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, > > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, > > {"name": "Country_fiscal__c", "type":["null", > "string"],"default":null}, > > {"name": "Fiscal_Number_1__c", "type":["null", > "string"],"default":null}, > > {"name": "Fiscal_Number_2__c", "type":["null", > "string"],"default":null}, > > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, > > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, > > {"name": "Post_Code_fiscal__c", "type":["null", > "string"],"default":null}, > > {"name": "Web_Prepayment__c", "type":["null", > "boolean"],"default":null}, > > {"name": "Customer_Subtype__c", "type":["null", > "string"],"default":null}, > > {"name": "Relationship__c", "type":["null", "string"],"default":null}, > > {"name": "Market_Country__c", "type":["null", > "string"],"default":null}, > > {"name": "Customer_Service_Centre__c", "type":["null", > "string"],"default":null}, > > {"name": "Acquisition_Channel_Type__c", "type":["null", > "string"],"default":null}, > > {"name": "Acquisition_Channel_Description__c", "type":["null", > "string"],"default":null}, > > {"name": "Comments__c", "type":["null", "string"],"default":null}, > > {"name": "Street_Commercial__c", "type":["null", > "string"],"default":null}, > > {"name": "Country_Commercial__c", "type":["null", > "string"],"default":null}, > > {"name": "City_Commercial__c", "type":["null", > "string"],"default":null}, > > {"name": "Post_Code_Commercial__c", "type":["null", > "string"],"default":null}, > > {"name": "Atlas_Branch_Number__c", "type":["null", > "double"],"default":null}, > > {"name": "Timezone__c", "type":["null", "string"],"default":null}, > > {"name": "Billing_Language__c", "type":["null", > "string"],"default":null}, > > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null} > > ] } > > ') > > STORED AS > > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > > LOCATION 's3://bucket/path/to/avro'; > OK > Time taken: 0.48 seconds > hive> > > select * from tableName limit 10; > OK > Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: > Found cdr.avro.Account, expecting > org.apache.hadoop.hive.CannotDetermineSchemaSentinel > Time taken: 0.028 seconds > {code} > This problem doesn't affect when we store the avro schema with 400 fields in > a file in s3 and use the avro.schema.url field. -- This message was sent by Atlassian JIRA (v6.3.4#6332)