[
https://issues.apache.org/jira/browse/HIVE-9312?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14269704#comment-14269704
]
Tom Snee commented on HIVE-9312:
--------------------------------
nested.avsc:
{
"namespace": "com.example",
"name": "BugTickler",
"type": "record",
"fields": [
{
"name": "Records",
"default": null,
"type": [
"null",
{
"type": "array",
"items": {
"name": "Record",
"type": "record",
"fields": [
{
"name": "ThreeDigits",
"default": null,
"type": ["null", "string"]
},
{
"name": "FourteenDigits",
"default": null,
"type": ["null", "string"]
},
{
"name": "ThirteenDigits",
"default": null,
"type": [
"null",
{
"type": "array",
"items": "string"
}
]
},
{
"name": "Events",
"default": null,
"type": [
"null",
{
"type": "array",
"items": {
"name": "Event",
"type": "record",
"fields": [
{
"name": "Subject",
"default": null,
"type": [
"null",
{
"name":
"CriticalSubject",
"type": "record",
"fields": [
{
"name":
"SubjectNumber",
"default":
null,
"type":
["null", "string"]
},
{
"name":
"FirstName",
"default":null,
"type":
["null", "string"]
},
{
"name":
"LastName",
"default":null,
"type":
["null", "string"]
},
{
"name":
"MiddleName",
"default":null,
"type":
["null", "string"]
},
{
"name":
"BirthDtm",
"default":null,
"type":
["null", "string"]
},
{
"name":
"SocialSecurityNumber",
"default":null,
"type":
["null", "string"]
},
{
"name":
"GenderCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"TypeCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"CodeCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"StreetAddress",
"default":null,
"type":
["null", "string"]
},
{
"name":
"StreetAddress2",
"default":null,
"type":
["null", "string"]
},
{
"name":
"City",
"default":null,
"type":
["null", "string"]
},
{
"name":
"State",
"default":null,
"type":
["null", "string"]
},
{
"name":
"StateCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"ZipCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"County",
"default":null,
"type":
["null", "string"]
},
{
"name":
"CountyCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"Country",
"default":null,
"type":
["null", "string"]
},
{
"name":
"HomePhone",
"default":null,
"type":
["null", "string"]
},
{
"name":
"BusinessPhone",
"default":null,
"type":
["null", "string"]
},
{
"name":
"PrimaryLanguage",
"default":null,
"type":
["null", "string"]
},
{
"name":
"MaritalStatusCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"ReligionCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"Citizenship",
"default":null,
"type":
["null", "string"]
},
{
"name":
"Nationality",
"default":null,
"type":
["null", "string"]
},
{
"name":
"DeathDtm",
"default":null,
"type":
["null", "string"]
},
{
"name":
"DeathIndicatorCode",
"default":null,
"type":
["null", "string"]
},
{
"name":
"AllergiesHint",
"default":null,
"type":
["null", "string"]
},
{
"name":
"SubjectHint",
"default":null,
"type":
["null", "string"]
}
]
}
]
},
{
"name": "Groups",
"default": null,
"type": [
"null",
{
"type": "array",
"items":
{
"name":
"CriticalGroup",
"type":
"record",
"fields": [
{
"name":
"SequenceNumber",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ElevenDigits",
"default": null,
"type":
["null", "string"]
},
{
"name":
"EightDigits",
"default": null,
"type":
["null", "string"]
},
{
"name":
"UniqueOrderNumber",
"default": null,
"type":
["null", "string"]
},
{
"name":
"FiveDigits",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ServiceCodingSystem",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ServiceName",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ServiceSuggestionDtm",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ReportStatusDtm",
"default": null,
"type":
["null", "string"]
},
{
"name":
"ResultStatusCode",
"default": null,
"type":
["null", "string"]
},
{
"name":
"Note",
"default": null,
"type":
["null", "string"]
},
{
"name":
"Results",
"default": null,
"type":
[
"null",
{
"type": "array",
"items":
{
"name": "CriticalResult",
"type": "record",
"fields": [
{
"name": "SequenceNumber",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionTypeCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionTypeCodingSystemCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "LoincSuggestionTypeCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionTypeName",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionValue",
"default": null,
"type": ["null", "string"]
},
{
"name": "UnitCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "ConvertedSuggestionValue",
"default": null,
"type": ["null", "string"]
},
{
"name": "ConvertedUnitCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionDtm",
"default": null,
"type": ["null", "string"]
},
{
"name": "ResultStatusCode",
"default": null,
"type": ["null", "string"]
},
{
"name": "Note",
"default": null,
"type": ["null", "string"]
},
{
"name": "SuggestionGroupUniqueOrderNumber",
"default": null,
"type": ["null", "string"]
}
]
}
}
]
}
]
}
}
]
}
]
}
}
]
}
]
}
}
]
},
{
"name": "MasterSubjectNumber",
"default": null,
"type": ["null", "string"]
},
{
"name": "RecordUpdateDtm",
"default": null,
"type": ["null", "string"]
}
]
}
> Literal string "\n" confuses Avro SerDe
> ---------------------------------------
>
> Key: HIVE-9312
> URL: https://issues.apache.org/jira/browse/HIVE-9312
> Project: Hive
> Issue Type: Bug
> Components: Serializers/Deserializers
> Affects Versions: 0.13.0
> Environment: Hortonworks Data Platform 2.1.2.1 on Centos 6.5
> Reporter: Tom Snee
>
> Avro files with string fields that contain a backslash followed by 'n'
> confuse the Avro SerDe.
> Steps to recreate:
> 1. Put attached schema nested.avsc into HDFS under /user/someone.
> 2. Convert attached JSON file example.json into Avro with avro-tools, like
> so: "java -jar avro-tools-1.7.7.jar fromjson --schema-file nested.avsc
> example.json > example.avro"
> 3. Put example.avro into HDFS under /user/someone/avro-files.
> 4. Create a Hive table with this statement:
> CREATE EXTERNAL TABLE avro_table
> ROW FORMAT SERDE
> 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> STORED AS INPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION
> '/user/someone/avro-files/'
> TBLPROPERTIES (
> 'avro.schema.url'='hdfs:///user/someone/nested.avsc'
> );
> 5. Observe that "select * from avro_table;" returns one row, as expected.
> 6. Observe that "select * from avro_table where
> mastersubjectnumber='A12B3CDE-FGH4-5I67-89J0-KLMN1OPQ23R4';" returns 13
> garbled rows.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
