[
https://issues.apache.org/jira/browse/AVRO-2146?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
laki updated AVRO-2146:
-----------------------
Description:
Here is the schema, no unions, but getting union error :
{
"type" : "record",
"name" : "edm_generic_publisher_avro_schema",
"namespace" : "edm.avro",
"doc" : "The generic avro schema used by publishers to publish events to the
enterprise streaming service",
"fields" : [
{"name" : "event",
"type" : {
"type" : "record",
"name" : "event_meta_data",
"fields" : [
{"name" : "event_name", "type" : "string", "doc" : "The name of the event. In
the CDC, this field is populated with the name of the data base table or
segment."}
,
{"name" : "operation_type", "type" : "string", "doc": "The operation or action
that triggered the event. e.g., Insert, Update, Delete, etc."}
,
{"name" : "transaction_identifier", "type" : "string", "default" : "NONE",
"doc" : "A unique identifier that identifies a unit or work or transaction.
Useful in relating multiple events together."}
,
{"name" : "event_publication_timestamp_millis", "type" : "string", "doc":
"timestamp when the event was published"}
,
{"name" : "event_publisher", "type" : "string", "doc" : "The system or
application that published the event"}
,
{"name" : "event_publisher_identity", "type": "string", "default" : "NONE",
"doc": "The identity (user) of the system or application that published the
event"}
,
{"name" : "event_timestamp_millis", "type" : "string", "default" : "NONE",
"doc": "timestamp when the event occured"}
,
{"name": "event_initiator", "type": "string", "default" : "NONE", "doc" : "The
system or application that initiated the event"}
,
{"name": "event_initiator_identity", "type" : "string", "default" : "NONE",
"doc": "The system id or application id that initiated the event" }
]},
"doc" : "The data about the published event"
},
{ "name" : "contents",
"type" : {
"name": "data_field_groups",
"type": "array",
"items": {
"type": "record",
"name": "data_field_group",
"fields" : [
{"name": "data_group_name", "type": "string" }
,
{
"name": "data_fields",
"type": {
"type": "array",
"items": {
"name": "data_field",
"type": "record",
"fields":[
{"name" : "data_field_name", "type" : "string", "doc" : "The field name"}
,
{"name": "data_field_type", "type": "string", "doc" : "The data type is one of
the following values: string, boolean, int, long, float, double or bytes"}
,
{"name" : "data_field_value", "type" : ["string"], "doc" : "The value"}
]
}
}
}
]
}
},
"doc" : "The datafields for the for the published event"
}
]
}
;
here is the code that is causing the issue-------------------------------
static byte[] fromJasonToAvro( InputStream json, Schema schemastr) throws
Exception {
FileOutputStream outStream;
InputStream input = json;
DataInputStream din = new DataInputStream(input);
Schema schema = schemastr;
Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
Object datum = reader.read(null, decoder);
GenericDatumWriter<Object> w = new GenericDatumWriter<Object>(schema);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null);
w.write(datum, e);
e.flush();
-----------------------------------------------------
json file:
{
"event": {
"event_name": "Customer Phone Number",
"operation_type": "Add",
"transaction_identifier": "1234567890",
"event_publication_timestamp_millis": "1518464452915",
"event_publisher": "CIS",
"event_publisher_identity": "System",
"event_timestamp_millis": "NONE",
"event_initiator": "NONE",
"event_initiator_identity": "NONE"
},
"content": [
{
"data_group_name": "customer_identifier",
"data_fields": [
{
"data_field_name": "company_number",
"data_field_type": "string",
"data_field_value": "069"
},
{
"data_field_name": "customer_short_name",
"data_field_type": "string",
"data_field_value": "marshben"
},
{
"data_field_name": "tie_breaker",
"data_field_type": "string",
"data_field_value": "01"
}
]
},
{
"data_group_name": "customer_phone_number",
"data_fields": [
{
"data_field_name": "phone_number",
"data_field_type": "string",
"data_field_value": "6x2-555-1x12"
},
{
"data_field_name": "phone_type",
"data_field_type": "string",
"data_field_value": "cell"
},
{
"data_field_name": "primary_indicator",
"data_field_type": "string",
"data_field_value": "yes"
}
]
}
]
}
was:
Here is the schema, no unions, but getting union error :
{
"type" : "record",
"name" : "edm_generic_publisher_avro_schema",
"namespace" : "edm.avro",
"doc" : "The generic avro schema used by publishers to publish events to the
enterprise streaming service",
"fields" : [
{"name" : "event",
"type" : {
"type" : "record",
"name" : "event_meta_data",
"fields" : [
{"name" : "event_name",
"type" : "string",
"doc" : "The name of the event. In the CDC, this field is populated with the
name of the data base table or segment."},
{"name" : "operation_type",
"type" : "string",
"doc": "The operation or action that triggered the event. e.g., Insert,
Update, Delete, etc."},
{"name" : "transaction_identifier",
"type" : "string",
"default" : "NONE",
"doc" : "A unique identifier that identifies a unit or work or transaction.
Useful in relating multiple events together."},
{"name" : "event_publication_timestamp_millis",
"type" : "string",
"doc": "timestamp when the event was published"},
{"name" : "event_publisher",
"type" : "string",
"doc" : "The system or application that published the event"},
{"name" : "event_publisher_identity",
"type": "string",
"default" : "NONE",
"doc": "The identity (user) of the system or application that published the
event"},
{"name" : "event_timestamp_millis",
"type" : "string",
"default" : "NONE",
"doc": "timestamp when the event occured"},
{"name": "event_initiator",
"type": "string",
"default" : "NONE",
"doc" : "The system or application that initiated the event"},
{"name": "event_initiator_identity",
"type" : "string",
"default" : "NONE",
"doc": "The system id or application id that initiated the event" }
]},
"doc" : "The data about the published event"
},
{ "name" : "contents",
"type" : {
"name": "data_field_groups",
"type": "array",
"items": {
"type": "record",
"name": "data_field_group",
"fields" : [
{"name": "data_group_name",
"type": "string"
},
{
"name": "data_fields",
"type": {
"type": "array",
"items": {
"name": "data_field",
"type": "record",
"fields":[
{"name" : "data_field_name",
"type" : "string",
"doc" : "The field name"},
{"name": "data_field_type",
"type": "string",
"doc" : "The data type is one of the following values: string, boolean, int,
long, float, double or bytes"},
{"name" : "data_field_value",
"type" : ["string"],
"doc" : "The value"}
]
}
}
}
]
}
},
"doc" : "The datafields for the for the published event"
}
]
}
;
here is the code that is causing the issue-------------------------------
static byte[] fromJasonToAvro( InputStream json, Schema schemastr) throws
Exception {
FileOutputStream outStream;
InputStream input = json;
DataInputStream din = new DataInputStream(input);
Schema schema = schemastr;
Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
Object datum = reader.read(null, decoder);
GenericDatumWriter<Object> w = new GenericDatumWriter<Object>(schema);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null);
w.write(datum, e);
e.flush();
> getting Expected start-union. Got VALUE_STRING
> ----------------------------------------------
>
> Key: AVRO-2146
> URL: https://issues.apache.org/jira/browse/AVRO-2146
> Project: Avro
> Issue Type: Bug
> Affects Versions: 1.8.2
> Environment: error message:
> Exception in thread "main" org.apache.avro.AvroTypeException: Expected
> start-union. Got VALUE_STRING
> at org.apache.avro.io.JsonDecoder.error(JsonDecoder.java:698)
> at org.apache.avro.io.JsonDecoder.readIndex(JsonDecoder.java:441)
> at org.apache.avro.io.ResolvingDecoder.doAction(ResolvingDecoder.java:290)
> at org.apache.avro.io.parsing.Parser.advance(Parser.java:88)
> at org.apache.avro.io.ResolvingDecoder.readIndex(ResolvingDecoder.java:267)
> at
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:179)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
> at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232)
> at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222)
> at
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
> at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:232)
> at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:222)
> at
> org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:175)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:153)
> at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:145)
> at myJson2Avro.fromJasonToAvro(myJson2Avro.java:81)
> at myJson2Avro.main(myJson2Avro.java:48)
> Reporter: laki
> Priority: Major
>
> Here is the schema, no unions, but getting union error :
>
> {
> "type" : "record",
> "name" : "edm_generic_publisher_avro_schema",
> "namespace" : "edm.avro",
> "doc" : "The generic avro schema used by publishers to publish events to the
> enterprise streaming service",
> "fields" : [
> {"name" : "event",
> "type" : {
> "type" : "record",
> "name" : "event_meta_data",
> "fields" : [
> {"name" : "event_name", "type" : "string", "doc" : "The name of the event. In
> the CDC, this field is populated with the name of the data base table or
> segment."}
> ,
> {"name" : "operation_type", "type" : "string", "doc": "The operation or
> action that triggered the event. e.g., Insert, Update, Delete, etc."}
> ,
> {"name" : "transaction_identifier", "type" : "string", "default" : "NONE",
> "doc" : "A unique identifier that identifies a unit or work or transaction.
> Useful in relating multiple events together."}
> ,
> {"name" : "event_publication_timestamp_millis", "type" : "string", "doc":
> "timestamp when the event was published"}
> ,
>
> {"name" : "event_publisher", "type" : "string", "doc" : "The system or
> application that published the event"}
> ,
>
> {"name" : "event_publisher_identity", "type": "string", "default" : "NONE",
> "doc": "The identity (user) of the system or application that published the
> event"}
> ,
>
> {"name" : "event_timestamp_millis", "type" : "string", "default" : "NONE",
> "doc": "timestamp when the event occured"}
> ,
>
> {"name": "event_initiator", "type": "string", "default" : "NONE", "doc" :
> "The system or application that initiated the event"}
> ,
> {"name": "event_initiator_identity", "type" : "string", "default" : "NONE",
> "doc": "The system id or application id that initiated the event" }
> ]},
> "doc" : "The data about the published event"
> },
> { "name" : "contents",
> "type" : {
> "name": "data_field_groups",
> "type": "array",
> "items": {
> "type": "record",
> "name": "data_field_group",
> "fields" : [
> {"name": "data_group_name", "type": "string" }
> ,
> {
> "name": "data_fields",
> "type": {
> "type": "array",
> "items": {
> "name": "data_field",
> "type": "record",
> "fields":[
> {"name" : "data_field_name", "type" : "string", "doc" : "The field name"}
> ,
>
> {"name": "data_field_type", "type": "string", "doc" : "The data type is one
> of the following values: string, boolean, int, long, float, double or bytes"}
> ,
> {"name" : "data_field_value", "type" : ["string"], "doc" : "The value"}
> ]
> }
> }
> }
> ]
> }
> },
> "doc" : "The datafields for the for the published event"
> }
> ]
> }
> ;
>
>
> here is the code that is causing the issue-------------------------------
>
> static byte[] fromJasonToAvro( InputStream json, Schema schemastr) throws
> Exception {
> FileOutputStream outStream;
> InputStream input = json;
> DataInputStream din = new DataInputStream(input);
> Schema schema = schemastr;
> Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
> DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
> Object datum = reader.read(null, decoder);
> GenericDatumWriter<Object> w = new GenericDatumWriter<Object>(schema);
> ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
>
> Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null);
> w.write(datum, e);
> e.flush();
>
>
> -----------------------------------------------------
>
> json file:
>
> {
> "event": {
> "event_name": "Customer Phone Number",
> "operation_type": "Add",
> "transaction_identifier": "1234567890",
> "event_publication_timestamp_millis": "1518464452915",
> "event_publisher": "CIS",
> "event_publisher_identity": "System",
> "event_timestamp_millis": "NONE",
> "event_initiator": "NONE",
> "event_initiator_identity": "NONE"
> },
> "content": [
> {
> "data_group_name": "customer_identifier",
> "data_fields": [
> {
> "data_field_name": "company_number",
> "data_field_type": "string",
> "data_field_value": "069"
> },
> {
> "data_field_name": "customer_short_name",
> "data_field_type": "string",
> "data_field_value": "marshben"
> },
> {
> "data_field_name": "tie_breaker",
> "data_field_type": "string",
> "data_field_value": "01"
> }
> ]
> },
> {
> "data_group_name": "customer_phone_number",
> "data_fields": [
> {
> "data_field_name": "phone_number",
> "data_field_type": "string",
> "data_field_value": "6x2-555-1x12"
> },
> {
> "data_field_name": "phone_type",
> "data_field_type": "string",
> "data_field_value": "cell"
> },
> {
> "data_field_name": "primary_indicator",
> "data_field_type": "string",
> "data_field_value": "yes"
> }
> ]
> }
>
> ]
> }
>
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)