Github user fhueske commented on a diff in the pull request: https://github.com/apache/flink/pull/2762#discussion_r88463790 --- Diff: flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/util/serialization/AvroRowDeserializationSchema.java --- @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.streaming.util.serialization; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.table.Row; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import static org.apache.flink.streaming.connectors.kafka.internals.TypeUtil.createRowAvroSchema; + +/** + * Deserialization schema from Avro to {@link Row}. + * + * <p>Deserializes the <code>byte[]</code> messages in Avro format and reads + * the specified fields. + * + * <p>Failure during deserialization are forwarded as wrapped IOExceptions. + */ +public class AvroRowDeserializationSchema extends AbstractDeserializationSchema<Row> { + + /** Field names in a row */ + private final String[] fieldNames; + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + /** Avro deserialization schema */ + private final Schema schema; + /** Reader that deserializes byte array into a record */ + private final DatumReader<GenericRecord> datumReader; + /** Record to deserialize byte array to */ + private final GenericRecord record; + + /** + * Creates a Avro deserializtion schema for the given type classes. + * + * @param fieldNames + * @param fieldTypes Type classes to parse Avro fields as. + */ + public AvroRowDeserializationSchema(String[] fieldNames, TypeInformation<?>[] fieldTypes) { + this.schema = createRowAvroSchema(fieldNames, fieldTypes); + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + this.datumReader = new GenericDatumReader<>(schema); + this.record = new GenericData.Record(schema); + } + + @Override + public Row deserialize(byte[] message) throws IOException { + readRecord(message); + return convertRecordToRow(); + } + + private void readRecord(byte[] message) throws IOException { + ByteArrayInputStream arrayInputStream = new ByteArrayInputStream(message); --- End diff -- creating a new `ByteArrayInputStream` and `Decoder` for each record is quite expensive. Can we reuse them as you did in the serializer?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---