[
https://issues.apache.org/jira/browse/NIFI-4142?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16124100#comment-16124100
]
ASF GitHub Bot commented on NIFI-4142:
--------------------------------------
Github user joewitt commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2015#discussion_r132786485
--- Diff:
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java
---
@@ -0,0 +1,457 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.standard;
+
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Parser;
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.avro.AvroSchemaValidator;
+import org.apache.nifi.avro.AvroTypeUtil;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.schema.access.SchemaField;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.schema.validation.SchemaValidationContext;
+import org.apache.nifi.schema.validation.StandardSchemaValidator;
+import org.apache.nifi.schemaregistry.services.SchemaRegistry;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriter;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.WriteResult;
+import org.apache.nifi.serialization.record.RawRecordWriter;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordSchema;
+import
org.apache.nifi.serialization.record.validation.RecordSchemaValidator;
+import
org.apache.nifi.serialization.record.validation.SchemaValidationResult;
+import org.apache.nifi.serialization.record.validation.ValidationError;
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@InputRequirement(Requirement.INPUT_REQUIRED)
+@Tags({"record", "schema", "validate"})
+@CapabilityDescription("Validates the Records of an incoming FlowFile
against a given schema. All records that adhere to the schema are routed to the
\"valid\" relationship while "
+ + "records that do not adhere to hte schema are routed to the
\"invalid\" relationship. It is therefore possible for a single incoming
FlowFile to be split into two individual "
+ + "FlowFiles if some records are valid according to the schema and
others are not. Any FlowFile that is routed to the \"invalid\" relationship
will emit a ROUTE Provenance Event "
+ + "with the Details field populated to explain why records were
invalid. In addition, to gain further explanation of why records were invalid,
DEBUG-level logging can be enabled "
+ + "for the \"org.apache.nifi.processors.standard.ValidateRecord\"
logger.")
+public class ValidateRecord extends AbstractProcessor {
+
+ static final AllowableValue SCHEMA_NAME_PROPERTY = new
AllowableValue("schema-name-property", "Use Schema Name Property",
+ "The schema to validate the data against is determined by looking
at the 'Schema Name' Property and looking up the schema in the configured
Schema Registry");
+ static final AllowableValue SCHEMA_TEXT_PROPERTY = new
AllowableValue("schema-text-property", "Use Schema Text Property",
+ "The schema to validate the data against is determined by looking
at the 'Schema Text' Property and parsing the schema as an Avro schema");
+ static final AllowableValue READER_SCHEMA = new
AllowableValue("reader-schema", "Used Reader's Schema",
--- End diff --
Ah here it is. "Used Reader's Schema" should be "Use Reader Schema".
> Implement a ValidateRecord Processor
> ------------------------------------
>
> Key: NIFI-4142
> URL: https://issues.apache.org/jira/browse/NIFI-4142
> Project: Apache NiFi
> Issue Type: New Feature
> Components: Extensions
> Reporter: Mark Payne
> Assignee: Mark Payne
> Fix For: 1.4.0
>
>
> We need a processor that is capable of validating that all Records in a
> FlowFile adhere to the proper schema.
> The Processor should be configured with a Record Reader and should route each
> record to either 'valid' or 'invalid' based on whether or not the record
> adheres to the reader's schema. A record would be invalid in any of the
> following cases:
> - Missing field that is required according to the schema
> - Extra field that is not present in schema (it should be configurable
> whether or not this is a failure)
> - Field requires coercion and strict type checking enabled (this should also
> be configurable)
> - Field is invalid, such as the value "hello" when it should be an integer
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)