[ https://issues.apache.org/jira/browse/SPARK-37849?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17471633#comment-17471633 ]
melin commented on SPARK-37849: ------------------------------- Restrictions can be added in HiveOutputWriter {code:java} override def write(row: InternalRow): Unit = { var i = 0 while (i < fieldOIs.length) { val dataType = dataTypes(i) val value = row.get(i, dataType) if (columnMaxSize > 0) { if (value != null && value.isInstanceOf[UTF8String] && columnMaxSize > 0) { val valueSize = value.asInstanceOf[UTF8String].numBytes() if (valueSize > columnMaxSize) { throw new LimitMaxLengthException(s"column ${columnNames(i)} size $valueSize , " + s"max limit of $columnMaxSize bytes") } } else if (value != null && value.isInstanceOf[UnsafeArrayData] && columnMaxSize > 0) { val valueSize = value.asInstanceOf[UnsafeArrayData].getSizeInBytes if (valueSize > columnMaxSize) { throw new LimitMaxLengthException(s"column ${columnNames(i)} size $valueSize , " + s"max limit of $columnMaxSize bytes") } } else if (value != null && value.isInstanceOf[UnsafeMapData] && columnMaxSize > 0) { val valueSize = value.asInstanceOf[UnsafeMapData].getSizeInBytes if (valueSize > columnMaxSize) { throw new LimitMaxLengthException(s"column ${columnNames(i)} size $valueSize , " + s"max limit of $columnMaxSize bytes") } } } outputData(i) = if (row.isNullAt(i)) null else wrappers(i)(value) i += 1 } hiveWriter.write(serializer.serialize(outputData, standardOI)) } {code} > Supports limiting the maximum amount of column data > --------------------------------------------------- > > Key: SPARK-37849 > URL: https://issues.apache.org/jira/browse/SPARK-37849 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 3.2.0 > Reporter: melin > Priority: Major > > Avoid writing too much data in a column。 -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org