[ 
https://issues.apache.org/jira/browse/SPARK-37849?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17471633#comment-17471633
 ] 

melin commented on SPARK-37849:
-------------------------------

Restrictions can be added in HiveOutputWriter
{code:java}
override def write(row: InternalRow): Unit = {
  var i = 0
  while (i < fieldOIs.length) {
    val dataType = dataTypes(i)
    val value = row.get(i, dataType)

    if (columnMaxSize > 0) {
      if (value != null && value.isInstanceOf[UTF8String] && columnMaxSize > 0) 
{
        val valueSize = value.asInstanceOf[UTF8String].numBytes()
        if (valueSize > columnMaxSize) {
          throw new LimitMaxLengthException(s"column ${columnNames(i)} size 
$valueSize , " +
            s"max limit of $columnMaxSize bytes")
        }
      } else if (value != null && value.isInstanceOf[UnsafeArrayData] && 
columnMaxSize > 0) {
        val valueSize = value.asInstanceOf[UnsafeArrayData].getSizeInBytes
        if (valueSize > columnMaxSize) {
          throw new LimitMaxLengthException(s"column ${columnNames(i)} size 
$valueSize , " +
            s"max limit of $columnMaxSize bytes")
        }
      } else if (value != null && value.isInstanceOf[UnsafeMapData] && 
columnMaxSize > 0) {
        val valueSize = value.asInstanceOf[UnsafeMapData].getSizeInBytes
        if (valueSize > columnMaxSize) {
          throw new LimitMaxLengthException(s"column ${columnNames(i)} size 
$valueSize , " +
            s"max limit of $columnMaxSize bytes")
        }
      }
    }

    outputData(i) = if (row.isNullAt(i)) null else wrappers(i)(value)
    i += 1
  }
  hiveWriter.write(serializer.serialize(outputData, standardOI))
} {code}

> Supports limiting the maximum amount of column data
> ---------------------------------------------------
>
>                 Key: SPARK-37849
>                 URL: https://issues.apache.org/jira/browse/SPARK-37849
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 3.2.0
>            Reporter: melin
>            Priority: Major
>
> Avoid writing too much data in a column。



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to