This is an automated email from the ASF dual-hosted git repository.
jmclean pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 9ee4099743 [#8287] feat(spark-common): support varchar/char partition
values in SparkPartitionUtils (#8338)
9ee4099743 is described below
commit 9ee40997431a769e0f7d10cc61a1c2d056e4dca5
Author: Minji Ryu <[email protected]>
AuthorDate: Fri Aug 29 07:53:47 2025 +0900
[#8287] feat(spark-common): support varchar/char partition values in
SparkPartitionUtils (#8338)
### What changes were proposed in this pull request?
* Add support for `VarcharType` and `CharType` partition values in
`SparkPartitionUtils#getSparkPartitionValue`.
* Update `TestSparkPartitionUtils` to include `varchar(5)` and `char(2)`
columns in schema/internalRow/literals/hivePartitionValues, so existing
looped tests also cover these types.
### Why are the changes needed?
Currently `SparkPartitionUtils` handles `StringType` but not its
variants `VarcharType` and `CharType`.
This PR adds conversion support for these types, ensuring consistent
handling of all string variants.
Fixes #8287.
### Does this PR introduce *any* user-facing change?
No.
This change only affects internal partition value conversion utilities
and corresponding unit tests.
### How was this patch tested?
* Extended the existing unit test `TestSparkPartitionUtils` by adding
`varchar(5)` and `char(2)` columns to:
* `schema`
* `internalRow`
* `literals`
* `hivePartitionValues`
* Verified that existing test methods (`testToGravitinoLiteral`,
`testGetPartitionValueAsString`, and `testGetSparkPartitionValue`) now
also validate `VarcharType` and `CharType`.
All tests passed locally.
---
.../spark/connector/utils/SparkPartitionUtils.java | 4 ++++
.../connector/utils/TestSparkPartitionUtils.java | 19 ++++++++++++++++---
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git
a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/utils/SparkPartitionUtils.java
b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/utils/SparkPartitionUtils.java
index 8789875200..232c4a7254 100644
---
a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/utils/SparkPartitionUtils.java
+++
b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/utils/SparkPartitionUtils.java
@@ -152,6 +152,10 @@ public class SparkPartitionUtils {
return Float.parseFloat(hivePartitionValue);
} else if (dataType instanceof DecimalType) {
return Decimal.apply(hivePartitionValue);
+ } else if (dataType instanceof VarcharType) {
+ return UTF8String.fromString(hivePartitionValue);
+ } else if (dataType instanceof CharType) {
+ return UTF8String.fromString(hivePartitionValue);
} else {
throw new UnsupportedOperationException("Unsupported partition type: "
+ dataType);
}
diff --git
a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/utils/TestSparkPartitionUtils.java
b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/utils/TestSparkPartitionUtils.java
index b041ca945d..ea411e6ac1 100644
---
a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/utils/TestSparkPartitionUtils.java
+++
b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/utils/TestSparkPartitionUtils.java
@@ -22,13 +22,16 @@ package org.apache.gravitino.spark.connector.utils;
import java.time.LocalDate;
import org.apache.gravitino.rel.expressions.literals.Literal;
import org.apache.gravitino.rel.expressions.literals.Literals;
+import org.apache.gravitino.rel.types.Types;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.CharType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.VarcharType;
import org.apache.spark.unsafe.types.UTF8String;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -46,6 +49,8 @@ public class TestSparkPartitionUtils {
float floatValue = 3.14f;
double doubleValue = 3.1415926535;
UTF8String stringValue = UTF8String.fromString("Hello World");
+ UTF8String varcharValue = UTF8String.fromString("abc");
+ UTF8String charValue = UTF8String.fromString("xy");
int date = 0;
private InternalRow internalRow =
new GenericInternalRow(
@@ -58,7 +63,9 @@ public class TestSparkPartitionUtils {
floatValue,
doubleValue,
stringValue,
- date
+ date,
+ varcharValue,
+ charValue,
});
private Literal[] literals =
new Literals.LiteralImpl[] {
@@ -71,6 +78,8 @@ public class TestSparkPartitionUtils {
Literals.doubleLiteral(doubleValue),
Literals.stringLiteral(stringValue.toString()),
Literals.dateLiteral(LocalDate.of(1970, 1, 1)),
+ Literals.varcharLiteral(5, varcharValue.toString()),
+ Literals.of(charValue, Types.FixedCharType.of(2)),
};
private String[] hivePartitionValues = {
"true",
@@ -81,7 +90,9 @@ public class TestSparkPartitionUtils {
"3.14",
"3.1415926535",
"Hello World",
- "1970-01-01"
+ "1970-01-01",
+ "abc",
+ "xy"
};
StructType schema =
new StructType(
@@ -94,7 +105,9 @@ public class TestSparkPartitionUtils {
new StructField("float", DataTypes.FloatType, false,
Metadata.empty()),
new StructField("double", DataTypes.DoubleType, false,
Metadata.empty()),
new StructField("string", DataTypes.StringType, false,
Metadata.empty()),
- new StructField("date", DataTypes.DateType, false,
Metadata.empty())
+ new StructField("date", DataTypes.DateType, false,
Metadata.empty()),
+ new StructField("varchar5", VarcharType.apply(5), false,
Metadata.empty()),
+ new StructField("char2", CharType.apply(2), false,
Metadata.empty()),
});
@Test