comphead commented on code in PR #2902:
URL: https://github.com/apache/datafusion-comet/pull/2902#discussion_r2624172493
##########
spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala:
##########
@@ -88,6 +89,68 @@ object FuzzDataGenerator {
StructType(fields.toSeq)
}
+ def generateNestedSchema(
+ r: Random,
+ numCols: Int,
+ minDepth: Int,
+ maxDepth: Int,
+ options: SchemaGenOptions): StructType = {
+ assert(numCols > 0)
+ assert(minDepth >= 0)
+ assert(maxDepth >= 0)
+ assert(minDepth <= maxDepth)
+ assert(
+ options.generateArray || options.generateStruct || options.generateMap,
+ "cannot generate nested schema if options do not include generating
complex types")
+
+ val counter = new AtomicLong
+
+ def generateFieldName() = {
+ s"c_${counter.incrementAndGet()}"
+ }
+
+ def generateArray(depth: Int, name: String) = {
+ val element = genField(r, depth + 1)
+ StructField(name, DataTypes.createArrayType(element.dataType, true))
+ }
+
+ def generateStruct(depth: Int, name: String) = {
+ val fields =
+ Range(1, 2 + r.nextInt(10)).map(_ => genField(r, depth + 1)).toArray
+ StructField(name, DataTypes.createStructType(fields))
+ }
+
+ def generateMap(depth: Int, name: String) = {
+ val keyField = genField(r, depth + 1)
+ val valueField = genField(r, depth + 1)
+ StructField(name, DataTypes.createMapType(keyField.dataType,
valueField.dataType))
+ }
+
+ def generatePrimitive(name: String) = {
+ StructField(name, randomChoice(options.primitiveTypes, r))
+ }
+
+ def genField(r: Random, depth: Int): StructField = {
+ val name = generateFieldName()
+ val generators = new ListBuffer[() => StructField]()
+ if (options.generateArray && depth < maxDepth) {
+ generators += (() => generateArray(depth + 1, name))
+ }
+ if (options.generateStruct && depth < maxDepth) {
+ generators += (() => generateStruct(depth + 1, name))
+ }
+ if (options.generateMap && depth < maxDepth) {
+ generators += (() => generateMap(depth, name))
Review Comment:
just wondering why depth is not + 1 here like for arrays and structs?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]