xiaonanyang-db commented on code in PR #50560: URL: https://github.com/apache/spark/pull/50560#discussion_r2043632968
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala: ########## @@ -241,4 +251,168 @@ class StaxXmlGenerator( } } } + + /** + * Serialize the single Variant value to XML + */ + def write(v: VariantVal): Unit = { + writeVariant(options.rowTag, v, pos = 0) + } + + /** + * Write a Variant field to XML + * + * @param name The name of the field + * @param v The original Variant entity + * @param pos The position in the Variant data array where the field value starts + */ + private def writeVariant(name: String, v: VariantVal, pos: Int): Unit = { + VariantUtil.getType(v.getValue, pos) match { + case VariantUtil.Type.OBJECT => + writeVariantObject(name, v, pos) + case VariantUtil.Type.ARRAY => + writeVariantArray(name, v, pos) + case _ => + writeVariantPrimitive(name, v, pos) + } + } + + /** + * Write a Variant object to XML. A Variant object is serialized as an XML element, with the child + * fields serialized as XML nodes recursively. + * + * @param name The name of the object field, which is used as the XML element name + * @param v The original Variant entity + * @param pos The position in the Variant data array where the object value starts + */ + private def writeVariantObject(name: String, v: VariantVal, pos: Int): Unit = { + gen.writeStartElement(name) + VariantUtil.handleObject( + v.getValue, + pos, + (size, idSize, offsetSize, idStart, offsetStart, dataStart) => { + // Traverse the fields of the object and get their names and positions in the original + // Variant + val elementInfo = (0 until size).map { i => + val id = VariantUtil.readUnsigned(v.getValue, idStart + idSize * i, idSize) + val offset = + VariantUtil.readUnsigned(v.getValue, offsetStart + offsetSize * i, offsetSize) + val elementPos = dataStart + offset + val elementName = VariantUtil.getMetadataKey(v.getMetadata, id) + (elementName, elementPos) + } + + // Partition the fields of the object into XML attributes and elements + val (attributes, elements) = elementInfo.partition { + case (f, _) => + // Similar to the reader, we use attributePrefx option to determine whether the field is + // an attribute or not. + // In addition, we also check if the field is a value tag, in case the value tag also + // starts with the attribute prefix. + f.startsWith(options.attributePrefix) && f != options.valueTag + } + + // We need to write attributes first before the elements. + (attributes ++ elements).foreach { + case (field, elementPos) => + writeVariant(field, v, elementPos) + } + } + ) + gen.writeEndElement() + } + + /** + * Write a Variant array to XML. A Variant array is flattened and written as a sequence of + * XML element with the same element name as the array field name. + * + * @param name The name of the array field + * @param v The original Variant entity + * @param pos The position in the Variant data array where the array value starts + */ + private def writeVariantArray(name: String, v: VariantVal, pos: Int): Unit = { + VariantUtil.handleArray( + v.getValue, + pos, + (size, offsetSize, offsetStart, dataStart) => { + // Traverse each item of the array and write each of them as an XML element + (0 until size).foreach { i => + val offset = + VariantUtil.readUnsigned(v.getValue, offsetStart + offsetSize * i, offsetSize) + val elementPos = dataStart + offset + // Check if the array element is also of type ARRAY + if (VariantUtil.getType(v.getValue, elementPos) == VariantUtil.Type.ARRAY) { + // For the case round trip in reading and writing XML files, [[ArrayType]] cannot have + // [[ArrayType]] as element type. It always wraps the element with [[StructType]]. So, + // this case only can happen when we convert a normal [[DataFrame]] to XML file. + // When [[ArrayType]] has [[ArrayType]] as elements, it is confusing what is element + // name for XML file. + writeVariantArray(options.arrayElementName, v, elementPos) + } else { + writeVariant(name, v, elementPos) + } + } + } + ) + } + + /** + * Write a Variant primitive field to XML + * + * @param name The name of the field + * @param v The original Variant entity + * @param pos The position in the Variant data array where the field value starts + */ + private def writeVariantPrimitive(name: String, v: VariantVal, pos: Int): Unit = { + val primitiveVal = VariantUtil.getType(v.getValue, pos) match { Review Comment: ack, will do -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org