ahshahid commented on code in PR #48252: URL: https://github.com/apache/spark/pull/48252#discussion_r1844713507
########## sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala: ########## @@ -148,34 +163,180 @@ object JavaTypeInference { // TODO: we should only collect properties that have getter and setter. However, some tests // pass in scala case class as java bean class which doesn't have getter and setter. val properties = getJavaBeanReadableProperties(c) - // add type variables from inheritance hierarchy of the class - val classTV = JavaTypeUtils.getTypeArguments(c, classOf[Object]).asScala.toMap ++ - typeVariables - // Note that the fields are ordered by name. - val fields = properties.map { property => - val readMethod = property.getReadMethod - val encoder = encoderFor(readMethod.getGenericReturnType, seenTypeSet + c, classTV) - // The existence of `javax.annotation.Nonnull`, means this field is not nullable. - val hasNonNull = readMethod.isAnnotationPresent(classOf[Nonnull]) - EncoderField( - property.getName, - encoder, - encoder.nullable && !hasNonNull, - Metadata.empty, - Option(readMethod.getName), - Option(property.getWriteMethod).map(_.getName)) + + // if the properties is empty and this is not a top level enclosing class, then we + // should not consider class as bean, as otherwise it will be treated as empty schema + // and loose the data on deser. + if (properties.isEmpty && seenTypeSet.nonEmpty) { + findBestEncoder(Seq(c), seenTypeSet, typeVariables, None, serializableEncodersOnly = true) + .getOrElse(throw ExecutionErrors.cannotFindEncoderForTypeError(t.getTypeName)) + } else { + // add type variables from inheritance hierarchy of the class + val parentClassesTypeMap = + JavaTypeUtils.getTypeArguments(c, classOf[Object]).asScala.toMap + val classTV = parentClassesTypeMap ++ typeVariables + // Note that the fields are ordered by name. + val fields = properties.map { property => + val readMethod = property.getReadMethod + val methodReturnType = readMethod.getGenericReturnType + val encoder = encoderFor(methodReturnType, seenTypeSet + c, classTV) + // The existence of `javax.annotation.Nonnull`, means this field is not nullable. + val hasNonNull = readMethod.isAnnotationPresent(classOf[Nonnull]) + EncoderField( + property.getName, + encoder, + encoder.nullable && !hasNonNull, + Metadata.empty, + Option(readMethod.getName), + Option(property.getWriteMethod).map(_.getName)) + } + // implies it cannot be assumed a BeanClass. + // Check if its super class or interface could be represented by an Encoder + + JavaBeanEncoder(ClassTag(c), fields.toImmutableArraySeq) } - JavaBeanEncoder(ClassTag(c), fields.toImmutableArraySeq) case _ => throw ExecutionErrors.cannotFindEncoderForTypeError(t.toString) } + private def createUDTEncoderUsingAnnotation(c: Class[_]): UDTEncoder[Any] = { + val udt = c + .getAnnotation(classOf[SQLUserDefinedType]) + .udt() + .getConstructor() + .newInstance() + .asInstanceOf[UserDefinedType[Any]] + val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt() + UDTEncoder(udt, udtClass) + } + + private def createUDTEncoderUsingRegistration(c: Class[_]): UDTEncoder[Any] = { + val udt = UDTRegistration + .getUDTFor(c.getName) + .get + .getConstructor() + .newInstance() + .asInstanceOf[UserDefinedType[Any]] + UDTEncoder(udt, udt.getClass) + } + def getJavaBeanReadableProperties(beanClass: Class[_]): Array[PropertyDescriptor] = { val beanInfo = Introspector.getBeanInfo(beanClass) beanInfo.getPropertyDescriptors .filterNot(_.getName == "class") .filterNot(_.getName == "declaringClass") .filter(_.getReadMethod != null) } + + private def findBestEncoder( + typesToCheck: Seq[Class[_]], + seenTypeSet: Set[Class[_]], + typeVariables: Map[TypeVariable[_], Type], + baseClass: Option[Class[_]], + serializableEncodersOnly: Boolean = false): Option[AgnosticEncoder[_]] = + if (serializableEncodersOnly) { + val isClientConnect = clientConnectFlag.get + assert(typesToCheck.size == 1) + typesToCheck + .flatMap(c => { + if (!isClientConnect && classOf[KryoSerializable].isAssignableFrom(c)) { Review Comment: will do. the expalnation is provided in the previous comment's response. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org