Copilot commented on code in PR #54572:
URL: https://github.com/apache/spark/pull/54572#discussion_r2873138675
##########
core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala:
##########
@@ -243,4 +247,134 @@ class SizeEstimatorSuite
assertResult(2015)(SizeEstimator.estimate(new DummyClass8))
assertResult(20206)(SizeEstimator.estimate(Array.fill(10)(new
DummyClass8)))
}
+
+ // Tests for JEP 450/519: Compact Object Headers
+ // With Compact Object Headers, the object header is 8 bytes (vs. 12 with
Compressed Oops,
+ // or 16 without) because the class pointer is encoded inside the mark word.
+ // Object references pointers are 4 bytes with Compressed Oops, or 8 bytes
without.
+
+ test("64-bit arch with compact object headers: simple classes") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // objectSize = 8 (compact header), pointerSize = 4
+ // DummyClass1: 8-byte header, no fields => 8 bytes
+ assertResult(8)(SizeEstimator.estimate(new DummyClass1))
+ // DummyClass2: 8-byte header + Int(4) => 12, aligned to 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass2))
+ // DummyClass3: 8-byte header + Int(4) + Double(8) => 20, aligned to 24
+ assertResult(24)(SizeEstimator.estimate(new DummyClass3))
+ // DummyClass4: 8-byte header + Int(4) + pointer(4) => 16, aligned to 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass4(null)))
+ // DummyClass4 with DummyClass3: 16 + 24 = 40
+ assertResult(40)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
+ }
+
+ test("64-bit arch with compact object headers: primitive wrapper objects") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // Boolean/Byte/Char/Short/Int/Float wrappers: 8-byte header + primitive
up to 4 bytes => 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Boolean.TRUE))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Byte.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Character.valueOf('1')))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Short.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Integer.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Float.valueOf(1.0f)))
+ // Long/Double wrappers: 8-byte header + 8-byte primitive => 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Long.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Double.valueOf(1.0)))
+ }
+
+ test("64-bit arch with compact object headers: primitive arrays") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // Array header = objectSize(8) + length Int(4) = 12, aligned to 16
+ // Array[Byte](10): 16 + alignSize(10*1=10) = 16 + 16 = 32
+ assertResult(32)(SizeEstimator.estimate(new Array[Byte](10)))
+ // Array[Char](10): 16 + alignSize(10*2=20) = 16 + 24 = 40
+ assertResult(40)(SizeEstimator.estimate(new Array[Char](10)))
+ // Array[Int](10): 16 + alignSize(10*4=40) = 16 + 40 = 56
+ assertResult(56)(SizeEstimator.estimate(new Array[Int](10)))
+ // Array[Long](10): 16 + alignSize(10*8=80) = 16 + 80 = 96
+ assertResult(96)(SizeEstimator.estimate(new Array[Long](10)))
+ }
+
+ test("64-bit arch with compact object headers: strings") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // DummyString has: pointer(arr,4) + Int(hashCode,4) + Int(hash32,4) = 12
fields
+ // objectSize=8, fields=12 => shellSize=20, aligned to 24
+ // DummyString("") => DummyString(24) + Array[Char](0)(16) = 40
+ assertResult(40)(SizeEstimator.estimate(DummyString("")))
+ // DummyString("a") => 24 + Array[Char](1): 16 + alignSize(2) = 16+8=24 =>
24+24=48
+ assertResult(48)(SizeEstimator.estimate(DummyString("a")))
+ }
+
+ test("64-bit arch with compact object headers: class field blocks rounding")
{
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // DummyClass1: 8-byte header, no fields => shellSize=8, alignSize(8)=8
+ // DummyClass5 extends DummyClass1: parent.shellSize=8, adds Boolean(1)
+ // alignedSize = max(8, alignSizeUp(8,1)+1) = 9, shellSize=9
+ // shellSize = alignSizeUp(9, pointerSize=4) = 12
+ // alignSize(12) = 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass5))
+ // DummyClass6 extends DummyClass5: parent.shellSize=12, adds Boolean(1)
+ // alignedSize = max(12, alignSizeUp(12,1)+1) = 13, shellSize=13
+ // shellSize = alignSizeUp(13, pointerSize=4) = 16
+ // alignSize(16) = 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass6))
+ }
+
+ test("64-bit arch with compact object headers and no compressed oops") {
+ reinitializeSizeEstimator("amd64", "false", "true")
+ // objectSize = 8, pointerSize = 8
+ // DummyClass1: 8-byte header, no fields => 8 bytes
+ assertResult(8)(SizeEstimator.estimate(new DummyClass1))
+ assertResult(16)(SizeEstimator.estimate(new DummyClass2))
+ // DummyClass3: 8-byte header + Int(4) + Double(8) => 20, aligned to 24
+ assertResult(24)(SizeEstimator.estimate(new DummyClass3))
+ // DummyClass4: 8-byte header + Int(4) + pointer(8) => 20, aligned to 24
+ assertResult(24)(SizeEstimator.estimate(new DummyClass4(null)))
+ // DummyClass4 with DummyClass3: 24 + 24 = 48
+ assertResult(48)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
+
+ // Primitive wrapper objects
+ // Boolean/Byte/Char/Short/Int/Float wrappers:
+ // 8-byte header + primitive up to 4 bytes => 12, aligned to 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Boolean.TRUE))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Byte.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Character.valueOf('1')))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Short.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Integer.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Float.valueOf(1.0f)))
+ // Long/Double wrappers: 8-byte header + 8-byte primitive => 16, aligned
to 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Long.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Double.valueOf(1.0)))
+
+ // Primitive arrays
+ // Array header = objectSize(8) + length Int(4) = 12, aligned to 16
+ // Array[Byte](10): 16 + alignSize(10*1=10) = 16 + 16 = 32
+ assertResult(32)(SizeEstimator.estimate(new Array[Byte](10)))
+ // Array[Char](10): 16 + alignSize(10*2=20) = 16 + 24 = 40
+ assertResult(40)(SizeEstimator.estimate(new Array[Char](10)))
+ // Array[Int](10): 16 + alignSize(10*4=40) = 16 + 40 = 56
+ assertResult(56)(SizeEstimator.estimate(new Array[Int](10)))
+ // Array[Long](10): 16 + alignSize(10*8=80) = 16 + 80 = 96
+ assertResult(96)(SizeEstimator.estimate(new Array[Long](10)))
+
+ // Strings (DummyString)
+ // DummyString has: pointer(arr,8) + Int(hashCode,4) + Int(hash32,4) = 16
fields
+ // objectSize=8, fields=16 => shellSize=24, aligned to 24
Review Comment:
Comment says "= 16 fields" but the arithmetic is summing byte sizes, not
counting fields. Reword to something like "16 bytes of fields" to keep the
explanation accurate.
```suggestion
// DummyString has: pointer(arr,8) + Int(hashCode,4) + Int(hash32,4) =
16 bytes of fields
// objectSize=8, field bytes=16 => shellSize=24, aligned to 24
```
##########
core/src/main/scala/org/apache/spark/util/SizeEstimator.scala:
##########
@@ -136,28 +158,38 @@ object SizeEstimator extends Logging {
return System.getProperty("java.vm.info").contains("Compressed Ref")
}
+ getHotSpotVMOption("UseCompressedOops") match {
+ case Some(value) => value.contains("true")
+ case None =>
+ // Guess whether they've enabled UseCompressedOops based on whether
maxMemory < 32 GB
+ val guess = Runtime.getRuntime.maxMemory < (32L*1024*1024*1024)
+ logWarning(log"Failed to check whether UseCompressedOops is set; " +
+ log"assuming " + (if (guess) log"yes" else log"not"))
+ guess
+ }
+ }
+
+ /**
+ * Retrieves the string representation of a HotSpot VM option via the
HotSpotDiagnosticMXBean.
+ * Returns [[Some]] with the option's string value on success, or [[None]]
if the option cannot
+ * be read (e.g. on non-HotSpot JVMs or when the option does not exist).
+ */
+ private def getHotSpotVMOption(option: String): Option[String] = {
try {
val hotSpotMBeanName = "com.sun.management:type=HotSpotDiagnostic"
- val server = ManagementFactory.getPlatformMBeanServer()
+ val server = ManagementFactory.getPlatformMBeanServer
// NOTE: This should throw an exception in non-Sun JVMs
// scalastyle:off classforname
val hotSpotMBeanClass =
Class.forName("com.sun.management.HotSpotDiagnosticMXBean")
- val getVMMethod = hotSpotMBeanClass.getDeclaredMethod("getVMOption",
- Class.forName("java.lang.String"))
+ val getVMMethod = hotSpotMBeanClass.getDeclaredMethod("getVMOption",
classOf[String])
// scalastyle:on classforname
val bean = ManagementFactory.newPlatformMXBeanProxy(server,
hotSpotMBeanName, hotSpotMBeanClass)
- // TODO: We could use reflection on the VMOption returned ?
- getVMMethod.invoke(bean, "UseCompressedOops").toString.contains("true")
+ Some(getVMMethod.invoke(bean, option).toString)
Review Comment:
`getHotSpotVMOption` returns `VMOption.toString` and callers detect enabled
flags via `.contains("true")`. `VMOption.toString` is not a stable/parsable
representation of the option value (and may contain other `true` substrings
unrelated to the option value), so this can mis-detect options like
`UseCompressedOops` / `UseCompactObjectHeaders`. Prefer extracting the actual
VMOption value (e.g., via `com.sun.management.VMOption#getValue` like
`Utils.checkUseGC` does) and compare that value exactly.
```suggestion
val vmOption = getVMMethod.invoke(bean, option)
val getValueMethod = vmOption.getClass.getMethod("getValue")
val value = getValueMethod.invoke(vmOption).asInstanceOf[String]
Some(value)
```
##########
core/src/main/scala/org/apache/spark/util/SizeEstimator.scala:
##########
@@ -123,6 +131,20 @@ object SizeEstimator extends Logging {
classInfos.put(classOf[Object], new ClassInfo(objectSize, Nil))
Review Comment:
PR description says that with Compact Object Headers "pointerSize = 4
regardless of UseCompressedOops", but the implementation keeps `pointerSize =
8` when `!isCompressedOops` even if `isCompactObjectHeaders` is true. Please
reconcile: either update the description (and any related docs) or adjust the
`pointerSize` logic/tests to match the intended behavior.
##########
core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala:
##########
@@ -243,4 +247,134 @@ class SizeEstimatorSuite
assertResult(2015)(SizeEstimator.estimate(new DummyClass8))
assertResult(20206)(SizeEstimator.estimate(Array.fill(10)(new
DummyClass8)))
}
+
+ // Tests for JEP 450/519: Compact Object Headers
+ // With Compact Object Headers, the object header is 8 bytes (vs. 12 with
Compressed Oops,
+ // or 16 without) because the class pointer is encoded inside the mark word.
+ // Object references pointers are 4 bytes with Compressed Oops, or 8 bytes
without.
+
+ test("64-bit arch with compact object headers: simple classes") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // objectSize = 8 (compact header), pointerSize = 4
+ // DummyClass1: 8-byte header, no fields => 8 bytes
+ assertResult(8)(SizeEstimator.estimate(new DummyClass1))
+ // DummyClass2: 8-byte header + Int(4) => 12, aligned to 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass2))
+ // DummyClass3: 8-byte header + Int(4) + Double(8) => 20, aligned to 24
+ assertResult(24)(SizeEstimator.estimate(new DummyClass3))
+ // DummyClass4: 8-byte header + Int(4) + pointer(4) => 16, aligned to 16
+ assertResult(16)(SizeEstimator.estimate(new DummyClass4(null)))
+ // DummyClass4 with DummyClass3: 16 + 24 = 40
+ assertResult(40)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
+ }
+
+ test("64-bit arch with compact object headers: primitive wrapper objects") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // Boolean/Byte/Char/Short/Int/Float wrappers: 8-byte header + primitive
up to 4 bytes => 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Boolean.TRUE))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Byte.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Character.valueOf('1')))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Short.valueOf("1")))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Integer.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Float.valueOf(1.0f)))
+ // Long/Double wrappers: 8-byte header + 8-byte primitive => 16
+ assertResult(16)(SizeEstimator.estimate(java.lang.Long.valueOf(1)))
+ assertResult(16)(SizeEstimator.estimate(java.lang.Double.valueOf(1.0)))
+ }
+
+ test("64-bit arch with compact object headers: primitive arrays") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // Array header = objectSize(8) + length Int(4) = 12, aligned to 16
+ // Array[Byte](10): 16 + alignSize(10*1=10) = 16 + 16 = 32
+ assertResult(32)(SizeEstimator.estimate(new Array[Byte](10)))
+ // Array[Char](10): 16 + alignSize(10*2=20) = 16 + 24 = 40
+ assertResult(40)(SizeEstimator.estimate(new Array[Char](10)))
+ // Array[Int](10): 16 + alignSize(10*4=40) = 16 + 40 = 56
+ assertResult(56)(SizeEstimator.estimate(new Array[Int](10)))
+ // Array[Long](10): 16 + alignSize(10*8=80) = 16 + 80 = 96
+ assertResult(96)(SizeEstimator.estimate(new Array[Long](10)))
+ }
+
+ test("64-bit arch with compact object headers: strings") {
+ reinitializeSizeEstimator("amd64", "true", "true")
+ // DummyString has: pointer(arr,4) + Int(hashCode,4) + Int(hash32,4) = 12
fields
+ // objectSize=8, fields=12 => shellSize=20, aligned to 24
Review Comment:
Comment says "= 12 fields" but the arithmetic is summing byte sizes, not
counting fields. Reword to avoid confusion when reasoning about `shellSize`
(e.g., "12 bytes of fields").
```suggestion
// DummyString has: pointer(arr,4) + Int(hashCode,4) + Int(hash32,4) =
12 bytes of fields
// objectSize=8, fields=12 bytes => shellSize=20, aligned to 24
```
##########
core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala:
##########
@@ -243,4 +247,134 @@ class SizeEstimatorSuite
assertResult(2015)(SizeEstimator.estimate(new DummyClass8))
assertResult(20206)(SizeEstimator.estimate(Array.fill(10)(new
DummyClass8)))
}
+
+ // Tests for JEP 450/519: Compact Object Headers
+ // With Compact Object Headers, the object header is 8 bytes (vs. 12 with
Compressed Oops,
+ // or 16 without) because the class pointer is encoded inside the mark word.
+ // Object references pointers are 4 bytes with Compressed Oops, or 8 bytes
without.
Review Comment:
Comment typo/grammar: "Object references pointers" should be "Object
reference pointers" (or similar).
```suggestion
// Object reference pointers are 4 bytes with Compressed Oops, or 8 bytes
without.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]