This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 8ae4ce522 fix: include TypeMeta header bits in hash (#3659)
8ae4ce522 is described below
commit 8ae4ce522a537a12715f40c29bbe1136dab6f973
Author: Shawn Yang <[email protected]>
AuthorDate: Fri May 8 14:22:11 2026 +0800
fix: include TypeMeta header bits in hash (#3659)
## Why?
## What does this PR do?
## Related issues
## AI Contribution Checklist
- [ ] Substantial AI assistance was used in this PR: `yes` / `no`
- [ ] If `yes`, I included a completed [AI Contribution
Checklist](https://github.com/apache/fory/blob/main/AI_POLICY.md#9-contributor-checklist-for-ai-assisted-prs)
in this PR description and the required `AI Usage Disclosure`.
- [ ] If `yes`, my PR description includes the required `ai_review`
summary and screenshot evidence of the final clean AI review results
from both fresh reviewers on the current PR diff or current HEAD after
the latest code changes.
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
---
cpp/fory/serialization/context.cc | 8 ++--
cpp/fory/serialization/serialization_test.cc | 55 ++++++++++++++++++++--
cpp/fory/serialization/type_resolver.cc | 30 ++++++++----
csharp/src/Fory/ReadContext.cs | 2 +-
csharp/src/Fory/TypeMeta.cs | 50 ++++++++++++++++----
csharp/tests/Fory.Tests/ForyRuntimeTests.cs | 49 +++++++++++++++++++
dart/packages/fory/lib/src/meta/type_meta.dart | 6 ++-
.../fory/lib/src/resolver/type_resolver.dart | 4 +-
dart/packages/fory/lib/src/util/hash_util.dart | 21 +++++----
dart/packages/fory/test/xlang_protocol_test.dart | 12 +++++
docs/specification/java_serialization_spec.md | 10 +++-
docs/specification/xlang_serialization_spec.md | 8 +++-
go/fory/type_def.go | 35 +++++++-------
go/fory/type_def_test.go | 29 ++++++++++++
go/fory/type_resolver.go | 2 +-
.../org/apache/fory/meta/NativeTypeDefDecoder.java | 6 +--
.../org/apache/fory/meta/NativeTypeDefEncoder.java | 22 ++++++---
.../main/java/org/apache/fory/meta/TypeDef.java | 6 +--
.../org/apache/fory/resolver/TypeResolver.java | 4 +-
.../apache/fory/meta/NativeTypeDefEncoderTest.java | 41 ++++++++++++++++
.../org/apache/fory/meta/TypeDefEncoderTest.java | 33 +++++++++++++
javascript/packages/core/lib/context.ts | 2 +-
javascript/packages/core/lib/meta/TypeMeta.ts | 20 +++++---
javascript/test/typemeta.test.ts | 46 ++++++++++++++++++
python/pyfory/meta/typedef.py | 5 +-
python/pyfory/meta/typedef_decoder.py | 4 +-
python/pyfory/meta/typedef_encoder.py | 6 +--
python/pyfory/registry.py | 2 +-
python/pyfory/serialization.pyx | 2 +-
python/pyfory/tests/test_typedef_encoding.py | 40 +++++++++++++++-
rust/fory-core/src/meta/type_meta.rs | 53 ++++++++++++++++++---
rust/fory-core/src/resolver/meta_resolver.rs | 4 +-
swift/Sources/Fory/ReadContext.swift | 6 +--
swift/Sources/Fory/TypeMeta.swift | 16 ++++---
swift/Tests/ForyTests/ForySwiftTests.swift | 36 ++++++++++++++
35 files changed, 565 insertions(+), 110 deletions(-)
diff --git a/cpp/fory/serialization/context.cc
b/cpp/fory/serialization/context.cc
index ef657fcf7..6eec0267d 100644
--- a/cpp/fory/serialization/context.cc
+++ b/cpp/fory/serialization/context.cc
@@ -506,8 +506,8 @@ Result<const TypeInfo *, Error>
ReadContext::read_type_meta() {
// Check if we already parsed this type meta (cache lookup by header)
if (has_last_meta_header_ && meta_header == last_meta_header_) {
// Header-cache hits intentionally skip without rehashing. Entries reach
- // this cache only after a successful TypeMeta parse and 52-bit body-hash
- // validation.
+ // this cache only after a successful TypeMeta parse and 52-bit
+ // metadata-hash validation.
const TypeInfo *cached = last_meta_type_info_;
reading_type_infos_.push_back(cached);
FORY_RETURN_NOT_OK(
@@ -518,8 +518,8 @@ Result<const TypeInfo *, Error>
ReadContext::read_type_meta() {
auto *cache_entry = parsed_type_infos_.find(meta_header);
if (cache_entry != nullptr) {
// Header-cache hits intentionally skip without rehashing. Entries reach
- // this cache only after a successful TypeMeta parse and 52-bit body-hash
- // validation.
+ // this cache only after a successful TypeMeta parse and 52-bit
+ // metadata-hash validation.
const TypeInfo *cached = cache_entry->second;
reading_type_infos_.push_back(cached);
has_last_meta_header_ = true;
diff --git a/cpp/fory/serialization/serialization_test.cc
b/cpp/fory/serialization/serialization_test.cc
index 226afe42b..27ab634fa 100644
--- a/cpp/fory/serialization/serialization_test.cc
+++ b/cpp/fory/serialization/serialization_test.cc
@@ -84,7 +84,27 @@ namespace test {
namespace {
uint64_t compute_type_meta_hash_bits_for_test(const uint8_t *meta_bytes,
- size_t meta_size) {
+ size_t meta_size,
+ uint64_t header_low_bits) {
+ constexpr uint32_t kHashShift = 12;
+ constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
+ std::vector<uint8_t> hash_input(meta_size + 2);
+ std::memcpy(hash_input.data(), meta_bytes, meta_size);
+ hash_input[meta_size] = static_cast<uint8_t>(header_low_bits);
+ hash_input[meta_size + 1] = static_cast<uint8_t>(header_low_bits >> 8);
+ int64_t hash_out[2] = {0, 0};
+ MurmurHash3_x64_128(hash_input.data(), static_cast<int>(hash_input.size()),
+ 47, hash_out);
+ uint64_t shifted = static_cast<uint64_t>(hash_out[0]) << kHashShift;
+ if (static_cast<int64_t>(shifted) < 0) {
+ shifted = ~shifted + 1;
+ }
+ return shifted & kHashBitsMask;
+}
+
+uint64_t
+compute_body_only_type_meta_hash_bits_for_test(const uint8_t *meta_bytes,
+ size_t meta_size) {
constexpr uint32_t kHashShift = 12;
constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
int64_t hash_out[2] = {0, 0};
@@ -829,7 +849,7 @@ TEST(SerializationTest,
TypeMetaRejectsOverConsumedDeclaredSize) {
EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData);
}
-TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) {
+TEST(SerializationTest, TypeMetaHeaderUses52BitMetadataHash) {
std::vector<FieldInfo> fields;
fields.emplace_back(
"value", FieldType(static_cast<uint32_t>(TypeId::VARINT32), false));
@@ -869,6 +889,35 @@ TEST(SerializationTest, TypeMetaHeaderUses52BitBodyHash) {
parsed.value()->get_hash());
}
+TEST(SerializationTest, TypeMetaRejectsBodyOnlyHeaderHash) {
+ TypeMeta meta =
+ TypeMeta::from_fields(static_cast<uint32_t>(TypeId::STRUCT), "", "S",
+ false, 1, std::vector<FieldInfo>{});
+ auto bytes_result = meta.to_bytes();
+ ASSERT_TRUE(bytes_result.ok())
+ << "TypeMeta serialization failed: " << bytes_result.error().to_string();
+
+ std::vector<uint8_t> bytes = bytes_result.value();
+ ASSERT_GT(bytes.size(), sizeof(uint64_t));
+ uint64_t header = 0;
+ std::memcpy(&header, bytes.data(), sizeof(header));
+
+ constexpr uint32_t kHashShift = 12;
+ constexpr uint64_t kHashBitsMask = UINT64_MAX << kHashShift;
+ uint64_t body_only_hash = compute_body_only_type_meta_hash_bits_for_test(
+ bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t));
+ ASSERT_NE(header & kHashBitsMask, body_only_hash);
+ header = body_only_hash | (header & ~kHashBitsMask);
+ std::memcpy(bytes.data(), &header, sizeof(header));
+
+ Buffer buffer(bytes);
+ auto parsed = TypeMeta::from_bytes(buffer, nullptr);
+ ASSERT_FALSE(parsed.ok());
+ EXPECT_EQ(parsed.error().code(), ErrorCode::InvalidData);
+ EXPECT_NE(parsed.error().to_string().find("metadata hash"),
+ std::string::npos);
+}
+
TEST(SerializationTest, TypeMetaNonStructHeaderUsesDenseKindCode) {
TypeMeta meta =
TypeMeta::from_fields(static_cast<uint32_t>(TypeId::ENUM), "", "E",
false,
@@ -902,7 +951,7 @@ TEST(SerializationTest,
TypeMetaRejectsNonStructReservedKindBits) {
ASSERT_NE(header & 0xff, 0xff);
header &= ~(UINT64_MAX << 12);
header |= compute_type_meta_hash_bits_for_test(
- bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t));
+ bytes.data() + sizeof(uint64_t), bytes.size() - sizeof(uint64_t),
header);
std::memcpy(bytes.data(), &header, sizeof(header));
Buffer buffer(bytes);
diff --git a/cpp/fory/serialization/type_resolver.cc
b/cpp/fory/serialization/type_resolver.cc
index 039e7e826..31252670d 100644
--- a/cpp/fory/serialization/type_resolver.cc
+++ b/cpp/fory/serialization/type_resolver.cc
@@ -378,9 +378,15 @@ inline Result<uint32_t, Error>
type_id_from_type_meta_kind(uint8_t kind_code) {
}
inline uint64_t compute_type_meta_hash_bits(const uint8_t *meta_bytes,
- size_t meta_size) {
+ size_t meta_size,
+ uint64_t header_low_bits) {
+ std::vector<uint8_t> hash_input(meta_size + 2);
+ std::memcpy(hash_input.data(), meta_bytes, meta_size);
+ hash_input[meta_size] = static_cast<uint8_t>(header_low_bits);
+ hash_input[meta_size + 1] = static_cast<uint8_t>(header_low_bits >> 8);
int64_t hash_out[2] = {0, 0};
- MurmurHash3_x64_128(meta_bytes, static_cast<int>(meta_size), 47, hash_out);
+ MurmurHash3_x64_128(hash_input.data(), static_cast<int>(hash_input.size()),
+ 47, hash_out);
uint64_t shifted = static_cast<uint64_t>(hash_out[0]) <<
TYPE_META_HASH_SHIFT;
if (static_cast<int64_t>(shifted) < 0) {
shifted = ~shifted + 1;
@@ -390,8 +396,10 @@ inline uint64_t compute_type_meta_hash_bits(const uint8_t
*meta_bytes,
inline int64_t compute_type_meta_hash(const uint8_t *meta_bytes,
size_t meta_size) {
+ uint64_t header_low_bits =
+ std::min<uint64_t>(META_SIZE_MASK, static_cast<uint64_t>(meta_size));
return static_cast<int64_t>(
- compute_type_meta_hash_bits(meta_bytes, meta_size) >>
+ compute_type_meta_hash_bits(meta_bytes, meta_size, header_low_bits) >>
TYPE_META_HASH_SHIFT);
}
@@ -434,7 +442,7 @@ read_type_meta_size(Buffer &buffer, uint64_t header, size_t
*header_size) {
inline Result<void, Error> validate_type_meta_hash(Buffer &buffer,
uint32_t body_start,
uint32_t meta_size,
- int64_t header_hash) {
+ uint64_t header) {
uint64_t body_end = static_cast<uint64_t>(body_start) + meta_size;
if (FORY_PREDICT_FALSE(body_end > buffer.reader_index() ||
body_end > buffer.size())) {
@@ -442,10 +450,11 @@ inline Result<void, Error> validate_type_meta_hash(Buffer
&buffer,
Error::invalid_data("TypeMeta body range is not readable"));
}
uint64_t computed_hash_bits = compute_type_meta_hash_bits(
- buffer.data() + body_start, static_cast<size_t>(meta_size));
+ buffer.data() + body_start, static_cast<size_t>(meta_size),
+ header & ~TYPE_META_HASH_BITS_MASK);
if (FORY_PREDICT_FALSE((computed_hash_bits >> TYPE_META_HASH_SHIFT) !=
- static_cast<uint64_t>(header_hash))) {
- return Unexpected(Error::invalid_data("TypeMeta body hash mismatch"));
+ (header >> TYPE_META_HASH_SHIFT))) {
+ return Unexpected(Error::invalid_data("TypeMeta metadata hash mismatch"));
}
return Result<void, Error>();
}
@@ -574,7 +583,8 @@ Result<std::vector<uint8_t>, Error> TypeMeta::to_bytes()
const {
uint64_t meta_size = layer_size;
uint64_t header = std::min(META_SIZE_MASK, meta_size);
- header |= compute_type_meta_hash_bits(layer_buffer.data(), layer_size);
+ header |=
+ compute_type_meta_hash_bits(layer_buffer.data(), layer_size, header);
result_buffer.write_bytes(reinterpret_cast<const uint8_t *>(&header),
sizeof(header));
@@ -700,7 +710,7 @@ TypeMeta::from_bytes(Buffer &buffer, const TypeMeta
*local_type_info) {
"TypeMeta parser did not consume declared meta size"));
}
FORY_RETURN_IF_ERROR(
- validate_type_meta_hash(buffer, body_start, meta_size, meta_hash));
+ validate_type_meta_hash(buffer, body_start, meta_size, header_bits));
auto meta = std::make_unique<TypeMeta>();
meta->hash = meta_hash;
@@ -811,7 +821,7 @@ TypeMeta::from_bytes_with_header(Buffer &buffer, int64_t
header) {
"TypeMeta parser did not consume declared meta size"));
}
FORY_RETURN_IF_ERROR(
- validate_type_meta_hash(buffer, start_pos, meta_size, meta_hash));
+ validate_type_meta_hash(buffer, start_pos, meta_size, header_bits));
auto meta = std::make_unique<TypeMeta>();
meta->hash = meta_hash;
diff --git a/csharp/src/Fory/ReadContext.cs b/csharp/src/Fory/ReadContext.cs
index 6444500c4..2192bb55c 100644
--- a/csharp/src/Fory/ReadContext.cs
+++ b/csharp/src/Fory/ReadContext.cs
@@ -205,7 +205,7 @@ public sealed class ReadContext
if (TryGetCachedReadTypeMeta(header, out TypeMeta cachedTypeMeta))
{
// Header-cache hits intentionally skip without rehashing. Entries
reach this cache only
- // after a successful TypeMeta parse and 52-bit body-hash
validation. The current body
+ // after a successful TypeMeta parse and 52-bit metadata-hash
validation. The current body
// size still comes from the current header bytes, not from the
cached TypeMeta.
TypeMeta.SkipBody(Reader, header);
StoreReadTypeMeta(cachedTypeMeta, index);
diff --git a/csharp/src/Fory/TypeMeta.cs b/csharp/src/Fory/TypeMeta.cs
index 4312a5af8..216d34dfc 100644
--- a/csharp/src/Fory/TypeMeta.cs
+++ b/csharp/src/Fory/TypeMeta.cs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+using System.Buffers;
+
namespace Apache.Fory;
internal static class TypeMetaConstants
@@ -467,9 +469,8 @@ public sealed class TypeMeta : IEquatable<TypeMeta>
}
byte[] body = EncodeBody();
- ulong header = ComputeHeaderHashBits(body);
- uint bodySize = (uint)Math.Min(body.Length,
(int)TypeMetaConstants.TypeMetaSizeMask);
- header |= bodySize;
+ ulong headerLowBits = ComputeHeaderLowBits(body.Length, compressed:
false);
+ ulong header = ComputeHeaderHashBits(body, headerLowBits) |
headerLowBits;
ByteWriter writer = new(body.Length + 16);
writer.WriteUInt64(header);
if (body.Length >= (int)TypeMetaConstants.TypeMetaSizeMask)
@@ -609,18 +610,47 @@ public sealed class TypeMeta : IEquatable<TypeMeta>
reader.Skip(ReadBodySize(reader, header));
}
- private static ulong ComputeHeaderHashBits(ReadOnlySpan<byte> body)
+ private static ulong ComputeHeaderLowBits(int bodyLength, bool compressed)
+ {
+ ulong headerLowBits = (ulong)Math.Min(bodyLength,
(int)TypeMetaConstants.TypeMetaSizeMask);
+ if (compressed)
+ {
+ headerLowBits |= TypeMetaConstants.TypeMetaCompressedFlag;
+ }
+
+ return headerLowBits;
+ }
+
+ private static ulong ComputeHeaderHashBits(ReadOnlySpan<byte> body, ulong
headerLowBits)
{
- (ulong bodyHash, _) = MurmurHash3.X64_128(body,
TypeMetaConstants.TypeMetaHashSeed);
- ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift;
- long signed = unchecked((long)shifted);
- long absSigned = signed == long.MinValue ? signed : Math.Abs(signed);
- return unchecked((ulong)absSigned) &
TypeMetaConstants.TypeMetaHashMask;
+ int hashInputLength = body.Length + sizeof(ushort);
+ byte[]? rented = null;
+ Span<byte> hashInput = hashInputLength <= 1024
+ ? stackalloc byte[hashInputLength]
+ : (rented =
ArrayPool<byte>.Shared.Rent(hashInputLength)).AsSpan(0, hashInputLength);
+ try
+ {
+ body.CopyTo(hashInput);
+ hashInput[body.Length] = unchecked((byte)headerLowBits);
+ hashInput[body.Length + 1] = unchecked((byte)(headerLowBits >> 8));
+ (ulong bodyHash, _) = MurmurHash3.X64_128(hashInput,
TypeMetaConstants.TypeMetaHashSeed);
+ ulong shifted = bodyHash << TypeMetaConstants.TypeMetaHashShift;
+ long signed = unchecked((long)shifted);
+ long absSigned = signed == long.MinValue ? signed :
Math.Abs(signed);
+ return unchecked((ulong)absSigned) &
TypeMetaConstants.TypeMetaHashMask;
+ }
+ finally
+ {
+ if (rented is not null)
+ {
+ ArrayPool<byte>.Shared.Return(rented);
+ }
+ }
}
private static void ValidateParsedTypeMetaHash(ulong header,
ReadOnlySpan<byte> body)
{
- ulong expectedHeaderHash = ComputeHeaderHashBits(body);
+ ulong expectedHeaderHash = ComputeHeaderHashBits(body, header &
~TypeMetaConstants.TypeMetaHashMask);
ulong actualHeaderHash = header & TypeMetaConstants.TypeMetaHashMask;
if (actualHeaderHash != expectedHeaderHash)
{
diff --git a/csharp/tests/Fory.Tests/ForyRuntimeTests.cs
b/csharp/tests/Fory.Tests/ForyRuntimeTests.cs
index 7e1b0bc10..511713cf3 100644
--- a/csharp/tests/Fory.Tests/ForyRuntimeTests.cs
+++ b/csharp/tests/Fory.Tests/ForyRuntimeTests.cs
@@ -16,6 +16,7 @@
// under the License.
using System.Buffers;
+using System.Buffers.Binary;
using System.Collections.Concurrent;
using System.Collections.Immutable;
using System.Threading.Tasks;
@@ -1735,6 +1736,33 @@ public sealed class ForyRuntimeTests
Assert.Contains("TypeMeta metadata hash mismatch", exception.Message,
StringComparison.Ordinal);
}
+ [Fact]
+ public void TypeMetaHeaderHashIncludesLowHeaderBits()
+ {
+ TypeMeta typeMeta = new(
+ (uint)TypeId.CompatibleStruct,
+ 201,
+ MetaString.Empty('.', '_'),
+ MetaString.Empty('$', '_'),
+ registerByName: false,
+ [new TypeMetaFieldInfo(1, "value", new
TypeMetaFieldType((uint)TypeId.String, true))]);
+ byte[] encoded = typeMeta.Encode();
+ ulong header = BinaryPrimitives.ReadUInt64LittleEndian(encoded);
+ int bodyOffset = TypeMetaBodyOffset(encoded, header);
+ ulong hashMask = ulong.MaxValue << 12;
+ ulong bodyOnlyHash =
BodyOnlyTypeMetaHashBits(encoded.AsSpan(bodyOffset));
+ Assert.NotEqual(header & hashMask, bodyOnlyHash);
+
+ byte[] malformed = (byte[])encoded.Clone();
+ BinaryPrimitives.WriteUInt64LittleEndian(
+ malformed,
+ bodyOnlyHash | (header & ~hashMask));
+
+ InvalidDataException exception =
+ Assert.Throws<InvalidDataException>(() =>
TypeMeta.Decode(malformed));
+ Assert.Contains("TypeMeta metadata hash mismatch", exception.Message,
StringComparison.Ordinal);
+ }
+
[Fact]
public void TypeMetaAssignFieldIdsPrefersIdAndFallsBackToName()
{
@@ -1889,6 +1917,27 @@ public sealed class ForyRuntimeTests
return malformed;
}
+ private static int TypeMetaBodyOffset(byte[] encoded, ulong header)
+ {
+ ByteReader reader = new(encoded);
+ _ = reader.ReadUInt64();
+ if ((header & 0xff) == 0xff)
+ {
+ _ = reader.ReadVarUInt32();
+ }
+
+ return reader.Cursor;
+ }
+
+ private static ulong BodyOnlyTypeMetaHashBits(ReadOnlySpan<byte> body)
+ {
+ (ulong bodyHash, _) = MurmurHash3.X64_128(body, 47);
+ ulong shifted = bodyHash << 12;
+ long signed = unchecked((long)shifted);
+ long absSigned = signed == long.MinValue ? signed : Math.Abs(signed);
+ return unchecked((ulong)absSigned) & (ulong.MaxValue << 12);
+ }
+
private static (int TypeMetaStart, int TypeMetaEnd, TypeMeta TypeMeta)
ReadCompatibleTypeMetaRange(byte[] payload)
{
ByteReader reader = new(payload);
diff --git a/dart/packages/fory/lib/src/meta/type_meta.dart
b/dart/packages/fory/lib/src/meta/type_meta.dart
index 8b32f199f..7d77e1725 100644
--- a/dart/packages/fory/lib/src/meta/type_meta.dart
+++ b/dart/packages/fory/lib/src/meta/type_meta.dart
@@ -57,6 +57,7 @@ final class WireTypeMeta {
final class TypeHeader {
static const int _compressMetaFlag = 1 << 8;
static const int _reservedMetaFlags = 0x0e00;
+ static const int _headerLowBitsMask = 0x0fff;
static const int _hashLow32Mask = 0xfffff000;
final Int64 value;
@@ -89,7 +90,10 @@ final class TypeHeader {
@pragma('vm:prefer-inline')
void validateBodyHash(Uint8List body) {
- final expected = typeDefHeader(body);
+ final expected = typeDefHeader(
+ body,
+ headerLowBits: value.low32 & _headerLowBitsMask,
+ );
if (value.high32Unsigned != expected.high32Unsigned ||
(value.low32 & _hashLow32Mask) != (expected.low32 & _hashLow32Mask)) {
throw StateError('Invalid TypeDef metadata hash.');
diff --git a/dart/packages/fory/lib/src/resolver/type_resolver.dart
b/dart/packages/fory/lib/src/resolver/type_resolver.dart
index b9d095d39..adecdca05 100644
--- a/dart/packages/fory/lib/src/resolver/type_resolver.dart
+++ b/dart/packages/fory/lib/src/resolver/type_resolver.dart
@@ -1070,7 +1070,7 @@ final class TypeResolver {
final expectedTypeDef = expectedType?.typeDef;
if (expectedTypeDef != null && expectedTypeDef.header == header.value) {
// Header-cache hits intentionally skip without rehashing. Entries reach
this cache only
- // after a successful TypeDef parse and 52-bit body-hash validation.
+ // after a successful TypeDef parse and 52-bit metadata-hash validation.
header.skipRemaining(buffer);
sharedTypes.add(expectedType!);
return wireTypeMetaForResolved(expectedType);
@@ -1078,7 +1078,7 @@ final class TypeResolver {
final cached = _parsedTypeMetaCache.lookup(header);
if (cached != null) {
// Header-cache hits intentionally skip without rehashing. Entries reach
this cache only
- // after a successful TypeDef parse and 52-bit body-hash validation.
+ // after a successful TypeDef parse and 52-bit metadata-hash validation.
header.skipRemaining(buffer);
sharedTypes.add(cached);
return wireTypeMetaForResolved(cached);
diff --git a/dart/packages/fory/lib/src/util/hash_util.dart
b/dart/packages/fory/lib/src/util/hash_util.dart
index 834f6d620..e80874c49 100644
--- a/dart/packages/fory/lib/src/util/hash_util.dart
+++ b/dart/packages/fory/lib/src/util/hash_util.dart
@@ -154,18 +154,23 @@ Int64 metaStringHash(List<int> bytes, {int encoding = 0})
{
Int64 typeDefHeader(
List<int> bytes, {
bool compressed = false,
+ int? headerLowBits,
}) {
- final hash = _int64FromUint64(
- _murmurHash3X64_128Bits(bytes).$1 << _typeDefHashShift,
- );
- var header = _absSigned64Bits(hash);
- if (compressed) {
- header = header | _typeDefCompressMetaFlag;
- }
- header = header |
+ var lowBits = headerLowBits ??
(bytes.length > _typeDefMetaSizeMask
? _typeDefMetaSizeMask
: bytes.length);
+ if (compressed) {
+ lowBits |= _typeDefCompressMetaFlag;
+ }
+ final hashInput = List<int>.of(bytes, growable: true)
+ ..add(lowBits & 0xff)
+ ..add((lowBits >> 8) & 0xff);
+ final hash = _int64FromUint64(
+ _murmurHash3X64_128Bits(hashInput).$1 << _typeDefHashShift,
+ );
+ var header = _absSigned64Bits(hash);
+ header = header | lowBits;
return _int64FromUint64(header);
}
diff --git a/dart/packages/fory/test/xlang_protocol_test.dart
b/dart/packages/fory/test/xlang_protocol_test.dart
index c8bb77f1a..355ef4252 100644
--- a/dart/packages/fory/test/xlang_protocol_test.dart
+++ b/dart/packages/fory/test/xlang_protocol_test.dart
@@ -172,6 +172,18 @@ void main() {
),
),
);
+
+ final headerWithDifferentLowBits = TypeHeader(header.value ^ 1);
+ expect(
+ () => headerWithDifferentLowBits.validateBodyHash(body),
+ throwsA(
+ isA<StateError>().having(
+ (error) => error.toString(),
+ 'message',
+ contains('metadata hash'),
+ ),
+ ),
+ );
});
});
}
diff --git a/docs/specification/java_serialization_spec.md
b/docs/specification/java_serialization_spec.md
index c52695f1c..094dcb135 100644
--- a/docs/specification/java_serialization_spec.md
+++ b/docs/specification/java_serialization_spec.md
@@ -206,9 +206,15 @@ Header layout (lower bits on the right):
```
- size: lower 8 bits. If size equals the mask (0xFF), write extra size as
varuint32 and add it.
-- compress: bit 8, set when payload is compressed.
+- compress: bit 8, set when class meta bytes are compressed.
- reserved: bits 9-11 are reserved for future use and must be zero.
-- hash: 52-bit hash of the payload.
+- hash: 52 stored hash bits derived from MurmurHash3 x64_128 seed 47 over
+ `class meta bytes || header_low12_le`. `header_low12_le` is two
little-endian bytes containing
+ the low 12 header bits (size, compress, and reserved bits); the upper four
bits of the second
+ byte are zero. Take lane 0 of the 128-bit MurmurHash3 result as a signed
int64, left-shift it by
+ 12 with two's-complement 64-bit wraparound, apply signed absolute value
(leaving `INT64_MIN`
+ unchanged), then mask with `0xfffffffffffff000`. The final header is the
masked hash bits OR-ed
+ with the low 12 header bits.
### Class meta bytes
diff --git a/docs/specification/xlang_serialization_spec.md
b/docs/specification/xlang_serialization_spec.md
index 851791edb..516622ae6 100644
--- a/docs/specification/xlang_serialization_spec.md
+++ b/docs/specification/xlang_serialization_spec.md
@@ -557,7 +557,13 @@ The 8-byte header is a little-endian uint64:
Current xlang writers MUST leave this bit unset and current xlang readers
MUST treat a set bit
as unsupported.
- Bits 9-11: reserved for future extension (must be zero).
-- High 52 bits: hash of the TypeDef body.
+- High 52 bits: stored hash bits derived from MurmurHash3 x64_128 seed 47 over
+ `TypeDef body || header_low12_le`. `header_low12_le` is two little-endian
bytes containing the low
+ 12 header bits (size, compress, and reserved bits); the upper four bits of
the second byte are
+ zero. Take lane 0 of the 128-bit MurmurHash3 result as a signed int64,
left-shift it by 12 with
+ two's-complement 64-bit wraparound, apply signed absolute value (leaving
`INT64_MIN` unchanged),
+ then mask with `0xfffffffffffff000`. The final header is the masked hash
bits OR-ed with the low
+ 12 header bits.
#### TypeDef body
diff --git a/go/fory/type_def.go b/go/fory/type_def.go
index e1ab7c3eb..7520c42a6 100644
--- a/go/fory/type_def.go
+++ b/go/fory/type_def.go
@@ -35,7 +35,7 @@ const (
/*
TypeDef represents a transportable value object containing type information
and field definitions.
typeDef are layout as following:
- - first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits
meta size)
+ - first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1
bit compress flag + 8 bits meta size)
- next 1 byte: kind header
- next variable bytes: type id (varint) or ns name + type name
- next variable bytes: field definitions (see below)
@@ -286,7 +286,7 @@ func readTypeDef(fory *Fory, buffer *ByteBuffer, header
int64, err *Error) *Type
func skipTypeDef(buffer *ByteBuffer, header int64, err *Error) {
// Header-cache hits intentionally treat the current body as opaque
bytes and skip by the size in
// the current header. Parsed TypeDefs are published to the cache only
after successful body parse
- // and 52-bit body-hash validation; cache hits must not reparse or
rehash that body.
+ // and 52-bit metadata-hash validation; cache hits must not reparse or
rehash that body.
sz := int(header & META_SIZE_MASK)
if sz == META_SIZE_MASK {
sz += int(buffer.ReadVarUint32(err))
@@ -672,7 +672,7 @@ func getFieldNameEncodingIndex(encoding meta.Encoding) int {
/*
encodingTypeDef encodes a TypeDef into binary format according to the
specification
typeDef are layout as following:
-- first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits
meta size)
+- first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1
bit compress flag + 8 bits meta size)
- next 1 byte: kind header
- next variable bytes: type id (varint) or ns name + type name
- next variable bytes: field defs (see below)
@@ -806,20 +806,15 @@ func encodingTypeDef(typeResolver *TypeResolver, typeDef
*TypeDef) ([]byte, erro
// prependGlobalHeader writes the 8-byte global header
func prependGlobalHeader(buffer *ByteBuffer, isCompressed bool) (*ByteBuffer,
error) {
- var header uint64
metaSize := buffer.WriterIndex()
-
- header |= typeDefHeaderHash(buffer.GetByteSlice(0, metaSize))
-
- if isCompressed {
- header |= COMPRESS_META_FLAG
+ headerLowBits := uint64(metaSize)
+ if metaSize >= META_SIZE_MASK {
+ headerLowBits = META_SIZE_MASK
}
-
- if metaSize < META_SIZE_MASK {
- header |= uint64(metaSize) & META_SIZE_MASK
- } else {
- header |= META_SIZE_MASK // Set to max value, actual size will
follow
+ if isCompressed {
+ headerLowBits |= COMPRESS_META_FLAG
}
+ header := typeDefHeaderHash(buffer.GetByteSlice(0, metaSize),
headerLowBits) | headerLowBits
result := NewByteBuffer(make([]byte, metaSize+8))
result.WriteInt64(int64(header))
@@ -990,7 +985,7 @@ func writeFieldDef(typeResolver *TypeResolver, buffer
*ByteBuffer, field FieldDe
/*
decodeTypeDef decodes a TypeDef from the buffer
typeDef are layout as following:
- - first 8 bytes: global header (52 bits hash + 1 bit compress flag + 8 bits
meta size)
+ - first 8 bytes: global header (52 bits metadata hash + 3 bits reserved + 1
bit compress flag + 8 bits meta size)
- next 1 byte: kind header
- next variable bytes: type id (varint) or ns name + type name
- next variable bytes: field definitions (see below)
@@ -1275,8 +1270,12 @@ func buildTypeDefEncoded(header int64, metaSizeBits,
extraMetaSize int, metaByte
return buffer.Bytes()
}
-func typeDefHeaderHash(data []byte) uint64 {
- hash := int64(Murmur3Sum64WithSeed(data, 47) << (64 - NUM_HASH_BITS))
+func typeDefHeaderHash(data []byte, headerLowBits uint64) uint64 {
+ hashInput := make([]byte, len(data)+2)
+ copy(hashInput, data)
+ hashInput[len(data)] = byte(headerLowBits)
+ hashInput[len(data)+1] = byte(headerLowBits >> 8)
+ hash := int64(Murmur3Sum64WithSeed(hashInput, 47) << (64 -
NUM_HASH_BITS))
if hash < 0 {
hash = -hash
}
@@ -1295,7 +1294,7 @@ func validateParsedTypeDefHash(header int64,
metaSizeBits, extraMetaSize int, en
}
hashMask := ^uint64(0)
hashMask <<= uint(64 - NUM_HASH_BITS)
- expectedHeaderHash := typeDefHeaderHash(encoded)
+ expectedHeaderHash := typeDefHeaderHash(encoded,
uint64(header)&^hashMask)
actualHeaderHash := uint64(header) & hashMask
if expectedHeaderHash != actualHeaderHash {
return fmt.Errorf("invalid TypeDef metadata hash")
diff --git a/go/fory/type_def_test.go b/go/fory/type_def_test.go
index 37b465001..ef1627d7b 100644
--- a/go/fory/type_def_test.go
+++ b/go/fory/type_def_test.go
@@ -418,6 +418,25 @@ func TestTypeDefRejectsMetadataHashMismatch(t *testing.T) {
require.Contains(t, err.Error(), "metadata hash")
}
+func TestTypeDefHeaderHashIncludesHeaderLowBits(t *testing.T) {
+ fory := NewFory()
+ body := typeDefTestBodyWithoutFields()
+ _, header := typeDefTestFrame(t, body, false)
+
+ hashMask := ^uint64(0)
+ hashMask <<= uint(64 - NUM_HASH_BITS)
+ bodyOnlyHash := bodyOnlyTypeDefHeaderHash(body)
+ require.NotEqual(t, uint64(header)&hashMask, bodyOnlyHash)
+ rewrittenHeader := int64(bodyOnlyHash | (uint64(header) &^ hashMask))
+ buffer := NewByteBuffer(nil)
+ buffer.WriteBinary(body)
+ buffer.SetReaderIndex(0)
+
+ _, err := decodeTypeDef(fory, buffer, rewrittenHeader)
+ require.Error(t, err)
+ require.Contains(t, err.Error(), "metadata hash")
+}
+
func TestTypeDefRejectsEncodedMetadataAboveMaxBinarySize(t *testing.T) {
fory := NewFory(WithMaxBinarySize(1))
body := typeDefTestBodyWithoutFields()
@@ -523,6 +542,16 @@ func TestTypeDefRejectsFieldNameLengthBeyondMetadata(t
*testing.T) {
require.Contains(t, err.Error(), "field name length")
}
+func bodyOnlyTypeDefHeaderHash(data []byte) uint64 {
+ hash := int64(Murmur3Sum64WithSeed(data, 47) << (64 - NUM_HASH_BITS))
+ if hash < 0 {
+ hash = -hash
+ }
+ hashMask := ^uint64(0)
+ hashMask <<= uint(64 - NUM_HASH_BITS)
+ return uint64(hash) & hashMask
+}
+
// TestTypeDefNestedRecursionStackOverflowPanic verifies that
readFieldTypeWithFlags
// rejects a crafted payload with 20 million nested LIST types, returning an
error
// at depth 64 instead of recursing until a goroutine stack overflow crashes
the process.
diff --git a/go/fory/type_resolver.go b/go/fory/type_resolver.go
index 3838e67e2..109087170 100644
--- a/go/fory/type_resolver.go
+++ b/go/fory/type_resolver.go
@@ -1636,7 +1636,7 @@ func (r *TypeResolver) readSharedTypeMeta(buffer
*ByteBuffer, err *Error) *TypeI
var td *TypeDef
if existingTd, exists := r.defIdToTypeDef[id]; exists {
// Header-cache hits intentionally skip without rehashing.
Entries reach this cache only
- // after a successful TypeDef parse and 52-bit body-hash
validation.
+ // after a successful TypeDef parse and 52-bit metadata-hash
validation.
skipTypeDef(buffer, id, err)
td = existingTd
} else {
diff --git
a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java
b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java
index b4f50c2c0..a4d125601 100644
---
a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java
+++
b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefDecoder.java
@@ -38,7 +38,6 @@ import org.apache.fory.resolver.ClassResolver;
import org.apache.fory.resolver.TypeResolver;
import org.apache.fory.serializer.UnknownClass;
import org.apache.fory.type.Types;
-import org.apache.fory.util.MurmurHash3;
import org.apache.fory.util.Preconditions;
/**
@@ -259,10 +258,9 @@ class NativeTypeDefDecoder {
if (encoded.length - bodyOffset != size) {
throw new DeserializationException("Invalid TypeDef encoded size");
}
- long hash = MurmurHash3.murmurhash3_x64_128(encoded, bodyOffset, size,
47)[0];
- hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS);
long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS);
- long expectedHeaderHash = Math.abs(hash) & hashMask;
+ long expectedHeaderHash =
+ NativeTypeDefEncoder.computeTypeDefHashBits(encoded, bodyOffset, size,
id & ~hashMask);
long actualHeaderHash = id & hashMask;
if (expectedHeaderHash != actualHeaderHash) {
throw new DeserializationException("Invalid TypeDef metadata hash");
diff --git
a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java
b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java
index 6173a9e88..4ef3979d9 100644
---
a/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java
+++
b/java/fory-core/src/main/java/org/apache/fory/meta/NativeTypeDefEncoder.java
@@ -238,14 +238,12 @@ public class NativeTypeDefEncoder {
static MemoryBuffer prependHeader(MemoryBuffer buffer, boolean isCompressed)
{
int metaSize = buffer.writerIndex();
- long hash = MurmurHash3.murmurhash3_x64_128(buffer.getHeapMemory(), 0,
metaSize, 47)[0];
- hash <<= (64 - NUM_HASH_BITS);
- // this id will be part of generated codec, a negative number won't be
allowed in class name.
- long header = Math.abs(hash);
+ long headerLowBits = Math.min(metaSize, META_SIZE_MASKS);
if (isCompressed) {
- header |= COMPRESS_META_FLAG;
+ headerLowBits |= COMPRESS_META_FLAG;
}
- header |= Math.min(metaSize, META_SIZE_MASKS);
+ long header =
+ computeTypeDefHashBits(buffer.getHeapMemory(), 0, metaSize,
headerLowBits) | headerLowBits;
MemoryBuffer result = MemoryUtils.buffer(metaSize + 8);
result.writeInt64(header);
if (metaSize >= META_SIZE_MASKS) {
@@ -255,6 +253,18 @@ public class NativeTypeDefEncoder {
return result;
}
+ static long computeTypeDefHashBits(byte[] bytes, int offset, int size, long
headerLowBits) {
+ byte[] hashInput = new byte[size + Short.BYTES];
+ System.arraycopy(bytes, offset, hashInput, 0, size);
+ hashInput[size] = (byte) headerLowBits;
+ hashInput[size + 1] = (byte) (headerLowBits >>> Byte.SIZE);
+ long hash = MurmurHash3.murmurhash3_x64_128(hashInput, 0,
hashInput.length, 47)[0];
+ hash <<= (64 - NUM_HASH_BITS);
+ long hashMask = -1L << (Long.SIZE - NUM_HASH_BITS);
+ // this id will be part of generated codec, a negative number won't be
allowed in class name.
+ return Math.abs(hash) & hashMask;
+ }
+
static int nativeKindCode(int typeId) {
switch (typeId) {
case Types.STRUCT:
diff --git a/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java
b/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java
index 6069472e9..838e7f36d 100644
--- a/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java
+++ b/java/fory-core/src/main/java/org/apache/fory/meta/TypeDef.java
@@ -116,10 +116,8 @@ public class TypeDef implements Serializable {
public static void skipTypeDef(MemoryBuffer buffer, long id) {
// Header-cache hits intentionally treat the current body as opaque bytes
and skip by the size
- // in
- // the current header. Parsed TypeDefs are published to the cache only
after successful body
- // parse
- // and 52-bit body-hash validation; cache hits must not reparse or rehash
that body.
+ // in the current header. Parsed TypeDefs are published to the cache only
after successful body
+ // parse and 52-bit metadata-hash validation; cache hits must not reparse
or rehash that body.
int size = (int) (id & META_SIZE_MASKS);
if (size == META_SIZE_MASKS) {
int extendedSize = buffer.readVarUInt32Small14();
diff --git
a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java
b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java
index c5856d6d2..4c690c9a0 100644
--- a/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java
+++ b/java/fory-core/src/main/java/org/apache/fory/resolver/TypeResolver.java
@@ -761,7 +761,7 @@ public abstract class TypeResolver {
simpleClassNameBytes = metaStringReader.readMetaString(buffer,
typeNameBytesCache);
// MetaStringReader returns the provided cache object only when the wire
identity matches. For
- // big meta strings, body-hash validation happens before the entry is
first cached.
+ // big meta strings, metadata-hash validation happens before the entry
is first cached.
if (typeNameBytesCache == simpleClassNameBytes && packageNameBytesCache
== namespaceBytes) {
return typeInfoCache;
}
@@ -793,7 +793,7 @@ public abstract class TypeResolver {
} else {
// New type in stream, with optimized reuse by validated TypeDef header.
A header-cache
// hit intentionally skips the body without rehashing: entries are
published only after the
- // TypeDef body has parsed successfully and matched the 52-bit body hash.
+ // TypeDef body has parsed successfully and matched the 52-bit metadata
hash.
long id = buffer.readInt64();
typeInfo = extRegistry.typeInfoByTypeDefId.get(id);
if (typeInfo != null) {
diff --git
a/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java
b/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java
index 22d988dfc..0a6bd9467 100644
---
a/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java
+++
b/java/fory-core/src/test/java/org/apache/fory/meta/NativeTypeDefEncoderTest.java
@@ -37,6 +37,7 @@ import org.apache.fory.test.bean.BeanA;
import org.apache.fory.test.bean.MapFields;
import org.apache.fory.test.bean.Struct;
import org.apache.fory.type.Types;
+import org.apache.fory.util.MurmurHash3;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -258,6 +259,17 @@ public class NativeTypeDefEncoderTest {
() -> TypeDef.readTypeDef(fory.getTypeResolver(),
MemoryBuffer.fromByteArray(malformed)));
}
+ @Test
+ public void testDecodeRejectsBodyOnlyHeaderHash() {
+ Fory fory = Fory.builder().withMetaShare(true).build();
+ TypeDef typeDef = TypeDef.buildTypeDef(fory.getTypeResolver(), Foo1.class);
+ byte[] malformed = rewriteHeaderWithBodyOnlyHash(typeDef);
+
+ Assert.assertThrows(
+ DeserializationException.class,
+ () -> TypeDef.readTypeDef(fory.getTypeResolver(),
MemoryBuffer.fromByteArray(malformed)));
+ }
+
@Test
public void testDecodeRejectsHashConsistentMalformedTypeDefBody() {
Fory fory = Fory.builder().withMetaShare(true).build();
@@ -277,6 +289,35 @@ public class NativeTypeDefEncoderTest {
return malformed;
}
+ private static byte[] rewriteHeaderWithBodyOnlyHash(TypeDef typeDef) {
+ byte[] malformed = typeDef.getEncoded().clone();
+ MemoryBuffer buffer = MemoryBuffer.fromByteArray(malformed);
+ long header = buffer.readInt64();
+ int bodyOffset = typeDefBodyOffset(malformed);
+ int size = malformed.length - bodyOffset;
+ long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ long bodyOnlyHash = bodyOnlyTypeDefHashBits(malformed, bodyOffset, size);
+ Assert.assertNotEquals(header & hashMask, bodyOnlyHash);
+ MemoryBuffer.fromByteArray(malformed).putInt64(0, bodyOnlyHash | (header &
~hashMask));
+ return malformed;
+ }
+
+ private static long bodyOnlyTypeDefHashBits(byte[] bytes, int offset, int
size) {
+ long hash = MurmurHash3.murmurhash3_x64_128(bytes, offset, size, 47)[0];
+ hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ return Math.abs(hash) & hashMask;
+ }
+
+ private static int typeDefBodyOffset(byte[] encoded) {
+ MemoryBuffer buffer = MemoryBuffer.fromByteArray(encoded);
+ long header = buffer.readInt64();
+ if ((header & TypeDef.META_SIZE_MASKS) == TypeDef.META_SIZE_MASKS) {
+ buffer.readVarUInt32Small14();
+ }
+ return buffer.readerIndex();
+ }
+
private static int indexOf(byte[] bytes, byte[] needle, int fromIndex) {
for (int i = fromIndex; i <= bytes.length - needle.length; i++) {
boolean match = true;
diff --git
a/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java
b/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java
index c62bbee1d..a4978d4c7 100644
--- a/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java
+++ b/java/fory-core/src/test/java/org/apache/fory/meta/TypeDefEncoderTest.java
@@ -30,6 +30,7 @@ import org.apache.fory.exception.DeserializationException;
import org.apache.fory.memory.MemoryBuffer;
import org.apache.fory.resolver.TypeResolver;
import org.apache.fory.type.Types;
+import org.apache.fory.util.MurmurHash3;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -483,6 +484,18 @@ public class TypeDefEncoderTest {
() -> TypeDef.readTypeDef(fory.getTypeResolver(),
MemoryBuffer.fromByteArray(malformed)));
}
+ @Test
+ public void testDecodeRejectsBodyOnlyHeaderHash() {
+ Fory fory =
Fory.builder().withXlang(true).withCompatible(false).withMetaShare(true).build();
+ fory.register(ClassWithNoAnnotations.class);
+ TypeDef typeDef = TypeDef.buildTypeDef(fory.getTypeResolver(),
ClassWithNoAnnotations.class);
+ byte[] malformed = rewriteHeaderWithBodyOnlyHash(typeDef);
+
+ Assert.assertThrows(
+ DeserializationException.class,
+ () -> TypeDef.readTypeDef(fory.getTypeResolver(),
MemoryBuffer.fromByteArray(malformed)));
+ }
+
@Test
public void testDecodeRejectsHashConsistentMalformedTypeDefBody() {
Fory fory =
Fory.builder().withXlang(true).withCompatible(false).withMetaShare(true).build();
@@ -537,6 +550,26 @@ public class TypeDefEncoderTest {
return malformed;
}
+ private static byte[] rewriteHeaderWithBodyOnlyHash(TypeDef typeDef) {
+ byte[] malformed = typeDef.getEncoded().clone();
+ MemoryBuffer buffer = MemoryBuffer.fromByteArray(malformed);
+ long header = buffer.readInt64();
+ int bodyOffset = typeDefBodyOffset(malformed);
+ int size = malformed.length - bodyOffset;
+ long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ long bodyOnlyHash = bodyOnlyTypeDefHashBits(malformed, bodyOffset, size);
+ Assert.assertNotEquals(header & hashMask, bodyOnlyHash);
+ MemoryBuffer.fromByteArray(malformed).putInt64(0, bodyOnlyHash | (header &
~hashMask));
+ return malformed;
+ }
+
+ private static long bodyOnlyTypeDefHashBits(byte[] bytes, int offset, int
size) {
+ long hash = MurmurHash3.murmurhash3_x64_128(bytes, offset, size, 47)[0];
+ hash <<= (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ long hashMask = -1L << (Long.SIZE - TypeDef.NUM_HASH_BITS);
+ return Math.abs(hash) & hashMask;
+ }
+
private static int indexOf(byte[] bytes, byte[] needle, int fromIndex) {
for (int i = fromIndex; i <= bytes.length - needle.length; i++) {
boolean match = true;
diff --git a/javascript/packages/core/lib/context.ts
b/javascript/packages/core/lib/context.ts
index 956c90445..0f59afe69 100644
--- a/javascript/packages/core/lib/context.ts
+++ b/javascript/packages/core/lib/context.ts
@@ -716,7 +716,7 @@ export class ReadContext {
let typeMeta: TypeMeta;
if (cached) {
// Header-cache hits intentionally skip without rehashing. Entries reach
this cache only
- // after a successful TypeMeta parse and 52-bit body-hash validation.
The current body
+ // after a successful TypeMeta parse and 52-bit metadata-hash
validation. The current body
// size still comes from the current header bytes, not from the cached
TypeMeta.
TypeMeta.skipBodyByHeaderLow(this.reader, headerLow);
typeMeta = cached;
diff --git a/javascript/packages/core/lib/meta/TypeMeta.ts
b/javascript/packages/core/lib/meta/TypeMeta.ts
index 321b26ec2..3bec55168 100644
--- a/javascript/packages/core/lib/meta/TypeMeta.ts
+++ b/javascript/packages/core/lib/meta/TypeMeta.ts
@@ -552,7 +552,10 @@ export class TypeMeta {
}
private static validateParsedBodyHash(header: bigint, body: Uint8Array) {
- const expectedHeaderHash = TypeMeta.headerHashBits(body);
+ const expectedHeaderHash = TypeMeta.headerHashBits(
+ body,
+ header & ~HEADER_HASH_MASK,
+ );
const actualHeaderHash = header & HEADER_HASH_MASK;
if (expectedHeaderHash !== actualHeaderHash) {
throw new Error("TypeMeta metadata hash mismatch");
@@ -954,19 +957,24 @@ export class TypeMeta {
}
private static buildHeader(buffer: Uint8Array, isCompressed: boolean) {
- let header = TypeMeta.headerHashBits(buffer);
+ let headerLowBits = BigInt(Math.min(buffer.length, META_SIZE_MASKS));
if (isCompressed) {
- header |= COMPRESS_META_FLAG;
+ headerLowBits |= COMPRESS_META_FLAG;
}
- header |= BigInt(Math.min(buffer.length, META_SIZE_MASKS));
+ const header = TypeMeta.headerHashBits(buffer, headerLowBits)
+ | headerLowBits;
return {
header: BigInt.asUintN(64, header),
headerHash: Number(header >> HASH_SHIFT_BITS),
};
}
- private static headerHashBits(buffer: Uint8Array) {
- const hash = x64hash128(buffer, 47);
+ private static headerHashBits(buffer: Uint8Array, headerLowBits: bigint) {
+ const hashInput = new Uint8Array(buffer.length + 2);
+ hashInput.set(buffer);
+ hashInput[buffer.length] = Number(headerLowBits & 0xffn);
+ hashInput[buffer.length + 1] = Number((headerLowBits >> 8n) & 0xffn);
+ const hash = x64hash128(hashInput, 47);
// Read the high 64 bits of the 128-bit MurmurHash3 as a SIGNED
// int64 to match pyfory (`hash_buffer()[0]` unpacks `int64_t[0]`),
// java (`murmurhash3_x64_128(...)[0]` returns `long`), and rust
diff --git a/javascript/test/typemeta.test.ts b/javascript/test/typemeta.test.ts
index 29de8dda2..66fba3e69 100644
--- a/javascript/test/typemeta.test.ts
+++ b/javascript/test/typemeta.test.ts
@@ -25,6 +25,7 @@ import Fory, {
} from "../packages/core/index";
import { ReadContext } from "../packages/core/lib/context";
import { TypeMeta } from "../packages/core/lib/meta/TypeMeta";
+import { x64hash128 } from "../packages/core/lib/murmurHash3";
import { BinaryReader } from "../packages/core/lib/reader";
import { BinaryWriter } from "../packages/core/lib/writer";
import { describe, expect, test } from "@jest/globals";
@@ -33,6 +34,9 @@ const COMPRESS_META_FLAG = 1n << 8n;
const RESERVED_META_FLAGS = 0b111n << 9n;
const META_SIZE_MASK = 0xffn;
const HASH_SHIFT_BITS = 12n;
+const LOW_HEADER_BITS_MASK = (1n << HASH_SHIFT_BITS) - 1n;
+const UINT64_MASK = (1n << 64n) - 1n;
+const HEADER_HASH_MASK = UINT64_MASK ^ LOW_HEADER_BITS_MASK;
describe("typemeta", () => {
test("writes TypeMeta header bits in the xlang layout", () => {
@@ -108,6 +112,36 @@ describe("typemeta", () => {
expect(skipReader.readGetCursor()).toBe(bytes.length);
});
+ test("includes TypeMeta header low bits in the metadata hash", () => {
+ const bytes = TypeMeta.fromTypeInfo(
+ Type.struct(7007, {
+ value: Type.string().setId(1),
+ }),
+ ).toBytes();
+ const malformed = new Uint8Array(bytes);
+ const view = new DataView(
+ malformed.buffer,
+ malformed.byteOffset,
+ malformed.byteLength,
+ );
+ const header = view.getBigUint64(0, true);
+ const bodyOffset = typeMetaBodyOffset(bytes);
+ const bodyOnlyHash = bodyOnlyHeaderHashBits(bytes.subarray(bodyOffset));
+ expect(header & HEADER_HASH_MASK).not.toBe(bodyOnlyHash);
+
+ view.setBigUint64(
+ 0,
+ bodyOnlyHash | (header & LOW_HEADER_BITS_MASK),
+ true,
+ );
+ const reader = new BinaryReader({});
+ reader.reset(malformed);
+
+ expect(() => TypeMeta.fromBytes(reader)).toThrow(
+ "TypeMeta metadata hash mismatch",
+ );
+ });
+
test("TypeMeta header cache hit skips the current body size", () => {
const header = 0xffn;
const typeMeta = TypeMeta.fromTypeInfo(Type.struct(7010, {}));
@@ -643,3 +677,15 @@ function typeMetaBodyOffset(bytes: Uint8Array) {
}
return reader.readGetCursor();
}
+
+function bodyOnlyHeaderHashBits(buffer: Uint8Array) {
+ const hash = x64hash128(buffer, 47);
+ let header = BigInt.asIntN(
+ 64,
+ hash.getBigInt64(0, false) << HASH_SHIFT_BITS,
+ );
+ if (header < 0n) {
+ header = -header;
+ }
+ return BigInt.asUintN(64, header) & HEADER_HASH_MASK;
+}
diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py
index 97551acc4..cb3057962 100644
--- a/python/pyfory/meta/typedef.py
+++ b/python/pyfory/meta/typedef.py
@@ -121,8 +121,9 @@ def xlang_non_struct_type_id(kind_code: int) -> int:
raise ValueError(f"Unsupported TypeDef kind code {kind_code}") from exc
-def _typedef_header_hash(encoded: bytes) -> int:
- hash_value = hash_buffer(encoded, 47)[0]
+def _typedef_header_hash(encoded: bytes, header_low_bits: int) -> int:
+ hash_input = encoded + bytes((header_low_bits & 0xFF, (header_low_bits >>
8) & 0xFF))
+ hash_value = hash_buffer(hash_input, 47)[0]
shifted = (hash_value << TYPEDEF_HASH_SHIFT) & _UINT64_MASK
if shifted >= (1 << 63):
shifted -= 1 << 64
diff --git a/python/pyfory/meta/typedef_decoder.py
b/python/pyfory/meta/typedef_decoder.py
index f221b4146..3f2b224c1 100644
--- a/python/pyfory/meta/typedef_decoder.py
+++ b/python/pyfory/meta/typedef_decoder.py
@@ -45,6 +45,7 @@ from pyfory.meta.typedef import (
is_named_typedef_kind,
xlang_non_struct_type_id,
_typedef_header_hash,
+ _UINT64_MASK,
)
from pyfory.types import TypeId
from pyfory._fory import NO_USER_TYPE_ID
@@ -204,7 +205,8 @@ def decode_typedef(buffer: Buffer, resolver, header=None)
-> TypeDef:
def _validate_parsed_typedef_hash(header: int, encoded_meta_data: bytes) ->
None:
- if _typedef_header_hash(encoded_meta_data) != (header & TYPEDEF_HASH_MASK):
+ header_bits = header & _UINT64_MASK
+ if _typedef_header_hash(encoded_meta_data, header_bits &
~TYPEDEF_HASH_MASK) != (header_bits & TYPEDEF_HASH_MASK):
raise ValueError("Invalid TypeDef metadata hash")
diff --git a/python/pyfory/meta/typedef_encoder.py
b/python/pyfory/meta/typedef_encoder.py
index 69d83ba61..17051f7c7 100644
--- a/python/pyfory/meta/typedef_encoder.py
+++ b/python/pyfory/meta/typedef_encoder.py
@@ -132,11 +132,11 @@ def encode_typedef(type_resolver, cls, include_fields:
bool = True):
def prepend_header(buffer: bytes, is_compressed: bool):
"""Prepend header to the buffer."""
meta_size = len(buffer)
- header = _typedef_header_hash(buffer)
+ header_low_bits = min(meta_size, META_SIZE_MASKS)
if is_compressed:
- header |= COMPRESS_META_FLAG
+ header_low_bits |= COMPRESS_META_FLAG
- header |= min(meta_size, META_SIZE_MASKS)
+ header = _typedef_header_hash(buffer, header_low_bits) | header_low_bits
if header >= (1 << 63):
header -= 1 << 64
result = Buffer.allocate(meta_size + 8)
diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 50a869319..8a4cb030f 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -1162,7 +1162,7 @@ class TypeResolver:
type_info = self._meta_shared_type_info.get(header)
if type_info is not None:
# Header-cache hits intentionally skip without rehashing. Entries
reach this cache only
- # after a successful TypeDef parse and 52-bit body-hash validation.
+ # after a successful TypeDef parse and 52-bit metadata-hash
validation.
skip_typedef(buffer, header)
return type_info
type_def = decode_typedef(buffer, self, header=header)
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 004ba5b6e..733156cbc 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -561,7 +561,7 @@ cdef class TypeResolver:
cdef object type_def
if typeinfo is not None:
# Header-cache hits intentionally skip without rehashing. Entries
reach this cache only
- # after a successful TypeDef parse and 52-bit body-hash validation.
+ # after a successful TypeDef parse and 52-bit metadata-hash
validation.
_skip_typedef_fast(buffer, header)
return typeinfo
type_def = decode_typedef(buffer, self.resolver, header=header)
diff --git a/python/pyfory/tests/test_typedef_encoding.py
b/python/pyfory/tests/test_typedef_encoding.py
index ed93d3fea..e91a42893 100644
--- a/python/pyfory/tests/test_typedef_encoding.py
+++ b/python/pyfory/tests/test_typedef_encoding.py
@@ -36,6 +36,11 @@ from pyfory.meta.typedef import (
DynamicFieldType,
FIELD_NAME_ENCODINGS,
COMPRESS_META_FLAG,
+ META_SIZE_MASKS,
+ TYPEDEF_HASH_MASK,
+ TYPEDEF_HASH_SHIFT,
+ _INT64_MIN,
+ _UINT64_MASK,
)
from pyfory.meta.typedef_encoder import (
FIELD_NAME_ENCODER,
@@ -47,6 +52,7 @@ from pyfory.serializer import PyArraySerializer
from pyfory.types import TypeId
from pyfory import Fory
from pyfory.error import TypeNotCompatibleError
+from pyfory.lib.mmh3 import hash_buffer
try:
import numpy as np
@@ -252,6 +258,16 @@ def
test_decode_typedef_rejects_parsed_body_with_mismatched_hash():
decode_typedef(Buffer(malformed), fory.type_resolver)
+def test_decode_typedef_rejects_body_only_header_hash():
+ fory = Fory(xlang=True, compatible=False)
+ fory.register(SimpleTypeDef, namespace="example", typename="SimpleTypeDef")
+ typedef = encode_typedef(fory.type_resolver, SimpleTypeDef)
+ malformed = _rewrite_header_with_body_only_hash(typedef.encoded)
+
+ with pytest.raises(ValueError, match="Invalid TypeDef metadata hash"):
+ decode_typedef(Buffer(malformed), fory.type_resolver)
+
+
def test_decode_typedef_rejects_hash_consistent_malformed_body():
fory = Fory(xlang=True, compatible=False)
encoded = prepend_header(b"\x00", False)
@@ -319,11 +335,33 @@ def _corrupt_encoded_field_name(typedef, field_name):
def _typedef_body_offset(encoded):
buffer = Buffer(encoded)
header = buffer.read_int64()
- if header & 0xFF == 0xFF:
+ if header & META_SIZE_MASKS == META_SIZE_MASKS:
buffer.read_var_uint32()
return buffer.get_reader_index()
+def _rewrite_header_with_body_only_hash(encoded):
+ malformed = bytearray(encoded)
+ buffer = Buffer(encoded)
+ header = buffer.read_int64() & _UINT64_MASK
+ body_offset = _typedef_body_offset(encoded)
+ body_only_hash = _body_only_typedef_hash_bits(encoded[body_offset:])
+ assert header & TYPEDEF_HASH_MASK != body_only_hash
+ rewritten_header = body_only_hash | (header & ~TYPEDEF_HASH_MASK)
+ malformed[:8] = rewritten_header.to_bytes(8, "little", signed=False)
+ return bytes(malformed)
+
+
+def _body_only_typedef_hash_bits(encoded_body):
+ hash_value = hash_buffer(encoded_body, 47)[0]
+ shifted = (hash_value << TYPEDEF_HASH_SHIFT) & _UINT64_MASK
+ if shifted >= (1 << 63):
+ shifted -= 1 << 64
+ if shifted != _INT64_MIN and shifted < 0:
+ shifted = -shifted
+ return (shifted & _UINT64_MASK) & TYPEDEF_HASH_MASK
+
+
def test_nested_container_typedef_preserves_declared_encoding():
fory = Fory(xlang=True, compatible=False)
fory.register(NestedEncodingTypeDef, namespace="example",
typename="NestedEncodingTypeDef")
diff --git a/rust/fory-core/src/meta/type_meta.rs
b/rust/fory-core/src/meta/type_meta.rs
index 7b3b8ef37..02b96ba04 100644
--- a/rust/fory-core/src/meta/type_meta.rs
+++ b/rust/fory-core/src/meta/type_meta.rs
@@ -167,15 +167,20 @@ fn read_type_meta_body_size(reader: &mut Reader, header:
i64) -> Result<usize, E
}
#[inline(always)]
-fn type_meta_hash_bits(body: &[u8]) -> u64 {
- let hash_value = murmurhash3_x64_128(body, 47).0 as i64;
+fn type_meta_hash_bits(body: &[u8], header_low_bits: u64) -> u64 {
+ let mut hash_input = Vec::with_capacity(body.len() + 2);
+ hash_input.extend_from_slice(body);
+ hash_input.push(header_low_bits as u8);
+ hash_input.push((header_low_bits >> 8) as u8);
+ let hash_value = murmurhash3_x64_128(&hash_input, 47).0 as i64;
hash_value.wrapping_shl(TYPE_META_HASH_SHIFT).wrapping_abs() as u64
}
#[inline(always)]
fn validate_type_meta_body_hash(header: i64, body: &[u8]) -> Result<(), Error>
{
let hash_mask = u64::MAX << TYPE_META_HASH_SHIFT;
- if ((header as u64) & hash_mask) != (type_meta_hash_bits(body) &
hash_mask) {
+ let expected_hash = type_meta_hash_bits(body, (header as u64) &
!hash_mask);
+ if ((header as u64) & hash_mask) != (expected_hash & hash_mask) {
return Err(Error::invalid_data("TypeMeta metadata hash mismatch"));
}
Ok(())
@@ -1136,7 +1141,8 @@ impl TypeMeta {
if is_compressed {
header |= COMPRESS_META_FLAG;
}
- let meta_hash_shifted =
type_meta_hash_bits(meta_writer.dump().as_slice()) as i64;
+ let meta_hash_shifted =
+ type_meta_hash_bits(meta_writer.dump().as_slice(), header as u64)
as i64;
let meta_hash = meta_hash_shifted >> TYPE_META_HASH_SHIFT;
header |= meta_hash_shifted;
result.write_i64(header);
@@ -1176,6 +1182,34 @@ mod tests {
assert!(message.contains("hash mismatch"));
}
+ #[test]
+ fn rejects_body_only_header_hash() {
+ let meta = TypeMeta::new(
+ STRUCT,
+ 1,
+ MetaString::get_empty().clone(),
+ MetaString::get_empty().clone(),
+ false,
+ vec![],
+ )
+ .unwrap();
+ let (mut bytes, _) = meta.to_bytes().unwrap();
+ let header = i64::from_le_bytes(bytes[0..8].try_into().unwrap()) as
u64;
+ let hash_mask = u64::MAX << TYPE_META_HASH_SHIFT;
+ let body_only_hash = body_only_type_meta_hash_bits(&bytes[8..]);
+ assert_ne!(header & hash_mask, body_only_hash);
+ let rewritten_header = body_only_hash | (header & !hash_mask);
+ bytes[0..8].copy_from_slice(&(rewritten_header as i64).to_le_bytes());
+
+ let mut reader = Reader::new(&bytes);
+ let result = TypeMeta::from_bytes(&mut reader,
&TypeResolver::default());
+ let message = result
+ .err()
+ .map(|error| error.to_string())
+ .unwrap_or_default();
+ assert!(message.contains("hash mismatch"));
+ }
+
#[test]
fn rejects_hash_consistent_trailing_body_bytes() {
let meta = TypeMeta::new(
@@ -1193,8 +1227,9 @@ mod tests {
let mut frame = vec![];
let mut writer = Writer::from_buffer(&mut frame);
let body_size = body.len() as i64;
- let mut header = type_meta_hash_bits(&body) as i64;
- header |= min(META_SIZE_MASK, body_size);
+ let header_low_bits = min(META_SIZE_MASK, body_size);
+ let mut header = type_meta_hash_bits(&body, header_low_bits as u64) as
i64;
+ header |= header_low_bits;
writer.write_i64(header);
if body_size >= META_SIZE_MASK {
writer.write_var_u32((body_size - META_SIZE_MASK) as u32);
@@ -1209,4 +1244,10 @@ mod tests {
.unwrap_or_default();
assert!(message.contains("metadata size"));
}
+
+ fn body_only_type_meta_hash_bits(body: &[u8]) -> u64 {
+ let hash_value = murmurhash3_x64_128(body, 47).0 as i64;
+ let shifted = hash_value << TYPE_META_HASH_SHIFT;
+ shifted.wrapping_abs() as u64 & (u64::MAX << TYPE_META_HASH_SHIFT)
+ }
}
diff --git a/rust/fory-core/src/resolver/meta_resolver.rs
b/rust/fory-core/src/resolver/meta_resolver.rs
index 260a65161..103c2fdb9 100644
--- a/rust/fory-core/src/resolver/meta_resolver.rs
+++ b/rust/fory-core/src/resolver/meta_resolver.rs
@@ -149,14 +149,14 @@ impl MetaReaderResolver {
.filter(|_| self.last_meta_header == meta_header)
{
// Header-cache hits intentionally skip without rehashing.
Entries reach this cache
- // only after a successful TypeMeta parse and 52-bit body-hash
validation.
+ // only after a successful TypeMeta parse and 52-bit
metadata-hash validation.
self.reading_type_infos.push(type_info.clone());
TypeMeta::skip_bytes_for_validated_header(reader,
meta_header)?;
return Ok(type_info.clone());
}
if let Some(type_info) = self.parsed_type_infos.get(&meta_header) {
// Header-cache hits intentionally skip without rehashing.
Entries reach this cache
- // only after a successful TypeMeta parse and 52-bit body-hash
validation.
+ // only after a successful TypeMeta parse and 52-bit
metadata-hash validation.
self.last_meta_header = meta_header;
self.last_type_info = Some(type_info.clone());
self.reading_type_infos.push(type_info.clone());
diff --git a/swift/Sources/Fory/ReadContext.swift
b/swift/Sources/Fory/ReadContext.swift
index 922c17019..6d1967d59 100644
--- a/swift/Sources/Fory/ReadContext.swift
+++ b/swift/Sources/Fory/ReadContext.swift
@@ -269,7 +269,7 @@ public final class ReadContext {
}
if header == localTypeDefHeader {
// Header-cache hits intentionally skip without rehashing.
Entries reach this
- // cache only after a successful TypeDef parse and 52-bit
body-hash validation.
+ // cache only after a successful TypeDef parse and 52-bit
metadata-hash validation.
compatibleTypeDefTypeInfos.push(localTypeInfo)
try buffer.skip(bodySize)
return nil
@@ -304,7 +304,7 @@ public final class ReadContext {
}
if let cached = typeResolver.getTypeInfo(forHeader: header) {
// Header-cache hits intentionally skip without rehashing. Entries
reach this cache only
- // after a successful TypeDef parse and 52-bit body-hash
validation.
+ // after a successful TypeDef parse and 52-bit metadata-hash
validation.
try buffer.skip(bodySize)
compatibleTypeDefTypeInfos.push(cached)
return cached
@@ -341,7 +341,7 @@ public final class ReadContext {
if header == localTypeDefHeader {
// Header-cache hits intentionally skip without rehashing.
Entries reach this
- // cache only after a successful TypeDef parse and 52-bit
body-hash validation.
+ // cache only after a successful TypeDef parse and 52-bit
metadata-hash validation.
compatibleTypeDefTypeInfos.push(localTypeInfo)
try buffer.skip(bodySize)
return localTypeInfo
diff --git a/swift/Sources/Fory/TypeMeta.swift
b/swift/Sources/Fory/TypeMeta.swift
index 4c123b2bb..ebb6d7850 100644
--- a/swift/Sources/Fory/TypeMeta.swift
+++ b/swift/Sources/Fory/TypeMeta.swift
@@ -310,11 +310,11 @@ public final class TypeMeta: Equatable, @unchecked
Sendable {
}
let body = try encodeBody()
- var header = Self.typeMetaHeaderHash(body)
+ var headerLowBits = UInt64(min(body.count, Int(typeMetaSizeMask)))
if compressed {
- header |= typeMetaCompressedFlag
+ headerLowBits |= typeMetaCompressedFlag
}
- header |= UInt64(min(body.count, Int(typeMetaSizeMask)))
+ let header = Self.typeMetaHeaderHash(body, headerLowBits: headerLowBits) |
headerLowBits
let buffer = ByteBuffer(capacity: body.count + 16)
buffer.writeUInt64(header)
@@ -407,7 +407,8 @@ public final class TypeMeta: Equatable, @unchecked Sendable
{
if bodyReader.remaining != 0 {
throw ForyError.invalidData("unexpected trailing bytes in TypeMeta body")
}
- if (header & Self.hashMask()) != Self.typeMetaHeaderHash(encodedBody) {
+ if (header & Self.hashMask())
+ != Self.typeMetaHeaderHash(encodedBody, headerLowBits: header &
~Self.hashMask()) {
throw ForyError.invalidData("invalid TypeMeta metadata hash")
}
@@ -470,8 +471,11 @@ public final class TypeMeta: Equatable, @unchecked
Sendable {
UInt64.max << (64 - typeMetaNumHashBits)
}
- private static func typeMetaHeaderHash(_ body: [UInt8]) -> UInt64 {
- let bodyHash = MurmurHash3.x64_128(body, seed: typeMetaHashSeed).0
+ private static func typeMetaHeaderHash(_ body: [UInt8], headerLowBits:
UInt64) -> UInt64 {
+ var hashInput = body
+ hashInput.append(UInt8(truncatingIfNeeded: headerLowBits))
+ hashInput.append(UInt8(truncatingIfNeeded: headerLowBits >> 8))
+ let bodyHash = MurmurHash3.x64_128(hashInput, seed: typeMetaHashSeed).0
let shifted = bodyHash << (64 - typeMetaNumHashBits)
let signed = Int64(bitPattern: shifted)
let absSigned = signed == Int64.min ? signed : Swift.abs(signed)
diff --git a/swift/Tests/ForyTests/ForySwiftTests.swift
b/swift/Tests/ForyTests/ForySwiftTests.swift
index dbdf4ce4c..a6ff197d3 100644
--- a/swift/Tests/ForyTests/ForySwiftTests.swift
+++ b/swift/Tests/ForyTests/ForySwiftTests.swift
@@ -1280,3 +1280,39 @@ func typeMetaRoundTripByID() throws {
#expect(decoded.userTypeID == 101)
#expect(decoded.fields.isEmpty)
}
+
+@Test
+func typeMetaHeaderHashIncludesHeaderLowBits() throws {
+ let emptyNamespace = MetaString.empty(specialChar1: ".", specialChar2: "_")
+ let emptyTypeName = MetaString.empty(specialChar1: "$", specialChar2: "_")
+
+ let meta = try TypeMeta(
+ typeID: TypeId.structType.rawValue,
+ userTypeID: 102,
+ namespace: emptyNamespace,
+ typeName: emptyTypeName,
+ registerByName: false,
+ fields: []
+ )
+
+ var encoded = try meta.encode()
+ let header = try ByteBuffer(bytes: encoded).readUInt64()
+ let hashMask = UInt64.max << 12
+ let bodyOnlyHash = bodyOnlyTypeMetaHeaderHash(Array(encoded.dropFirst(8)))
+ #expect((header & hashMask) != bodyOnlyHash)
+ let rewrittenHeader = bodyOnlyHash | (header & ~hashMask)
+ for index in 0..<8 {
+ encoded[index] = UInt8(truncatingIfNeeded: rewrittenHeader >> (index * 8))
+ }
+
+ #expect(throws: ForyError.self) {
+ _ = try TypeMeta.decode(encoded)
+ }
+}
+
+private func bodyOnlyTypeMetaHeaderHash(_ body: [UInt8]) -> UInt64 {
+ let shifted = MurmurHash3.x64_128(body, seed: 47).0 << 12
+ let signed = Int64(bitPattern: shifted)
+ let absSigned = signed == Int64.min ? signed : Swift.abs(signed)
+ return UInt64(bitPattern: absSigned) & (UInt64.max << 12)
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]