This is an automated email from the ASF dual-hosted git repository. gkoszyk pushed a commit to branch partition_redesign in repository https://gitbox.apache.org/repos/asf/iggy.git
commit b9769a14840f0af335ca030dfba813a5e9a91d0f Author: numinex <[email protected]> AuthorDate: Wed Mar 18 19:57:22 2026 +0100 proposal --- core/buf/src/lib.rs | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 1 deletion(-) diff --git a/core/buf/src/lib.rs b/core/buf/src/lib.rs index e6b4065d8..654aa02e1 100644 --- a/core/buf/src/lib.rs +++ b/core/buf/src/lib.rs @@ -1,5 +1,7 @@ +use std::alloc::{Layout, alloc, dealloc}; use std::mem::ManuallyDrop; -use std::ptr::NonNull; +use std::ops::{Deref, RangeBounds, Bound}; +use std::ptr::{self, NonNull, slice_from_raw_parts_mut}; use std::slice; use std::sync::atomic::{AtomicUsize, Ordering, fence}; @@ -98,6 +100,7 @@ impl Owned { impl Half { fn as_slice(&self) -> &[u8] { + self.ptr.as_ptr(); // SAFETY: `ptr,len` always describe a live allocation owned by `ctrlb`. unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) } } @@ -379,3 +382,159 @@ mod tests { assert!(!clone2.is_unique()); } } + +// ============================================================================= +// DESIGN: Packed Wide Pointer with Inline ControlBlock +// ============================================================================= +// +// GOAL: 16-byte sliceable refcounted buffer view, reusing ControlBlock for +// both Frozen and TwoHalves. +// +// ----------------------------------------------------------------------------- +// MEMORY LAYOUT +// ----------------------------------------------------------------------------- +// +// ┌─────────────────────────┬────────────────────────────────────────────┐ +// │ ControlBlock (16B) │ data bytes │ +// │ ref_count: AtomicUsize │ [0] [1] [2] [3] [4] [5] [6] [7] ... │ +// │ capacity: usize │ │ +// └─────────────────────────┴────────────────────────────────────────────┘ +// ^ ^ +// │ │ +// alloc_ptr data_start (offset = 0) +// +// ----------------------------------------------------------------------------- +// PACKED VIEW STRUCT (16 bytes) +// ----------------------------------------------------------------------------- +// +// struct PackedView { +// data: NonNull<[u8]>, // fat pointer: (ptr, packed_len) +// } +// +// The "length" portion of the fat pointer is PACKED: +// +// packed_len = (alloc_offset << 32) | actual_len +// +// ┌────────────────────────────────────────────────────────────────┐ +// │ 63 .............. 32 │ 31 ............................ 0 │ +// │ alloc_offset │ actual_len │ +// └────────────────────────────────────────────────────────────────┘ +// +// - actual_len: length of current view (max 4GB) +// - alloc_offset: distance from data_start to current ptr (max 4GB) +// +// ----------------------------------------------------------------------------- +// EXAMPLE: SLICING +// ----------------------------------------------------------------------------- +// +// Initial (full buffer, len=8): +// +// data.ptr ────────────────────────┐ +// ▼ +// ┌──────────────┬─────────────────────────────────────┐ +// │ ControlBlock │ [0] [1] [2] [3] [4] [5] [6] [7] │ +// └──────────────┴─────────────────────────────────────┘ +// +// packed_len = (0 << 32) | 8 // offset=0, len=8 +// +// +// After slice(3..6): +// +// data.ptr ──────────┐ +// ▼ +// ┌──────────────┬─────────────────────────────────────┐ +// │ ControlBlock │ [0] [1] [2] [3] [4] [5] [6] [7] │ +// └──────────────┴─────────────────────────────────────┘ +// +// packed_len = (3 << 32) | 3 // offset=3, len=3 +// +// ----------------------------------------------------------------------------- +// RECONSTRUCTING CONTROLBLOCK +// ----------------------------------------------------------------------------- +// +// fn ctrl_block(&self) -> &ControlBlock { +// let data_ptr = self.data.as_ptr() as *const u8; +// let offset = self.data.len() >> 32; +// let data_start = data_ptr.sub(offset); +// let ctrl_ptr = data_start.sub(size_of::<ControlBlock>()); +// &*ctrl_ptr.cast::<ControlBlock>() +// } +// +// ----------------------------------------------------------------------------- +// CONTROLBLOCK (shared by Frozen, TwoHalves, etc.) +// ----------------------------------------------------------------------------- +// +// #[repr(C)] +// struct ControlBlock { +// ref_count: AtomicUsize, // 8 bytes - atomic refcount +// capacity: usize, // 8 bytes - original alloc size for dealloc +// } +// +// - ref_count: shared by all views into this allocation +// - capacity: needed to reconstruct Layout for dealloc +// +// NOTE: No need for `base` or `len` - both reconstructable from PackedView! +// +// ----------------------------------------------------------------------------- +// USAGE IN FROZEN +// ----------------------------------------------------------------------------- +// +// pub struct Frozen(PackedView); // 16 bytes +// +// impl Frozen { +// fn slice(self, range: Range<usize>) -> Frozen { +// // Adjust ptr forward, update packed offset+len +// // No allocation, no refcount change (consumes self) +// } +// +// fn clone(&self) -> Frozen { +// self.ctrl_block().ref_count.fetch_add(1, Relaxed); +// Frozen(PackedView { data: self.0.data }) +// } +// +// fn drop(&mut self) { +// if self.ctrl_block().ref_count.fetch_sub(1, Release) == 1 { +// fence(Acquire); +// let layout = Layout::from_size_align( +// size_of::<ControlBlock>() + self.ctrl_block().capacity, +// align_of::<ControlBlock>(), +// ); +// dealloc(self.alloc_ptr(), layout); +// } +// } +// } +// +// ----------------------------------------------------------------------------- +// USAGE IN TWOHALVES +// ----------------------------------------------------------------------------- +// +// pub struct TwoHalves { +// head: PackedView, // 16 bytes - view into [0..split_at] +// tail: PackedView, // 16 bytes - view into [split_at..len] +// split_at: usize, // 8 bytes +// } +// +// Memory: +// +// head.ptr ───────────────┐ +// ▼ +// ┌──────────────┬───────────────┬───────────────────┐ +// │ ControlBlock │ [head bytes] │ [tail bytes] │ +// └──────────────┴───────────────┴───────────────────┘ +// ^ +// │ +// tail.ptr ───────────────────────┘ +// +// head.packed_len = (0 << 32) | split_at +// tail.packed_len = (split_at << 32) | (total_len - split_at) +// +// Both head and tail reconstruct the SAME ControlBlock. +// + +// The downside of this solution is that we allow up to 4GB buffers +// (if somebody would send batch taht is bigger that, we'd have to reject it) +// The upside is that we can fit more of those in a single cache-line and the size is an power of 2 number +// smaller than 64, so there is no wasted space, I think it's a worthwhile tradeoff, +// given that the access pattern (especially for Frozen), could invole, iterating through +// collection of those two times, once when searching for the right batch +// and once when submitting the buffer to the kernel, so the cache locality is important. \ No newline at end of file
