From 8dac9ccf0725be3e518af9b0ccb57d67bee74c89 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Sat, 16 Apr 2022 05:01:38 -0700 Subject: [PATCH] move to heap allocation Signed-off-by: Erik Hollensbe --- iblt/src/lib.rs | 173 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 39 deletions(-) diff --git a/iblt/src/lib.rs b/iblt/src/lib.rs index 4228a6d4a..61abc4889 100644 --- a/iblt/src/lib.rs +++ b/iblt/src/lib.rs @@ -6,8 +6,6 @@ * https://www.zerotier.com/ */ -use std::borrow::Cow; - use zerocopy::{AsBytes, FromBytes}; #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64")))] @@ -73,21 +71,23 @@ fn murmurhash32_mix32(mut x: u32) -> u32 { #[repr(C)] pub struct IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { - check_hash: [u32; BUCKETS], - count: [i8; BUCKETS], - key: [T; BUCKETS], + check_hash: Box>, + count: Box>, + key: Box>, } impl Clone for IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { fn clone(&self) -> Self { unsafe { - let mut tmp: Self = std::mem::MaybeUninit::uninit().assume_init(); - std::ptr::copy_nonoverlapping((self as *const Self).cast::(), (&mut tmp as *mut Self).cast::(), Self::SIZE_BYTES); + let mut tmp = Self::new(); + std::ptr::copy_nonoverlapping(self.check_hash.as_ptr(), tmp.check_hash.as_mut_ptr(), BUCKETS); + std::ptr::copy_nonoverlapping(self.count.as_ptr(), tmp.count.as_mut_ptr(), BUCKETS); + std::ptr::copy_nonoverlapping(self.key.as_ptr(), tmp.key.as_mut_ptr(), BUCKETS); tmp } } @@ -95,13 +95,12 @@ where impl IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { /// Number of bytes each bucket consumes (not congituously, but doesn't matter). const BUCKET_SIZE_BYTES: usize = std::mem::size_of::() + 4 + 1; /// Number of buckets in this IBLT. - #[allow(unused)] pub const BUCKETS: usize = BUCKETS; /// Size of this IBLT in bytes. @@ -110,14 +109,43 @@ where /// Create a new zeroed IBLT. #[inline(always)] pub fn new() -> Self { - assert!(Self::SIZE_BYTES <= std::mem::size_of::()); assert!(BUCKETS < (i32::MAX as usize)); - unsafe { std::mem::zeroed() } + + let mut s = Self { + check_hash: Box::new(Vec::with_capacity(BUCKETS)), + count: Box::new(Vec::with_capacity(BUCKETS)), + key: Box::new(Vec::with_capacity(BUCKETS)), + }; + + s.reset(); + s } #[inline(always)] - pub fn as_bytes(&self) -> &[u8] { - unsafe { &*std::ptr::slice_from_raw_parts((self as *const Self).cast::(), Self::SIZE_BYTES) } + pub fn as_bytes(&self) -> Box> { + let len: usize = BUCKETS * 4 + BUCKETS + BUCKETS * std::mem::size_of::(); + + let mut buf = Box::new(Vec::with_capacity(len)); + + // we can probably make this faster + + for b in self.check_hash.iter() { + for b2 in b.as_bytes() { + buf.push(*b2) + } + } + + for b in self.count.iter() { + buf.push(*b as u8) + } + + for b in self.key.iter() { + for b2 in b.as_bytes() { + buf.push(*b2) + } + } + + buf } /// Obtain an IBLT from bytes in memory. @@ -126,20 +154,46 @@ where /// Cow to 'b' that is just a cast. If re-alignment is necessary it returns an owned Cow containing a properly /// aligned copy. This makes conversion a nearly free cast when alignment adjustment isn't needed. #[inline(always)] - pub fn from_bytes<'a>(b: &'a [u8]) -> Option> { + pub fn from_bytes(b: Box>) -> Option { if b.len() == Self::SIZE_BYTES { - #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))] - { - if b.as_ptr().align_offset(8) == 0 { - Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) - } else { - // NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter. - Some(Cow::Owned(unsafe { &*b.as_ptr().cast::() }.clone())) - } - } + // FIXME I commented this out because I do not have access to the architectures needed. + // #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))] + // { + // if b.as_ptr().align_offset(8) == 0 { + // Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) + // } else { + // // NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter. + // Some(Cow::Owned(unsafe { &*b.as_ptr().cast::() }.clone())) + // } + // } #[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64"))] { - Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) + let mut tmp = Self::new(); + + // FIXME much easier ways to do this with the copy methods; probably ripe for a + // refactor + let mut i = 0; + let mut y = 0; + + for _ in 0..BUCKETS { + tmp.check_hash.push((b[y] as u32) << 3 | (b[y + 1] as u32) << 2 | (b[y + 2] as u32) << 1 | b[y + 3] as u32); + y += 4; + } + + i *= BUCKETS * 4; + + for y in 0..BUCKETS { + tmp.count.push(b[y + i] as i8); + } + + i += BUCKETS; + + for y in 0..BUCKETS { + let byt = &b[(y * i)..(y * i + std::mem::size_of::())]; + tmp.key.push(T::read_from(byt).unwrap()); + } + + Some(tmp) } } else { None @@ -149,7 +203,12 @@ where /// Zero this IBLT. #[inline(always)] pub fn reset(&mut self) { - unsafe { std::ptr::write_bytes((self as *mut Self).cast::(), 0, std::mem::size_of::()) }; + self.check_hash.clear(); + self.count.clear(); + self.key.clear(); + self.check_hash.resize(BUCKETS, 0); + self.count.resize(BUCKETS, 0); + self.key.resize(BUCKETS, Default::default()); } pub(crate) fn ins_rem(&mut self, key: T, delta: i8) { @@ -200,7 +259,7 @@ where /// bogus items that were never added. This is not an issue with this protocol as it would just result /// in an unsatisfied record request. pub fn list(&mut self, mut f: F) -> bool { - let mut queue: Vec = Vec::with_capacity(BUCKETS); + let mut queue: Box> = Box::new(Vec::with_capacity(BUCKETS)); for i in 0..BUCKETS { let count = self.count[i]; @@ -242,7 +301,7 @@ where } } - f(key.clone(), count == 1); + f(key, count == 1); } } @@ -252,15 +311,15 @@ where impl PartialEq for IBLT where - T: AsBytes + FromBytes + Clone, + T: AsBytes + FromBytes + Default + Clone, { #[inline(always)] fn eq(&self, other: &Self) -> bool { - self.as_bytes().eq(other.as_bytes()) + self.as_bytes().eq(&other.as_bytes()) } } -impl Eq for IBLT where T: AsBytes + FromBytes + Clone {} +impl Eq for IBLT where T: AsBytes + FromBytes + Default + Clone {} #[cfg(test)] mod tests { @@ -315,7 +374,7 @@ mod tests { #[test] fn struct_packing() { // Typical case - let mut tmp = IBLT::<[u8; 64], 16, 3>::new(); + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); @@ -348,7 +407,7 @@ mod tests { const LENGTH: usize = 16; const CAPACITY: usize = 4096; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; - let mut expected: HashSet = HashSet::with_capacity(4096); + let mut expected: HashSet = HashSet::with_capacity(CAPACITY); let mut count = LENGTH; while count <= CAPACITY { let mut test = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); @@ -380,8 +439,8 @@ mod tests { const LENGTH: usize = 16; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut missing_count = 1024; - let mut missing: HashSet = HashSet::with_capacity(CAPACITY * 2); - let mut all: HashSet = HashSet::with_capacity(REMOTE_SIZE); + let mut missing: Box> = Box::new(HashSet::with_capacity(CAPACITY * 2)); + let mut all: Box> = Box::new(HashSet::with_capacity(REMOTE_SIZE)); while missing_count <= CAPACITY { missing.clear(); all.clear(); @@ -429,10 +488,17 @@ mod tests { other_thing: [u8; 32], } + impl Default for TestType { + fn default() -> Self { + Self::zeroed() + } + } + impl TestType { pub fn zeroed() -> Self { unsafe { std::mem::zeroed() } } + pub fn new() -> Self { let mut newtype = Self::zeroed(); newtype.thing.fill_with(|| rand::random()); @@ -443,9 +509,9 @@ mod tests { #[test] fn test_polymorphism() { - const CAPACITY: usize = 512; - let mut full = IBLT::::new(); - let mut zero = IBLT::::new(); + const CAPACITY: usize = 4096; + let mut full = Box::new(IBLT::::new()); + let mut zero = Box::new(IBLT::::new()); for _ in 0..CAPACITY { zero.insert(TestType::zeroed()); @@ -473,4 +539,33 @@ mod tests { } }); } + + #[test] + fn test_to_from_bytes() { + // Typical case + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); + tmp.check_hash.fill(0x01010101); + tmp.count.fill(1); + tmp.key.iter_mut().for_each(|x| x.fill(1)); + + let mut tmp2 = IBLT::<[u8; 32], 16, 3>::from_bytes(tmp.as_bytes()).unwrap(); + + tmp2.subtract(&tmp); + + tmp2.list(|_, new| assert!(!new)); + } + + #[test] + fn test_clone() { + // Typical case + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); + tmp.check_hash.fill(0x01010101); + tmp.count.fill(1); + tmp.key.iter_mut().for_each(|x| x.fill(1)); + + let mut tmp2 = tmp.clone(); + + tmp2.subtract(&tmp); + tmp2.list(|_, new| assert!(!new)); + } }