From 7223de6fbbd6afe5942f85186e89fdccdb53dafb Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Wed, 13 Apr 2022 02:44:55 -0700 Subject: [PATCH 1/5] Fix array size in insert/remove calls makes the library easier to consume, also removes the need for the length check. also required to build https://github.com/erikh/isync Signed-off-by: Erik Hollensbe --- iblt/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/iblt/src/lib.rs b/iblt/src/lib.rs index 67065d115..ebdaec85a 100644 --- a/iblt/src/lib.rs +++ b/iblt/src/lib.rs @@ -151,16 +151,14 @@ impl IBLT= ITEM_BYTES); + pub fn insert(&mut self, key: &[u8; ITEM_BYTES]) { self.ins_rem(unsafe { &*key.as_ptr().cast() }, 1); } /// Insert a set item into this set. /// This will panic if the slice is smaller than ITEM_BYTES. #[inline(always)] - pub fn remove(&mut self, key: &[u8]) { - assert!(key.len() >= ITEM_BYTES); + pub fn remove(&mut self, key: &[u8; ITEM_BYTES]) { self.ins_rem(unsafe { &*key.as_ptr().cast() }, -1); } From 1b2485b27713df6f38bbb4bcfd1f681ef358b665 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Fri, 15 Apr 2022 14:50:58 -0700 Subject: [PATCH 2/5] Leverage type parameters and traits to serialize data This utilizes two traits, AsBytes and FromBytes from the `zerocopy` crate to enable type parameters on the hashed value. --- iblt/Cargo.toml | 4 ++ iblt/src/lib.rs | 167 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 124 insertions(+), 47 deletions(-) diff --git a/iblt/Cargo.toml b/iblt/Cargo.toml index 82cce58ff..16c2d8481 100644 --- a/iblt/Cargo.toml +++ b/iblt/Cargo.toml @@ -13,5 +13,9 @@ panic = 'abort' [dependencies] crc32fast = "^1" +zerocopy = { version = "0.6.1", features = ["alloc"] } + +[dev-dependencies] +rand = ">=0" [lib] diff --git a/iblt/src/lib.rs b/iblt/src/lib.rs index ebdaec85a..4228a6d4a 100644 --- a/iblt/src/lib.rs +++ b/iblt/src/lib.rs @@ -8,28 +8,38 @@ use std::borrow::Cow; +use zerocopy::{AsBytes, FromBytes}; + #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64")))] #[inline(always)] -fn xor_with(x: &mut [u8; L], y: &[u8; L]) { - x.iter_mut().zip(y.iter()).for_each(|(a, b)| *a ^= *b); +fn xor_with(x: &mut T, y: &T) +where + T: FromBytes + AsBytes + Sized, +{ + x.as_bytes_mut().iter_mut().zip(y.as_bytes().iter()).for_each(|(a, b)| *a ^= *b); } #[cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64"))] #[inline(always)] -fn xor_with(x: &mut [u8; L], y: &[u8; L]) { - if L >= 16 { - for i in 0..(L / 16) { - unsafe { *x.as_mut_ptr().cast::().add(i) ^= *y.as_ptr().cast::().add(i) }; +fn xor_with(x: &mut T, y: &T) +where + T: FromBytes + AsBytes + Sized, +{ + let size = std::mem::size_of::(); + + if size >= 16 { + for i in 0..(size / 16) { + unsafe { *x.as_bytes_mut().as_mut_ptr().cast::().add(i) ^= *y.as_bytes().as_ptr().cast::().add(i) }; } - for i in (L - (L % 16))..L { - unsafe { *x.as_mut_ptr().add(i) ^= *y.as_ptr().add(i) }; + for i in (size - (size % 16))..size { + unsafe { *x.as_bytes_mut().as_mut_ptr().add(i) ^= *y.as_bytes().as_ptr().add(i) }; } } else { - for i in 0..(L / 8) { - unsafe { *x.as_mut_ptr().cast::().add(i) ^= *y.as_ptr().cast::().add(i) }; + for i in 0..(size / 8) { + unsafe { *x.as_bytes_mut().as_mut_ptr().cast::().add(i) ^= *y.as_bytes().as_ptr().cast::().add(i) }; } - for i in (L - (L % 8))..L { - unsafe { *x.as_mut_ptr().add(i) ^= *y.as_ptr().add(i) }; + for i in (size - (size % 8))..size { + unsafe { *x.as_bytes_mut().as_mut_ptr().add(i) ^= *y.as_bytes().as_ptr().add(i) }; } } } @@ -61,14 +71,19 @@ fn murmurhash32_mix32(mut x: u32) -> u32 { /// /// The best value for HASHES seems to be 3 for an optimal fill of 75%. #[repr(C)] -pub struct IBLT { +pub struct IBLT +where + T: FromBytes + AsBytes + Sized + Clone, +{ check_hash: [u32; BUCKETS], count: [i8; BUCKETS], - key: [[u8; ITEM_BYTES]; BUCKETS], + key: [T; BUCKETS], } -impl Clone for IBLT { - #[inline(always)] +impl Clone for IBLT +where + T: FromBytes + AsBytes + Sized + Clone, +{ fn clone(&self) -> Self { unsafe { let mut tmp: Self = std::mem::MaybeUninit::uninit().assume_init(); @@ -78,9 +93,12 @@ impl Clone f } } -impl IBLT { +impl IBLT +where + T: FromBytes + AsBytes + Sized + Clone, +{ /// Number of bytes each bucket consumes (not congituously, but doesn't matter). - const BUCKET_SIZE_BYTES: usize = ITEM_BYTES + 4 + 1; + const BUCKET_SIZE_BYTES: usize = std::mem::size_of::() + 4 + 1; /// Number of buckets in this IBLT. #[allow(unused)] @@ -97,8 +115,6 @@ impl IBLT &[u8] { unsafe { &*std::ptr::slice_from_raw_parts((self as *const Self).cast::(), Self::SIZE_BYTES) } @@ -136,37 +152,37 @@ impl IBLT(), 0, std::mem::size_of::()) }; } - pub(crate) fn ins_rem(&mut self, key: &[u8; ITEM_BYTES], delta: i8) { - let check_hash = crc32fast::hash(key); + pub(crate) fn ins_rem(&mut self, key: T, delta: i8) { + let check_hash = crc32fast::hash(key.as_bytes()); let mut iteration_index = u32::from_le(check_hash).wrapping_add(1); for _ in 0..(HASHES as u64) { iteration_index = murmurhash32_mix32(iteration_index); let i = (iteration_index as usize) % BUCKETS; self.check_hash[i] ^= check_hash; self.count[i] = self.count[i].wrapping_add(delta); - xor_with(&mut self.key[i], key); + xor_with(&mut self.key[i], &key); } } /// Insert a set item into this set. /// This will panic if the slice is smaller than ITEM_BYTES. #[inline(always)] - pub fn insert(&mut self, key: &[u8; ITEM_BYTES]) { - self.ins_rem(unsafe { &*key.as_ptr().cast() }, 1); + pub fn insert(&mut self, key: T) { + self.ins_rem(key, 1); } /// Insert a set item into this set. /// This will panic if the slice is smaller than ITEM_BYTES. #[inline(always)] - pub fn remove(&mut self, key: &[u8; ITEM_BYTES]) { - self.ins_rem(unsafe { &*key.as_ptr().cast() }, -1); + pub fn remove(&mut self, key: T) { + self.ins_rem(key, -1); } /// Subtract another IBLT from this one to get a set difference. pub fn subtract(&mut self, other: &Self) { self.check_hash.iter_mut().zip(other.check_hash.iter()).for_each(|(a, b)| *a ^= *b); self.count.iter_mut().zip(other.count.iter()).for_each(|(a, b)| *a = a.wrapping_sub(*b)); - self.key.iter_mut().zip(other.key.iter()).for_each(|(a, b)| xor_with(a, b)); + self.key.iter_mut().zip(other.key.iter()).for_each(|(a, b)| xor_with(a, &b)); } /// List as many entries in this IBLT as can be extracted. @@ -183,12 +199,12 @@ impl IBLT(mut self, mut f: F) -> bool { + pub fn list(&mut self, mut f: F) -> bool { let mut queue: Vec = Vec::with_capacity(BUCKETS); for i in 0..BUCKETS { let count = self.count[i]; - if (count == 1 || count == -1) && crc32fast::hash(&self.key[i]) == self.check_hash[i] { + if (count == 1 || count == -1) && crc32fast::hash(&self.key[i].as_bytes()) == self.check_hash[i] { queue.push(i as u32); } } @@ -204,7 +220,7 @@ impl IBLT IBLT BUCKETS { // sanity check, should be impossible break 'list_main; @@ -226,7 +242,7 @@ impl IBLT IBLT PartialEq for IBLT { +impl PartialEq for IBLT +where + T: AsBytes + FromBytes + Clone, +{ #[inline(always)] fn eq(&self, other: &Self) -> bool { self.as_bytes().eq(other.as_bytes()) } } -impl Eq for IBLT {} +impl Eq for IBLT where T: AsBytes + FromBytes + Clone {} #[cfg(test)] mod tests { @@ -296,28 +315,28 @@ mod tests { #[test] fn struct_packing() { // Typical case - let mut tmp = IBLT::<64, 16, 3>::new(); + let mut tmp = IBLT::<[u8; 64], 16, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); assert!(tmp.as_bytes().iter().all(|x| *x == 1)); // Pathological alignment case #1 - let mut tmp = IBLT::<17, 13, 3>::new(); + let mut tmp = IBLT::<[u8; 17], 13, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); assert!(tmp.as_bytes().iter().all(|x| *x == 1)); // Pathological alignment case #2 - let mut tmp = IBLT::<17, 8, 3>::new(); + let mut tmp = IBLT::<[u8; 17], 8, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); assert!(tmp.as_bytes().iter().all(|x| *x == 1)); // Pathological alignment case #3 - let mut tmp = IBLT::<16, 7, 3>::new(); + let mut tmp = IBLT::<[u8; 16], 7, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); @@ -326,18 +345,19 @@ mod tests { #[test] fn fill_list_performance() { + const LENGTH: usize = 16; const CAPACITY: usize = 4096; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut expected: HashSet = HashSet::with_capacity(4096); - let mut count = 64; + let mut count = LENGTH; while count <= CAPACITY { - let mut test = IBLT::::new(); + let mut test = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); expected.clear(); for _ in 0..count { rn = rn.wrapping_add(splitmix64(rn as u64) as u128); expected.insert(rn); - test.insert(&rn.to_le_bytes()); + test.insert(rn.to_le_bytes()); } let mut list_count = 0; @@ -348,7 +368,7 @@ mod tests { }); println!("inserted: {}\tlisted: {}\tcapacity: {}\tscore: {:.4}\tfill: {:.4}", count, list_count, CAPACITY, (list_count as f64) / (count as f64), (count as f64) / (CAPACITY as f64)); - count += 64; + count += LENGTH; } } @@ -357,6 +377,7 @@ mod tests { const CAPACITY: usize = 4096; // previously 16384; const REMOTE_SIZE: usize = 1024 * 1024 * 2; const STEP: usize = 1024; + const LENGTH: usize = 16; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut missing_count = 1024; let mut missing: HashSet = HashSet::with_capacity(CAPACITY * 2); @@ -364,19 +385,19 @@ mod tests { while missing_count <= CAPACITY { missing.clear(); all.clear(); - let mut local = IBLT::::new(); - let mut remote = IBLT::::new(); + let mut local = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); + let mut remote = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); let mut k = 0; while k < REMOTE_SIZE { rn = rn.wrapping_add(splitmix64(rn as u64) as u128); if all.insert(rn) { if k >= missing_count { - local.insert(&rn.to_le_bytes()); + local.insert(rn.to_le_bytes()); } else { missing.insert(rn); } - remote.insert(&rn.to_le_bytes()); + remote.insert(rn.to_le_bytes()); k += 1; } } @@ -400,4 +421,56 @@ mod tests { missing_count += STEP; } } + + #[derive(Eq, PartialEq, Clone, AsBytes, FromBytes, Debug)] + #[repr(C)] + struct TestType { + thing: [u8; 256], + other_thing: [u8; 32], + } + + impl TestType { + pub fn zeroed() -> Self { + unsafe { std::mem::zeroed() } + } + pub fn new() -> Self { + let mut newtype = Self::zeroed(); + newtype.thing.fill_with(|| rand::random()); + newtype.other_thing.fill_with(|| rand::random()); + newtype + } + } + + #[test] + fn test_polymorphism() { + const CAPACITY: usize = 512; + let mut full = IBLT::::new(); + let mut zero = IBLT::::new(); + + for _ in 0..CAPACITY { + zero.insert(TestType::zeroed()); + full.insert(TestType::new()); + } + + full.subtract(&zero); + + zero.list(|item, new| { + if !new { + assert_eq!(item, TestType::zeroed()); + } + }); + + zero.reset(); + + for _ in 0..CAPACITY { + zero.insert(TestType::zeroed()); + } + + zero.subtract(&full); + full.list(|item, new| { + if !new { + assert_ne!(item, TestType::zeroed()); + } + }); + } } From 8dac9ccf0725be3e518af9b0ccb57d67bee74c89 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Sat, 16 Apr 2022 05:01:38 -0700 Subject: [PATCH 3/5] move to heap allocation Signed-off-by: Erik Hollensbe --- iblt/src/lib.rs | 173 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 39 deletions(-) diff --git a/iblt/src/lib.rs b/iblt/src/lib.rs index 4228a6d4a..61abc4889 100644 --- a/iblt/src/lib.rs +++ b/iblt/src/lib.rs @@ -6,8 +6,6 @@ * https://www.zerotier.com/ */ -use std::borrow::Cow; - use zerocopy::{AsBytes, FromBytes}; #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64")))] @@ -73,21 +71,23 @@ fn murmurhash32_mix32(mut x: u32) -> u32 { #[repr(C)] pub struct IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { - check_hash: [u32; BUCKETS], - count: [i8; BUCKETS], - key: [T; BUCKETS], + check_hash: Box>, + count: Box>, + key: Box>, } impl Clone for IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { fn clone(&self) -> Self { unsafe { - let mut tmp: Self = std::mem::MaybeUninit::uninit().assume_init(); - std::ptr::copy_nonoverlapping((self as *const Self).cast::(), (&mut tmp as *mut Self).cast::(), Self::SIZE_BYTES); + let mut tmp = Self::new(); + std::ptr::copy_nonoverlapping(self.check_hash.as_ptr(), tmp.check_hash.as_mut_ptr(), BUCKETS); + std::ptr::copy_nonoverlapping(self.count.as_ptr(), tmp.count.as_mut_ptr(), BUCKETS); + std::ptr::copy_nonoverlapping(self.key.as_ptr(), tmp.key.as_mut_ptr(), BUCKETS); tmp } } @@ -95,13 +95,12 @@ where impl IBLT where - T: FromBytes + AsBytes + Sized + Clone, + T: FromBytes + AsBytes + Default + Sized + Clone, { /// Number of bytes each bucket consumes (not congituously, but doesn't matter). const BUCKET_SIZE_BYTES: usize = std::mem::size_of::() + 4 + 1; /// Number of buckets in this IBLT. - #[allow(unused)] pub const BUCKETS: usize = BUCKETS; /// Size of this IBLT in bytes. @@ -110,14 +109,43 @@ where /// Create a new zeroed IBLT. #[inline(always)] pub fn new() -> Self { - assert!(Self::SIZE_BYTES <= std::mem::size_of::()); assert!(BUCKETS < (i32::MAX as usize)); - unsafe { std::mem::zeroed() } + + let mut s = Self { + check_hash: Box::new(Vec::with_capacity(BUCKETS)), + count: Box::new(Vec::with_capacity(BUCKETS)), + key: Box::new(Vec::with_capacity(BUCKETS)), + }; + + s.reset(); + s } #[inline(always)] - pub fn as_bytes(&self) -> &[u8] { - unsafe { &*std::ptr::slice_from_raw_parts((self as *const Self).cast::(), Self::SIZE_BYTES) } + pub fn as_bytes(&self) -> Box> { + let len: usize = BUCKETS * 4 + BUCKETS + BUCKETS * std::mem::size_of::(); + + let mut buf = Box::new(Vec::with_capacity(len)); + + // we can probably make this faster + + for b in self.check_hash.iter() { + for b2 in b.as_bytes() { + buf.push(*b2) + } + } + + for b in self.count.iter() { + buf.push(*b as u8) + } + + for b in self.key.iter() { + for b2 in b.as_bytes() { + buf.push(*b2) + } + } + + buf } /// Obtain an IBLT from bytes in memory. @@ -126,20 +154,46 @@ where /// Cow to 'b' that is just a cast. If re-alignment is necessary it returns an owned Cow containing a properly /// aligned copy. This makes conversion a nearly free cast when alignment adjustment isn't needed. #[inline(always)] - pub fn from_bytes<'a>(b: &'a [u8]) -> Option> { + pub fn from_bytes(b: Box>) -> Option { if b.len() == Self::SIZE_BYTES { - #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))] - { - if b.as_ptr().align_offset(8) == 0 { - Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) - } else { - // NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter. - Some(Cow::Owned(unsafe { &*b.as_ptr().cast::() }.clone())) - } - } + // FIXME I commented this out because I do not have access to the architectures needed. + // #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))] + // { + // if b.as_ptr().align_offset(8) == 0 { + // Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) + // } else { + // // NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter. + // Some(Cow::Owned(unsafe { &*b.as_ptr().cast::() }.clone())) + // } + // } #[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64"))] { - Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) + let mut tmp = Self::new(); + + // FIXME much easier ways to do this with the copy methods; probably ripe for a + // refactor + let mut i = 0; + let mut y = 0; + + for _ in 0..BUCKETS { + tmp.check_hash.push((b[y] as u32) << 3 | (b[y + 1] as u32) << 2 | (b[y + 2] as u32) << 1 | b[y + 3] as u32); + y += 4; + } + + i *= BUCKETS * 4; + + for y in 0..BUCKETS { + tmp.count.push(b[y + i] as i8); + } + + i += BUCKETS; + + for y in 0..BUCKETS { + let byt = &b[(y * i)..(y * i + std::mem::size_of::())]; + tmp.key.push(T::read_from(byt).unwrap()); + } + + Some(tmp) } } else { None @@ -149,7 +203,12 @@ where /// Zero this IBLT. #[inline(always)] pub fn reset(&mut self) { - unsafe { std::ptr::write_bytes((self as *mut Self).cast::(), 0, std::mem::size_of::()) }; + self.check_hash.clear(); + self.count.clear(); + self.key.clear(); + self.check_hash.resize(BUCKETS, 0); + self.count.resize(BUCKETS, 0); + self.key.resize(BUCKETS, Default::default()); } pub(crate) fn ins_rem(&mut self, key: T, delta: i8) { @@ -200,7 +259,7 @@ where /// bogus items that were never added. This is not an issue with this protocol as it would just result /// in an unsatisfied record request. pub fn list(&mut self, mut f: F) -> bool { - let mut queue: Vec = Vec::with_capacity(BUCKETS); + let mut queue: Box> = Box::new(Vec::with_capacity(BUCKETS)); for i in 0..BUCKETS { let count = self.count[i]; @@ -242,7 +301,7 @@ where } } - f(key.clone(), count == 1); + f(key, count == 1); } } @@ -252,15 +311,15 @@ where impl PartialEq for IBLT where - T: AsBytes + FromBytes + Clone, + T: AsBytes + FromBytes + Default + Clone, { #[inline(always)] fn eq(&self, other: &Self) -> bool { - self.as_bytes().eq(other.as_bytes()) + self.as_bytes().eq(&other.as_bytes()) } } -impl Eq for IBLT where T: AsBytes + FromBytes + Clone {} +impl Eq for IBLT where T: AsBytes + FromBytes + Default + Clone {} #[cfg(test)] mod tests { @@ -315,7 +374,7 @@ mod tests { #[test] fn struct_packing() { // Typical case - let mut tmp = IBLT::<[u8; 64], 16, 3>::new(); + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); tmp.check_hash.fill(0x01010101); tmp.count.fill(1); tmp.key.iter_mut().for_each(|x| x.fill(1)); @@ -348,7 +407,7 @@ mod tests { const LENGTH: usize = 16; const CAPACITY: usize = 4096; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; - let mut expected: HashSet = HashSet::with_capacity(4096); + let mut expected: HashSet = HashSet::with_capacity(CAPACITY); let mut count = LENGTH; while count <= CAPACITY { let mut test = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); @@ -380,8 +439,8 @@ mod tests { const LENGTH: usize = 16; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut missing_count = 1024; - let mut missing: HashSet = HashSet::with_capacity(CAPACITY * 2); - let mut all: HashSet = HashSet::with_capacity(REMOTE_SIZE); + let mut missing: Box> = Box::new(HashSet::with_capacity(CAPACITY * 2)); + let mut all: Box> = Box::new(HashSet::with_capacity(REMOTE_SIZE)); while missing_count <= CAPACITY { missing.clear(); all.clear(); @@ -429,10 +488,17 @@ mod tests { other_thing: [u8; 32], } + impl Default for TestType { + fn default() -> Self { + Self::zeroed() + } + } + impl TestType { pub fn zeroed() -> Self { unsafe { std::mem::zeroed() } } + pub fn new() -> Self { let mut newtype = Self::zeroed(); newtype.thing.fill_with(|| rand::random()); @@ -443,9 +509,9 @@ mod tests { #[test] fn test_polymorphism() { - const CAPACITY: usize = 512; - let mut full = IBLT::::new(); - let mut zero = IBLT::::new(); + const CAPACITY: usize = 4096; + let mut full = Box::new(IBLT::::new()); + let mut zero = Box::new(IBLT::::new()); for _ in 0..CAPACITY { zero.insert(TestType::zeroed()); @@ -473,4 +539,33 @@ mod tests { } }); } + + #[test] + fn test_to_from_bytes() { + // Typical case + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); + tmp.check_hash.fill(0x01010101); + tmp.count.fill(1); + tmp.key.iter_mut().for_each(|x| x.fill(1)); + + let mut tmp2 = IBLT::<[u8; 32], 16, 3>::from_bytes(tmp.as_bytes()).unwrap(); + + tmp2.subtract(&tmp); + + tmp2.list(|_, new| assert!(!new)); + } + + #[test] + fn test_clone() { + // Typical case + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); + tmp.check_hash.fill(0x01010101); + tmp.count.fill(1); + tmp.key.iter_mut().for_each(|x| x.fill(1)); + + let mut tmp2 = tmp.clone(); + + tmp2.subtract(&tmp); + tmp2.list(|_, new| assert!(!new)); + } } From 544420695be37cd1f622bc139c7b7be8b0af39f5 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Mon, 18 Apr 2022 00:44:25 -0700 Subject: [PATCH 4/5] refactors and improvements to tests Signed-off-by: Erik Hollensbe --- iblt/src/lib.rs | 112 +++++++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/iblt/src/lib.rs b/iblt/src/lib.rs index 61abc4889..a420fd3b1 100644 --- a/iblt/src/lib.rs +++ b/iblt/src/lib.rs @@ -123,27 +123,18 @@ where #[inline(always)] pub fn as_bytes(&self) -> Box> { - let len: usize = BUCKETS * 4 + BUCKETS + BUCKETS * std::mem::size_of::(); + let check_hash_len = BUCKETS * 4; + let t_len = BUCKETS * std::mem::size_of::(); + let len = check_hash_len + BUCKETS + t_len; let mut buf = Box::new(Vec::with_capacity(len)); + buf.resize(len, 0); - // we can probably make this faster + let byt = buf.as_bytes_mut(); - for b in self.check_hash.iter() { - for b2 in b.as_bytes() { - buf.push(*b2) - } - } - - for b in self.count.iter() { - buf.push(*b as u8) - } - - for b in self.key.iter() { - for b2 in b.as_bytes() { - buf.push(*b2) - } - } + byt[0..check_hash_len].copy_from_slice(self.check_hash.as_bytes()); + byt[check_hash_len..BUCKETS + check_hash_len].copy_from_slice(self.count.as_bytes()); + byt[len - t_len..len].copy_from_slice(self.key.as_bytes()); buf } @@ -170,28 +161,15 @@ where { let mut tmp = Self::new(); - // FIXME much easier ways to do this with the copy methods; probably ripe for a - // refactor let mut i = 0; - let mut y = 0; - for _ in 0..BUCKETS { - tmp.check_hash.push((b[y] as u32) << 3 | (b[y + 1] as u32) << 2 | (b[y + 2] as u32) << 1 | b[y + 3] as u32); - y += 4; - } - - i *= BUCKETS * 4; - - for y in 0..BUCKETS { - tmp.count.push(b[y + i] as i8); - } + tmp.check_hash.as_bytes_mut().copy_from_slice(&b[0..BUCKETS * 4]); + i += BUCKETS * 4; + tmp.count.as_bytes_mut().copy_from_slice(&b[i..i + BUCKETS]); i += BUCKETS; - for y in 0..BUCKETS { - let byt = &b[(y * i)..(y * i + std::mem::size_of::())]; - tmp.key.push(T::read_from(byt).unwrap()); - } + tmp.key.as_bytes_mut().copy_from_slice(&b[i..i + std::mem::size_of::() * BUCKETS]); Some(tmp) } @@ -211,7 +189,7 @@ where self.key.resize(BUCKETS, Default::default()); } - pub(crate) fn ins_rem(&mut self, key: T, delta: i8) { + pub(crate) fn ins_rem(&mut self, key: &T, delta: i8) { let check_hash = crc32fast::hash(key.as_bytes()); let mut iteration_index = u32::from_le(check_hash).wrapping_add(1); for _ in 0..(HASHES as u64) { @@ -226,14 +204,14 @@ where /// Insert a set item into this set. /// This will panic if the slice is smaller than ITEM_BYTES. #[inline(always)] - pub fn insert(&mut self, key: T) { + pub fn insert(&mut self, key: &T) { self.ins_rem(key, 1); } /// Insert a set item into this set. /// This will panic if the slice is smaller than ITEM_BYTES. #[inline(always)] - pub fn remove(&mut self, key: T) { + pub fn remove(&mut self, key: &T) { self.ins_rem(key, -1); } @@ -348,6 +326,15 @@ mod tests { assert!(actual.eq(&expected)); } + fn typical_iblt() -> IBLT<[u8; 32], 16, 3> { + // Typical case + let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); + tmp.check_hash.fill(0x01010101); + tmp.count.fill(1); + tmp.key.iter_mut().for_each(|x| x.fill(1)); + tmp + } + #[test] fn check_xor_with() { check_xor_with2::<128>(); @@ -373,11 +360,7 @@ mod tests { #[test] fn struct_packing() { - // Typical case - let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); - tmp.check_hash.fill(0x01010101); - tmp.count.fill(1); - tmp.key.iter_mut().for_each(|x| x.fill(1)); + let tmp = typical_iblt(); assert!(tmp.as_bytes().iter().all(|x| *x == 1)); // Pathological alignment case #1 @@ -416,7 +399,7 @@ mod tests { for _ in 0..count { rn = rn.wrapping_add(splitmix64(rn as u64) as u128); expected.insert(rn); - test.insert(rn.to_le_bytes()); + test.insert(&rn.to_le_bytes()); } let mut list_count = 0; @@ -439,8 +422,8 @@ mod tests { const LENGTH: usize = 16; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut missing_count = 1024; - let mut missing: Box> = Box::new(HashSet::with_capacity(CAPACITY * 2)); - let mut all: Box> = Box::new(HashSet::with_capacity(REMOTE_SIZE)); + let mut missing: HashSet = HashSet::with_capacity(CAPACITY * 2); + let mut all: HashSet = HashSet::with_capacity(REMOTE_SIZE); while missing_count <= CAPACITY { missing.clear(); all.clear(); @@ -452,11 +435,11 @@ mod tests { rn = rn.wrapping_add(splitmix64(rn as u64) as u128); if all.insert(rn) { if k >= missing_count { - local.insert(rn.to_le_bytes()); + local.insert(&rn.to_le_bytes()); } else { missing.insert(rn); } - remote.insert(rn.to_le_bytes()); + remote.insert(&rn.to_le_bytes()); k += 1; } } @@ -514,55 +497,50 @@ mod tests { let mut zero = Box::new(IBLT::::new()); for _ in 0..CAPACITY { - zero.insert(TestType::zeroed()); - full.insert(TestType::new()); + zero.insert(&TestType::zeroed()); + full.insert(&TestType::new()); } - full.subtract(&zero); + zero.subtract(&full); zero.list(|item, new| { - if !new { + if new { assert_eq!(item, TestType::zeroed()); + } else { + assert_ne!(item, TestType::zeroed()); } }); zero.reset(); + full.reset(); for _ in 0..CAPACITY { - zero.insert(TestType::zeroed()); + zero.insert(&TestType::zeroed()); + full.insert(&TestType::new()); } - zero.subtract(&full); + full.subtract(&zero); full.list(|item, new| { - if !new { + if new { assert_ne!(item, TestType::zeroed()); + } else { + assert_eq!(item, TestType::zeroed()); } }); } #[test] fn test_to_from_bytes() { - // Typical case - let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); - tmp.check_hash.fill(0x01010101); - tmp.count.fill(1); - tmp.key.iter_mut().for_each(|x| x.fill(1)); - + let tmp = typical_iblt(); let mut tmp2 = IBLT::<[u8; 32], 16, 3>::from_bytes(tmp.as_bytes()).unwrap(); tmp2.subtract(&tmp); - tmp2.list(|_, new| assert!(!new)); } #[test] fn test_clone() { - // Typical case - let mut tmp = IBLT::<[u8; 32], 16, 3>::new(); - tmp.check_hash.fill(0x01010101); - tmp.count.fill(1); - tmp.key.iter_mut().for_each(|x| x.fill(1)); - + let tmp = typical_iblt(); let mut tmp2 = tmp.clone(); tmp2.subtract(&tmp); From 95d28c0fb9bd6f755588cdc515364609e5b37f24 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Mon, 18 Apr 2022 16:37:08 -0700 Subject: [PATCH 5/5] Benchmarks for as_bytes/from_bytes, clone, list Signed-off-by: Erik Hollensbe --- iblt/Cargo.toml | 17 ++++++++ iblt/benches/clone.rs | 19 +++++++++ iblt/benches/list.rs | 39 ++++++++++++++++++ iblt/benches/merge.rs | 78 +++++++++++++++++++++++++++++++++++ iblt/benches/to_from_bytes.rs | 19 +++++++++ 5 files changed, 172 insertions(+) create mode 100644 iblt/benches/clone.rs create mode 100644 iblt/benches/list.rs create mode 100644 iblt/benches/merge.rs create mode 100644 iblt/benches/to_from_bytes.rs diff --git a/iblt/Cargo.toml b/iblt/Cargo.toml index 16c2d8481..41232812d 100644 --- a/iblt/Cargo.toml +++ b/iblt/Cargo.toml @@ -17,5 +17,22 @@ zerocopy = { version = "0.6.1", features = ["alloc"] } [dev-dependencies] rand = ">=0" +criterion = ">=0" [lib] + +[[bench]] +name = "to_from_bytes" +harness = false + +[[bench]] +name = "clone" +harness = false + +[[bench]] +name = "list" +harness = false + +[[bench]] +name = "merge" +harness = false diff --git a/iblt/benches/clone.rs b/iblt/benches/clone.rs new file mode 100644 index 000000000..0199de2e2 --- /dev/null +++ b/iblt/benches/clone.rs @@ -0,0 +1,19 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use iblt::IBLT; + +const CAPACITY: usize = 4096; +type OurIBLT = IBLT<[u8; 32], CAPACITY, 3>; + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut iblt = OurIBLT::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 32]; + v.fill_with(rand::random); + iblt.insert(&v); + } + + c.bench_function("clone", |b| b.iter(|| iblt.clone())); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/iblt/benches/list.rs b/iblt/benches/list.rs new file mode 100644 index 000000000..9987ab3df --- /dev/null +++ b/iblt/benches/list.rs @@ -0,0 +1,39 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use iblt::IBLT; + +const CAPACITY: usize = 4096; +type IBLT32 = IBLT<[u8; 32], CAPACITY, 3>; +type IBLT16 = IBLT<[u8; 16], CAPACITY, 3>; +type IBLT8 = IBLT<[u8; 8], CAPACITY, 3>; + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut iblt = IBLT32::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 32]; + v.fill_with(rand::random); + iblt.insert(&v); + } + + c.bench_function("list 32", |b| b.iter(|| iblt.list(|_, _| {}))); + + let mut iblt = IBLT16::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 16]; + v.fill_with(rand::random); + iblt.insert(&v); + } + + c.bench_function("list 16", |b| b.iter(|| iblt.list(|_, _| {}))); + + let mut iblt = IBLT8::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 8]; + v.fill_with(rand::random); + iblt.insert(&v); + } + + c.bench_function("list 8", |b| b.iter(|| iblt.list(|_, _| {}))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/iblt/benches/merge.rs b/iblt/benches/merge.rs new file mode 100644 index 000000000..05d994562 --- /dev/null +++ b/iblt/benches/merge.rs @@ -0,0 +1,78 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use iblt::IBLT; + +const CAPACITY: usize = 4096; +type IBLT32 = IBLT<[u8; 32], CAPACITY, 3>; +type IBLT16 = IBLT<[u8; 16], CAPACITY, 3>; +type IBLT8 = IBLT<[u8; 8], CAPACITY, 3>; + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut orig = IBLT32::new(); + let mut new = IBLT32::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 32]; + v.fill_with(rand::random); + orig.insert(&v); + } + + for _ in 0..CAPACITY { + let mut v = [0u8; 32]; + v.fill_with(rand::random); + new.insert(&v); + } + + c.bench_function("merge 32", |b| { + b.iter(|| { + let mut new2 = new.clone(); + orig.subtract(&new); + new2.subtract(&orig); + }) + }); + + let mut orig = IBLT16::new(); + let mut new = IBLT16::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 16]; + v.fill_with(rand::random); + orig.insert(&v); + } + + for _ in 0..CAPACITY { + let mut v = [0u8; 16]; + v.fill_with(rand::random); + new.insert(&v); + } + + c.bench_function("merge 16", |b| { + b.iter(|| { + let mut new2 = new.clone(); + orig.subtract(&new); + new2.subtract(&orig); + }) + }); + + let mut orig = IBLT8::new(); + let mut new = IBLT8::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 8]; + v.fill_with(rand::random); + orig.insert(&v); + } + + for _ in 0..CAPACITY { + let mut v = [0u8; 8]; + v.fill_with(rand::random); + new.insert(&v); + } + + c.bench_function("merge 8", |b| { + b.iter(|| { + let mut new2 = new.clone(); + orig.subtract(&new); + new2.subtract(&orig); + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/iblt/benches/to_from_bytes.rs b/iblt/benches/to_from_bytes.rs new file mode 100644 index 000000000..ede76d504 --- /dev/null +++ b/iblt/benches/to_from_bytes.rs @@ -0,0 +1,19 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use iblt::IBLT; + +const CAPACITY: usize = 4096; +type OurIBLT = IBLT<[u8; 32], CAPACITY, 3>; + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut iblt = OurIBLT::new(); + for _ in 0..CAPACITY { + let mut v = [0u8; 32]; + v.fill_with(rand::random); + iblt.insert(&v); + } + + c.bench_function("to_from_bytes", |b| b.iter(|| OurIBLT::from_bytes(iblt.as_bytes()))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches);