move to heap allocation

Signed-off-by: Erik Hollensbe <git@hollensbe.org>
This commit is contained in:
Erik Hollensbe 2022-04-16 05:01:38 -07:00
parent 1b2485b277
commit 8dac9ccf07
No known key found for this signature in database
GPG key ID: 4BB0E241A863B389

View file

@ -6,8 +6,6 @@
* https://www.zerotier.com/ * https://www.zerotier.com/
*/ */
use std::borrow::Cow;
use zerocopy::{AsBytes, FromBytes}; use zerocopy::{AsBytes, FromBytes};
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64")))] #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64", target_arch = "powerpc64")))]
@ -73,21 +71,23 @@ fn murmurhash32_mix32(mut x: u32) -> u32 {
#[repr(C)] #[repr(C)]
pub struct IBLT<T, const BUCKETS: usize, const HASHES: usize> pub struct IBLT<T, const BUCKETS: usize, const HASHES: usize>
where where
T: FromBytes + AsBytes + Sized + Clone, T: FromBytes + AsBytes + Default + Sized + Clone,
{ {
check_hash: [u32; BUCKETS], check_hash: Box<Vec<u32>>,
count: [i8; BUCKETS], count: Box<Vec<i8>>,
key: [T; BUCKETS], key: Box<Vec<T>>,
} }
impl<T, const BUCKETS: usize, const HASHES: usize> Clone for IBLT<T, BUCKETS, HASHES> impl<T, const BUCKETS: usize, const HASHES: usize> Clone for IBLT<T, BUCKETS, HASHES>
where where
T: FromBytes + AsBytes + Sized + Clone, T: FromBytes + AsBytes + Default + Sized + Clone,
{ {
fn clone(&self) -> Self { fn clone(&self) -> Self {
unsafe { unsafe {
let mut tmp: Self = std::mem::MaybeUninit::uninit().assume_init(); let mut tmp = Self::new();
std::ptr::copy_nonoverlapping((self as *const Self).cast::<u8>(), (&mut tmp as *mut Self).cast::<u8>(), Self::SIZE_BYTES); std::ptr::copy_nonoverlapping(self.check_hash.as_ptr(), tmp.check_hash.as_mut_ptr(), BUCKETS);
std::ptr::copy_nonoverlapping(self.count.as_ptr(), tmp.count.as_mut_ptr(), BUCKETS);
std::ptr::copy_nonoverlapping(self.key.as_ptr(), tmp.key.as_mut_ptr(), BUCKETS);
tmp tmp
} }
} }
@ -95,13 +95,12 @@ where
impl<T, const BUCKETS: usize, const HASHES: usize> IBLT<T, BUCKETS, HASHES> impl<T, const BUCKETS: usize, const HASHES: usize> IBLT<T, BUCKETS, HASHES>
where where
T: FromBytes + AsBytes + Sized + Clone, T: FromBytes + AsBytes + Default + Sized + Clone,
{ {
/// Number of bytes each bucket consumes (not congituously, but doesn't matter). /// Number of bytes each bucket consumes (not congituously, but doesn't matter).
const BUCKET_SIZE_BYTES: usize = std::mem::size_of::<T>() + 4 + 1; const BUCKET_SIZE_BYTES: usize = std::mem::size_of::<T>() + 4 + 1;
/// Number of buckets in this IBLT. /// Number of buckets in this IBLT.
#[allow(unused)]
pub const BUCKETS: usize = BUCKETS; pub const BUCKETS: usize = BUCKETS;
/// Size of this IBLT in bytes. /// Size of this IBLT in bytes.
@ -110,14 +109,43 @@ where
/// Create a new zeroed IBLT. /// Create a new zeroed IBLT.
#[inline(always)] #[inline(always)]
pub fn new() -> Self { pub fn new() -> Self {
assert!(Self::SIZE_BYTES <= std::mem::size_of::<Self>());
assert!(BUCKETS < (i32::MAX as usize)); assert!(BUCKETS < (i32::MAX as usize));
unsafe { std::mem::zeroed() }
let mut s = Self {
check_hash: Box::new(Vec::with_capacity(BUCKETS)),
count: Box::new(Vec::with_capacity(BUCKETS)),
key: Box::new(Vec::with_capacity(BUCKETS)),
};
s.reset();
s
} }
#[inline(always)] #[inline(always)]
pub fn as_bytes(&self) -> &[u8] { pub fn as_bytes(&self) -> Box<Vec<u8>> {
unsafe { &*std::ptr::slice_from_raw_parts((self as *const Self).cast::<u8>(), Self::SIZE_BYTES) } let len: usize = BUCKETS * 4 + BUCKETS + BUCKETS * std::mem::size_of::<T>();
let mut buf = Box::new(Vec::with_capacity(len));
// we can probably make this faster
for b in self.check_hash.iter() {
for b2 in b.as_bytes() {
buf.push(*b2)
}
}
for b in self.count.iter() {
buf.push(*b as u8)
}
for b in self.key.iter() {
for b2 in b.as_bytes() {
buf.push(*b2)
}
}
buf
} }
/// Obtain an IBLT from bytes in memory. /// Obtain an IBLT from bytes in memory.
@ -126,20 +154,46 @@ where
/// Cow to 'b' that is just a cast. If re-alignment is necessary it returns an owned Cow containing a properly /// Cow to 'b' that is just a cast. If re-alignment is necessary it returns an owned Cow containing a properly
/// aligned copy. This makes conversion a nearly free cast when alignment adjustment isn't needed. /// aligned copy. This makes conversion a nearly free cast when alignment adjustment isn't needed.
#[inline(always)] #[inline(always)]
pub fn from_bytes<'a>(b: &'a [u8]) -> Option<Cow<'a, Self>> { pub fn from_bytes(b: Box<Vec<u8>>) -> Option<Self> {
if b.len() == Self::SIZE_BYTES { if b.len() == Self::SIZE_BYTES {
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))] // FIXME I commented this out because I do not have access to the architectures needed.
{ // #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64")))]
if b.as_ptr().align_offset(8) == 0 { // {
Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) // if b.as_ptr().align_offset(8) == 0 {
} else { // Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() }))
// NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter. // } else {
Some(Cow::Owned(unsafe { &*b.as_ptr().cast::<Self>() }.clone())) // // NOTE: clone() is implemented above using a raw copy so that alignment doesn't matter.
} // Some(Cow::Owned(unsafe { &*b.as_ptr().cast::<Self>() }.clone()))
} // }
// }
#[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64"))] #[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "powerpc64", target_arch = "aarch64"))]
{ {
Some(Cow::Borrowed(unsafe { &*b.as_ptr().cast() })) let mut tmp = Self::new();
// FIXME much easier ways to do this with the copy methods; probably ripe for a
// refactor
let mut i = 0;
let mut y = 0;
for _ in 0..BUCKETS {
tmp.check_hash.push((b[y] as u32) << 3 | (b[y + 1] as u32) << 2 | (b[y + 2] as u32) << 1 | b[y + 3] as u32);
y += 4;
}
i *= BUCKETS * 4;
for y in 0..BUCKETS {
tmp.count.push(b[y + i] as i8);
}
i += BUCKETS;
for y in 0..BUCKETS {
let byt = &b[(y * i)..(y * i + std::mem::size_of::<T>())];
tmp.key.push(T::read_from(byt).unwrap());
}
Some(tmp)
} }
} else { } else {
None None
@ -149,7 +203,12 @@ where
/// Zero this IBLT. /// Zero this IBLT.
#[inline(always)] #[inline(always)]
pub fn reset(&mut self) { pub fn reset(&mut self) {
unsafe { std::ptr::write_bytes((self as *mut Self).cast::<u8>(), 0, std::mem::size_of::<Self>()) }; self.check_hash.clear();
self.count.clear();
self.key.clear();
self.check_hash.resize(BUCKETS, 0);
self.count.resize(BUCKETS, 0);
self.key.resize(BUCKETS, Default::default());
} }
pub(crate) fn ins_rem(&mut self, key: T, delta: i8) { pub(crate) fn ins_rem(&mut self, key: T, delta: i8) {
@ -200,7 +259,7 @@ where
/// bogus items that were never added. This is not an issue with this protocol as it would just result /// bogus items that were never added. This is not an issue with this protocol as it would just result
/// in an unsatisfied record request. /// in an unsatisfied record request.
pub fn list<F: FnMut(T, bool)>(&mut self, mut f: F) -> bool { pub fn list<F: FnMut(T, bool)>(&mut self, mut f: F) -> bool {
let mut queue: Vec<u32> = Vec::with_capacity(BUCKETS); let mut queue: Box<Vec<u32>> = Box::new(Vec::with_capacity(BUCKETS));
for i in 0..BUCKETS { for i in 0..BUCKETS {
let count = self.count[i]; let count = self.count[i];
@ -242,7 +301,7 @@ where
} }
} }
f(key.clone(), count == 1); f(key, count == 1);
} }
} }
@ -252,15 +311,15 @@ where
impl<T, const BUCKETS: usize, const HASHES: usize> PartialEq for IBLT<T, BUCKETS, HASHES> impl<T, const BUCKETS: usize, const HASHES: usize> PartialEq for IBLT<T, BUCKETS, HASHES>
where where
T: AsBytes + FromBytes + Clone, T: AsBytes + FromBytes + Default + Clone,
{ {
#[inline(always)] #[inline(always)]
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.as_bytes().eq(other.as_bytes()) self.as_bytes().eq(&other.as_bytes())
} }
} }
impl<T, const BUCKETS: usize, const HASHES: usize> Eq for IBLT<T, BUCKETS, HASHES> where T: AsBytes + FromBytes + Clone {} impl<T, const BUCKETS: usize, const HASHES: usize> Eq for IBLT<T, BUCKETS, HASHES> where T: AsBytes + FromBytes + Default + Clone {}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@ -315,7 +374,7 @@ mod tests {
#[test] #[test]
fn struct_packing() { fn struct_packing() {
// Typical case // Typical case
let mut tmp = IBLT::<[u8; 64], 16, 3>::new(); let mut tmp = IBLT::<[u8; 32], 16, 3>::new();
tmp.check_hash.fill(0x01010101); tmp.check_hash.fill(0x01010101);
tmp.count.fill(1); tmp.count.fill(1);
tmp.key.iter_mut().for_each(|x| x.fill(1)); tmp.key.iter_mut().for_each(|x| x.fill(1));
@ -348,7 +407,7 @@ mod tests {
const LENGTH: usize = 16; const LENGTH: usize = 16;
const CAPACITY: usize = 4096; const CAPACITY: usize = 4096;
let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00;
let mut expected: HashSet<u128> = HashSet::with_capacity(4096); let mut expected: HashSet<u128> = HashSet::with_capacity(CAPACITY);
let mut count = LENGTH; let mut count = LENGTH;
while count <= CAPACITY { while count <= CAPACITY {
let mut test = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new(); let mut test = IBLT::<[u8; LENGTH], CAPACITY, HASHES>::new();
@ -380,8 +439,8 @@ mod tests {
const LENGTH: usize = 16; const LENGTH: usize = 16;
let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00; let mut rn: u128 = 0xd3b07384d113edec49eaa6238ad5ff00;
let mut missing_count = 1024; let mut missing_count = 1024;
let mut missing: HashSet<u128> = HashSet::with_capacity(CAPACITY * 2); let mut missing: Box<HashSet<u128>> = Box::new(HashSet::with_capacity(CAPACITY * 2));
let mut all: HashSet<u128> = HashSet::with_capacity(REMOTE_SIZE); let mut all: Box<HashSet<u128>> = Box::new(HashSet::with_capacity(REMOTE_SIZE));
while missing_count <= CAPACITY { while missing_count <= CAPACITY {
missing.clear(); missing.clear();
all.clear(); all.clear();
@ -429,10 +488,17 @@ mod tests {
other_thing: [u8; 32], other_thing: [u8; 32],
} }
impl Default for TestType {
fn default() -> Self {
Self::zeroed()
}
}
impl TestType { impl TestType {
pub fn zeroed() -> Self { pub fn zeroed() -> Self {
unsafe { std::mem::zeroed() } unsafe { std::mem::zeroed() }
} }
pub fn new() -> Self { pub fn new() -> Self {
let mut newtype = Self::zeroed(); let mut newtype = Self::zeroed();
newtype.thing.fill_with(|| rand::random()); newtype.thing.fill_with(|| rand::random());
@ -443,9 +509,9 @@ mod tests {
#[test] #[test]
fn test_polymorphism() { fn test_polymorphism() {
const CAPACITY: usize = 512; const CAPACITY: usize = 4096;
let mut full = IBLT::<TestType, CAPACITY, HASHES>::new(); let mut full = Box::new(IBLT::<TestType, CAPACITY, HASHES>::new());
let mut zero = IBLT::<TestType, CAPACITY, HASHES>::new(); let mut zero = Box::new(IBLT::<TestType, CAPACITY, HASHES>::new());
for _ in 0..CAPACITY { for _ in 0..CAPACITY {
zero.insert(TestType::zeroed()); zero.insert(TestType::zeroed());
@ -473,4 +539,33 @@ mod tests {
} }
}); });
} }
#[test]
fn test_to_from_bytes() {
// Typical case
let mut tmp = IBLT::<[u8; 32], 16, 3>::new();
tmp.check_hash.fill(0x01010101);
tmp.count.fill(1);
tmp.key.iter_mut().for_each(|x| x.fill(1));
let mut tmp2 = IBLT::<[u8; 32], 16, 3>::from_bytes(tmp.as_bytes()).unwrap();
tmp2.subtract(&tmp);
tmp2.list(|_, new| assert!(!new));
}
#[test]
fn test_clone() {
// Typical case
let mut tmp = IBLT::<[u8; 32], 16, 3>::new();
tmp.check_hash.fill(0x01010101);
tmp.count.fill(1);
tmp.key.iter_mut().for_each(|x| x.fill(1));
let mut tmp2 = tmp.clone();
tmp2.subtract(&tmp);
tmp2.list(|_, new| assert!(!new));
}
} }