mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-04-25 08:27:39 +02:00
commit changes before merge
This commit is contained in:
parent
e22afcd23b
commit
29beb39d94
7 changed files with 112 additions and 327 deletions
|
@ -1,3 +1,3 @@
|
|||
{
|
||||
"workspace.name" : "ZeroTier"
|
||||
"workspace.name" : "tetanus"
|
||||
}
|
||||
|
|
|
@ -11,12 +11,6 @@ use async_trait::async_trait;
|
|||
/// Size of keys, which is the size of a 512-bit hash. This is a protocol constant.
|
||||
pub const KEY_SIZE: usize = 64;
|
||||
|
||||
/// Minimum possible value in a key range (all zero).
|
||||
pub const MIN_KEY: [u8; KEY_SIZE] = [0; KEY_SIZE];
|
||||
|
||||
/// Maximum possible value in a key range (all 0xff).
|
||||
pub const MAX_KEY: [u8; KEY_SIZE] = [0xff; KEY_SIZE];
|
||||
|
||||
/// Result returned by DataStore::store().
|
||||
pub enum StoreResult {
|
||||
/// Entry was accepted.
|
||||
|
@ -32,6 +26,18 @@ pub enum StoreResult {
|
|||
Rejected,
|
||||
}
|
||||
|
||||
/// Convert a prefix into an inclusive range of keys.
|
||||
///
|
||||
/// This is a convenience function for implementing keys_under() with data stores that support
|
||||
/// straightforward range queries with full keys.
|
||||
pub fn prefix_to_range(prefix: u64, prefix_bits: u32) -> ([u8; KEY_SIZE], [u8; KEY_SIZE]) {
|
||||
let mut a = [0_u8; KEY_SIZE];
|
||||
a[0..8].copy_from_slice(&((prefix & 0xffffffffffffffff_u64.wrapping_shl(64 - prefix_bits)).to_be_bytes()));
|
||||
let mut b = [0xff_u8; KEY_SIZE];
|
||||
b[0..8].copy_from_slice(&((prefix | 0xffffffffffffffff_u64.wrapping_shr(prefix_bits)).to_be_bytes()));
|
||||
(a, b)
|
||||
}
|
||||
|
||||
/// API to be implemented by the data set we want to replicate.
|
||||
///
|
||||
/// Keys as understood by syncwhole are SHA512 hashes of values. The user can of course
|
||||
|
@ -60,9 +66,6 @@ pub trait DataStore: Sync + Send {
|
|||
/// Maximum size of a value in bytes.
|
||||
const MAX_VALUE_SIZE: usize;
|
||||
|
||||
/// Get the current wall clock in milliseconds since Unix epoch.
|
||||
fn clock(&self) -> i64;
|
||||
|
||||
/// Get the domain of this data store.
|
||||
///
|
||||
/// This is an arbitrary unique identifier that must be the same for all nodes that
|
||||
|
@ -70,15 +73,13 @@ pub trait DataStore: Sync + Send {
|
|||
/// data across data sets if this is not desired.
|
||||
fn domain(&self) -> &str;
|
||||
|
||||
/// Get an item if it exists as of a given reference time.
|
||||
async fn load(&self, reference_time: i64, key: &[u8]) -> Option<Self::ValueRef>;
|
||||
|
||||
/// Check whether this data store contains a key.
|
||||
/// Get the reference time that should be used on this side to query remote peers.
|
||||
///
|
||||
/// The default implementation just calls load(). Override if a faster version is possible.
|
||||
async fn contains(&self, reference_time: i64, key: &[u8]) -> bool {
|
||||
self.load(reference_time, key).await.is_some()
|
||||
}
|
||||
/// This is typically the local "wall clock" time in milliseconds since Unix epoch.
|
||||
fn reference_time(&self) -> i64;
|
||||
|
||||
/// Get an item by identity hash key if it exists.
|
||||
async fn load(&self, key: &[u8; KEY_SIZE]) -> Option<Self::ValueRef>;
|
||||
|
||||
/// Store an item in the data store and return its status.
|
||||
///
|
||||
|
@ -103,22 +104,19 @@ pub trait DataStore: Sync + Send {
|
|||
/// Rejected should only be returned if the value actually fails a validity check, signature
|
||||
/// verification, proof of work check, or some other required criteria. Ignored must be
|
||||
/// returned if the value is valid but is too old or was rejected for some other normal reason.
|
||||
async fn store(&self, key: &[u8], value: &[u8]) -> StoreResult;
|
||||
async fn store(&self, key: &[u8; KEY_SIZE], value: &[u8]) -> StoreResult;
|
||||
|
||||
/// Get the number of items in a range.
|
||||
async fn count(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8]) -> u64;
|
||||
|
||||
/// Get the total number of records in this data store.
|
||||
async fn total_count(&self) -> u64;
|
||||
|
||||
/// Iterate through a series of keys in a range (inclusive), stopping when function returns false.
|
||||
/// Iterate through keys under a given key prefix.
|
||||
///
|
||||
/// The default implementation uses for_each() and just drops the value. Specialize if you can do it faster
|
||||
/// by only retrieving keys.
|
||||
async fn for_each_key<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], mut f: F) {
|
||||
self.for_each(reference_time, key_range_start, key_range_end, |k, _| f(k)).await;
|
||||
}
|
||||
/// The prefix is a bit string up to 64 bits long. The implementation can technically interpret this
|
||||
/// any way it wants, but usually this would be the first 64 bits of the key as a big-endian bit string.
|
||||
///
|
||||
/// Keys MUST be output in ascending binary sort order.
|
||||
async fn keys_under<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, prefix: u64, prefix_bits: u32, f: F);
|
||||
|
||||
/// Iterate through a series of entries in a range (inclusive), stopping when function returns false.
|
||||
async fn for_each<F: Send + FnMut(&[u8], &Self::ValueRef) -> bool>(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], f: F);
|
||||
/// Load all record values under a given key prefix.
|
||||
///
|
||||
/// This should clear and fill the result, fetching up to the limit values under a given key prefix.
|
||||
/// Values may be pushed into the vector in any order.
|
||||
async fn values_under(&self, prefix: u64, prefix_bits: u32, result: &mut Vec<Option<Self::ValueRef>>, limit: usize);
|
||||
}
|
||||
|
|
|
@ -53,6 +53,7 @@ fn next_iteration_index(mut x: u64, hash_no: u64) -> u64 {
|
|||
/// The best value for HASHES seems to be 3 for an optimal fill of 80%.
|
||||
#[repr(C)]
|
||||
pub struct IBLT<const BUCKETS: usize, const HASHES: usize> {
|
||||
total_count: i64, // always stored little-endian in memory
|
||||
key: [u64; BUCKETS],
|
||||
check_hash: [u32; BUCKETS],
|
||||
count: [i8; BUCKETS],
|
||||
|
@ -76,7 +77,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
pub const BUCKETS: usize = BUCKETS;
|
||||
|
||||
/// Size of this IBLT in bytes.
|
||||
pub const SIZE_BYTES: usize = BUCKETS * BUCKET_SIZE_BYTES;
|
||||
pub const SIZE_BYTES: usize = 8 + (BUCKETS * BUCKET_SIZE_BYTES); // total_count + buckets
|
||||
|
||||
/// Create a new zeroed IBLT.
|
||||
#[inline(always)]
|
||||
|
@ -126,7 +127,13 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
}
|
||||
}
|
||||
|
||||
fn ins_rem(&mut self, key: u64, delta: i8) {
|
||||
/// Get the total number of set items that have been added to this IBLT.
|
||||
pub fn count(&self) -> u64 {
|
||||
i64::from_le(self.total_count).max(0) as u64
|
||||
}
|
||||
|
||||
pub(crate) fn ins_rem(&mut self, key: u64, delta: i8) {
|
||||
self.total_count = i64::from_le(self.total_count).wrapping_add(delta as i64).to_le();
|
||||
let check_hash = get_check_hash(key);
|
||||
let mut iteration_index = u64::from_le(key);
|
||||
for k in 0..(HASHES as u64) {
|
||||
|
@ -152,14 +159,20 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
|
||||
/// Subtract another IBLT from this one to get a set difference.
|
||||
pub fn subtract(&mut self, other: &Self) {
|
||||
self.total_count = i64::from_le(self.total_count).wrapping_sub(i64::from_le(other.total_count.max(0))).max(0).to_le();
|
||||
self.key.iter_mut().zip(other.key.iter()).for_each(|(a, b)| *a ^= *b);
|
||||
self.check_hash.iter_mut().zip(other.check_hash.iter()).for_each(|(a, b)| *a ^= *b);
|
||||
self.count.iter_mut().zip(other.count.iter()).for_each(|(a, b)| *a = a.wrapping_sub(*b));
|
||||
}
|
||||
|
||||
/// List as many entries in this IBLT as can be extracted.
|
||||
/// True is returned if extraction was 100% successful. False indicates that
|
||||
/// some entries were not extractable.
|
||||
///
|
||||
/// True is returned if the number of extracted items was exactly equal to the total number of items
|
||||
/// in this set summary. A return of false indicates incomplete extraction or an invalid IBLT.
|
||||
///
|
||||
/// Due to the small check hash sizes used in this IBLT there is a very small chance this will list
|
||||
/// bogus items that were never added. This is not an issue with this protocol as it would just result
|
||||
/// in an unsatisfied record request.
|
||||
pub fn list<F: FnMut(u64)>(mut self, mut f: F) -> bool {
|
||||
let mut queue: Vec<u32> = Vec::with_capacity(BUCKETS);
|
||||
|
||||
|
@ -170,7 +183,10 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
}
|
||||
}
|
||||
|
||||
'list_main: loop {
|
||||
let total_count = i64::from_le(self.total_count);
|
||||
let mut listed = 0;
|
||||
|
||||
'list_main: while listed < total_count {
|
||||
let i = queue.pop();
|
||||
let i = if i.is_some() {
|
||||
i.unwrap() as usize
|
||||
|
@ -182,6 +198,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
let check_hash = self.check_hash[i];
|
||||
let count = self.count[i];
|
||||
if (count == 1 || count == -1) && check_hash == get_check_hash(key) {
|
||||
listed += 1;
|
||||
f(key);
|
||||
|
||||
let mut iteration_index = u64::from_le(key);
|
||||
|
@ -205,7 +222,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
|||
}
|
||||
}
|
||||
|
||||
self.count.iter().all(|x| *x == 0) && self.key.iter().all(|x| *x == 0)
|
||||
listed == total_count
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
|
||||
pub(crate) mod iblt;
|
||||
pub(crate) mod protocol;
|
||||
pub(crate) mod utils;
|
||||
pub(crate) mod varint;
|
||||
|
||||
pub mod datastore;
|
||||
pub mod host;
|
||||
pub mod node;
|
||||
pub mod utils;
|
||||
|
||||
pub use async_trait;
|
||||
|
|
|
@ -48,7 +48,7 @@ fn get_random_bytes(mut buf: &mut [u8]) {
|
|||
pub struct TestNodeHost {
|
||||
pub name: String,
|
||||
pub config: Config,
|
||||
pub records: tokio::sync::Mutex<BTreeMap<[u8; 64], [u8; 64]>>,
|
||||
pub records: tokio::sync::RwLock<BTreeMap<[u8; 64], [u8; 64]>>,
|
||||
}
|
||||
|
||||
impl TestNodeHost {
|
||||
|
@ -63,7 +63,7 @@ impl TestNodeHost {
|
|||
Self {
|
||||
name: test_no.to_string(),
|
||||
config: Config::default(),
|
||||
records: tokio::sync::Mutex::new(s),
|
||||
records: tokio::sync::RwLock::new(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -105,54 +105,34 @@ impl DataStore for TestNodeHost {
|
|||
"test"
|
||||
}
|
||||
|
||||
async fn load(&self, _: i64, key: &[u8]) -> Option<Self::ValueRef> {
|
||||
let key = key.try_into();
|
||||
if key.is_ok() {
|
||||
let key: [u8; 64] = key.unwrap();
|
||||
let records = self.records.lock().await;
|
||||
let value = records.get(&key);
|
||||
if value.is_some() {
|
||||
return Some(value.unwrap().clone());
|
||||
}
|
||||
async fn load(&self, key: &[u8; 64]) -> Option<Self::ValueRef> {
|
||||
let records = self.records.read().await;
|
||||
let value = records.get(key);
|
||||
if value.is_some() {
|
||||
Some(value.unwrap().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
async fn store(&self, key: &[u8], value: &[u8]) -> StoreResult {
|
||||
let key = key.try_into();
|
||||
if key.is_ok() && value.len() == 64 {
|
||||
let key: [u8; 64] = key.unwrap();
|
||||
let value: [u8; 64] = value.try_into().unwrap();
|
||||
if key == Self::sha512(&[&value]) {
|
||||
if self.records.lock().await.insert(key, value).is_none() {
|
||||
StoreResult::Ok
|
||||
} else {
|
||||
StoreResult::Duplicate
|
||||
}
|
||||
async fn store(&self, key: &[u8; 64], value: &[u8]) -> StoreResult {
|
||||
let value: [u8; 64] = value.try_into();
|
||||
if value.is_ok() {
|
||||
if self.records.write().await.insert(key.clone(), value).is_none() {
|
||||
StoreResult::Ok
|
||||
} else {
|
||||
StoreResult::Rejected
|
||||
StoreResult::Duplicate
|
||||
}
|
||||
} else {
|
||||
StoreResult::Rejected
|
||||
}
|
||||
}
|
||||
|
||||
async fn count(&self, _: i64, key_range_start: &[u8], key_range_end: &[u8]) -> u64 {
|
||||
let start: [u8; 64] = key_range_start.try_into().unwrap();
|
||||
let end: [u8; 64] = key_range_end.try_into().unwrap();
|
||||
self.records.lock().await.range((Included(start), Included(end))).count() as u64
|
||||
}
|
||||
|
||||
async fn total_count(&self) -> u64 {
|
||||
self.records.lock().await.len() as u64
|
||||
}
|
||||
|
||||
async fn for_each<F: Send + FnMut(&[u8], &Self::ValueRef) -> bool>(&self, _reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], mut f: F) {
|
||||
let start: [u8; 64] = key_range_start.try_into().unwrap();
|
||||
let end: [u8; 64] = key_range_end.try_into().unwrap();
|
||||
let records = self.records.lock().await;
|
||||
async fn keys_under<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, prefix: u64, prefix_bits: u32, f: F) {
|
||||
let (start, end) = prefix_to_range(prefix, prefix_bits);
|
||||
let records = self.records.read().await;
|
||||
for (k, v) in records.range((Included(start), Included(end))) {
|
||||
if !f(k, v) {
|
||||
if !f(k) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,10 +35,6 @@ const HOUSEKEEPING_PERIOD: i64 = SYNC_STATUS_PERIOD;
|
|||
/// Inactivity timeout for connections in milliseconds.
|
||||
const CONNECTION_TIMEOUT: i64 = SYNC_STATUS_PERIOD * 4;
|
||||
|
||||
/// Announce when we get records from peers if sync status estimate is more than this threshold.
|
||||
/// This is used to stop us from spamming with HaveRecords while catching up.
|
||||
const ANNOUNCE_IF_SYNCED_MORE_THAN: f64 = 0.95;
|
||||
|
||||
/// Information about a remote node to which we are connected.
|
||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RemoteNodeInfo {
|
||||
|
@ -97,7 +93,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
|||
announce_queue: Mutex::new(HashMap::with_capacity(256)),
|
||||
bind_address,
|
||||
starting_instant: Instant::now(),
|
||||
sync_completeness_estimate: AtomicU64::new((0.0_f64).to_bits()),
|
||||
});
|
||||
|
||||
Ok(Self {
|
||||
|
@ -118,13 +113,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
|||
&self.internal.host
|
||||
}
|
||||
|
||||
/// Broadcast a new record to the world.
|
||||
///
|
||||
/// This should be called when new records are added to the synchronized data store
|
||||
/// that are created locally. If this isn't called it may take a while for normal
|
||||
/// sync to pick up and propagate the record.
|
||||
pub async fn broadcast_new_record(&self, key: &[u8], value: &[u8]) {}
|
||||
|
||||
/// Attempt to connect to an explicitly specified TCP endpoint.
|
||||
pub async fn connect(&self, endpoint: &SocketAddr) -> std::io::Result<bool> {
|
||||
self.internal.clone().connect(endpoint, Instant::now().add(Duration::from_millis(CONNECTION_TIMEOUT as u64))).await
|
||||
|
@ -144,15 +132,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
|||
pub async fn connection_count(&self) -> usize {
|
||||
self.internal.connections.lock().await.len()
|
||||
}
|
||||
|
||||
/// Get a value from 0.0 to 1.0 estimating how synchronized we are with the network.
|
||||
///
|
||||
/// This is an inexact estimate since it's based on record counts and it's possible for
|
||||
/// two nodes to have the same count but disjoint sets. It tends to be fairly good in
|
||||
/// practice though unless you have been disconnected for a very long time.
|
||||
pub async fn sync_completeness_estimate(&self) -> f64 {
|
||||
f64::from_bits(self.internal.sync_completeness_estimate.load(Ordering::Relaxed))
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: DataStore + 'static, H: Host + 'static> Drop for Node<D, H> {
|
||||
|
@ -197,9 +176,6 @@ pub struct NodeInternal<D: DataStore + 'static, H: Host + 'static> {
|
|||
|
||||
// Instant this node started.
|
||||
starting_instant: Instant,
|
||||
|
||||
// Latest estimate of sync completeness.
|
||||
sync_completeness_estimate: AtomicU64,
|
||||
}
|
||||
|
||||
impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||
|
@ -221,26 +197,11 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
connected_to_addresses.clear();
|
||||
let now = self.ms_monotonic();
|
||||
|
||||
// Drop dead connections, send SyncStatus, and populate counts for computing sync status.
|
||||
let sync_status = Arc::new(
|
||||
rmp_serde::encode::to_vec_named(&msg::SyncStatus {
|
||||
record_count: self.datastore.total_count().await,
|
||||
clock: self.datastore.clock() as u64,
|
||||
})
|
||||
.unwrap(),
|
||||
);
|
||||
self.connections.lock().await.retain(|sa, c| {
|
||||
if !c.closed.load(Ordering::Relaxed) {
|
||||
let cc = c.clone();
|
||||
if (now - c.last_receive_time.load(Ordering::Relaxed)) < CONNECTION_TIMEOUT {
|
||||
connected_to_addresses.insert(sa.clone());
|
||||
if c.info.lock().unwrap().initialized {
|
||||
counts.push(c.last_sync_status_record_count.load(Ordering::Relaxed));
|
||||
let ss2 = sync_status.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
let _ = tokio::time::timeout_at(sleep_until, cc.send_msg(MessageType::SyncStatus, ss2.as_slice(), now)).await;
|
||||
}));
|
||||
}
|
||||
true // keep connection
|
||||
} else {
|
||||
let _ = c.read_task.lock().unwrap().take().map(|j| j.abort());
|
||||
|
@ -271,19 +232,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
}
|
||||
});
|
||||
|
||||
let sync_completness_estimate = if !counts.is_empty() {
|
||||
counts.sort_unstable();
|
||||
let twothirds = if counts.len() > 3 { *counts.get((counts.len() / 3) * 2).unwrap() } else { *counts.last().unwrap() };
|
||||
if twothirds > 0 {
|
||||
((self.datastore.total_count().await as f64) / (twothirds as f64)).min(1.0)
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
self.sync_completeness_estimate.store(sync_completness_estimate.to_bits(), Ordering::Relaxed);
|
||||
|
||||
let config = self.host.node_config();
|
||||
|
||||
// Always try to connect to anchor peers.
|
||||
|
@ -341,20 +289,22 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
|
||||
let now = self.ms_monotonic();
|
||||
for c in self.connections.lock().await.iter() {
|
||||
let mut have_records: Vec<u8> = Vec::with_capacity((to_announce.len() * ANNOUNCE_KEY_LEN) + 4);
|
||||
have_records.push(ANNOUNCE_KEY_LEN as u8);
|
||||
for (key, already_has) in to_announce.iter() {
|
||||
if !already_has.contains(c.0) {
|
||||
let _ = std::io::Write::write_all(&mut have_records, key);
|
||||
if c.1.announce_new_records.load(Ordering::Relaxed) {
|
||||
let mut have_records: Vec<u8> = Vec::with_capacity((to_announce.len() * ANNOUNCE_KEY_LEN) + 4);
|
||||
have_records.push(ANNOUNCE_KEY_LEN as u8);
|
||||
for (key, already_has) in to_announce.iter() {
|
||||
if !already_has.contains(c.0) {
|
||||
let _ = std::io::Write::write_all(&mut have_records, key);
|
||||
}
|
||||
}
|
||||
if have_records.len() > 1 {
|
||||
let c2 = c.1.clone();
|
||||
background_tasks.spawn(async move {
|
||||
// If the connection dies this will either fail or time out in 1s. Usually these execute instantly due to
|
||||
// write buffering but a short timeout prevents them from building up too much.
|
||||
let _ = tokio::time::timeout(announce_timeout, c2.send_msg(MessageType::HaveRecords, have_records.as_slice(), now));
|
||||
})
|
||||
}
|
||||
}
|
||||
if have_records.len() > 1 {
|
||||
let c2 = c.1.clone();
|
||||
background_tasks.spawn(async move {
|
||||
// If the connection dies this will either fail or time out in 1s. Usually these execute instantly due to
|
||||
// write buffering but a short timeout prevents them from building up too much.
|
||||
let _ = tokio::time::timeout(announce_timeout, c2.send_msg(MessageType::HaveRecords, have_records.as_slice(), now));
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -401,7 +351,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
writer: Mutex::new(writer),
|
||||
last_send_time: AtomicI64::new(now),
|
||||
last_receive_time: AtomicI64::new(now),
|
||||
last_sync_status_record_count: AtomicU64::new(0),
|
||||
info: std::sync::Mutex::new(RemoteNodeInfo {
|
||||
name: String::new(),
|
||||
contact: String::new(),
|
||||
|
@ -413,6 +362,7 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
initialized: false,
|
||||
}),
|
||||
read_task: std::sync::Mutex::new(None),
|
||||
announce_new_records: AtomicBool::new(false),
|
||||
closed: AtomicBool::new(false),
|
||||
});
|
||||
let self2 = self.clone();
|
||||
|
@ -431,7 +381,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
const BUF_CHUNK_SIZE: usize = 4096;
|
||||
const READ_BUF_INITIAL_SIZE: usize = 65536; // should be a multiple of BUF_CHUNK_SIZE
|
||||
|
||||
let background_tasks = AsyncTaskReaper::new();
|
||||
let mut write_buffer: Vec<u8> = Vec::with_capacity(BUF_CHUNK_SIZE);
|
||||
let mut read_buffer: Vec<u8> = Vec::new();
|
||||
read_buffer.resize(READ_BUF_INITIAL_SIZE, 0);
|
||||
|
@ -581,102 +530,19 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
}
|
||||
|
||||
match message_type {
|
||||
MessageType::HaveRecords => {
|
||||
if message.len() > 1 {
|
||||
let clock = self.datastore.clock();
|
||||
let mut announce_queue_key = [0_u8; ANNOUNCE_KEY_LEN];
|
||||
let mut start = [0_u8; KEY_SIZE];
|
||||
let mut end = [0xff_u8; KEY_SIZE];
|
||||
let key_prefix_len = message[0] as usize;
|
||||
message = &message[1..];
|
||||
if key_prefix_len > 0 && key_prefix_len <= KEY_SIZE {
|
||||
write_buffer.clear();
|
||||
write_buffer.push(key_prefix_len as u8);
|
||||
while message.len() >= key_prefix_len {
|
||||
let key_prefix = &message[..key_prefix_len];
|
||||
MessageType::HaveRecords => {}
|
||||
|
||||
if key_prefix_len >= ANNOUNCE_KEY_LEN {
|
||||
// If the key prefix is appropriately sized, look up and add this remote endpoint
|
||||
// to the list of endpoints that already have this record if it's in the announce
|
||||
// queue. We don't add a new entry to the announce queue if one doesn't already
|
||||
// exist because we did not just receive the actual record. This just avoids announcing
|
||||
// to peers that just told us they have it.
|
||||
announce_queue_key.copy_from_slice(&key_prefix[..ANNOUNCE_KEY_LEN]);
|
||||
self.announce_queue.lock().await.get_mut(&announce_queue_key).map(|already_has| {
|
||||
if !already_has.contains(&remote_address) {
|
||||
already_has.push(remote_address.clone());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if if key_prefix_len < KEY_SIZE {
|
||||
(&mut start[..key_prefix_len]).copy_from_slice(key_prefix);
|
||||
(&mut end[..key_prefix_len]).copy_from_slice(key_prefix);
|
||||
self.datastore.count(clock, &start, &end).await == 0
|
||||
} else {
|
||||
!self.datastore.contains(clock, key_prefix).await
|
||||
} {
|
||||
let _ = std::io::Write::write_all(&mut write_buffer, key_prefix);
|
||||
}
|
||||
|
||||
message = &message[key_prefix_len..];
|
||||
}
|
||||
if write_buffer.len() > 1 {
|
||||
let _ = connection.send_msg(MessageType::GetRecords, write_buffer.as_slice(), now).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MessageType::GetRecords => {
|
||||
if message.len() > 1 {
|
||||
let mut start = [0_u8; KEY_SIZE];
|
||||
let mut end = [0xff_u8; KEY_SIZE];
|
||||
let key_prefix_len = message[0] as usize;
|
||||
message = &message[1..];
|
||||
if key_prefix_len > 0 && key_prefix_len <= KEY_SIZE {
|
||||
while message.len() >= key_prefix_len {
|
||||
let key_prefix = &message[..key_prefix_len];
|
||||
|
||||
if key_prefix_len < KEY_SIZE {
|
||||
(&mut start[..key_prefix_len]).copy_from_slice(key_prefix);
|
||||
(&mut end[..key_prefix_len]).copy_from_slice(key_prefix);
|
||||
self.datastore
|
||||
.for_each(0, &start, &end, |_, v| {
|
||||
let v2 = v.clone();
|
||||
let c2 = connection.clone();
|
||||
background_tasks.spawn(async move {
|
||||
let _ = c2.send_msg(MessageType::Record, v2.as_ref(), now).await;
|
||||
});
|
||||
true
|
||||
})
|
||||
.await;
|
||||
} else {
|
||||
let record = self.datastore.load(0, key_prefix).await;
|
||||
if record.is_some() {
|
||||
let record = record.unwrap();
|
||||
let v: &[u8] = record.as_ref();
|
||||
let _ = connection.send_msg(MessageType::Record, v, now).await?;
|
||||
}
|
||||
}
|
||||
|
||||
message = &message[key_prefix_len..];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MessageType::GetRecords => {}
|
||||
|
||||
MessageType::Record => {
|
||||
let key = H::sha512(&[message]);
|
||||
match self.datastore.store(&key, message).await {
|
||||
StoreResult::Ok => {
|
||||
if f64::from_bits(self.sync_completeness_estimate.load(Ordering::Relaxed)) >= ANNOUNCE_IF_SYNCED_MORE_THAN {
|
||||
let announce_key: [u8; ANNOUNCE_KEY_LEN] = (&key[..ANNOUNCE_KEY_LEN]).try_into().unwrap();
|
||||
let mut q = self.announce_queue.lock().await;
|
||||
let ql = q.entry(announce_key).or_insert_with(|| Vec::with_capacity(2));
|
||||
if !ql.contains(&remote_address) {
|
||||
ql.push(remote_address.clone());
|
||||
}
|
||||
let announce_key: [u8; ANNOUNCE_KEY_LEN] = (&key[..ANNOUNCE_KEY_LEN]).try_into().unwrap();
|
||||
let mut q = self.announce_queue.lock().await;
|
||||
let ql = q.entry(announce_key).or_insert_with(|| Vec::with_capacity(2));
|
||||
if !ql.contains(&remote_address) {
|
||||
ql.push(remote_address.clone());
|
||||
}
|
||||
}
|
||||
StoreResult::Rejected => {
|
||||
|
@ -686,17 +552,8 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
|||
}
|
||||
}
|
||||
|
||||
MessageType::SyncStatus => {
|
||||
let msg: msg::SyncStatus = decode_msgpack(message)?;
|
||||
connection.last_sync_status_record_count.store(msg.record_count, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
MessageType::SyncRequest => {
|
||||
let msg: msg::SyncRequest = decode_msgpack(message)?;
|
||||
}
|
||||
|
||||
MessageType::SyncResponse => {
|
||||
let msg: msg::SyncResponse = decode_msgpack(message)?;
|
||||
MessageType::Sync => {
|
||||
let msg: msg::Sync = decode_msgpack(message)?;
|
||||
}
|
||||
|
||||
_ => {}
|
||||
|
@ -733,9 +590,9 @@ struct Connection {
|
|||
writer: Mutex<OwnedWriteHalf>,
|
||||
last_send_time: AtomicI64,
|
||||
last_receive_time: AtomicI64,
|
||||
last_sync_status_record_count: AtomicU64,
|
||||
info: std::sync::Mutex<RemoteNodeInfo>,
|
||||
read_task: std::sync::Mutex<Option<JoinHandle<std::io::Result<()>>>>,
|
||||
announce_new_records: AtomicBool,
|
||||
closed: AtomicBool,
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ pub enum MessageType {
|
|||
/// msg::InitResponse (msgpack)
|
||||
InitResponse = 2_u8,
|
||||
|
||||
/// <u8 length of each key in bytes>[<key>...]
|
||||
/// <full record key>[<full record key>...]
|
||||
HaveRecords = 3_u8,
|
||||
|
||||
/// <u8 length of each key in bytes>[<key>...]
|
||||
|
@ -36,14 +36,8 @@ pub enum MessageType {
|
|||
/// <record>
|
||||
Record = 5_u8,
|
||||
|
||||
/// msg::SyncStatus (msgpack)
|
||||
SyncStatus = 6_u8,
|
||||
|
||||
/// msg::SyncRequest (msgpack)
|
||||
SyncRequest = 7_u8,
|
||||
|
||||
/// msg::SyncResponse (msgpack)
|
||||
SyncResponse = 8_u8,
|
||||
/// msg::Sync (msgpack)
|
||||
Sync = 7_u8,
|
||||
}
|
||||
|
||||
impl From<u8> for MessageType {
|
||||
|
@ -68,9 +62,7 @@ impl MessageType {
|
|||
Self::HaveRecords => "HAVE_RECORDS",
|
||||
Self::GetRecords => "GET_RECORDS",
|
||||
Self::Record => "RECORD",
|
||||
Self::SyncStatus => "SYNC_STATUS",
|
||||
Self::SyncRequest => "SYNC_REQUEST",
|
||||
Self::SyncResponse => "SYNC_RESPONSE",
|
||||
Self::Sync => "SYNC",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -141,81 +133,22 @@ pub mod msg {
|
|||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SyncStatus {
|
||||
/// Total number of records this node has in its data store.
|
||||
#[serde(rename = "c")]
|
||||
pub record_count: u64,
|
||||
pub struct Sync<'a> {
|
||||
/// 64-bit prefix of reocrd keys for this request
|
||||
#[serde(rename = "p")]
|
||||
pub prefix: u64,
|
||||
|
||||
/// Sending node's system clock.
|
||||
#[serde(rename = "t")]
|
||||
pub clock: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SyncRequest<'a> {
|
||||
/// Key range start (length: KEY_SIZE)
|
||||
#[serde(with = "serde_bytes")]
|
||||
#[serde(rename = "s")]
|
||||
pub range_start: &'a [u8],
|
||||
|
||||
/// Key range end (length: KEY_SIZE)
|
||||
#[serde(with = "serde_bytes")]
|
||||
#[serde(rename = "e")]
|
||||
pub range_end: &'a [u8],
|
||||
|
||||
/// Number of records requesting node already has under key range
|
||||
#[serde(rename = "c")]
|
||||
pub record_count: u64,
|
||||
/// Number of bits in prefix that are meaningful
|
||||
#[serde(rename = "b")]
|
||||
pub prefix_bits: u8,
|
||||
|
||||
/// Reference time for query
|
||||
#[serde(rename = "t")]
|
||||
pub reference_time: u64,
|
||||
|
||||
/// Random salt
|
||||
#[serde(rename = "x")]
|
||||
pub salt: &'a [u8],
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SyncResponse<'a> {
|
||||
/// Key range start (length: KEY_SIZE)
|
||||
#[serde(rename = "s")]
|
||||
pub range_start: &'a [u8],
|
||||
|
||||
/// Key range end (length: KEY_SIZE)
|
||||
#[serde(rename = "e")]
|
||||
pub range_end: &'a [u8],
|
||||
|
||||
/// Number of records responder has under key range
|
||||
#[serde(rename = "c")]
|
||||
pub record_count: u64,
|
||||
|
||||
/// Reference time for query
|
||||
#[serde(rename = "t")]
|
||||
pub reference_time: u64,
|
||||
|
||||
/// Random salt
|
||||
#[serde(rename = "x")]
|
||||
pub salt: &'a [u8],
|
||||
|
||||
/// IBLT set summary or empty if not included
|
||||
///
|
||||
/// If an IBLT is omitted it means the sender determined it was
|
||||
/// more efficient to just send keys. In that case keys[] should have
|
||||
/// an explicit list.
|
||||
/// Set summary for keys under prefix
|
||||
#[serde(with = "serde_bytes")]
|
||||
#[serde(rename = "i")]
|
||||
pub iblt: &'a [u8],
|
||||
|
||||
/// Explicit list of keys (full key length).
|
||||
///
|
||||
/// This may still contain keys if an IBLT is present. In that case
|
||||
/// keys included here will be any that have identical 64-bit prefixes
|
||||
/// to keys already added to the IBLT and thus would collide. These
|
||||
/// should be rare so it's most efficient to just explicitly name them.
|
||||
/// Otherwise keys with identical 64-bit prefixes may never be synced.
|
||||
#[serde(with = "serde_bytes")]
|
||||
#[serde(rename = "k")]
|
||||
pub keys: &'a [u8],
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue