mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-07-27 12:42:49 +02:00
commit changes before merge
This commit is contained in:
parent
e22afcd23b
commit
29beb39d94
7 changed files with 112 additions and 327 deletions
|
@ -1,3 +1,3 @@
|
||||||
{
|
{
|
||||||
"workspace.name" : "ZeroTier"
|
"workspace.name" : "tetanus"
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,12 +11,6 @@ use async_trait::async_trait;
|
||||||
/// Size of keys, which is the size of a 512-bit hash. This is a protocol constant.
|
/// Size of keys, which is the size of a 512-bit hash. This is a protocol constant.
|
||||||
pub const KEY_SIZE: usize = 64;
|
pub const KEY_SIZE: usize = 64;
|
||||||
|
|
||||||
/// Minimum possible value in a key range (all zero).
|
|
||||||
pub const MIN_KEY: [u8; KEY_SIZE] = [0; KEY_SIZE];
|
|
||||||
|
|
||||||
/// Maximum possible value in a key range (all 0xff).
|
|
||||||
pub const MAX_KEY: [u8; KEY_SIZE] = [0xff; KEY_SIZE];
|
|
||||||
|
|
||||||
/// Result returned by DataStore::store().
|
/// Result returned by DataStore::store().
|
||||||
pub enum StoreResult {
|
pub enum StoreResult {
|
||||||
/// Entry was accepted.
|
/// Entry was accepted.
|
||||||
|
@ -32,6 +26,18 @@ pub enum StoreResult {
|
||||||
Rejected,
|
Rejected,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert a prefix into an inclusive range of keys.
|
||||||
|
///
|
||||||
|
/// This is a convenience function for implementing keys_under() with data stores that support
|
||||||
|
/// straightforward range queries with full keys.
|
||||||
|
pub fn prefix_to_range(prefix: u64, prefix_bits: u32) -> ([u8; KEY_SIZE], [u8; KEY_SIZE]) {
|
||||||
|
let mut a = [0_u8; KEY_SIZE];
|
||||||
|
a[0..8].copy_from_slice(&((prefix & 0xffffffffffffffff_u64.wrapping_shl(64 - prefix_bits)).to_be_bytes()));
|
||||||
|
let mut b = [0xff_u8; KEY_SIZE];
|
||||||
|
b[0..8].copy_from_slice(&((prefix | 0xffffffffffffffff_u64.wrapping_shr(prefix_bits)).to_be_bytes()));
|
||||||
|
(a, b)
|
||||||
|
}
|
||||||
|
|
||||||
/// API to be implemented by the data set we want to replicate.
|
/// API to be implemented by the data set we want to replicate.
|
||||||
///
|
///
|
||||||
/// Keys as understood by syncwhole are SHA512 hashes of values. The user can of course
|
/// Keys as understood by syncwhole are SHA512 hashes of values. The user can of course
|
||||||
|
@ -60,9 +66,6 @@ pub trait DataStore: Sync + Send {
|
||||||
/// Maximum size of a value in bytes.
|
/// Maximum size of a value in bytes.
|
||||||
const MAX_VALUE_SIZE: usize;
|
const MAX_VALUE_SIZE: usize;
|
||||||
|
|
||||||
/// Get the current wall clock in milliseconds since Unix epoch.
|
|
||||||
fn clock(&self) -> i64;
|
|
||||||
|
|
||||||
/// Get the domain of this data store.
|
/// Get the domain of this data store.
|
||||||
///
|
///
|
||||||
/// This is an arbitrary unique identifier that must be the same for all nodes that
|
/// This is an arbitrary unique identifier that must be the same for all nodes that
|
||||||
|
@ -70,15 +73,13 @@ pub trait DataStore: Sync + Send {
|
||||||
/// data across data sets if this is not desired.
|
/// data across data sets if this is not desired.
|
||||||
fn domain(&self) -> &str;
|
fn domain(&self) -> &str;
|
||||||
|
|
||||||
/// Get an item if it exists as of a given reference time.
|
/// Get the reference time that should be used on this side to query remote peers.
|
||||||
async fn load(&self, reference_time: i64, key: &[u8]) -> Option<Self::ValueRef>;
|
|
||||||
|
|
||||||
/// Check whether this data store contains a key.
|
|
||||||
///
|
///
|
||||||
/// The default implementation just calls load(). Override if a faster version is possible.
|
/// This is typically the local "wall clock" time in milliseconds since Unix epoch.
|
||||||
async fn contains(&self, reference_time: i64, key: &[u8]) -> bool {
|
fn reference_time(&self) -> i64;
|
||||||
self.load(reference_time, key).await.is_some()
|
|
||||||
}
|
/// Get an item by identity hash key if it exists.
|
||||||
|
async fn load(&self, key: &[u8; KEY_SIZE]) -> Option<Self::ValueRef>;
|
||||||
|
|
||||||
/// Store an item in the data store and return its status.
|
/// Store an item in the data store and return its status.
|
||||||
///
|
///
|
||||||
|
@ -103,22 +104,19 @@ pub trait DataStore: Sync + Send {
|
||||||
/// Rejected should only be returned if the value actually fails a validity check, signature
|
/// Rejected should only be returned if the value actually fails a validity check, signature
|
||||||
/// verification, proof of work check, or some other required criteria. Ignored must be
|
/// verification, proof of work check, or some other required criteria. Ignored must be
|
||||||
/// returned if the value is valid but is too old or was rejected for some other normal reason.
|
/// returned if the value is valid but is too old or was rejected for some other normal reason.
|
||||||
async fn store(&self, key: &[u8], value: &[u8]) -> StoreResult;
|
async fn store(&self, key: &[u8; KEY_SIZE], value: &[u8]) -> StoreResult;
|
||||||
|
|
||||||
/// Get the number of items in a range.
|
/// Iterate through keys under a given key prefix.
|
||||||
async fn count(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8]) -> u64;
|
|
||||||
|
|
||||||
/// Get the total number of records in this data store.
|
|
||||||
async fn total_count(&self) -> u64;
|
|
||||||
|
|
||||||
/// Iterate through a series of keys in a range (inclusive), stopping when function returns false.
|
|
||||||
///
|
///
|
||||||
/// The default implementation uses for_each() and just drops the value. Specialize if you can do it faster
|
/// The prefix is a bit string up to 64 bits long. The implementation can technically interpret this
|
||||||
/// by only retrieving keys.
|
/// any way it wants, but usually this would be the first 64 bits of the key as a big-endian bit string.
|
||||||
async fn for_each_key<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], mut f: F) {
|
///
|
||||||
self.for_each(reference_time, key_range_start, key_range_end, |k, _| f(k)).await;
|
/// Keys MUST be output in ascending binary sort order.
|
||||||
}
|
async fn keys_under<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, prefix: u64, prefix_bits: u32, f: F);
|
||||||
|
|
||||||
/// Iterate through a series of entries in a range (inclusive), stopping when function returns false.
|
/// Load all record values under a given key prefix.
|
||||||
async fn for_each<F: Send + FnMut(&[u8], &Self::ValueRef) -> bool>(&self, reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], f: F);
|
///
|
||||||
|
/// This should clear and fill the result, fetching up to the limit values under a given key prefix.
|
||||||
|
/// Values may be pushed into the vector in any order.
|
||||||
|
async fn values_under(&self, prefix: u64, prefix_bits: u32, result: &mut Vec<Option<Self::ValueRef>>, limit: usize);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,7 @@ fn next_iteration_index(mut x: u64, hash_no: u64) -> u64 {
|
||||||
/// The best value for HASHES seems to be 3 for an optimal fill of 80%.
|
/// The best value for HASHES seems to be 3 for an optimal fill of 80%.
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct IBLT<const BUCKETS: usize, const HASHES: usize> {
|
pub struct IBLT<const BUCKETS: usize, const HASHES: usize> {
|
||||||
|
total_count: i64, // always stored little-endian in memory
|
||||||
key: [u64; BUCKETS],
|
key: [u64; BUCKETS],
|
||||||
check_hash: [u32; BUCKETS],
|
check_hash: [u32; BUCKETS],
|
||||||
count: [i8; BUCKETS],
|
count: [i8; BUCKETS],
|
||||||
|
@ -76,7 +77,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
pub const BUCKETS: usize = BUCKETS;
|
pub const BUCKETS: usize = BUCKETS;
|
||||||
|
|
||||||
/// Size of this IBLT in bytes.
|
/// Size of this IBLT in bytes.
|
||||||
pub const SIZE_BYTES: usize = BUCKETS * BUCKET_SIZE_BYTES;
|
pub const SIZE_BYTES: usize = 8 + (BUCKETS * BUCKET_SIZE_BYTES); // total_count + buckets
|
||||||
|
|
||||||
/// Create a new zeroed IBLT.
|
/// Create a new zeroed IBLT.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
|
@ -126,7 +127,13 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ins_rem(&mut self, key: u64, delta: i8) {
|
/// Get the total number of set items that have been added to this IBLT.
|
||||||
|
pub fn count(&self) -> u64 {
|
||||||
|
i64::from_le(self.total_count).max(0) as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ins_rem(&mut self, key: u64, delta: i8) {
|
||||||
|
self.total_count = i64::from_le(self.total_count).wrapping_add(delta as i64).to_le();
|
||||||
let check_hash = get_check_hash(key);
|
let check_hash = get_check_hash(key);
|
||||||
let mut iteration_index = u64::from_le(key);
|
let mut iteration_index = u64::from_le(key);
|
||||||
for k in 0..(HASHES as u64) {
|
for k in 0..(HASHES as u64) {
|
||||||
|
@ -152,14 +159,20 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
|
|
||||||
/// Subtract another IBLT from this one to get a set difference.
|
/// Subtract another IBLT from this one to get a set difference.
|
||||||
pub fn subtract(&mut self, other: &Self) {
|
pub fn subtract(&mut self, other: &Self) {
|
||||||
|
self.total_count = i64::from_le(self.total_count).wrapping_sub(i64::from_le(other.total_count.max(0))).max(0).to_le();
|
||||||
self.key.iter_mut().zip(other.key.iter()).for_each(|(a, b)| *a ^= *b);
|
self.key.iter_mut().zip(other.key.iter()).for_each(|(a, b)| *a ^= *b);
|
||||||
self.check_hash.iter_mut().zip(other.check_hash.iter()).for_each(|(a, b)| *a ^= *b);
|
self.check_hash.iter_mut().zip(other.check_hash.iter()).for_each(|(a, b)| *a ^= *b);
|
||||||
self.count.iter_mut().zip(other.count.iter()).for_each(|(a, b)| *a = a.wrapping_sub(*b));
|
self.count.iter_mut().zip(other.count.iter()).for_each(|(a, b)| *a = a.wrapping_sub(*b));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List as many entries in this IBLT as can be extracted.
|
/// List as many entries in this IBLT as can be extracted.
|
||||||
/// True is returned if extraction was 100% successful. False indicates that
|
///
|
||||||
/// some entries were not extractable.
|
/// True is returned if the number of extracted items was exactly equal to the total number of items
|
||||||
|
/// in this set summary. A return of false indicates incomplete extraction or an invalid IBLT.
|
||||||
|
///
|
||||||
|
/// Due to the small check hash sizes used in this IBLT there is a very small chance this will list
|
||||||
|
/// bogus items that were never added. This is not an issue with this protocol as it would just result
|
||||||
|
/// in an unsatisfied record request.
|
||||||
pub fn list<F: FnMut(u64)>(mut self, mut f: F) -> bool {
|
pub fn list<F: FnMut(u64)>(mut self, mut f: F) -> bool {
|
||||||
let mut queue: Vec<u32> = Vec::with_capacity(BUCKETS);
|
let mut queue: Vec<u32> = Vec::with_capacity(BUCKETS);
|
||||||
|
|
||||||
|
@ -170,7 +183,10 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
'list_main: loop {
|
let total_count = i64::from_le(self.total_count);
|
||||||
|
let mut listed = 0;
|
||||||
|
|
||||||
|
'list_main: while listed < total_count {
|
||||||
let i = queue.pop();
|
let i = queue.pop();
|
||||||
let i = if i.is_some() {
|
let i = if i.is_some() {
|
||||||
i.unwrap() as usize
|
i.unwrap() as usize
|
||||||
|
@ -182,6 +198,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
let check_hash = self.check_hash[i];
|
let check_hash = self.check_hash[i];
|
||||||
let count = self.count[i];
|
let count = self.count[i];
|
||||||
if (count == 1 || count == -1) && check_hash == get_check_hash(key) {
|
if (count == 1 || count == -1) && check_hash == get_check_hash(key) {
|
||||||
|
listed += 1;
|
||||||
f(key);
|
f(key);
|
||||||
|
|
||||||
let mut iteration_index = u64::from_le(key);
|
let mut iteration_index = u64::from_le(key);
|
||||||
|
@ -205,7 +222,7 @@ impl<const BUCKETS: usize, const HASHES: usize> IBLT<BUCKETS, HASHES> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.count.iter().all(|x| *x == 0) && self.key.iter().all(|x| *x == 0)
|
listed == total_count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
pub(crate) mod iblt;
|
pub(crate) mod iblt;
|
||||||
pub(crate) mod protocol;
|
pub(crate) mod protocol;
|
||||||
|
pub(crate) mod utils;
|
||||||
pub(crate) mod varint;
|
pub(crate) mod varint;
|
||||||
|
|
||||||
pub mod datastore;
|
pub mod datastore;
|
||||||
pub mod host;
|
pub mod host;
|
||||||
pub mod node;
|
pub mod node;
|
||||||
pub mod utils;
|
|
||||||
|
|
||||||
pub use async_trait;
|
pub use async_trait;
|
||||||
|
|
|
@ -48,7 +48,7 @@ fn get_random_bytes(mut buf: &mut [u8]) {
|
||||||
pub struct TestNodeHost {
|
pub struct TestNodeHost {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub config: Config,
|
pub config: Config,
|
||||||
pub records: tokio::sync::Mutex<BTreeMap<[u8; 64], [u8; 64]>>,
|
pub records: tokio::sync::RwLock<BTreeMap<[u8; 64], [u8; 64]>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TestNodeHost {
|
impl TestNodeHost {
|
||||||
|
@ -63,7 +63,7 @@ impl TestNodeHost {
|
||||||
Self {
|
Self {
|
||||||
name: test_no.to_string(),
|
name: test_no.to_string(),
|
||||||
config: Config::default(),
|
config: Config::default(),
|
||||||
records: tokio::sync::Mutex::new(s),
|
records: tokio::sync::RwLock::new(s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,54 +105,34 @@ impl DataStore for TestNodeHost {
|
||||||
"test"
|
"test"
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn load(&self, _: i64, key: &[u8]) -> Option<Self::ValueRef> {
|
async fn load(&self, key: &[u8; 64]) -> Option<Self::ValueRef> {
|
||||||
let key = key.try_into();
|
let records = self.records.read().await;
|
||||||
if key.is_ok() {
|
let value = records.get(key);
|
||||||
let key: [u8; 64] = key.unwrap();
|
if value.is_some() {
|
||||||
let records = self.records.lock().await;
|
Some(value.unwrap().clone())
|
||||||
let value = records.get(&key);
|
} else {
|
||||||
if value.is_some() {
|
None
|
||||||
return Some(value.unwrap().clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return None;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn store(&self, key: &[u8], value: &[u8]) -> StoreResult {
|
async fn store(&self, key: &[u8; 64], value: &[u8]) -> StoreResult {
|
||||||
let key = key.try_into();
|
let value: [u8; 64] = value.try_into();
|
||||||
if key.is_ok() && value.len() == 64 {
|
if value.is_ok() {
|
||||||
let key: [u8; 64] = key.unwrap();
|
if self.records.write().await.insert(key.clone(), value).is_none() {
|
||||||
let value: [u8; 64] = value.try_into().unwrap();
|
StoreResult::Ok
|
||||||
if key == Self::sha512(&[&value]) {
|
|
||||||
if self.records.lock().await.insert(key, value).is_none() {
|
|
||||||
StoreResult::Ok
|
|
||||||
} else {
|
|
||||||
StoreResult::Duplicate
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
StoreResult::Rejected
|
StoreResult::Duplicate
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
StoreResult::Rejected
|
StoreResult::Rejected
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn count(&self, _: i64, key_range_start: &[u8], key_range_end: &[u8]) -> u64 {
|
async fn keys_under<F: Send + FnMut(&[u8]) -> bool>(&self, reference_time: i64, prefix: u64, prefix_bits: u32, f: F) {
|
||||||
let start: [u8; 64] = key_range_start.try_into().unwrap();
|
let (start, end) = prefix_to_range(prefix, prefix_bits);
|
||||||
let end: [u8; 64] = key_range_end.try_into().unwrap();
|
let records = self.records.read().await;
|
||||||
self.records.lock().await.range((Included(start), Included(end))).count() as u64
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn total_count(&self) -> u64 {
|
|
||||||
self.records.lock().await.len() as u64
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn for_each<F: Send + FnMut(&[u8], &Self::ValueRef) -> bool>(&self, _reference_time: i64, key_range_start: &[u8], key_range_end: &[u8], mut f: F) {
|
|
||||||
let start: [u8; 64] = key_range_start.try_into().unwrap();
|
|
||||||
let end: [u8; 64] = key_range_end.try_into().unwrap();
|
|
||||||
let records = self.records.lock().await;
|
|
||||||
for (k, v) in records.range((Included(start), Included(end))) {
|
for (k, v) in records.range((Included(start), Included(end))) {
|
||||||
if !f(k, v) {
|
if !f(k) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,10 +35,6 @@ const HOUSEKEEPING_PERIOD: i64 = SYNC_STATUS_PERIOD;
|
||||||
/// Inactivity timeout for connections in milliseconds.
|
/// Inactivity timeout for connections in milliseconds.
|
||||||
const CONNECTION_TIMEOUT: i64 = SYNC_STATUS_PERIOD * 4;
|
const CONNECTION_TIMEOUT: i64 = SYNC_STATUS_PERIOD * 4;
|
||||||
|
|
||||||
/// Announce when we get records from peers if sync status estimate is more than this threshold.
|
|
||||||
/// This is used to stop us from spamming with HaveRecords while catching up.
|
|
||||||
const ANNOUNCE_IF_SYNCED_MORE_THAN: f64 = 0.95;
|
|
||||||
|
|
||||||
/// Information about a remote node to which we are connected.
|
/// Information about a remote node to which we are connected.
|
||||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub struct RemoteNodeInfo {
|
pub struct RemoteNodeInfo {
|
||||||
|
@ -97,7 +93,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
||||||
announce_queue: Mutex::new(HashMap::with_capacity(256)),
|
announce_queue: Mutex::new(HashMap::with_capacity(256)),
|
||||||
bind_address,
|
bind_address,
|
||||||
starting_instant: Instant::now(),
|
starting_instant: Instant::now(),
|
||||||
sync_completeness_estimate: AtomicU64::new((0.0_f64).to_bits()),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
@ -118,13 +113,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
||||||
&self.internal.host
|
&self.internal.host
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Broadcast a new record to the world.
|
|
||||||
///
|
|
||||||
/// This should be called when new records are added to the synchronized data store
|
|
||||||
/// that are created locally. If this isn't called it may take a while for normal
|
|
||||||
/// sync to pick up and propagate the record.
|
|
||||||
pub async fn broadcast_new_record(&self, key: &[u8], value: &[u8]) {}
|
|
||||||
|
|
||||||
/// Attempt to connect to an explicitly specified TCP endpoint.
|
/// Attempt to connect to an explicitly specified TCP endpoint.
|
||||||
pub async fn connect(&self, endpoint: &SocketAddr) -> std::io::Result<bool> {
|
pub async fn connect(&self, endpoint: &SocketAddr) -> std::io::Result<bool> {
|
||||||
self.internal.clone().connect(endpoint, Instant::now().add(Duration::from_millis(CONNECTION_TIMEOUT as u64))).await
|
self.internal.clone().connect(endpoint, Instant::now().add(Duration::from_millis(CONNECTION_TIMEOUT as u64))).await
|
||||||
|
@ -144,15 +132,6 @@ impl<D: DataStore + 'static, H: Host + 'static> Node<D, H> {
|
||||||
pub async fn connection_count(&self) -> usize {
|
pub async fn connection_count(&self) -> usize {
|
||||||
self.internal.connections.lock().await.len()
|
self.internal.connections.lock().await.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a value from 0.0 to 1.0 estimating how synchronized we are with the network.
|
|
||||||
///
|
|
||||||
/// This is an inexact estimate since it's based on record counts and it's possible for
|
|
||||||
/// two nodes to have the same count but disjoint sets. It tends to be fairly good in
|
|
||||||
/// practice though unless you have been disconnected for a very long time.
|
|
||||||
pub async fn sync_completeness_estimate(&self) -> f64 {
|
|
||||||
f64::from_bits(self.internal.sync_completeness_estimate.load(Ordering::Relaxed))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<D: DataStore + 'static, H: Host + 'static> Drop for Node<D, H> {
|
impl<D: DataStore + 'static, H: Host + 'static> Drop for Node<D, H> {
|
||||||
|
@ -197,9 +176,6 @@ pub struct NodeInternal<D: DataStore + 'static, H: Host + 'static> {
|
||||||
|
|
||||||
// Instant this node started.
|
// Instant this node started.
|
||||||
starting_instant: Instant,
|
starting_instant: Instant,
|
||||||
|
|
||||||
// Latest estimate of sync completeness.
|
|
||||||
sync_completeness_estimate: AtomicU64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
|
@ -221,26 +197,11 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
connected_to_addresses.clear();
|
connected_to_addresses.clear();
|
||||||
let now = self.ms_monotonic();
|
let now = self.ms_monotonic();
|
||||||
|
|
||||||
// Drop dead connections, send SyncStatus, and populate counts for computing sync status.
|
|
||||||
let sync_status = Arc::new(
|
|
||||||
rmp_serde::encode::to_vec_named(&msg::SyncStatus {
|
|
||||||
record_count: self.datastore.total_count().await,
|
|
||||||
clock: self.datastore.clock() as u64,
|
|
||||||
})
|
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
self.connections.lock().await.retain(|sa, c| {
|
self.connections.lock().await.retain(|sa, c| {
|
||||||
if !c.closed.load(Ordering::Relaxed) {
|
if !c.closed.load(Ordering::Relaxed) {
|
||||||
let cc = c.clone();
|
let cc = c.clone();
|
||||||
if (now - c.last_receive_time.load(Ordering::Relaxed)) < CONNECTION_TIMEOUT {
|
if (now - c.last_receive_time.load(Ordering::Relaxed)) < CONNECTION_TIMEOUT {
|
||||||
connected_to_addresses.insert(sa.clone());
|
connected_to_addresses.insert(sa.clone());
|
||||||
if c.info.lock().unwrap().initialized {
|
|
||||||
counts.push(c.last_sync_status_record_count.load(Ordering::Relaxed));
|
|
||||||
let ss2 = sync_status.clone();
|
|
||||||
tasks.push(tokio::spawn(async move {
|
|
||||||
let _ = tokio::time::timeout_at(sleep_until, cc.send_msg(MessageType::SyncStatus, ss2.as_slice(), now)).await;
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
true // keep connection
|
true // keep connection
|
||||||
} else {
|
} else {
|
||||||
let _ = c.read_task.lock().unwrap().take().map(|j| j.abort());
|
let _ = c.read_task.lock().unwrap().take().map(|j| j.abort());
|
||||||
|
@ -271,19 +232,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let sync_completness_estimate = if !counts.is_empty() {
|
|
||||||
counts.sort_unstable();
|
|
||||||
let twothirds = if counts.len() > 3 { *counts.get((counts.len() / 3) * 2).unwrap() } else { *counts.last().unwrap() };
|
|
||||||
if twothirds > 0 {
|
|
||||||
((self.datastore.total_count().await as f64) / (twothirds as f64)).min(1.0)
|
|
||||||
} else {
|
|
||||||
1.0
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
1.0
|
|
||||||
};
|
|
||||||
self.sync_completeness_estimate.store(sync_completness_estimate.to_bits(), Ordering::Relaxed);
|
|
||||||
|
|
||||||
let config = self.host.node_config();
|
let config = self.host.node_config();
|
||||||
|
|
||||||
// Always try to connect to anchor peers.
|
// Always try to connect to anchor peers.
|
||||||
|
@ -341,20 +289,22 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
|
|
||||||
let now = self.ms_monotonic();
|
let now = self.ms_monotonic();
|
||||||
for c in self.connections.lock().await.iter() {
|
for c in self.connections.lock().await.iter() {
|
||||||
let mut have_records: Vec<u8> = Vec::with_capacity((to_announce.len() * ANNOUNCE_KEY_LEN) + 4);
|
if c.1.announce_new_records.load(Ordering::Relaxed) {
|
||||||
have_records.push(ANNOUNCE_KEY_LEN as u8);
|
let mut have_records: Vec<u8> = Vec::with_capacity((to_announce.len() * ANNOUNCE_KEY_LEN) + 4);
|
||||||
for (key, already_has) in to_announce.iter() {
|
have_records.push(ANNOUNCE_KEY_LEN as u8);
|
||||||
if !already_has.contains(c.0) {
|
for (key, already_has) in to_announce.iter() {
|
||||||
let _ = std::io::Write::write_all(&mut have_records, key);
|
if !already_has.contains(c.0) {
|
||||||
|
let _ = std::io::Write::write_all(&mut have_records, key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if have_records.len() > 1 {
|
||||||
|
let c2 = c.1.clone();
|
||||||
|
background_tasks.spawn(async move {
|
||||||
|
// If the connection dies this will either fail or time out in 1s. Usually these execute instantly due to
|
||||||
|
// write buffering but a short timeout prevents them from building up too much.
|
||||||
|
let _ = tokio::time::timeout(announce_timeout, c2.send_msg(MessageType::HaveRecords, have_records.as_slice(), now));
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if have_records.len() > 1 {
|
|
||||||
let c2 = c.1.clone();
|
|
||||||
background_tasks.spawn(async move {
|
|
||||||
// If the connection dies this will either fail or time out in 1s. Usually these execute instantly due to
|
|
||||||
// write buffering but a short timeout prevents them from building up too much.
|
|
||||||
let _ = tokio::time::timeout(announce_timeout, c2.send_msg(MessageType::HaveRecords, have_records.as_slice(), now));
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -401,7 +351,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
writer: Mutex::new(writer),
|
writer: Mutex::new(writer),
|
||||||
last_send_time: AtomicI64::new(now),
|
last_send_time: AtomicI64::new(now),
|
||||||
last_receive_time: AtomicI64::new(now),
|
last_receive_time: AtomicI64::new(now),
|
||||||
last_sync_status_record_count: AtomicU64::new(0),
|
|
||||||
info: std::sync::Mutex::new(RemoteNodeInfo {
|
info: std::sync::Mutex::new(RemoteNodeInfo {
|
||||||
name: String::new(),
|
name: String::new(),
|
||||||
contact: String::new(),
|
contact: String::new(),
|
||||||
|
@ -413,6 +362,7 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
initialized: false,
|
initialized: false,
|
||||||
}),
|
}),
|
||||||
read_task: std::sync::Mutex::new(None),
|
read_task: std::sync::Mutex::new(None),
|
||||||
|
announce_new_records: AtomicBool::new(false),
|
||||||
closed: AtomicBool::new(false),
|
closed: AtomicBool::new(false),
|
||||||
});
|
});
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
|
@ -431,7 +381,6 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
const BUF_CHUNK_SIZE: usize = 4096;
|
const BUF_CHUNK_SIZE: usize = 4096;
|
||||||
const READ_BUF_INITIAL_SIZE: usize = 65536; // should be a multiple of BUF_CHUNK_SIZE
|
const READ_BUF_INITIAL_SIZE: usize = 65536; // should be a multiple of BUF_CHUNK_SIZE
|
||||||
|
|
||||||
let background_tasks = AsyncTaskReaper::new();
|
|
||||||
let mut write_buffer: Vec<u8> = Vec::with_capacity(BUF_CHUNK_SIZE);
|
let mut write_buffer: Vec<u8> = Vec::with_capacity(BUF_CHUNK_SIZE);
|
||||||
let mut read_buffer: Vec<u8> = Vec::new();
|
let mut read_buffer: Vec<u8> = Vec::new();
|
||||||
read_buffer.resize(READ_BUF_INITIAL_SIZE, 0);
|
read_buffer.resize(READ_BUF_INITIAL_SIZE, 0);
|
||||||
|
@ -581,102 +530,19 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
}
|
}
|
||||||
|
|
||||||
match message_type {
|
match message_type {
|
||||||
MessageType::HaveRecords => {
|
MessageType::HaveRecords => {}
|
||||||
if message.len() > 1 {
|
|
||||||
let clock = self.datastore.clock();
|
|
||||||
let mut announce_queue_key = [0_u8; ANNOUNCE_KEY_LEN];
|
|
||||||
let mut start = [0_u8; KEY_SIZE];
|
|
||||||
let mut end = [0xff_u8; KEY_SIZE];
|
|
||||||
let key_prefix_len = message[0] as usize;
|
|
||||||
message = &message[1..];
|
|
||||||
if key_prefix_len > 0 && key_prefix_len <= KEY_SIZE {
|
|
||||||
write_buffer.clear();
|
|
||||||
write_buffer.push(key_prefix_len as u8);
|
|
||||||
while message.len() >= key_prefix_len {
|
|
||||||
let key_prefix = &message[..key_prefix_len];
|
|
||||||
|
|
||||||
if key_prefix_len >= ANNOUNCE_KEY_LEN {
|
MessageType::GetRecords => {}
|
||||||
// If the key prefix is appropriately sized, look up and add this remote endpoint
|
|
||||||
// to the list of endpoints that already have this record if it's in the announce
|
|
||||||
// queue. We don't add a new entry to the announce queue if one doesn't already
|
|
||||||
// exist because we did not just receive the actual record. This just avoids announcing
|
|
||||||
// to peers that just told us they have it.
|
|
||||||
announce_queue_key.copy_from_slice(&key_prefix[..ANNOUNCE_KEY_LEN]);
|
|
||||||
self.announce_queue.lock().await.get_mut(&announce_queue_key).map(|already_has| {
|
|
||||||
if !already_has.contains(&remote_address) {
|
|
||||||
already_has.push(remote_address.clone());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if if key_prefix_len < KEY_SIZE {
|
|
||||||
(&mut start[..key_prefix_len]).copy_from_slice(key_prefix);
|
|
||||||
(&mut end[..key_prefix_len]).copy_from_slice(key_prefix);
|
|
||||||
self.datastore.count(clock, &start, &end).await == 0
|
|
||||||
} else {
|
|
||||||
!self.datastore.contains(clock, key_prefix).await
|
|
||||||
} {
|
|
||||||
let _ = std::io::Write::write_all(&mut write_buffer, key_prefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
message = &message[key_prefix_len..];
|
|
||||||
}
|
|
||||||
if write_buffer.len() > 1 {
|
|
||||||
let _ = connection.send_msg(MessageType::GetRecords, write_buffer.as_slice(), now).await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MessageType::GetRecords => {
|
|
||||||
if message.len() > 1 {
|
|
||||||
let mut start = [0_u8; KEY_SIZE];
|
|
||||||
let mut end = [0xff_u8; KEY_SIZE];
|
|
||||||
let key_prefix_len = message[0] as usize;
|
|
||||||
message = &message[1..];
|
|
||||||
if key_prefix_len > 0 && key_prefix_len <= KEY_SIZE {
|
|
||||||
while message.len() >= key_prefix_len {
|
|
||||||
let key_prefix = &message[..key_prefix_len];
|
|
||||||
|
|
||||||
if key_prefix_len < KEY_SIZE {
|
|
||||||
(&mut start[..key_prefix_len]).copy_from_slice(key_prefix);
|
|
||||||
(&mut end[..key_prefix_len]).copy_from_slice(key_prefix);
|
|
||||||
self.datastore
|
|
||||||
.for_each(0, &start, &end, |_, v| {
|
|
||||||
let v2 = v.clone();
|
|
||||||
let c2 = connection.clone();
|
|
||||||
background_tasks.spawn(async move {
|
|
||||||
let _ = c2.send_msg(MessageType::Record, v2.as_ref(), now).await;
|
|
||||||
});
|
|
||||||
true
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
} else {
|
|
||||||
let record = self.datastore.load(0, key_prefix).await;
|
|
||||||
if record.is_some() {
|
|
||||||
let record = record.unwrap();
|
|
||||||
let v: &[u8] = record.as_ref();
|
|
||||||
let _ = connection.send_msg(MessageType::Record, v, now).await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
message = &message[key_prefix_len..];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MessageType::Record => {
|
MessageType::Record => {
|
||||||
let key = H::sha512(&[message]);
|
let key = H::sha512(&[message]);
|
||||||
match self.datastore.store(&key, message).await {
|
match self.datastore.store(&key, message).await {
|
||||||
StoreResult::Ok => {
|
StoreResult::Ok => {
|
||||||
if f64::from_bits(self.sync_completeness_estimate.load(Ordering::Relaxed)) >= ANNOUNCE_IF_SYNCED_MORE_THAN {
|
let announce_key: [u8; ANNOUNCE_KEY_LEN] = (&key[..ANNOUNCE_KEY_LEN]).try_into().unwrap();
|
||||||
let announce_key: [u8; ANNOUNCE_KEY_LEN] = (&key[..ANNOUNCE_KEY_LEN]).try_into().unwrap();
|
let mut q = self.announce_queue.lock().await;
|
||||||
let mut q = self.announce_queue.lock().await;
|
let ql = q.entry(announce_key).or_insert_with(|| Vec::with_capacity(2));
|
||||||
let ql = q.entry(announce_key).or_insert_with(|| Vec::with_capacity(2));
|
if !ql.contains(&remote_address) {
|
||||||
if !ql.contains(&remote_address) {
|
ql.push(remote_address.clone());
|
||||||
ql.push(remote_address.clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
StoreResult::Rejected => {
|
StoreResult::Rejected => {
|
||||||
|
@ -686,17 +552,8 @@ impl<D: DataStore + 'static, H: Host + 'static> NodeInternal<D, H> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MessageType::SyncStatus => {
|
MessageType::Sync => {
|
||||||
let msg: msg::SyncStatus = decode_msgpack(message)?;
|
let msg: msg::Sync = decode_msgpack(message)?;
|
||||||
connection.last_sync_status_record_count.store(msg.record_count, Ordering::Relaxed);
|
|
||||||
}
|
|
||||||
|
|
||||||
MessageType::SyncRequest => {
|
|
||||||
let msg: msg::SyncRequest = decode_msgpack(message)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
MessageType::SyncResponse => {
|
|
||||||
let msg: msg::SyncResponse = decode_msgpack(message)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => {}
|
_ => {}
|
||||||
|
@ -733,9 +590,9 @@ struct Connection {
|
||||||
writer: Mutex<OwnedWriteHalf>,
|
writer: Mutex<OwnedWriteHalf>,
|
||||||
last_send_time: AtomicI64,
|
last_send_time: AtomicI64,
|
||||||
last_receive_time: AtomicI64,
|
last_receive_time: AtomicI64,
|
||||||
last_sync_status_record_count: AtomicU64,
|
|
||||||
info: std::sync::Mutex<RemoteNodeInfo>,
|
info: std::sync::Mutex<RemoteNodeInfo>,
|
||||||
read_task: std::sync::Mutex<Option<JoinHandle<std::io::Result<()>>>>,
|
read_task: std::sync::Mutex<Option<JoinHandle<std::io::Result<()>>>>,
|
||||||
|
announce_new_records: AtomicBool,
|
||||||
closed: AtomicBool,
|
closed: AtomicBool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ pub enum MessageType {
|
||||||
/// msg::InitResponse (msgpack)
|
/// msg::InitResponse (msgpack)
|
||||||
InitResponse = 2_u8,
|
InitResponse = 2_u8,
|
||||||
|
|
||||||
/// <u8 length of each key in bytes>[<key>...]
|
/// <full record key>[<full record key>...]
|
||||||
HaveRecords = 3_u8,
|
HaveRecords = 3_u8,
|
||||||
|
|
||||||
/// <u8 length of each key in bytes>[<key>...]
|
/// <u8 length of each key in bytes>[<key>...]
|
||||||
|
@ -36,14 +36,8 @@ pub enum MessageType {
|
||||||
/// <record>
|
/// <record>
|
||||||
Record = 5_u8,
|
Record = 5_u8,
|
||||||
|
|
||||||
/// msg::SyncStatus (msgpack)
|
/// msg::Sync (msgpack)
|
||||||
SyncStatus = 6_u8,
|
Sync = 7_u8,
|
||||||
|
|
||||||
/// msg::SyncRequest (msgpack)
|
|
||||||
SyncRequest = 7_u8,
|
|
||||||
|
|
||||||
/// msg::SyncResponse (msgpack)
|
|
||||||
SyncResponse = 8_u8,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<u8> for MessageType {
|
impl From<u8> for MessageType {
|
||||||
|
@ -68,9 +62,7 @@ impl MessageType {
|
||||||
Self::HaveRecords => "HAVE_RECORDS",
|
Self::HaveRecords => "HAVE_RECORDS",
|
||||||
Self::GetRecords => "GET_RECORDS",
|
Self::GetRecords => "GET_RECORDS",
|
||||||
Self::Record => "RECORD",
|
Self::Record => "RECORD",
|
||||||
Self::SyncStatus => "SYNC_STATUS",
|
Self::Sync => "SYNC",
|
||||||
Self::SyncRequest => "SYNC_REQUEST",
|
|
||||||
Self::SyncResponse => "SYNC_RESPONSE",
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,81 +133,22 @@ pub mod msg {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct SyncStatus {
|
pub struct Sync<'a> {
|
||||||
/// Total number of records this node has in its data store.
|
/// 64-bit prefix of reocrd keys for this request
|
||||||
#[serde(rename = "c")]
|
#[serde(rename = "p")]
|
||||||
pub record_count: u64,
|
pub prefix: u64,
|
||||||
|
|
||||||
/// Sending node's system clock.
|
/// Number of bits in prefix that are meaningful
|
||||||
#[serde(rename = "t")]
|
#[serde(rename = "b")]
|
||||||
pub clock: u64,
|
pub prefix_bits: u8,
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
pub struct SyncRequest<'a> {
|
|
||||||
/// Key range start (length: KEY_SIZE)
|
|
||||||
#[serde(with = "serde_bytes")]
|
|
||||||
#[serde(rename = "s")]
|
|
||||||
pub range_start: &'a [u8],
|
|
||||||
|
|
||||||
/// Key range end (length: KEY_SIZE)
|
|
||||||
#[serde(with = "serde_bytes")]
|
|
||||||
#[serde(rename = "e")]
|
|
||||||
pub range_end: &'a [u8],
|
|
||||||
|
|
||||||
/// Number of records requesting node already has under key range
|
|
||||||
#[serde(rename = "c")]
|
|
||||||
pub record_count: u64,
|
|
||||||
|
|
||||||
/// Reference time for query
|
/// Reference time for query
|
||||||
#[serde(rename = "t")]
|
#[serde(rename = "t")]
|
||||||
pub reference_time: u64,
|
pub reference_time: u64,
|
||||||
|
|
||||||
/// Random salt
|
/// Set summary for keys under prefix
|
||||||
#[serde(rename = "x")]
|
|
||||||
pub salt: &'a [u8],
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
pub struct SyncResponse<'a> {
|
|
||||||
/// Key range start (length: KEY_SIZE)
|
|
||||||
#[serde(rename = "s")]
|
|
||||||
pub range_start: &'a [u8],
|
|
||||||
|
|
||||||
/// Key range end (length: KEY_SIZE)
|
|
||||||
#[serde(rename = "e")]
|
|
||||||
pub range_end: &'a [u8],
|
|
||||||
|
|
||||||
/// Number of records responder has under key range
|
|
||||||
#[serde(rename = "c")]
|
|
||||||
pub record_count: u64,
|
|
||||||
|
|
||||||
/// Reference time for query
|
|
||||||
#[serde(rename = "t")]
|
|
||||||
pub reference_time: u64,
|
|
||||||
|
|
||||||
/// Random salt
|
|
||||||
#[serde(rename = "x")]
|
|
||||||
pub salt: &'a [u8],
|
|
||||||
|
|
||||||
/// IBLT set summary or empty if not included
|
|
||||||
///
|
|
||||||
/// If an IBLT is omitted it means the sender determined it was
|
|
||||||
/// more efficient to just send keys. In that case keys[] should have
|
|
||||||
/// an explicit list.
|
|
||||||
#[serde(with = "serde_bytes")]
|
#[serde(with = "serde_bytes")]
|
||||||
#[serde(rename = "i")]
|
#[serde(rename = "i")]
|
||||||
pub iblt: &'a [u8],
|
pub iblt: &'a [u8],
|
||||||
|
|
||||||
/// Explicit list of keys (full key length).
|
|
||||||
///
|
|
||||||
/// This may still contain keys if an IBLT is present. In that case
|
|
||||||
/// keys included here will be any that have identical 64-bit prefixes
|
|
||||||
/// to keys already added to the IBLT and thus would collide. These
|
|
||||||
/// should be rare so it's most efficient to just explicitly name them.
|
|
||||||
/// Otherwise keys with identical 64-bit prefixes may never be synced.
|
|
||||||
#[serde(with = "serde_bytes")]
|
|
||||||
#[serde(rename = "k")]
|
|
||||||
pub keys: &'a [u8],
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue