Merge pull request #1924 from zerotier/tetanus-defrag-2

Fixed the defragmenter and implement DOS resistance for init packets
This commit is contained in:
Monica Moniot 2023-03-23 09:40:24 -04:00 committed by GitHub
commit d042866687
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 81 additions and 98 deletions

View file

@ -71,7 +71,7 @@ pub trait ApplicationLayer: Sized {
///
/// A physical path could be an IP address or IP plus device in the case of UDP, a socket in the
/// case of TCP, etc.
type PhysicalPath: PartialEq + Eq + Hash + Clone;
type PhysicalPath: Hash;
/// Get a reference to this host's static public key blob.
///

View file

@ -3,6 +3,7 @@ use std::ptr::slice_from_raw_parts;
/// Fast packet defragmenter
pub struct Fragged<Fragment, const MAX_FRAGMENTS: usize> {
count: u32,
have: u64,
counter: u64,
frags: [MaybeUninit<Fragment>; MAX_FRAGMENTS],
@ -35,15 +36,21 @@ impl<Fragment, const MAX_FRAGMENTS: usize> Fragged<Fragment, MAX_FRAGMENTS> {
// that the array of MaybeUninit<Fragment> can be freely cast into an array of
// Fragment. They also check that the maximum number of fragments is not too large
// for the fact that we use bits in a u64 to track which fragments are received.
assert!(MAX_FRAGMENTS <= 64);
assert_eq!(size_of::<MaybeUninit<Fragment>>(), size_of::<Fragment>());
assert_eq!(
debug_assert!(MAX_FRAGMENTS <= 64);
debug_assert_eq!(size_of::<MaybeUninit<Fragment>>(), size_of::<Fragment>());
debug_assert_eq!(
size_of::<[MaybeUninit<Fragment>; MAX_FRAGMENTS]>(),
size_of::<[Fragment; MAX_FRAGMENTS]>()
);
unsafe { zeroed() }
}
/// Returns the counter value associated with the packet currently being assembled.
/// If no packet is currently being assembled it returns 0.
#[inline(always)]
pub fn counter(&self) -> u64 {
self.counter
}
/// Add a fragment and return an assembled packet container if all fragments have been received.
///
/// When a fully assembled packet is returned the internal state is reset and this object can
@ -51,12 +58,10 @@ impl<Fragment, const MAX_FRAGMENTS: usize> Fragged<Fragment, MAX_FRAGMENTS> {
#[inline(always)]
pub fn assemble(&mut self, counter: u64, fragment: Fragment, fragment_no: u8, fragment_count: u8) -> Option<Assembled<Fragment, MAX_FRAGMENTS>> {
if fragment_no < fragment_count && (fragment_count as usize) <= MAX_FRAGMENTS {
let mut have = self.have;
// If the counter has changed, reset the structure to receive a new packet.
if counter != self.counter {
self.counter = counter;
if needs_drop::<Fragment>() {
let mut have = self.have;
let mut i = 0;
while have != 0 {
if (have & 1) != 0 {
@ -66,25 +71,27 @@ impl<Fragment, const MAX_FRAGMENTS: usize> Fragged<Fragment, MAX_FRAGMENTS> {
have = have.wrapping_shr(1);
i += 1;
}
} else {
have = 0;
}
}
unsafe {
self.frags.get_unchecked_mut(fragment_no as usize).write(fragment);
}
let want = 0xffffffffffffffffu64.wrapping_shr((64 - fragment_count) as u32);
have |= 1u64.wrapping_shl(fragment_no as u32);
if (have & want) == want {
self.have = 0;
// Setting 'have' to 0 resets the state of this object, and the fragments
// are effectively moved into the Assembled<> container and returned. That
// container will drop them when it is dropped.
return Some(Assembled(unsafe { std::mem::transmute_copy(&self.frags) }, fragment_count as usize));
} else {
self.have = have;
self.count = fragment_count as u32;
self.counter = counter;
}
let got = 1u64.wrapping_shl(fragment_no as u32);
if got & self.have == 0 && self.count as u8 == fragment_count {
self.have |= got;
unsafe {
self.frags.get_unchecked_mut(fragment_no as usize).write(fragment);
}
if self.have == 1u64.wrapping_shl(self.count) - 1 {
self.have = 0;
self.count = 0;
self.counter = 0;
// Setting 'have' to 0 resets the state of this object, and the fragments
// are effectively moved into the Assembled<> container and returned. That
// container will drop them when it is dropped.
return Some(Assembled(unsafe { std::mem::transmute_copy(&self.frags) }, fragment_count as usize));
}
}
}
return None;

View file

@ -46,7 +46,7 @@ fn alice_main(
alice_out: mpsc::SyncSender<Vec<u8>>,
alice_in: mpsc::Receiver<Vec<u8>>,
) {
let context = zssp::Context::<TestApplication>::new(16, TEST_MTU);
let context = zssp::Context::<TestApplication>::new(TEST_MTU);
let mut data_buf = [0u8; 65536];
let mut next_service = ms_monotonic() + 500;
let mut last_ratchet_count = 0;
@ -157,7 +157,7 @@ fn bob_main(
bob_out: mpsc::SyncSender<Vec<u8>>,
bob_in: mpsc::Receiver<Vec<u8>>,
) {
let context = zssp::Context::<TestApplication>::new(16, TEST_MTU);
let context = zssp::Context::<TestApplication>::new(TEST_MTU);
let mut data_buf = [0u8; 65536];
let mut data_buf_2 = [0u8; TEST_MTU];
let mut last_ratchet_count = 0;

View file

@ -63,6 +63,7 @@ pub(crate) const KBKDF_KEY_USAGE_LABEL_AES_GCM_ALICE_TO_BOB: u8 = b'A'; // AES-G
pub(crate) const KBKDF_KEY_USAGE_LABEL_AES_GCM_BOB_TO_ALICE: u8 = b'B'; // AES-GCM in B->A direction
pub(crate) const KBKDF_KEY_USAGE_LABEL_RATCHET: u8 = b'R'; // Key used in derivatin of next session key
pub(crate) const MAX_INCOMPLETE_SESSION_QUEUE_SIZE: usize = 32;
pub(crate) const MAX_FRAGMENTS: usize = 48; // hard protocol max: 63
pub(crate) const MAX_NOISE_HANDSHAKE_FRAGMENTS: usize = 16; // enough room for p384 + ZT identity + kyber1024 + tag/hmac/etc.
pub(crate) const MAX_NOISE_HANDSHAKE_SIZE: usize = MAX_NOISE_HANDSHAKE_FRAGMENTS * MIN_TRANSPORT_MTU;

View file

@ -9,7 +9,10 @@
// ZSSP: ZeroTier Secure Session Protocol
// FIPS compliant Noise_XK with Jedi powers (Kyber1024) and built-in attack-resistant large payload (fragmentation) support.
use std::collections::{HashMap, HashSet};
use std::collections::hash_map::RandomState;
//use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::hash::{BuildHasher, Hash, Hasher};
use std::num::NonZeroU64;
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak};
@ -36,17 +39,9 @@ const GCM_CIPHER_POOL_SIZE: usize = 4;
/// Each application using ZSSP must create an instance of this to own sessions and
/// defragment incoming packets that are not yet associated with a session.
pub struct Context<Application: ApplicationLayer> {
max_incomplete_session_queue_size: usize,
default_physical_mtu: AtomicUsize,
defrag: Mutex<
HashMap<
(Application::PhysicalPath, u64),
Arc<(
Mutex<Fragged<Application::IncomingPacketBuffer, MAX_NOISE_HANDSHAKE_FRAGMENTS>>,
i64, // creation timestamp
)>,
>,
>,
defrag_salt: RandomState,
defrag: [Mutex<Fragged<Application::IncomingPacketBuffer, MAX_NOISE_HANDSHAKE_FRAGMENTS>>; MAX_INCOMPLETE_SESSION_QUEUE_SIZE],
sessions: RwLock<SessionsById<Application>>,
}
@ -154,11 +149,11 @@ impl<Application: ApplicationLayer> Context<Application> {
/// Create a new session context.
///
/// * `max_incomplete_session_queue_size` - Maximum number of incomplete sessions in negotiation phase
pub fn new(max_incomplete_session_queue_size: usize, default_physical_mtu: usize) -> Self {
pub fn new(default_physical_mtu: usize) -> Self {
Self {
max_incomplete_session_queue_size,
default_physical_mtu: AtomicUsize::new(default_physical_mtu),
defrag: Mutex::new(HashMap::new()),
defrag_salt: RandomState::new(),
defrag: std::array::from_fn(|_| Mutex::new(Fragged::new())),
sessions: RwLock::new(SessionsById {
active: HashMap::with_capacity(64),
incoming: HashMap::with_capacity(64),
@ -203,7 +198,7 @@ impl<Application: ApplicationLayer> Context<Application> {
PACKET_TYPE_ALICE_NOISE_XK_INIT,
None,
0,
1,
random::next_u64_secure(),
None,
);
}
@ -262,9 +257,6 @@ impl<Application: ApplicationLayer> Context<Application> {
}
}
// Delete any expired defragmentation queue items not associated with a session.
self.defrag.lock().unwrap().retain(|_, fragged| fragged.1 > negotiation_timeout_cutoff);
Application::INCOMING_SESSION_NEGOTIATION_TIMEOUT_MS.min(Application::RETRY_INTERVAL)
}
@ -386,7 +378,7 @@ impl<Application: ApplicationLayer> Context<Application> {
PACKET_TYPE_ALICE_NOISE_XK_INIT,
None,
0,
1,
random::next_u64_secure(),
None,
)?;
}
@ -458,7 +450,7 @@ impl<Application: ApplicationLayer> Context<Application> {
let (key_index, packet_type, fragment_count, fragment_no, incoming_counter) = parse_packet_header(&incoming_physical_packet);
if session.check_receive_window(incoming_counter) {
let (assembled_packet, incoming_packet_buf_arr);
let assembled_packet;
let incoming_packet = if fragment_count > 1 {
assembled_packet = session.defrag[(incoming_counter as usize) % COUNTER_WINDOW_MAX_OOO]
.lock()
@ -470,8 +462,7 @@ impl<Application: ApplicationLayer> Context<Application> {
return Ok(ReceiveResult::Ok(Some(session)));
}
} else {
incoming_packet_buf_arr = [incoming_physical_packet_buf];
&incoming_packet_buf_arr
std::array::from_ref(&incoming_physical_packet_buf)
};
return self.process_complete_incoming_packet(
@ -499,7 +490,7 @@ impl<Application: ApplicationLayer> Context<Application> {
.decrypt_block_in_place(&mut incoming_physical_packet[HEADER_PROTECT_ENCRYPT_START..HEADER_PROTECT_ENCRYPT_END]);
let (key_index, packet_type, fragment_count, fragment_no, incoming_counter) = parse_packet_header(&incoming_physical_packet);
let (assembled_packet, incoming_packet_buf_arr);
let assembled_packet;
let incoming_packet = if fragment_count > 1 {
assembled_packet = incoming.defrag[(incoming_counter as usize) % COUNTER_WINDOW_MAX_OOO]
.lock()
@ -511,8 +502,7 @@ impl<Application: ApplicationLayer> Context<Application> {
return Ok(ReceiveResult::Ok(None));
}
} else {
incoming_packet_buf_arr = [incoming_physical_packet_buf];
&incoming_packet_buf_arr
std::array::from_ref(&incoming_physical_packet_buf)
};
return self.process_complete_incoming_packet(
@ -535,48 +525,44 @@ impl<Application: ApplicationLayer> Context<Application> {
} else {
let (key_index, packet_type, fragment_count, fragment_no, incoming_counter) = parse_packet_header(&incoming_physical_packet);
let (assembled_packet, incoming_packet_buf_arr);
let assembled;
let incoming_packet = if fragment_count > 1 {
assembled_packet = {
let mut defrag = self.defrag.lock().unwrap();
let f = defrag
.entry((source.clone(), incoming_counter))
.or_insert_with(|| Arc::new((Mutex::new(Fragged::new()), current_time)))
.clone();
// Using just incoming_counter unhashed would be good DOS resistant,
// but why not make it harder by mixing in a random value and the physical path in as well.
let mut hasher = self.defrag_salt.build_hasher();
source.hash(&mut hasher);
hasher.write_u64(incoming_counter);
let hashed_counter = hasher.finish();
let idx0 = (hashed_counter as usize) % MAX_INCOMPLETE_SESSION_QUEUE_SIZE;
let idx1 = (hashed_counter as usize) / MAX_INCOMPLETE_SESSION_QUEUE_SIZE % MAX_INCOMPLETE_SESSION_QUEUE_SIZE;
// Anti-DOS overflow purge of the incoming defragmentation queue for packets not associated with known sessions.
if defrag.len() >= self.max_incomplete_session_queue_size {
// First, drop all entries that are timed out or whose physical source duplicates another entry.
let mut sources = HashSet::with_capacity(defrag.len());
let negotiation_timeout_cutoff = current_time - Application::INCOMING_SESSION_NEGOTIATION_TIMEOUT_MS;
defrag
.retain(|k, fragged| (fragged.1 > negotiation_timeout_cutoff && sources.insert(k.0.clone())) || Arc::ptr_eq(fragged, &f));
// Then, if we are still at or over the limit, drop 10% of remaining entries at random.
if defrag.len() >= self.max_incomplete_session_queue_size {
let mut rn = random::next_u32_secure();
defrag.retain(|_, fragged| {
rn = prng32(rn);
rn > (u32::MAX / 10) || Arc::ptr_eq(fragged, &f)
});
}
// Open hash lookup of just 2 slots.
// By only checking 2 slots we avoid a full table lookup while also minimizing the chance that 2 offers collide.
// To DOS, an adversary would either need to volumetrically spam the defrag table to keep all slots full
// or replay Alice's packet header from a spoofed physical path before Alice's packet is fully assembled.
// Volumetric spam is quite difficult since without the `defrag_salt: RandomState` value an adversary
// cannot control which slots their fragments index to. And since Alice's packet header has a randomly
// generated counter value replaying it in time requires extreme amounts of network control.
let mut slot0 = self.defrag[idx0].lock().unwrap();
if slot0.counter() == hashed_counter {
assembled = slot0.assemble(hashed_counter, incoming_physical_packet_buf, fragment_no, fragment_count);
} else {
let mut slot1 = self.defrag[idx1].lock().unwrap();
if slot1.counter() == hashed_counter || slot1.counter() == 0 {
assembled = slot1.assemble(hashed_counter, incoming_physical_packet_buf, fragment_no, fragment_count);
} else {
// slot1 is full so kick out whatever is in slot0 to make more room.
assembled = slot0.assemble(hashed_counter, incoming_physical_packet_buf, fragment_no, fragment_count);
}
f
}
.0
.lock()
.unwrap()
.assemble(incoming_counter, incoming_physical_packet_buf, fragment_no, fragment_count);
if let Some(assembled_packet) = assembled_packet.as_ref() {
self.defrag.lock().unwrap().remove(&(source.clone(), incoming_counter));
if let Some(assembled_packet) = &assembled {
assembled_packet.as_ref()
} else {
return Ok(ReceiveResult::Ok(None));
}
} else {
incoming_packet_buf_arr = [incoming_physical_packet_buf];
&incoming_packet_buf_arr
std::array::from_ref(&incoming_physical_packet_buf)
};
return self.process_complete_incoming_packet(
@ -585,7 +571,7 @@ impl<Application: ApplicationLayer> Context<Application> {
&mut check_allow_incoming_session,
&mut check_accept_session,
data_buf,
incoming_counter,
1, // The incoming_counter on init packets is only meant for DOS resistant defragmentation, we do not want to use it for anything noise related.
incoming_packet,
packet_type,
None,
@ -724,7 +710,7 @@ impl<Application: ApplicationLayer> Context<Application> {
* identity, then responds with his ephemeral keys.
*/
if incoming_counter != 1 || session.is_some() || incoming.is_some() {
if session.is_some() || incoming.is_some() {
return Err(Error::OutOfSequence);
}
if pkt_assembled.len() != AliceNoiseXKInit::SIZE {
@ -801,7 +787,7 @@ impl<Application: ApplicationLayer> Context<Application> {
// If this queue is too big, we remove the latest entry and replace it. The latest
// is used because under flood conditions this is most likely to be another bogus
// entry. If we find one that is actually timed out, that one is replaced instead.
if sessions.incoming.len() >= self.max_incomplete_session_queue_size {
if sessions.incoming.len() >= MAX_INCOMPLETE_SESSION_QUEUE_SIZE {
let mut newest = i64::MIN;
let mut replace_id = None;
let cutoff_time = current_time - Application::INCOMING_SESSION_NEGOTIATION_TIMEOUT_MS;
@ -1710,14 +1696,3 @@ fn kbkdf512<const LABEL: u8>(key: &Secret<NOISE_HASHLEN>) -> Secret<NOISE_HASHLE
fn kbkdf256<const LABEL: u8>(key: &Secret<NOISE_HASHLEN>) -> Secret<32> {
hmac_sha512_secret256(key.as_bytes(), &[1, b'Z', b'T', LABEL, 0x00, 0, 1u8, 0u8])
}
fn prng32(mut x: u32) -> u32 {
// based on lowbias32 from https://nullprogram.com/blog/2018/07/31/
x = x.wrapping_add(1); // don't get stuck on 0
x ^= x.wrapping_shr(16);
x = x.wrapping_mul(0x7feb352d);
x ^= x.wrapping_shr(15);
x = x.wrapping_mul(0x846ca68b);
x ^= x.wrapping_shr(16);
x
}