diff --git a/node/Metrics.cpp b/node/Metrics.cpp index 11232cb0e..366f4ab32 100644 --- a/node/Metrics.cpp +++ b/node/Metrics.cpp @@ -13,6 +13,7 @@ // clang-format off #include #include +#include "Metrics.hpp" // clang-format on namespace prometheus { @@ -162,5 +163,68 @@ prometheus::simpleapi::gauge_metric_t pool_avail { "controller_pgsql_available_c prometheus::simpleapi::gauge_metric_t pool_in_use { "controller_pgsql_in_use_conns", "number of postgres database connections in use" }; prometheus::simpleapi::counter_metric_t pool_errors { "controller_pgsql_connection_errors", "number of connection errors the connection pool has seen" }; #endif -} // namespace Metrics -} // namespace ZeroTier + + // Fragmentation Metrics + prometheus::simpleapi::counter_family_t packet_fragmentation + { "zt_packet_fragmentation", "ZeroTier packet fragmentation events" }; + + // VL2 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "tx"}, {"reason", "oversized_frame"}}) }; + prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "would_fragment_or_drop"}}) }; + + // VL1 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl1_fragmented_tx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "tx"}, {"reason", "mtu_exceeded"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "reassembly_failed"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_without_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_before_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_head"}}) }; + + // VL1 Fragmentation Histogram and Counters + prometheus::CustomFamily> &vl1_fragments_per_packet_histogram = + prometheus::Builder>() + .Name("zt_vl1_fragments_per_packet") + .Help("Histogram of fragments per packet at VL1") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL1_VL2"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl1_fragments_per_packet_hist = + vl1_fragments_per_packet_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS), std::end(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS)) + ); + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + prometheus::CustomFamily> &vl2_frame_size_histogram = + prometheus::Builder>() + .Name("zt_vl2_frame_size") + .Help("Histogram of frame sizes delivered to TAP (VL2)") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl2_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl2_vl1_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL2_VL1"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl2_frame_size_hist = + vl2_frame_size_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS), std::end(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS)) + ); + } +} diff --git a/node/Metrics.hpp b/node/Metrics.hpp index 8c2c4290d..a8574225a 100644 --- a/node/Metrics.hpp +++ b/node/Metrics.hpp @@ -139,6 +139,38 @@ extern prometheus::simpleapi::counter_metric_t db_get_network_list; extern prometheus::simpleapi::counter_metric_t db_member_change; extern prometheus::simpleapi::counter_metric_t db_network_change; + // Fragmentation Metrics + extern prometheus::simpleapi::counter_family_t packet_fragmentation; + + // VL2 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx; + extern prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx; + + // VL1 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl1_fragmented_tx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx; + + // VL1 Fragmentation Histogram and Counters + extern prometheus::CustomFamily> &vl1_fragments_per_packet_histogram; + extern prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx; + extern prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx; + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + extern prometheus::CustomFamily> &vl2_frame_size_histogram; + + // Histogram bucket boundaries for VL1 fragments per packet + inline constexpr uint64_t VL1_FRAGMENTS_PER_PACKET_BUCKETS[] = {1,2,3,4,5,6,7,8,9,10,12,16}; + // Histogram bucket boundaries for VL2 frame size + inline constexpr uint64_t VL2_FRAME_SIZE_BUCKETS[] = {512,576,1200,1280,1332,1380,1400,1420,1460,1472,1480,1492,1500,2800,9000}; + #ifdef ZT_CONTROLLER_USE_LIBPQ // Central Controller Metrics extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification; @@ -159,7 +191,10 @@ extern prometheus::simpleapi::gauge_metric_t pool_avail; extern prometheus::simpleapi::gauge_metric_t pool_in_use; extern prometheus::simpleapi::counter_metric_t pool_errors; #endif -} // namespace Metrics -} // namespace ZeroTier + + extern prometheus::Histogram &vl1_fragments_per_packet_hist; + extern prometheus::Histogram &vl2_frame_size_hist; + } // namespace Metrics +}// namespace ZeroTier #endif // METRICS_H_ diff --git a/node/Switch.cpp b/node/Switch.cpp index 6b0e2b081..d4cb2bd87 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -121,7 +121,7 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd Mutex::Lock rql(rq->lock); if (rq->packetId != fragmentPacketId) { // No packet found, so we received a fragment without its head. - + Metrics::vl1_fragment_without_head_rx++; rq->flowId = flowId; rq->timestamp = now; rq->packetId = fragmentPacketId; @@ -132,7 +132,7 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd } else if (! (rq->haveFragments & (1 << fragmentNumber))) { // We have other fragments and maybe the head, so add this one and check - + Metrics::vl1_fragment_before_head_rx++; rq->frags[fragmentNumber - 1] = fragment; rq->totalFragments = totalFragments; @@ -143,14 +143,17 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd rq->frag0.append(rq->frags[f - 1].payload(), rq->frags[f - 1].payloadLength()); } - if (rq->frag0.tryDecode(RR, tPtr, flowId)) { - rq->timestamp = 0; // packet decoded, free entry - } - else { - rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + if (rq->frag0.tryDecode(RR,tPtr,flowId)) { + rq->timestamp = 0; // packet decoded, free entry + } else { + rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } - } // else this is a duplicate fragment, ignore + } else { + // This is a duplicate fragment, ignore + Metrics::vl1_duplicate_fragment_rx++; + } } } @@ -230,19 +233,21 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd } if (rq->frag0.tryDecode(RR, tPtr, flowId)) { - rq->timestamp = 0; // packet decoded, free entry - } - else { - rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + rq->timestamp = 0; // packet decoded, free entry + } else { + rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } else { // Still waiting on more fragments, but keep the head rq->frag0.init(data, len, path, now); } - } // else this is a duplicate head, ignore - } - else { + } else { + // This is a duplicate head, ignore + Metrics::vl1_duplicate_head_rx++; + } + } else { // Packet is unfragmented, so just process it IncomingPacket packet(data, len, path, now); if (! packet.tryDecode(RR, tPtr, flowId)) { @@ -268,7 +273,16 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd void Switch::onLocalEthernet(void* tPtr, const SharedPtr& network, const MAC& from, const MAC& to, unsigned int etherType, unsigned int vlanId, const void* data, unsigned int len) { - if (! network->hasConfig()) { + if (!network->hasConfig()) { + return; + } + + // VL2 fragmentation metric: oversized frame from TAP device (TX) + unsigned int tap_mtu = network->config().mtu; + bool was_fragmented_at_vl2 = (len > tap_mtu); + if (was_fragmented_at_vl2) { + Metrics::vl2_oversized_frame_tx++; + // Just measure, do not drop or return return; } @@ -960,9 +974,17 @@ void Switch::doAnythingWaitingForPeer(void* tPtr, const SharedPtr& peer) for (unsigned int ptr = 0; ptr < ZT_RX_QUEUE_SIZE; ++ptr) { RXQueueEntry* const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); - if ((rq->timestamp) && (rq->complete)) { - if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId)) || ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((rq->timestamp)&&(rq->complete)) { + if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { rq->timestamp = 0; + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } + } else { + const Address src(rq->frag0.source()); + if (!RR->topology->getPeer(tPtr,src)) { + requestWhois(tPtr,now,src); + } } } } @@ -1019,9 +1041,12 @@ unsigned long Switch::doTimerTasks(void* tPtr, int64_t now) for (unsigned int ptr = 0; ptr < ZT_RX_QUEUE_SIZE; ++ptr) { RXQueueEntry* const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); - if ((rq->timestamp) && (rq->complete)) { - if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId)) || ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((rq->timestamp)&&(rq->complete)) { + if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { rq->timestamp = 0; + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } } else { const Address src(rq->frag0.source()); @@ -1084,7 +1109,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { if (peer->_paths[i].p && peer->_paths[i].p->alive(now)) { uint16_t userSpecifiedMtu = peer->_paths[i].p->mtu(); - _sendViaSpecificPath(tPtr, peer, peer->_paths[i].p, userSpecifiedMtu, now, packet, encrypt, flowId); + _sendViaSpecificPath(tPtr, peer, peer->_paths[i].p, userSpecifiedMtu, now, packet, encrypt, flowId, false); } } return true; @@ -1102,7 +1127,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) } if (viaPath) { uint16_t userSpecifiedMtu = viaPath->mtu(); - _sendViaSpecificPath(tPtr, peer, viaPath, userSpecifiedMtu, now, packet, encrypt, flowId); + _sendViaSpecificPath(tPtr, peer, viaPath, userSpecifiedMtu, now, packet, encrypt, flowId, false); return true; } } @@ -1110,7 +1135,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) return false; } -void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId) +void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId, bool was_fragmented_at_vl2) { unsigned int mtu = ZT_DEFAULT_PHYSMTU; uint64_t trustedPathId = 0; @@ -1137,6 +1162,11 @@ void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtrsend(RR, tPtr, packet.data(), chunkSize, now)) { if (chunkSize < packet.size()) { // Too big for one packet, fragment the rest + Metrics::vl1_fragments_per_packet_hist.Observe(2); + if (was_fragmented_at_vl2) { + Metrics::vl1_vl2_double_fragmentation_tx++; + } + unsigned int fragStart = chunkSize; unsigned int remaining = packet.size() - chunkSize; unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); @@ -1144,6 +1174,7 @@ void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId); + void _sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId, bool was_fragmented_at_vl2); void _recordOutgoingPacketMetrics(const Packet& p); const RuntimeEnvironment* const RR; diff --git a/osdep/BSDEthernetTap.cpp b/osdep/BSDEthernetTap.cpp index e1e0d42e5..ac137a659 100644 --- a/osdep/BSDEthernetTap.cpp +++ b/osdep/BSDEthernetTap.cpp @@ -51,6 +51,13 @@ #include #include +#include "../node/Constants.hpp" +#include "../node/Utils.hpp" +#include "../node/Mutex.hpp" +#include "OSUtils.hpp" +#include "BSDEthernetTap.hpp" +#include "../node/Metrics.hpp" + #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" #define ZT_TAP_BUF_SIZE (1024 * 16) @@ -353,6 +360,11 @@ std::vector BSDEthernetTap::ips() const void BSDEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 5fe37216d..cb08dfb52 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -16,6 +16,7 @@ #endif #include "../node/Constants.hpp" +#include "../node/Metrics.hpp" #ifdef __LINUX__ @@ -507,6 +508,11 @@ std::vector LinuxEthernetTap::ips() const void LinuxEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/MacEthernetTap.cpp b/osdep/MacEthernetTap.cpp index 09278855b..bb8397890 100644 --- a/osdep/MacEthernetTap.cpp +++ b/osdep/MacEthernetTap.cpp @@ -22,6 +22,7 @@ #include "MacEthernetTap.hpp" #include "MacEthernetTapAgent.h" #include "OSUtils.hpp" +#include "../node/Metrics.hpp" #include #include @@ -393,6 +394,11 @@ std::vector MacEthernetTap::ips() const void MacEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } struct iovec iov[3]; unsigned char hdr[15]; uint16_t l; diff --git a/osdep/NetBSDEthernetTap.cpp b/osdep/NetBSDEthernetTap.cpp index 0508246b0..03cb1d183 100644 --- a/osdep/NetBSDEthernetTap.cpp +++ b/osdep/NetBSDEthernetTap.cpp @@ -51,6 +51,15 @@ #include #include #include + +#include "../node/Constants.hpp" +#include "../node/Utils.hpp" +#include "../node/Mutex.hpp" +#include "OSUtils.hpp" +#include "NetBSDEthernetTap.hpp" +#include "../node/Metrics.hpp" + +#include using namespace std; #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" @@ -328,6 +337,12 @@ std::vector NetBSDEthernetTap::ips() const void NetBSDEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[4096]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/WindowsEthernetTap.cpp b/osdep/WindowsEthernetTap.cpp index 3043868ae..5ebcf4904 100644 --- a/osdep/WindowsEthernetTap.cpp +++ b/osdep/WindowsEthernetTap.cpp @@ -16,9 +16,10 @@ #include "../node/Constants.hpp" #include "../node/Mutex.hpp" #include "../node/Utils.hpp" -#include "..\windows\TapDriver6\tap-windows.h" +#include "../windows/TapDriver6/tap-windows.h" #include "OSUtils.hpp" #include "WinDNSHelper.hpp" +#include "../node/Metrics.hpp" #include #include @@ -816,7 +817,14 @@ std::vector WindowsEthernetTap::ips() const void WindowsEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { - if ((! _initialized) || (! _enabled) || (_tap == INVALID_HANDLE_VALUE) || (len > _mtu)) + // Check MTU and add to histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + return; + } + + if ((! _initialized) || (! _enabled) || (_tap == INVALID_HANDLE_VALUE)) return; Mutex::Lock _l(_injectPending_m);