ZeroTierOne/node/Metrics.hpp
Aaron Johnson 8285e0f45b Add Prometheus metrics for packet fragmentation monitoring in nodes
- Add comprehensive VL1 (ZeroTier protocol) fragmentation metrics:
  * Track fragmented packets, fragments, reassembly failures
  * Monitor fragment ordering issues and duplicates
  * Histogram for fragments per packet distribution

- Add VL2 (TAP/Ethernet) fragmentation metrics for virtual ethernet interfaces:
  * Track oversized frames from TAP devices
  * Monitor frames that would fragment or drop
  * Histogram for frame size distribution with common MTU buckets

- Integration across all TAP implementations (Linux, Mac, BSD, Windows)

This allows monitoring of fragmentation patterns for nodes participating
as members in ZeroTier networks, helping identify MTU mismatches and
optimize virtual ethernet performance.
2025-07-15 10:41:03 -07:00

200 lines
10 KiB
C++

/*
* Copyright (c)2013-2023 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2026-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
#ifndef METRICS_H_
#define METRICS_H_
// clang-format off
#include <prometheus/simpleapi.h>
#include <prometheus/histogram.h>
// clang-format on
namespace prometheus {
namespace simpleapi {
extern std::shared_ptr<Registry> registry_ptr;
}
} // namespace prometheus
namespace ZeroTier {
namespace Metrics {
// Packet Type Counts
extern prometheus::simpleapi::counter_family_t packets;
// incoming packets
extern prometheus::simpleapi::counter_metric_t pkt_nop_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_in;
extern prometheus::simpleapi::counter_metric_t pkt_ack_in;
extern prometheus::simpleapi::counter_metric_t pkt_qos_in;
extern prometheus::simpleapi::counter_metric_t pkt_hello_in;
extern prometheus::simpleapi::counter_metric_t pkt_ok_in;
extern prometheus::simpleapi::counter_metric_t pkt_whois_in;
extern prometheus::simpleapi::counter_metric_t pkt_rendezvous_in;
extern prometheus::simpleapi::counter_metric_t pkt_frame_in;
extern prometheus::simpleapi::counter_metric_t pkt_ext_frame_in;
extern prometheus::simpleapi::counter_metric_t pkt_echo_in;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_like_in;
extern prometheus::simpleapi::counter_metric_t pkt_network_credentials_in;
extern prometheus::simpleapi::counter_metric_t pkt_network_config_request_in;
extern prometheus::simpleapi::counter_metric_t pkt_network_config_in;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_gather_in;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_frame_in;
extern prometheus::simpleapi::counter_metric_t pkt_push_direct_paths_in;
extern prometheus::simpleapi::counter_metric_t pkt_user_message_in;
extern prometheus::simpleapi::counter_metric_t pkt_remote_trace_in;
extern prometheus::simpleapi::counter_metric_t pkt_path_negotiation_request_in;
// outgoing packets
extern prometheus::simpleapi::counter_metric_t pkt_nop_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_out;
extern prometheus::simpleapi::counter_metric_t pkt_ack_out;
extern prometheus::simpleapi::counter_metric_t pkt_qos_out;
extern prometheus::simpleapi::counter_metric_t pkt_hello_out;
extern prometheus::simpleapi::counter_metric_t pkt_ok_out;
extern prometheus::simpleapi::counter_metric_t pkt_whois_out;
extern prometheus::simpleapi::counter_metric_t pkt_rendezvous_out;
extern prometheus::simpleapi::counter_metric_t pkt_frame_out;
extern prometheus::simpleapi::counter_metric_t pkt_ext_frame_out;
extern prometheus::simpleapi::counter_metric_t pkt_echo_out;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_like_out;
extern prometheus::simpleapi::counter_metric_t pkt_network_credentials_out;
extern prometheus::simpleapi::counter_metric_t pkt_network_config_request_out;
extern prometheus::simpleapi::counter_metric_t pkt_network_config_out;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_gather_out;
extern prometheus::simpleapi::counter_metric_t pkt_multicast_frame_out;
extern prometheus::simpleapi::counter_metric_t pkt_push_direct_paths_out;
extern prometheus::simpleapi::counter_metric_t pkt_user_message_out;
extern prometheus::simpleapi::counter_metric_t pkt_remote_trace_out;
extern prometheus::simpleapi::counter_metric_t pkt_path_negotiation_request_out;
// Packet Error Counts
extern prometheus::simpleapi::counter_family_t packet_errors;
// incoming errors
extern prometheus::simpleapi::counter_metric_t pkt_error_obj_not_found_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_unsupported_op_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_identity_collision_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_need_membership_cert_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_network_access_denied_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_unwanted_multicast_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_authentication_required_in;
extern prometheus::simpleapi::counter_metric_t pkt_error_internal_server_error_in;
// outgoing errors
extern prometheus::simpleapi::counter_metric_t pkt_error_obj_not_found_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_unsupported_op_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_identity_collision_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_need_membership_cert_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_network_access_denied_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_unwanted_multicast_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_authentication_required_out;
extern prometheus::simpleapi::counter_metric_t pkt_error_internal_server_error_out;
// Data Sent/Received Metrics
extern prometheus::simpleapi::counter_family_t data;
extern prometheus::simpleapi::counter_metric_t udp_send;
extern prometheus::simpleapi::counter_metric_t udp_recv;
extern prometheus::simpleapi::counter_metric_t tcp_send;
extern prometheus::simpleapi::counter_metric_t tcp_recv;
// Network Metrics
extern prometheus::simpleapi::gauge_metric_t network_num_joined;
extern prometheus::simpleapi::gauge_family_t network_num_multicast_groups;
extern prometheus::simpleapi::counter_family_t network_packets;
#ifndef ZT_NO_PEER_METRICS
// Peer Metrics
extern prometheus::CustomFamily<prometheus::Histogram<uint64_t> >& peer_latency;
extern prometheus::simpleapi::gauge_family_t peer_path_count;
extern prometheus::simpleapi::counter_family_t peer_packets;
extern prometheus::simpleapi::counter_family_t peer_packet_errors;
#endif
// General Controller Metrics
extern prometheus::simpleapi::gauge_metric_t network_count;
extern prometheus::simpleapi::gauge_metric_t member_count;
extern prometheus::simpleapi::counter_metric_t network_changes;
extern prometheus::simpleapi::counter_metric_t member_changes;
extern prometheus::simpleapi::counter_metric_t member_auths;
extern prometheus::simpleapi::counter_metric_t member_deauths;
extern prometheus::simpleapi::gauge_metric_t network_config_request_queue_size;
extern prometheus::simpleapi::counter_metric_t sso_expiration_checks;
extern prometheus::simpleapi::counter_metric_t sso_member_deauth;
extern prometheus::simpleapi::counter_metric_t network_config_request;
extern prometheus::simpleapi::gauge_metric_t network_config_request_threads;
extern prometheus::simpleapi::counter_metric_t db_get_network;
extern prometheus::simpleapi::counter_metric_t db_get_network_and_member;
extern prometheus::simpleapi::counter_metric_t db_get_network_and_member_and_summary;
extern prometheus::simpleapi::counter_metric_t db_get_member_list;
extern prometheus::simpleapi::counter_metric_t db_get_network_list;
extern prometheus::simpleapi::counter_metric_t db_member_change;
extern prometheus::simpleapi::counter_metric_t db_network_change;
// Fragmentation Metrics
extern prometheus::simpleapi::counter_family_t packet_fragmentation;
// VL2 Fragmentation Metrics
extern prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx;
extern prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx;
// VL1 Fragmentation Metrics
extern prometheus::simpleapi::counter_metric_t vl1_fragmented_tx;
extern prometheus::simpleapi::counter_metric_t vl1_fragment_rx;
extern prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx;
extern prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx;
extern prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx;
extern prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx;
extern prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx;
// VL1 Fragmentation Histogram and Counters
extern prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &vl1_fragments_per_packet_histogram;
extern prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx;
extern prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx;
// VL2 Frame Size Histogram
// Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6),
// 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU),
// 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo)
extern prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &vl2_frame_size_histogram;
// Histogram bucket boundaries for VL1 fragments per packet
inline constexpr uint64_t VL1_FRAGMENTS_PER_PACKET_BUCKETS[] = {1,2,3,4,5,6,7,8,9,10,12,16};
// Histogram bucket boundaries for VL2 frame size
inline constexpr uint64_t VL2_FRAME_SIZE_BUCKETS[] = {512,576,1200,1280,1332,1380,1400,1420,1460,1472,1480,1492,1500,2800,9000};
#ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics
extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification;
extern prometheus::simpleapi::counter_metric_t pgsql_net_notification;
extern prometheus::simpleapi::counter_metric_t pgsql_node_checkin;
extern prometheus::simpleapi::counter_metric_t pgsql_commit_ticks;
extern prometheus::simpleapi::counter_metric_t db_get_sso_info;
extern prometheus::simpleapi::counter_metric_t redis_mem_notification;
extern prometheus::simpleapi::counter_metric_t redis_net_notification;
extern prometheus::simpleapi::counter_metric_t redis_node_checkin;
// Central DB Pool Metrics
extern prometheus::simpleapi::counter_metric_t conn_counter;
extern prometheus::simpleapi::counter_metric_t max_pool_size;
extern prometheus::simpleapi::counter_metric_t min_pool_size;
extern prometheus::simpleapi::gauge_metric_t pool_avail;
extern prometheus::simpleapi::gauge_metric_t pool_in_use;
extern prometheus::simpleapi::counter_metric_t pool_errors;
#endif
extern prometheus::Histogram<uint64_t> &vl1_fragments_per_packet_hist;
extern prometheus::Histogram<uint64_t> &vl2_frame_size_hist;
} // namespace Metrics
}// namespace ZeroTier
#endif // METRICS_H_