diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index e5667acc0..890e56048 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -415,55 +415,128 @@ enum ZT_ResultCode */ #define ZT_ResultCode_isFatal(x) ((((int)(x)) >= 100)&&(((int)(x)) < 1000)) + /** - * The multipath algorithm in use by this node. + * Multipath bonding policy */ -enum ZT_MultipathMode +enum ZT_MultipathBondingPolicy { /** - * No fault tolerance or balancing. + * Normal operation. No fault tolerance, no load balancing */ - ZT_MULTIPATH_NONE = 0, + ZT_BONDING_POLICY_NONE = 0, /** - * Sends traffic out on all paths. + * Sends traffic out on only one path at a time. Configurable immediate + * fail-over. */ - ZT_MULTIPATH_BROADCAST = 1, + ZT_BONDING_POLICY_ACTIVE_BACKUP = 1, /** - * Sends traffic out on only one path at a time. Immediate fail-over. + * Sends traffic out on all paths */ - ZT_MULTIPATH_ACTIVE_BACKUP= 2, + ZT_BONDING_POLICY_BROADCAST = 2, /** - * Sends traffic out on all interfaces according to a uniform random distribution. + * Stripes packets across all paths */ - ZT_MULTIPATH_BALANCE_RANDOM = 3, + ZT_BONDING_POLICY_BALANCE_RR = 3, /** - * Stripes packets across all paths. + * Packets destined for specific peers will always be sent over the same + * path. */ - ZT_MULTIPATH_BALANCE_RR_OPAQUE = 4, + ZT_BONDING_POLICY_BALANCE_XOR = 4, /** - * Balances flows across all paths. + * Balances flows among all paths according to path performance */ - ZT_MULTIPATH_BALANCE_RR_FLOW = 5, + ZT_BONDING_POLICY_BALANCE_AWARE = 5 +}; + +/** + * Multipath active re-selection policy (slaveSelectMethod) + */ +enum ZT_MultipathSlaveSelectMethod +{ + /** + * Primary slave regains status as active slave whenever it comes back up + * (default when slaves are explicitly specified) + */ + ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS = 0, /** - * Hashes flows across all paths. + * Primary slave regains status as active slave when it comes back up and + * (if) it is better than the currently-active slave. */ - ZT_MULTIPATH_BALANCE_XOR_FLOW = 6, + ZT_MULTIPATH_RESELECTION_POLICY_BETTER = 1, /** - * Balances traffic across all paths according to observed performance. + * Primary slave regains status as active slave only if the currently-active + * slave fails. */ - ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE = 7, + ZT_MULTIPATH_RESELECTION_POLICY_FAILURE = 2, /** - * Balances flows across all paths. + * The primary slave can change if a superior path is detected. + * (default if user provides no fail-over guidance) */ - ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW = 8, + ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE = 3 +}; + +/** + * Mode of multipath slave interface + */ +enum ZT_MultipathSlaveMode +{ + ZT_MULTIPATH_SLAVE_MODE_PRIMARY = 0, + ZT_MULTIPATH_SLAVE_MODE_SPARE = 1 +}; + +/** + * Strategy for path monitoring + */ +enum ZT_MultipathMonitorStrategy +{ + /** + * Use bonding policy's default strategy + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT = 0, + + /** + * Does not actively send probes to judge aliveness, will rely + * on conventional traffic and summary statistics. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE = 1, + + /** + * Sends probes at a constant rate to judge aliveness. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE = 2, + + /** + * Sends probes at varying rates which correlate to native + * traffic loads to judge aliveness. + */ + ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC = 3 +}; + +/** + * Indices for the path quality weight vector + */ +enum ZT_MultipathQualityWeightIndex +{ + ZT_QOS_LAT_IDX, + ZT_QOS_LTM_IDX, + ZT_QOS_PDV_IDX, + ZT_QOS_PLR_IDX, + ZT_QOS_PER_IDX, + ZT_QOS_THR_IDX, + ZT_QOS_THM_IDX, + ZT_QOS_THV_IDX, + ZT_QOS_AGE_IDX, + ZT_QOS_SCP_IDX, + ZT_QOS_WEIGHT_SIZE }; /** @@ -1272,44 +1345,49 @@ typedef struct uint64_t trustedPathId; /** - * One-way latency + * Mean latency */ - float latency; + float latencyMean; /** - * How much latency varies over time + * Maximum observed latency */ - float packetDelayVariance; + float latencyMax; /** - * How much observed throughput varies over time + * Variance of latency */ - float throughputDisturbCoeff; + float latencyVariance; /** - * Packet Error Ratio (PER) - */ - float packetErrorRatio; - - /** - * Packet Loss Ratio (PLR) + * Packet loss ratio */ float packetLossRatio; /** - * Stability of the path + * Packet error ratio */ - float stability; + float packetErrorRatio; /** - * Current throughput (moving average) + * Mean throughput */ - uint64_t throughput; + uint64_t throughputMean; /** - * Maximum observed throughput for this path + * Maximum observed throughput */ - uint64_t maxThroughput; + float throughputMax; + + /** + * Throughput variance + */ + float throughputVariance; + + /** + * Address scope + */ + uint8_t scope; /** * Percentage of traffic allocated to this path @@ -1319,7 +1397,9 @@ typedef struct /** * Name of physical interface (for monitoring) */ - char *ifname; + char ifname[32]; + + uint64_t localSocket; /** * Is path expired? @@ -1373,9 +1453,11 @@ typedef struct unsigned int pathCount; /** - * Whether this peer was ever reachable via an aggregate link + * Whether multiple paths to this peer are bonded */ - bool hadAggregateLink; + bool isBonded; + + int bondingPolicy; /** * Known network paths to peer diff --git a/node/Bond.cpp b/node/Bond.cpp new file mode 100644 index 000000000..9a5ab1df8 --- /dev/null +++ b/node/Bond.cpp @@ -0,0 +1,1730 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#include + +#include "Peer.hpp" +#include "Bond.hpp" +#include "Switch.hpp" +#include "Flow.hpp" +#include "Path.hpp" + +namespace ZeroTier { + +Bond::Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer) : + RR(renv), + _peer(peer) +{ + setReasonableDefaults(policy); + _policyAlias = BondController::getPolicyStrByCode(policy); +} + +Bond::Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer) : + _policyAlias(policyAlias), + _peer(peer) +{ + setReasonableDefaults(BondController::getPolicyCodeByStr(basePolicy)); +} + +Bond::Bond(const RuntimeEnvironment *renv, const Bond &originalBond, const SharedPtr& peer) : + RR(renv), + _peer(peer) +{ + // First, set everything to sane defaults + setReasonableDefaults(originalBond._bondingPolicy); + _policyAlias = originalBond._policyAlias; + // Second, apply user specified values (only if they make sense) + _downDelay = originalBond._downDelay; + _upDelay = originalBond._upDelay; + if (originalBond._bondMonitorInterval > 0 && originalBond._bondMonitorInterval < 65535) { + _bondMonitorInterval = originalBond._bondMonitorInterval; + } + else { + fprintf(stderr, "warning: bondMonitorInterval (%d) is out of range, using default (%d)\n", originalBond._bondMonitorInterval, _bondMonitorInterval); + } + if (originalBond._slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE + && originalBond._failoverInterval != 0) { + fprintf(stderr, "warning: passive path monitoring was specified, this will prevent failovers from happening in a timely manner.\n"); + } + _abSlaveSelectMethod = originalBond._abSlaveSelectMethod; + memcpy(_qualityWeights, originalBond._qualityWeights, ZT_QOS_WEIGHT_SIZE * sizeof(float)); +} + +void Bond::nominatePath(const SharedPtr& path, int64_t now) +{ + char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "nominatePath: %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + if (!RR->bc->slaveAllowed(_policyAlias, getSlave(path))) { + return; + } + bool alreadyPresent = false; + for (int i=0; islave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + _paths[i]->startTrial(now); + break; + } + } + } + curateBond(now, true); + estimatePathQuality(now); +} + +SharedPtr Bond::getAppropriatePath(int64_t now, int32_t flowId) +{ + Mutex::Lock _l(_paths_m); + /** + * active-backup + */ + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_abPath) { + return _abPath; + } + } + /** + * broadcast + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BROADCAST) { + return SharedPtr(); // Handled in Switch::_trySend() + } + if (!_numBondedPaths) { + return SharedPtr(); // No paths assigned to bond yet, cannot balance traffic + } + /** + * balance-rr + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + if (!_allowFlowHashing) { + //fprintf(stderr, "_rrPacketsSentOnCurrSlave=%d, _numBondedPaths=%d, _rrIdx=%d\n", _rrPacketsSentOnCurrSlave, _numBondedPaths, _rrIdx); + if (_packetsPerSlave == 0) { + // Randomly select a path + return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + } + if (_rrPacketsSentOnCurrSlave < _packetsPerSlave) { + // Continue to use this slave + ++_rrPacketsSentOnCurrSlave; + return _paths[_bondedIdx[_rrIdx]]; + } + // Reset striping counter + _rrPacketsSentOnCurrSlave = 0; + if (_numBondedPaths == 1) { + _rrIdx = 0; + } + else { + int _tempIdx = _rrIdx; + for (int searchCount = 0; searchCount < (_numBondedPaths-1); searchCount++) { + _tempIdx = (_tempIdx == (_numBondedPaths-1)) ? 0 : _tempIdx+1; + if (_paths[_bondedIdx[_tempIdx]] && _paths[_bondedIdx[_tempIdx]]->eligible(now,_ackSendInterval)) { + _rrIdx = _tempIdx; + break; + } + } + } + //fprintf(stderr, "resultant _rrIdx=%d\n", _rrIdx); + if (_paths[_bondedIdx[_rrIdx]]) { + return _paths[_bondedIdx[_rrIdx]]; + } + } + } + /** + * balance-xor + */ + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + if (!_allowFlowHashing || flowId == -1) { + // No specific path required for unclassified traffic, send on anything + return _paths[_bondedIdx[_freeRandomByte % _numBondedPaths]]; // TODO: Optimize + } + else if (_allowFlowHashing) { + // TODO: Optimize + Mutex::Lock _l(_flows_m); + SharedPtr flow; + if (_flows.count(flowId)) { + flow = _flows[flowId]; + flow->updateActivity(now); + } + else { + unsigned char entropy; + Utils::getSecureRandom(&entropy, 1); + flow = createFlow(SharedPtr(), flowId, entropy, now); + } + if (flow) { + return flow->assignedPath(); + } + } + } + return SharedPtr(); +} + +void Bond::recordIncomingInvalidPacket(const SharedPtr& path) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingInvalidPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + for (int i=0; ipacketValiditySamples.push(false); + } + } +} + +void Bond::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordOutgoingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + _freeRandomByte += (unsigned char)(packetId >> 8); // Grab entropy to use in path selection logic + if (!_shouldCollectPathStatistics) { + return; + } + bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) + && (verb != Packet::VERB_ACK) + && (verb != Packet::VERB_QOS_MEASUREMENT)); + if (isFrame || shouldRecord) { + Mutex::Lock _l(_paths_m); + if (isFrame) { + ++(path->_packetsOut); + _lastFrame=now; + } + if (shouldRecord) { + path->_unackedBytes += payloadLength; + // Take note that we're expecting a VERB_ACK on this path as of a specific time + if (path->qosStatsOut.size() < ZT_QOS_MAX_OUTSTANDING_RECORDS) { + path->qosStatsOut[packetId] = now; + } + } + } + if (_allowFlowHashing) { + if (_allowFlowHashing && (flowId != ZT_QOS_NO_FLOW)) { + Mutex::Lock _l(_flows_m); + if (_flows.count(flowId)) { + _flows[flowId]->recordOutgoingBytes(payloadLength); + } + } + } +} + +void Bond::recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, + Packet::Verb verb, int32_t flowId, int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "recordIncomingPacket() %s %s, packetId=%llx, payloadLength=%d, verb=%x, flowId=%lx\n", getSlave(path)->ifname().c_str(), pathStr, packetId, payloadLength, verb, flowId); + bool isFrame = (verb == Packet::VERB_FRAME || verb == Packet::VERB_EXT_FRAME); + bool shouldRecord = (packetId & (ZT_QOS_ACK_DIVISOR - 1) + && (verb != Packet::VERB_ACK) + && (verb != Packet::VERB_QOS_MEASUREMENT)); + if (isFrame || shouldRecord) { + Mutex::Lock _l(_paths_m); + if (isFrame) { + ++(path->_packetsIn); + _lastFrame=now; + } + if (shouldRecord) { + path->ackStatsIn[packetId] = payloadLength; + ++(path->_packetsReceivedSinceLastAck); + path->qosStatsIn[packetId] = now; + ++(path->_packetsReceivedSinceLastQoS); + path->packetValiditySamples.push(true); + } + } + /** + * Learn new flows and pro-actively create entries for them in the bond so + * that the next time we send a packet out that is part of a flow we know + * which path to use. + */ + if ((flowId != ZT_QOS_NO_FLOW) + && (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + Mutex::Lock _l(_flows_m); + SharedPtr flow; + if (!_flows.count(flowId)) { + flow = createFlow(path, flowId, 0, now); + } else { + flow = _flows[flowId]; + } + if (flow) { + flow->recordIncomingBytes(payloadLength); + } + } +} + +void Bond::receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Mutex::Lock _l(_paths_m); + // Look up egress times and compute latency values for each record + std::map::iterator it; + for (int j=0; jqosStatsOut.find(rx_id[j]); + if (it != path->qosStatsOut.end()) { + path->latencySamples.push(((uint16_t)(now - it->second) - rx_ts[j]) / 2); + path->qosStatsOut.erase(it); + } + } + path->qosRecordSize.push(count); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedQoS() on path %s %s, count=%d, successful=%d, qosStatsOut.size()=%d\n", getSlave(path)->ifname().c_str(), pathStr, count, path->aknowledgedQoSRecordCountSinceLastCheck, path->qosStatsOut.size()); +} + +void Bond::receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes) +{ + Mutex::Lock _l(_paths_m); + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "receivedAck() %s %s, (ackedBytes=%d, lastAckReceived=%lld, ackAge=%lld)\n", getSlave(path)->ifname().c_str(), pathStr, ackedBytes, path->lastAckReceived, path->ackAge(now)); + path->_lastAckReceived = now; + path->_unackedBytes = (ackedBytes > path->_unackedBytes) ? 0 : path->_unackedBytes - ackedBytes; + int64_t timeSinceThroughputEstimate = (now - path->_lastThroughputEstimation); + if (timeSinceThroughputEstimate >= throughputMeasurementInterval) { + // TODO: See if this floating point math can be reduced + uint64_t throughput = (uint64_t)((float)(path->_bytesAckedSinceLastThroughputEstimation) / ((float)timeSinceThroughputEstimate / (float)1000)); + throughput /= 1000; + if (throughput > 0.0) { + path->throughputSamples.push(throughput); + path->_throughputMax = throughput > path->_throughputMax ? throughput : path->_throughputMax; + } + path->_lastThroughputEstimation = now; + path->_bytesAckedSinceLastThroughputEstimation = 0; + } else { + path->_bytesAckedSinceLastThroughputEstimation += ackedBytes; + } +} + +int32_t Bond::generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "generateQoSPacket() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + int32_t len = 0; + std::map::iterator it = path->qosStatsIn.begin(); + int i=0; + int numRecords = std::min(path->_packetsReceivedSinceLastQoS,ZT_QOS_TABLE_SIZE); + while (iqosStatsIn.end()) { + uint64_t id = it->first; + memcpy(qosBuffer, &id, sizeof(uint64_t)); + qosBuffer+=sizeof(uint64_t); + uint16_t holdingTime = (uint16_t)(now - it->second); + memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); + qosBuffer+=sizeof(uint16_t); + len+=sizeof(uint64_t)+sizeof(uint16_t); + path->qosStatsIn.erase(it++); + ++i; + } + return len; +} + +bool Bond::assignFlowToBondedPath(SharedPtr &flow, int64_t now) +{ + //fprintf(stderr, "assignFlowToBondedPath\n"); + char curPathStr[128]; + unsigned int idx = ZT_MAX_PEER_NETWORK_PATHS; + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_XOR) { + idx = abs((int)(flow->id() % (_numBondedPaths))); + flow->assignPath(_paths[_bondedIdx[idx]],now); + } + if (_bondingPolicy == ZT_BONDING_POLICY_BALANCE_AWARE) { + unsigned char entropy; + Utils::getSecureRandom(&entropy, 1); + if (_totalBondUnderload) { + entropy %= _totalBondUnderload; + } + if (!_numBondedPaths) { + fprintf(stderr, "no bonded paths for flow assignment\n"); + return false; + } + for(unsigned int i=0;ibonded()) { + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(curPathStr); + uint8_t probabilitySegment = (_totalBondUnderload > 0) ? _paths[i]->_affinity : _paths[i]->_allocation; + //fprintf(stderr, "i=%2d, entropy=%3d, alloc=%3d, byteload=%4d, segment=%3d, _totalBondUnderload=%3d, ifname=%s, path=%20s\n", i, entropy, _paths[i]->allocation, _paths[i]->relativeByteLoad, probabilitySegment, _totalBondUnderload, slave->ifname().c_str(), curPathStr); + if (entropy <= probabilitySegment) { + idx = i; + //fprintf(stderr, "\t is best path\n"); + break; + } + entropy -= probabilitySegment; + } + } + if (idx < ZT_MAX_PEER_NETWORK_PATHS) { + flow->assignPath(_paths[idx],now); + ++(_paths[idx]->_assignedFlowCount); + } + else { + fprintf(stderr, "could not assign flow?\n"); exit(0); // TODO: Remove + return false; + } + } + flow->assignedPath()->address().toString(curPathStr); + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (tx) flow %x with peer %llx to path %s on %s (idx=%d)\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str(), idx); + return true; +} + +SharedPtr Bond::createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now) +{ + //fprintf(stderr, "createFlow\n"); + char curPathStr[128]; + // --- + if (!_numBondedPaths) { + fprintf(stderr, "there are no bonded paths, cannot assign flow\n"); + return SharedPtr(); + } + if (_flows.size() >= ZT_FLOW_MAX_COUNT) { + fprintf(stderr, "max number of flows reached (%d), forcibly forgetting oldest flow\n", ZT_FLOW_MAX_COUNT); + forgetFlowsWhenNecessary(0,true,now); + } + SharedPtr flow = new Flow(flowId, now); + _flows[flowId] = flow; + fprintf(stderr, "new flow %x detected with peer %llx, %lu active flow(s)\n", flowId, _peer->_id.address().toInt(), (_flows.size())); + /** + * Add a flow with a given Path already provided. This is the case when a packet + * is received on a path but no flow exists, in this case we simply assign the path + * that the remote peer chose for us. + */ + if (path) { + flow->assignPath(path,now); + path->address().toString(curPathStr); + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, flow->assignedPath()->localSocket()); + fprintf(stderr, "assigned (rx) flow %x with peer %llx to path %s on %s\n", flow->id(), _peer->_id.address().toInt(), curPathStr, slave->ifname().c_str()); + } + /** + * Add a flow when no path was provided. This means that it is an outgoing packet + * and that it is up to the local peer to decide how to load-balance its transmission. + */ + else if (!path) { + assignFlowToBondedPath(flow, now); + } + return flow; +} + +void Bond::forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now) +{ + //fprintf(stderr, "forgetFlowsWhenNecessary\n"); + std::map >::iterator it = _flows.begin(); + std::map >::iterator oldestFlow = _flows.end(); + SharedPtr expiredFlow; + if (age) { // Remove by specific age + while (it != _flows.end()) { + if (it->second->age(now) > age) { + fprintf(stderr, "forgetting flow %x between this node and %llx, %lu active flow(s)\n", it->first, _peer->_id.address().toInt(), (_flows.size()-1)); + it = _flows.erase(it); + } else { + ++it; + } + } + } + else if (oldest) { // Remove single oldest by natural expiration + uint64_t maxAge = 0; + while (it != _flows.end()) { + if (it->second->age(now) > maxAge) { + maxAge = (now - it->second->age(now)); + oldestFlow = it; + } + ++it; + } + if (oldestFlow != _flows.end()) { + fprintf(stderr, "forgetting oldest flow %x (of age %llu) between this node and %llx, %lu active flow(s)\n", oldestFlow->first, oldestFlow->second->age(now), _peer->_id.address().toInt(), (_flows.size()-1)); + _flows.erase(oldestFlow); + } + } + fprintf(stderr, "000\n"); +} + +void Bond::processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility) +{ + //fprintf(stderr, "processIncomingPathNegotiationRequest\n"); + if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + return; + } + Mutex::Lock _l(_paths_m); + char pathStr[128]; + path->address().toString(pathStr); + if (!_lastPathNegotiationCheck) { + return; + } + SharedPtr slave = RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); + if (remoteUtility > _localUtility) { + fprintf(stderr, "peer suggests path, its utility (%d) is greater than ours (%d), we will switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + negotiatedPath = path; + } + if (remoteUtility < _localUtility) { + fprintf(stderr, "peer suggests path, its utility (%d) is less than ours (%d), we will NOT switch to %s on %s (ls=%llx)\n", remoteUtility, _localUtility, pathStr, slave->ifname().c_str(), path->localSocket()); + } + if (remoteUtility == _localUtility) { + fprintf(stderr, "peer suggest path, but utility is equal, picking choice made by peer with greater identity.\n"); + if (_peer->_id.address().toInt() > RR->node->identity().address().toInt()) { + fprintf(stderr, "peer identity was greater, going with their choice of %s on %s (ls=%llx)\n", pathStr, slave->ifname().c_str(), path->localSocket()); + negotiatedPath = path; + } else { + fprintf(stderr, "our identity was greater, no change\n"); + } + } +} + +void Bond::pathNegotiationCheck(void *tPtr, const int64_t now) +{ + //fprintf(stderr, "pathNegotiationCheck\n"); + char pathStr[128]; + int maxInPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + int maxOutPathIdx = ZT_MAX_PEER_NETWORK_PATHS; + uint64_t maxInCount = 0; + uint64_t maxOutCount = 0; + for(unsigned int i=0;i_packetsIn > maxInCount) { + maxInCount = _paths[i]->_packetsIn; + maxInPathIdx = i; + } + if (_paths[i]->_packetsOut > maxOutCount) { + maxOutCount = _paths[i]->_packetsOut; + maxOutPathIdx = i; + } + _paths[i]->resetPacketCounts(); + } + bool _peerLinksSynchronized = ((maxInPathIdx != ZT_MAX_PEER_NETWORK_PATHS) + && (maxOutPathIdx != ZT_MAX_PEER_NETWORK_PATHS) + && (maxInPathIdx != maxOutPathIdx)) ? false : true; + /** + * Determine utility and attempt to petition remote peer to switch to our chosen path + */ + if (!_peerLinksSynchronized) { + _localUtility = _paths[maxOutPathIdx]->_failoverScore - _paths[maxInPathIdx]->_failoverScore; + if (_paths[maxOutPathIdx]->_negotiated) { + _localUtility -= ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + } + if ((now - _lastSentPathNegotiationRequest) > ZT_PATH_NEGOTIATION_CUTOFF_TIME) { + fprintf(stderr, "BT: (sync) it's been long enough, sending more requests.\n"); + _numSentPathNegotiationRequests = 0; + } + if (_numSentPathNegotiationRequests < ZT_PATH_NEGOTIATION_TRY_COUNT) { + if (_localUtility >= 0) { + fprintf(stderr, "BT: (sync) paths appear to be out of sync (utility=%d)\n", _localUtility); + sendPATH_NEGOTIATION_REQUEST(tPtr, _paths[maxOutPathIdx]); + ++_numSentPathNegotiationRequests; + _lastSentPathNegotiationRequest = now; + _paths[maxOutPathIdx]->address().toString(pathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[maxOutPathIdx]->localSocket()); + fprintf(stderr, "sending request to use %s on %s, ls=%llx, utility=%d\n", pathStr, slave->ifname().c_str(), _paths[maxOutPathIdx]->localSocket(), _localUtility); + } + } + /** + * Give up negotiating and consider switching + */ + else if ((now - _lastSentPathNegotiationRequest) > (2 * ZT_PATH_NEGOTIATION_CHECK_INTERVAL)) { + if (_localUtility == 0) { + // There's no loss to us, just switch without sending a another request + fprintf(stderr, "BT: (sync) giving up, switching to remote peer's path.\n"); + negotiatedPath = _paths[maxInPathIdx]; + } + } + } +} + +void Bond::sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendPATH_NEGOTIATION_REQUEST() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + if (_abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + return; + } + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_PATH_NEGOTIATION_REQUEST); + outp.append(_localUtility); + if (path->address()) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,path->localSocket(),path->address(),outp.data(),outp.size()); + } +} + +void Bond::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket, + const InetAddress &atAddress,int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendACK() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_ACK); + int32_t bytesToAck = 0; + std::map::iterator it = path->ackStatsIn.begin(); + while (it != path->ackStatsIn.end()) { + bytesToAck += it->second; + ++it; + } + outp.append(bytesToAck); + if (atAddress) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + path->ackStatsIn.clear(); + path->_packetsReceivedSinceLastAck = 0; + path->_lastAckSent = now; +} + +void Bond::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket, + const InetAddress &atAddress,int64_t now) +{ + //char pathStr[128];path->address().toString(pathStr);fprintf(stderr, "sendQOS() %s %s\n", getSlave(path)->ifname().c_str(), pathStr); + const int64_t _now = RR->node->now(); + Packet outp(_peer->_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); + char qosData[ZT_QOS_MAX_PACKET_SIZE]; + int16_t len = generateQoSPacket(path, _now,qosData); + outp.append(qosData,len); + if (atAddress) { + outp.armor(_peer->key(),false); + RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); + } else { + RR->sw->send(tPtr,outp,false); + } + // Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. + path->_packetsReceivedSinceLastQoS = 0; + path->_lastQoSMeasurement = now; +} + +void Bond::processBackgroundTasks(void *tPtr, const int64_t now) +{ + Mutex::Lock _l(_paths_m); + if (!_peer->_canUseMultipath || (now - _lastBackgroundTaskCheck) < ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL) { + return; + } + _lastBackgroundTaskCheck = now; + + // Compute dynamic path monitor timer interval + if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + int suggestedMonitorInterval = (now - _lastFrame) / 100; + _dynamicPathMonitorInterval = std::min(ZT_PATH_HEARTBEAT_PERIOD, ((suggestedMonitorInterval > _bondMonitorInterval) ? suggestedMonitorInterval : _bondMonitorInterval)); + //fprintf(stderr, "_lastFrame=%llu, suggestedMonitorInterval=%d, _dynamicPathMonitorInterval=%d\n", + // (now-_lastFrame), suggestedMonitorInterval, _dynamicPathMonitorInterval); + } + + if (_slaveMonitorStrategy == ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC) { + _shouldCollectPathStatistics = true; + } + + // Memoize oft-used properties in the packet ingress/egress logic path + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + // Required for real-time balancing + _shouldCollectPathStatistics = true; + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + // Required for judging suitability of primary slave after recovery + _shouldCollectPathStatistics = true; + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + // Required for judging suitability of new candidate primary + _shouldCollectPathStatistics = true; + } + } + if ((now - _lastCheckUserPreferences) > 1000) { + _lastCheckUserPreferences = now; + applyUserPrefs(); + } + + curateBond(now,false); + if ((now - _lastQualityEstimation) > _qualityEstimationInterval) { + _lastQualityEstimation = now; + estimatePathQuality(now); + } + dumpInfo(now); + + // Send QOS/ACK packets as needed + if (_shouldCollectPathStatistics) { + for(unsigned int i=0;iallowed()) { + if (_paths[i]->needsToSendQoS(now,_qosSendInterval)) { + sendQOS_MEASUREMENT(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + } + if (_paths[i]->needsToSendAck(now,_ackSendInterval)) { + sendACK(tPtr, _paths[i], _paths[i]->localSocket(), _paths[i]->address(), now); + } + } + } + } + // Perform periodic background tasks unique to each bonding policy + switch (_bondingPolicy) + { + case ZT_BONDING_POLICY_ACTIVE_BACKUP: + processActiveBackupTasks(now); + break; + case ZT_BONDING_POLICY_BROADCAST: + break; + case ZT_BONDING_POLICY_BALANCE_RR: + case ZT_BONDING_POLICY_BALANCE_XOR: + case ZT_BONDING_POLICY_BALANCE_AWARE: + processBalanceTasks(now); + break; + default: + break; + } + // Check whether or not a path negotiation needs to be performed + if (((now - _lastPathNegotiationCheck) > ZT_PATH_NEGOTIATION_CHECK_INTERVAL) && _allowPathNegotiation) { + _lastPathNegotiationCheck = now; + pathNegotiationCheck(tPtr, now); + } +} + +void Bond::applyUserPrefs() +{ + fprintf(stderr, "applyUserPrefs, _minReqPathMonitorInterval=%d\n", RR->bc->minReqPathMonitorInterval()); + for(unsigned int i=0;i sl = getSlave(_paths[i]); + if (sl) { + if (sl->monitorInterval() == 0) { // If no interval was specified for this slave, use more generic bond-wide interval + sl->setMonitorInterval(_bondMonitorInterval); + } + RR->bc->setMinReqPathMonitorInterval((sl->monitorInterval() < RR->bc->minReqPathMonitorInterval()) ? sl->monitorInterval() : RR->bc->minReqPathMonitorInterval()); + bool bFoundCommonSlave = false; + SharedPtr commonSlave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + for(unsigned int j=0;jbc->getSlaveBySocket(_policyAlias, _paths[j]->localSocket()) == commonSlave) { + bFoundCommonSlave = true; + } + } + } + _paths[i]->_monitorInterval = sl->monitorInterval(); + _paths[i]->_upDelay = sl->upDelay() ? sl->upDelay() : _upDelay; + _paths[i]->_downDelay = sl->downDelay() ? sl->downDelay() : _downDelay; + _paths[i]->_ipvPref = sl->ipvPref(); + _paths[i]->_mode = sl->mode(); + _paths[i]->_enabled = sl->enabled(); + _paths[i]->_onlyPathOnSlave = !bFoundCommonSlave; + } + } + if (_peer) { + _peer->_shouldCollectPathStatistics = _shouldCollectPathStatistics; + _peer->_bondingPolicy = _bondingPolicy; + } +} + +void Bond::curateBond(const int64_t now, bool rebuildBond) +{ + //fprintf(stderr, "%lu curateBond (rebuildBond=%d)\n", ((now - RR->bc->getBondStartTime())), rebuildBond); + char pathStr[128]; + /** + * Update path states + */ + for(unsigned int i=0;ieligible(now,_ackSendInterval); + if (currEligibility != _paths[i]->_lastEligibilityState) { + _paths[i]->address().toString(pathStr); + //fprintf(stderr, "\n\n%ld path eligibility (for %s, %s) has changed (from %d to %d)\n", (RR->node->now() - RR->bc->getBondStartTime()), getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->lastCheckedEligibility, _paths[i]->eligible(now,_ackSendInterval)); + if (currEligibility) { + rebuildBond = true; + } + if (!currEligibility) { + _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, !currEligibility); + if (_paths[i]->bonded()) { + //fprintf(stderr, "the path was bonded, reallocation of its flows will occur soon\n"); + rebuildBond = true; + _paths[i]->_shouldReallocateFlows = _paths[i]->bonded(); + _paths[i]->setBonded(false); + } else { + //fprintf(stderr, "the path was not bonded, no consequences\n"); + } + } + } + if (currEligibility) { + _paths[i]->adjustRefractoryPeriod(now, _defaultPathRefractoryPeriod, false); + } + _paths[i]->_lastEligibilityState = currEligibility; + } + /** + * Curate the set of paths that are part of the bond proper. Selects a single path + * per logical slave according to eligibility and user-specified constraints. + */ + if ((_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) + || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR) + || (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE)) { + if (!_numBondedPaths) { + rebuildBond = true; + } + // TODO: Optimize + if (rebuildBond) { + int updatedBondedPathCount = 0; + std::map,int> slaveMap; + for (int i=0;iallowed() && (_paths[i]->eligible(now,_ackSendInterval) || !_numBondedPaths)) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (!slaveMap.count(slave)) { + slaveMap[slave] = i; + } + else { + bool overriden = false; + _paths[i]->address().toString(pathStr); + //fprintf(stderr, " slave representative path already exists! (%s %s)\n", getSlave(_paths[i])->ifname().c_str(), pathStr); + if (_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred()) { + // Override previous choice if preferred + //fprintf(stderr, "overriding since its preferred!\n"); + if (_paths[slaveMap[slave]]->_assignedFlowCount) { + _paths[slaveMap[slave]]->_deprecated = true; + } + else { + _paths[slaveMap[slave]]->_deprecated = true; + _paths[slaveMap[slave]]->setBonded(false); + } + slaveMap[slave] = i; + overriden = true; + } + if ((_paths[i]->preferred() && _paths[slaveMap[slave]]->preferred()) + || (!_paths[i]->preferred() && !_paths[slaveMap[slave]]->preferred())) { + if (_paths[i]->preferenceRank() > _paths[slaveMap[slave]]->preferenceRank()) { + // Override if higher preference + //fprintf(stderr, "overriding according to preference preferenceRank!\n"); + if (_paths[slaveMap[slave]]->_assignedFlowCount) { + _paths[slaveMap[slave]]->_deprecated = true; + } + else { + _paths[slaveMap[slave]]->_deprecated = true; + _paths[slaveMap[slave]]->setBonded(false); + } + slaveMap[slave] = i; + } + } + } + } + } + std::map,int>::iterator it = slaveMap.begin(); + for (int i=0; isecond; + _paths[_bondedIdx[i]]->setBonded(true); + ++it; + ++updatedBondedPathCount; + _paths[_bondedIdx[i]]->address().toString(pathStr); + fprintf(stderr, "setting i=%d, _bondedIdx[%d]=%d to bonded (%s %s)\n", i, i, _bondedIdx[i], getSlave(_paths[_bondedIdx[i]])->ifname().c_str(), pathStr); + } + } + _numBondedPaths = updatedBondedPathCount; + + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR) { + // Cause a RR reset since the currently used index might no longer be valid + _rrPacketsSentOnCurrSlave = _packetsPerSlave; + } + } + } +} + +void Bond::estimatePathQuality(const int64_t now) +{ + char pathStr[128]; + //--- + + uint32_t totUserSpecifiedSlaveSpeed = 0; + if (_numBondedPaths) { // Compute relative user-specified speeds of slaves + for(unsigned int i=0;i<_numBondedPaths;++i) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i] && _paths[i]->allowed()) { + totUserSpecifiedSlaveSpeed += slave->speed(); + } + } + for(unsigned int i=0;i<_numBondedPaths;++i) { + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i] && _paths[i]->allowed()) { + slave->setRelativeSpeed(round( ((float)slave->speed() / (float)totUserSpecifiedSlaveSpeed) * 255)); + } + } + } + + float lat[ZT_MAX_PEER_NETWORK_PATHS]; + float pdv[ZT_MAX_PEER_NETWORK_PATHS]; + float plr[ZT_MAX_PEER_NETWORK_PATHS]; + float per[ZT_MAX_PEER_NETWORK_PATHS]; + float thr[ZT_MAX_PEER_NETWORK_PATHS]; + float thm[ZT_MAX_PEER_NETWORK_PATHS]; + float thv[ZT_MAX_PEER_NETWORK_PATHS]; + + float maxLAT = 0; + float maxPDV = 0; + float maxPLR = 0; + float maxPER = 0; + float maxTHR = 0; + float maxTHM = 0; + float maxTHV = 0; + + float quality[ZT_MAX_PEER_NETWORK_PATHS]; + uint8_t alloc[ZT_MAX_PEER_NETWORK_PATHS]; + + float totQuality = 0.0f; + + memset(&lat, 0, sizeof(lat)); + memset(&pdv, 0, sizeof(pdv)); + memset(&plr, 0, sizeof(plr)); + memset(&per, 0, sizeof(per)); + memset(&thr, 0, sizeof(thr)); + memset(&thm, 0, sizeof(thm)); + memset(&thv, 0, sizeof(thv)); + memset(&quality, 0, sizeof(quality)); + memset(&alloc, 0, sizeof(alloc)); + + // Compute initial summary statistics + for(unsigned int i=0;iallowed()) { + continue; + } + // Compute/Smooth average of real-world observations + _paths[i]->_latencyMean = _paths[i]->latencySamples.mean(); + _paths[i]->_latencyVariance = _paths[i]->latencySamples.stddev(); + _paths[i]->_packetErrorRatio = 1.0 - (_paths[i]->packetValiditySamples.count() ? _paths[i]->packetValiditySamples.mean() : 1.0); + + if (userHasSpecifiedSlaveSpeeds()) { + // Use user-reported metrics + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + _paths[i]->_throughputMean = slave->speed(); + _paths[i]->_throughputVariance = 0; + } + } + /* + else { + // Use estimated metrics + if (_paths[i]->throughputSamples.count()) { + // If we have samples, use them + _paths[i]->throughputMean = (uint64_t)_paths[i]->throughputSamples.mean(); + if (_paths[i]->throughputMean > 0) { + _paths[i]->throughputVarianceSamples.push((float)_paths[i]->throughputSamples.stddev() / (float)_paths[i]->throughputMean); + _paths[i]->throughputVariance = _paths[i]->throughputVarianceSamples.mean(); + } + } + else { + // No samples have been collected yet, assume best case scenario + _paths[i]->throughputMean = ZT_QOS_THR_NORM_MAX; + _paths[i]->throughputVariance = 0; + } + } + */ + // Drain unacknowledged QoS records + std::map::iterator it = _paths[i]->qosStatsOut.begin(); + uint64_t currentLostRecords = 0; + while (it != _paths[i]->qosStatsOut.end()) { + int qosRecordTimeout = 5000; //_paths[i]->monitorInterval() * ZT_MULTIPATH_QOS_ACK_INTERVAL_MULTIPLIER * 8; + if ((now - it->second) >= qosRecordTimeout) { + //fprintf(stderr, "packetId=%llx was lost\n", it->first); + it = _paths[i]->qosStatsOut.erase(it); + ++currentLostRecords; + } else { ++it; } + } + + quality[i]=0; + totQuality=0; + // Normalize raw observations according to sane limits and/or user specified values + lat[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyMean, 0, _maxAcceptableLatency, 0, 1)); + pdv[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_latencyVariance, 0, _maxAcceptablePacketDelayVariance, 0, 1)); + plr[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetLossRatio, 0, _maxAcceptablePacketLossRatio, 0, 1)); + per[i] = 1.0 / expf(4*Utils::normalize(_paths[i]->_packetErrorRatio, 0, _maxAcceptablePacketErrorRatio, 0, 1)); + //thr[i] = 1.0; //Utils::normalize(_paths[i]->throughputMean, 0, ZT_QOS_THR_NORM_MAX, 0, 1); + //thm[i] = 1.0; //Utils::normalize(_paths[i]->throughputMax, 0, ZT_QOS_THM_NORM_MAX, 0, 1); + //thv[i] = 1.0; //1.0 / expf(4*Utils::normalize(_paths[i]->throughputVariance, 0, ZT_QOS_THV_NORM_MAX, 0, 1)); + //scp[i] = _paths[i]->ipvPref != 0 ? 1.0 : Utils::normalize(_paths[i]->ipScope(), InetAddress::IP_SCOPE_NONE, InetAddress::IP_SCOPE_PRIVATE, 0, 1); + // Record bond-wide maximums to determine relative values + maxLAT = lat[i] > maxLAT ? lat[i] : maxLAT; + maxPDV = pdv[i] > maxPDV ? pdv[i] : maxPDV; + maxPLR = plr[i] > maxPLR ? plr[i] : maxPLR; + maxPER = per[i] > maxPER ? per[i] : maxPER; + //maxTHR = thr[i] > maxTHR ? thr[i] : maxTHR; + //maxTHM = thm[i] > maxTHM ? thm[i] : maxTHM; + //maxTHV = thv[i] > maxTHV ? thv[i] : maxTHV; + + //fprintf(stdout, "EH %d: lat=%8.3f, ltm=%8.3f, pdv=%8.3f, plr=%5.3f, per=%5.3f, thr=%8f, thm=%5.3f, thv=%5.3f, avl=%5.3f, age=%8.2f, scp=%4d, q=%5.3f, qtot=%5.3f, ac=%d if=%s, path=%s\n", + // i, lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], avl[i], age[i], scp[i], quality[i], totQuality, alloc[i], getSlave(_paths[i])->ifname().c_str(), pathStr); + + } + // Convert metrics to relative quantities and apply contribution weights + for(unsigned int i=0;ibonded()) { + quality[i] += ((maxLAT > 0.0f ? lat[i] / maxLAT : 0.0f) * _qualityWeights[ZT_QOS_LAT_IDX]); + quality[i] += ((maxPDV > 0.0f ? pdv[i] / maxPDV : 0.0f) * _qualityWeights[ZT_QOS_PDV_IDX]); + quality[i] += ((maxPLR > 0.0f ? plr[i] / maxPLR : 0.0f) * _qualityWeights[ZT_QOS_PLR_IDX]); + quality[i] += ((maxPER > 0.0f ? per[i] / maxPER : 0.0f) * _qualityWeights[ZT_QOS_PER_IDX]); + //quality[i] += ((maxTHR > 0.0f ? thr[i] / maxTHR : 0.0f) * _qualityWeights[ZT_QOS_THR_IDX]); + //quality[i] += ((maxTHM > 0.0f ? thm[i] / maxTHM : 0.0f) * _qualityWeights[ZT_QOS_THM_IDX]); + //quality[i] += ((maxTHV > 0.0f ? thv[i] / maxTHV : 0.0f) * _qualityWeights[ZT_QOS_THV_IDX]); + //quality[i] += (scp[i] * _qualityWeights[ZT_QOS_SCP_IDX]); + totQuality += quality[i]; + } + } + // + for(unsigned int i=0;ibonded()) { + alloc[i] = std::ceil((quality[i] / totQuality) * (float)255); + _paths[i]->_allocation = alloc[i]; + } + } + /* + if ((now - _lastLogTS) > 500) { + if (!relevant()) {return;} + //fprintf(stderr, "\n"); + _lastPrintTS = now; + _lastLogTS = now; + int numPlottablePaths=0; + for(unsigned int i=0;iaddress().toString(pathStr); + //fprintf(stderr, "%lu FIN [%d/%d]: pmi=%5d, lat=%4.3f, ltm=%4.3f, pdv=%4.3f, plr=%4.3f, per=%4.3f, thr=%4.3f, thm=%4.3f, thv=%4.3f, age=%4.3f, scp=%4d, q=%4.3f, qtot=%4.3f, ac=%4d, asf=%3d, if=%s, path=%20s, bond=%d, qosout=%d, plrraw=%d\n", + // ((now - RR->bc->getBondStartTime())), i, _numBondedPaths, _paths[i]->monitorInterval, + // lat[i], ltm[i], pdv[i], plr[i], per[i], thr[i], thm[i], thv[i], age[i], scp[i], + // quality[i], totQuality, alloc[i], _paths[i]->assignedFlowCount, getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->bonded(), _paths[i]->qosStatsOut.size(), _paths[i]->packetLossRatio); + } + } + if (numPlottablePaths < 2) { + return; + } + if (!_header) { + fprintf(stdout, "now, bonded, relativeUnderload, flows, "); + for(unsigned int i=0;iaddress().toString(pathStr); + std::string label = std::string((pathStr)) + " " + getSlave(_paths[i])->ifname(); + for (int i=0; i<19; ++i) { + fprintf(stdout, "%s, ", label.c_str()); + } + } + } + _header=true; + } + fprintf(stdout, "%ld, %d, %d, %d, ",((now - RR->bc->getBondStartTime())),_numBondedPaths,_totalBondUnderload, _flows.size()); + for(unsigned int i=0;iaddress().toString(pathStr); + fprintf(stdout, "%s, %s, %8.3f, %8.3f, %8.3f, %5.3f, %5.3f, %5.3f, %8f, %5.3f, %5.3f, %d, %5.3f, %d, %d, %d, %d, %d, %d, ", + getSlave(_paths[i])->ifname().c_str(), pathStr, _paths[i]->latencyMean, lat[i],pdv[i], _paths[i]->packetLossRatio, plr[i],per[i],thr[i],thm[i],thv[i],(now - _paths[i]->lastIn()),quality[i],alloc[i], + _paths[i]->relativeByteLoad, _paths[i]->assignedFlowCount, _paths[i]->alive(now, true), _paths[i]->eligible(now,_ackSendInterval), _paths[i]->qosStatsOut.size()); + } + } + fprintf(stdout, "\n"); + } + */ +} + +void Bond::processBalanceTasks(const int64_t now) +{ + // Omitted +} + +void Bond::dequeueNextActiveBackupPath(const uint64_t now) +{ + //fprintf(stderr, "dequeueNextActiveBackupPath\n"); + if (_abFailoverQueue.empty()) { + return; + } + _abPath = _abFailoverQueue.front(); + _abFailoverQueue.pop_front(); + _lastActiveBackupPathChange = now; + for (int i=0; iresetPacketCounts(); + } + } +} + +void Bond::processActiveBackupTasks(const int64_t now) +{ + //fprintf(stderr, "%llu processActiveBackupTasks\n", (now - RR->bc->getBondStartTime())); + char pathStr[128]; char prevPathStr[128]; char curPathStr[128]; + + SharedPtr prevActiveBackupPath = _abPath; + SharedPtr nonPreferredPath; + bool bFoundPrimarySlave = false; + + /** + * Select initial "active" active-backup slave + */ + if (!_abPath) { + fprintf(stderr, "%llu no active backup path yet...\n", ((now - RR->bc->getBondStartTime()))); + /** + * [Automatic mode] + * The user has not explicitly specified slaves or their failover schedule, + * the bonding policy will now select the first eligible path and set it as + * its active backup path, if a substantially better path is detected the bonding + * policy will assign it as the new active backup path. If the path fails it will + * simply find the next eligible path. + */ + if (!userHasSpecifiedSlaves()) { + fprintf(stderr, "%llu AB: (auto) user did not specify any slaves. waiting until we know more\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; ieligible(now,_ackSendInterval)) { + _paths[i]->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) [%d] found eligible path %s on: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + } + _abPath = _paths[i]; + break; + } + } + } + /** + * [Manual mode] + * The user has specified slaves or failover rules that the bonding policy should adhere to. + */ + else if (userHasSpecifiedSlaves()) { + fprintf(stderr, "%llu AB: (manual) no active backup slave, checking local.conf\n", ((now - RR->bc->getBondStartTime()))); + if (userHasSpecifiedPrimarySlave()) { + fprintf(stderr, "%llu AB: (manual) user has specified primary slave, looking for it.\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (_paths[i]->eligible(now,_ackSendInterval) && slave->primary()) { + if (!_paths[i]->preferred()) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (initial) [%d] found path on primary slave, taking note in case we don't find a preferred path\n", ((now - RR->bc->getBondStartTime())), i); + nonPreferredPath = _paths[i]; + bFoundPrimarySlave = true; + } + if (_paths[i]->preferred()) { + _abPath = _paths[i]; + _abPath->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) [%d] found preferred path %s on primary slave: %s\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, slave->ifname().c_str()); + } + bFoundPrimarySlave = true; + break; + } + } + } + if (_abPath) { + _abPath->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) found preferred primary path: %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + } + } + else { + if (bFoundPrimarySlave && nonPreferredPath) { + fprintf(stderr, "%llu AB: (initial) found a non-preferred primary path\n", ((now - RR->bc->getBondStartTime()))); + _abPath = nonPreferredPath; + } + } + if (!_abPath) { + fprintf(stderr, "%llu AB: (initial) designated primary slave is not yet ready\n", ((now - RR->bc->getBondStartTime()))); + // TODO: Should fail-over to specified backup or just wait? + } + } + else if (!userHasSpecifiedPrimarySlave()) { + int _abIdx = ZT_MAX_PEER_NETWORK_PATHS; + fprintf(stderr, "%llu AB: (initial) user did not specify primary slave, just picking something\n", ((now - RR->bc->getBondStartTime()))); + for (int i=0; ieligible(now,_ackSendInterval)) { + _abIdx = i; + break; + } + } + if (_abIdx == ZT_MAX_PEER_NETWORK_PATHS) { + fprintf(stderr, "%llu AB: (initial) unable to find a candidate next-best, no change\n", ((now - RR->bc->getBondStartTime()))); + } + else { + _abPath = _paths[_abIdx]; + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _abPath->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (initial) selected non-primary slave idx=%d, %s on %s\n", ((now - RR->bc->getBondStartTime())), _abIdx, pathStr, slave->ifname().c_str()); + } + } + } + } + } + /** + * Update and maintain the active-backup failover queue + */ + if (_abPath) { + // Don't worry about the failover queue until we have an active slave + // Remove ineligible paths from the failover slave queue + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();) { + if ((*it) && !(*it)->eligible(now,_ackSendInterval)) { + (*it)->address().toString(curPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); + if (slave) { + fprintf(stderr, "%llu AB: (fq) %s on %s is now ineligible, removing from failover queue\n", ((now - RR->bc->getBondStartTime())), curPathStr, slave->ifname().c_str()); + } + it = _abFailoverQueue.erase(it); + } else { + ++it; + } + } + /** + * Failover instructions were provided by user, build queue according those as well as IPv + * preference, disregarding performance. + */ + if (userHasSpecifiedFailoverInstructions()) { + /** + * Clear failover scores + */ + for (int i=0; i_failoverScore = 0; + } + } + //fprintf(stderr, "AB: (fq) user has specified specific failover instructions, will follow them.\n"); + for (int i=0; iallowed() || !_paths[i]->eligible(now,_ackSendInterval)) { + continue; + } + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(pathStr); + + int failoverScoreHandicap = _paths[i]->_failoverScore; + if (_paths[i]->preferred()) + { + failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; + //fprintf(stderr, "%s on %s ----> %d for preferred\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); + } + if (slave->primary()) { + // If using "optimize" primary reselect mode, ignore user slave designations + failoverScoreHandicap += ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; + //fprintf(stderr, "%s on %s ----> %d for primary\n", pathStr, _paths[i]->ifname().c_str(), failoverScoreHandicap); + } + if (!_paths[i]->_failoverScore) { + // If we didn't inherit a failover score from a "parent" that wants to use this path as a failover + int newHandicap = failoverScoreHandicap ? failoverScoreHandicap : _paths[i]->_allocation; + _paths[i]->_failoverScore = newHandicap; + //fprintf(stderr, "%s on %s ----> %d for allocation\n", pathStr, _paths[i]->ifname().c_str(), newHandicap); + } + SharedPtr failoverSlave; + if (slave->failoverToSlave().length()) { + failoverSlave = RR->bc->getSlaveByName(_policyAlias, slave->failoverToSlave()); + } + if (failoverSlave) { + for (int j=0; jaddress().toString(pathStr); + int inheritedHandicap = failoverScoreHandicap - 10; + int newHandicap = _paths[j]->_failoverScore > inheritedHandicap ? _paths[j]->_failoverScore : inheritedHandicap; + //fprintf(stderr, "\thanding down %s on %s ----> %d\n", pathStr, getSlave(_paths[j])->ifname().c_str(), newHandicap); + if (!_paths[j]->preferred()) { + newHandicap--; + } + _paths[j]->_failoverScore = newHandicap; + } + } + } + if (_paths[i].ptr() != _abPath.ptr()) { + bool bFoundPathInQueue = false; + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + if (_paths[i].ptr() == (*it).ptr()) { + bFoundPathInQueue = true; + } + } + if (!bFoundPathInQueue) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + _abFailoverQueue.push_front(_paths[i]); + } + } + } + } + /** + * No failover instructions provided by user, build queue according to performance + * and IPv preference. + */ + else if (!userHasSpecifiedFailoverInstructions()) { + for (int i=0; iallowed() + || !_paths[i]->eligible(now,_ackSendInterval)) { + continue; + } + int failoverScoreHandicap = 0; + if (_paths[i]->preferred()) { + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED; + } + bool includeRefractoryPeriod = true; + if (!_paths[i]->eligible(now,includeRefractoryPeriod)) { + failoverScoreHandicap = -10000; + } + if (getSlave(_paths[i])->primary() && _abSlaveSelectMethod != ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE) { + // If using "optimize" primary reselect mode, ignore user slave designations + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY; + } + if (_paths[i].ptr() == negotiatedPath.ptr()) { + _paths[i]->_negotiated = true; + failoverScoreHandicap = ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED; + } else { + _paths[i]->_negotiated = false; + } + _paths[i]->_failoverScore = _paths[i]->_allocation + failoverScoreHandicap; + if (_paths[i].ptr() != _abPath.ptr()) { + bool bFoundPathInQueue = false; + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + if (_paths[i].ptr() == (*it).ptr()) { + bFoundPathInQueue = true; + } + } + if (!bFoundPathInQueue) { + _paths[i]->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (fq) [%d] added %s on %s to queue\n", ((now - RR->bc->getBondStartTime())), i, curPathStr, getSlave(_paths[i])->ifname().c_str()); + _abFailoverQueue.push_front(_paths[i]); + } + } + } + } + _abFailoverQueue.sort(PathQualityComparator()); + if (_abFailoverQueue.empty()) { + fprintf(stderr, "%llu AB: (fq) the failover queue is empty, the active-backup bond is no longer fault-tolerant\n", ((now - RR->bc->getBondStartTime()))); + } + } + /** + * Short-circuit if we have no queued paths + */ + if (_abFailoverQueue.empty()) { + return; + } + /** + * Fulfill primary reselect obligations + */ + if (_abPath && !_abPath->eligible(now,_ackSendInterval)) { // Implicit ZT_MULTIPATH_RESELECTION_POLICY_FAILURE + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu AB: (failure) failover event!, active backup path (%s) is no-longer eligible\n", ((now - RR->bc->getBondStartTime())), curPathStr); + if (!_abFailoverQueue.empty()) { + fprintf(stderr, "%llu AB: (failure) there are (%lu) slaves in queue to choose from...\n", ((now - RR->bc->getBondStartTime())), _abFailoverQueue.size()); + dequeueNextActiveBackupPath(now); + _abPath->address().toString(curPathStr); fprintf(stderr, "%llu sAB: (failure) switched to %s on %s\n", ((now - RR->bc->getBondStartTime())), curPathStr, getSlave(_abPath)->ifname().c_str()); + } else { + fprintf(stderr, "%llu AB: (failure) nothing available in the slave queue, doing nothing.\n", ((now - RR->bc->getBondStartTime()))); + } + } + /** + * Detect change to prevent flopping during later optimization step. + */ + if (prevActiveBackupPath != _abPath) { + _lastActiveBackupPathChange = now; + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS) { + if (_abPath && !getSlave(_abPath)->primary() + && getSlave(_abFailoverQueue.front())->primary()) { + fprintf(stderr, "%llu AB: (always) switching to available primary\n", ((now - RR->bc->getBondStartTime()))); + dequeueNextActiveBackupPath(now); + } + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_BETTER) { + if (_abPath && !getSlave(_abPath)->primary()) { + fprintf(stderr, "%llu AB: (better) active backup has switched to \"better\" primary slave according to re-select policy.\n", ((now - RR->bc->getBondStartTime()))); + if (getSlave(_abFailoverQueue.front())->primary() + && (_abFailoverQueue.front()->_failoverScore > _abPath->_failoverScore)) { + dequeueNextActiveBackupPath(now); + fprintf(stderr, "%llu AB: (better) switched back to user-defined primary\n", ((now - RR->bc->getBondStartTime()))); + } + } + } + if (_abSlaveSelectMethod == ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE && !_abFailoverQueue.empty()) { + /** + * Implement link negotiation that was previously-decided + */ + if (_abFailoverQueue.front()->_negotiated) { + dequeueNextActiveBackupPath(now); + _abPath->address().toString(prevPathStr); + fprintf(stderr, "%llu AB: (optimize) switched to negotiated path %s on %s\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(_abPath)->ifname().c_str()); + _lastPathNegotiationCheck = now; + } + else { + // Try to find a better path and automatically switch to it -- not too often, though. + if ((now - _lastActiveBackupPathChange) > ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL) { + if (!_abFailoverQueue.empty()) { + //fprintf(stderr, "AB: (optimize) there are (%d) slaves in queue to choose from...\n", _abFailoverQueue.size()); + int newFScore = _abFailoverQueue.front()->_failoverScore; + int prevFScore = _abPath->_failoverScore; + // Establish a minimum switch threshold to prevent flapping + int failoverScoreDifference = _abFailoverQueue.front()->_failoverScore - _abPath->_failoverScore; + int thresholdQuantity = (ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD * (float)_abPath->_allocation); + if ((failoverScoreDifference > 0) && (failoverScoreDifference > thresholdQuantity)) { + SharedPtr oldPath = _abPath; + _abPath->address().toString(prevPathStr); + dequeueNextActiveBackupPath(now); + _abPath->address().toString(curPathStr); + fprintf(stderr, "%llu AB: (optimize) switched from %s on %s (fs=%d) to %s on %s (fs=%d)\n", ((now - RR->bc->getBondStartTime())), prevPathStr, getSlave(oldPath)->ifname().c_str(), prevFScore, curPathStr, getSlave(_abPath)->ifname().c_str(), newFScore); + } + } + } + } + } +} + +void Bond::setReasonableDefaults(int policy) +{ + // If invalid bonding policy, try default + int _defaultBondingPolicy = BondController::defaultBondingPolicy(); + if (policy <= ZT_BONDING_POLICY_NONE || policy > ZT_BONDING_POLICY_BALANCE_AWARE) { + // If no default set, use NONE (effectively disabling this bond) + if (_defaultBondingPolicy < ZT_BONDING_POLICY_NONE || _defaultBondingPolicy > ZT_BONDING_POLICY_BALANCE_AWARE) { + _bondingPolicy= ZT_BONDING_POLICY_NONE; + } + _bondingPolicy= _defaultBondingPolicy; + } else { + _bondingPolicy= policy; + } + + _downDelay = 0; + _upDelay = 0; + _allowFlowHashing=false; + _bondMonitorInterval=0; + _allowPathNegotiation=false; + _shouldCollectPathStatistics=false; + _lastPathNegotiationReceived=0; + _lastBackgroundTaskCheck=0; + _lastPathNegotiationCheck=0; + + _lastFlowStatReset=0; + _lastFlowExpirationCheck=0; + _localUtility=0; + _numBondedPaths=0; + _rrPacketsSentOnCurrSlave=0; + _rrIdx=0; + _lastPathNegotiationReceived=0; + _pathNegotiationCutoffCount=0; + _lastFlowRebalance=0; + _totalBondUnderload = 0; + + //_maxAcceptableLatency + _maxAcceptablePacketDelayVariance = 50; + _maxAcceptablePacketLossRatio = 0.10; + _maxAcceptablePacketErrorRatio = 0.10; + _userHasSpecifiedSlaveSpeeds=0; + + _lastFrame=0; + + // TODO: Remove + _header=false; + _lastLogTS = 0; + _lastPrintTS = 0; + + + + + /** + * Paths are actively monitored to provide a real-time quality/preference-ordered rapid failover queue. + */ + switch (policy) { + case ZT_BONDING_POLICY_ACTIVE_BACKUP: + _failoverInterval = 5000; + _abSlaveSelectMethod = ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.2f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.2f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.2f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * All seemingly-alive paths are used. Paths are not actively monitored. + */ + case ZT_BONDING_POLICY_BROADCAST: + _downDelay = 30000; + _upDelay = 0; + break; + /** + * Paths are monitored to determine when/if one needs to be added or removed from the rotation + */ + case ZT_BONDING_POLICY_BALANCE_RR: + _failoverInterval = 5000; + _allowFlowHashing = false; + _packetsPerSlave = 8; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * Path monitoring is used to determine the capacity of each + * path and where to place the next flow. + */ + case ZT_BONDING_POLICY_BALANCE_XOR: + _failoverInterval = 5000;; + _upDelay=_bondMonitorInterval*2; + _allowFlowHashing = true; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.4f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.2f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + /** + * Path monitoring is used to determine the capacity of each + * path and where to place the next flow. Additionally, re-shuffling + * of flows may take place. + */ + case ZT_BONDING_POLICY_BALANCE_AWARE: + _failoverInterval = 3000; + _allowFlowHashing = true; + _slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC; + _qualityWeights[ZT_QOS_LAT_IDX] = 0.3f; + _qualityWeights[ZT_QOS_LTM_IDX] = 0.0f; + _qualityWeights[ZT_QOS_PDV_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PLR_IDX] = 0.1f; + _qualityWeights[ZT_QOS_PER_IDX] = 0.1f; + _qualityWeights[ZT_QOS_THR_IDX] = 0.0f; + _qualityWeights[ZT_QOS_THM_IDX] = 0.4f; + _qualityWeights[ZT_QOS_THV_IDX] = 0.0f; + _qualityWeights[ZT_QOS_SCP_IDX] = 0.0f; + break; + default: + break; + } + + /** + * Timer geometries and counters + */ + _bondMonitorInterval = _failoverInterval / 3; + _ackSendInterval = _failoverInterval; + _qualityEstimationInterval = _failoverInterval * 2; + + _dynamicPathMonitorInterval = 0; + + _downDelay=0; + _upDelay=0; + + _ackCutoffCount = 0; + _lastAckRateCheck = 0; + _qosSendInterval = _bondMonitorInterval * 4; + _qosCutoffCount = 0; + _lastQoSRateCheck = 0; + throughputMeasurementInterval = _ackSendInterval * 2; + BondController::setMinReqPathMonitorInterval(_bondMonitorInterval); + + _defaultPathRefractoryPeriod = 8000; + + fprintf(stderr, "TIMERS: strat=%d, fi= %d, bmi= %d, qos= %d, ack= %d, estimateInt= %d, refractory= %d, ud= %d, dd= %d\n", + _slaveMonitorStrategy, + _failoverInterval, + _bondMonitorInterval, + _qosSendInterval, + _ackSendInterval, + _qualityEstimationInterval, + _defaultPathRefractoryPeriod, + _upDelay, + _downDelay); + + _lastQualityEstimation=0; +} + +void Bond::setUserQualityWeights(float weights[], int len) +{ + if (len == ZT_QOS_WEIGHT_SIZE) { + float weightTotal = 0.0; + for (unsigned int i=0; i 0.99 && weightTotal < 1.01) { + memcpy(_qualityWeights, weights, len * sizeof(float)); + } + } +} + + +bool Bond::relevant() { + return _peer->identity().address().toInt() == 0x16a03a3d03 + || _peer->identity().address().toInt() == 0x4410300d03 + || _peer->identity().address().toInt() == 0x795cbf86fa; +} + +SharedPtr Bond::getSlave(const SharedPtr& path) +{ + return RR->bc->getSlaveBySocket(_policyAlias, path->localSocket()); +} + +void Bond::dumpInfo(const int64_t now) +{ + char pathStr[128]; + //char oldPathStr[128]; + char currPathStr[128]; + + if (!relevant()) { + return; + } + /* + fprintf(stderr, "---[ bp=%d, id=%llx, dd=%d, up=%d, pmi=%d, specifiedSlaves=%d, _specifiedPrimarySlave=%d, _specifiedFailInst=%d ]\n", + _policy, _peer->identity().address().toInt(), _downDelay, _upDelay, _monitorInterval, _userHasSpecifiedSlaves, _userHasSpecifiedPrimarySlave, _userHasSpecifiedFailoverInstructions); + + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, "Paths (bp=%d, stats=%d, primaryReselect=%d) :\n", + _policy, _shouldCollectPathStatistics, _abSlaveSelectMethod); + } + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + fprintf(stderr, "Paths (bp=%d, stats=%d, fh=%d) :\n", + _policy, _shouldCollectPathStatistics, _allowFlowHashing); + }*/ + + if ((now - _lastLogTS) < 1000) { + return; + } + _lastPrintTS = now; + _lastLogTS = now; + + fprintf(stderr, "\n\n"); + + for(int i=0; i slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[i]->localSocket()); + _paths[i]->address().toString(pathStr); + fprintf(stderr, " %2d: lat=%8.3f, ac=%3d, fail%5s, fscore=%6d, in=%7d, out=%7d, age=%7ld, ack=%7ld, ref=%6d, ls=%llx", + i, + _paths[i]->_latencyMean, + _paths[i]->_allocation, + slave->failoverToSlave().c_str(), + _paths[i]->_failoverScore, + _paths[i]->_packetsIn, + _paths[i]->_packetsOut, + (long)_paths[i]->age(now), + (long)_paths[i]->ackAge(now), + _paths[i]->_refractoryPeriod, + _paths[i]->localSocket() + ); + if (slave->spare()) { + fprintf(stderr, " SPR."); + } else { + fprintf(stderr, " "); + } + if (slave->primary()) { + fprintf(stderr, " PRIM."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->allowed()) { + fprintf(stderr, " ALL."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->eligible(now,_ackSendInterval)) { + fprintf(stderr, " ELI."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->preferred()) { + fprintf(stderr, " PREF."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->_negotiated) { + fprintf(stderr, " NEG."); + } else { + fprintf(stderr, " "); + } + if (_paths[i]->bonded()) { + fprintf(stderr, " BOND "); + } else { + fprintf(stderr, " "); + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abPath && (_abPath == _paths[i].ptr())) { + fprintf(stderr, " ACTIVE "); + } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, " "); + } + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP && _abFailoverQueue.size() && (_abFailoverQueue.front().ptr() == _paths[i].ptr())) { + fprintf(stderr, " NEXT "); + } else if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + fprintf(stderr, " "); + } + fprintf(stderr, "%5s %s\n", slave->ifname().c_str(), pathStr); + } + } + + if (_bondingPolicy== ZT_BONDING_POLICY_ACTIVE_BACKUP) { + if (!_abFailoverQueue.empty()) { + fprintf(stderr, "\nFailover Queue:\n"); + for (std::list >::iterator it(_abFailoverQueue.begin()); it!=_abFailoverQueue.end();++it) { + (*it)->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, (*it)->localSocket()); + fprintf(stderr, "\t%8s\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", + slave->ifname().c_str(), + slave->speed(), + slave->relativeSpeed(), + slave->ipvPref(), + (*it)->_failoverScore, + currPathStr); + } + } + else + { + fprintf(stderr, "\nFailover Queue size = %lu\n", _abFailoverQueue.size()); + } + } + + if (_bondingPolicy== ZT_BONDING_POLICY_BALANCE_RR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_XOR + || _bondingPolicy== ZT_BONDING_POLICY_BALANCE_AWARE) { + /* + if (_numBondedPaths) { + fprintf(stderr, "\nBonded Paths:\n"); + for (int i=0; i<_numBondedPaths; ++i) { + _paths[_bondedIdx[i]].p->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, _paths[_bondedIdx[i]].p->localSocket()); + fprintf(stderr, " [%d]\t%8s\tflows=%3d\tspeed=%7d\trelSpeed=%3d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + //fprintf(stderr, " [%d]\t%8s\tspeed=%7d\trelSpeed=%3d\tflowCount=%2d\tipvPref=%3d\tfscore=%9d\t\t%s\n", i, + slave->ifname().c_str(), + numberOfAssignedFlows(_paths[_bondedIdx[i]].p), + slave->speed(), + slave->relativeSpeed(), + //_paths[_bondedIdx[i]].p->assignedFlows.size(), + slave->ipvPref(), + _paths[_bondedIdx[i]].p->failoverScore(), + currPathStr); + } + } + */ + /* + if (_allowFlowHashing) { + //Mutex::Lock _l(_flows_m); + if (_flows.size()) { + fprintf(stderr, "\nFlows:\n"); + std::map >::iterator it = _flows.begin(); + while (it != _flows.end()) { + it->second->assignedPath()->address().toString(currPathStr); + SharedPtr slave =RR->bc->getSlaveBySocket(_policyAlias, it->second->assignedPath()->localSocket()); + fprintf(stderr, " [%4x] in=%16llu, out=%16llu, bytes=%16llu, last=%16llu, if=%8s\t\t%s\n", + it->second->id(), + it->second->bytesInPerUnitTime(), + it->second->bytesOutPerUnitTime(), + it->second->totalBytes(), + it->second->age(now), + slave->ifname().c_str(), + currPathStr); + ++it; + } + } + } + */ + } + //fprintf(stderr, "\n\n\n\n\n"); +} + +} // namespace ZeroTier \ No newline at end of file diff --git a/node/Bond.hpp b/node/Bond.hpp new file mode 100644 index 000000000..6318f3936 --- /dev/null +++ b/node/Bond.hpp @@ -0,0 +1,689 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_BOND_HPP +#define ZT_BOND_HPP + +#include + +#include "Path.hpp" +#include "Peer.hpp" +#include "../osdep/Slave.hpp" +#include "Flow.hpp" + +namespace ZeroTier { + +class RuntimeEnvironment; +class Slave; + +class Bond +{ + friend class SharedPtr; + friend class Peer; + friend class BondController; + + struct PathQualityComparator + { + bool operator ()(const SharedPtr & a, const SharedPtr & b) + { + if(a->_failoverScore == b->_failoverScore) { + return a < b; + } + return a->_failoverScore > b->_failoverScore; + } + }; + +public: + + // TODO: Remove + bool _header; + int64_t _lastLogTS; + int64_t _lastPrintTS; + void dumpInfo(const int64_t now); + bool relevant(); + + SharedPtr getSlave(const SharedPtr& path); + + /** + * Constructor. For use only in first initialization in Node + * + * @param renv Runtime environment + */ + Bond(const RuntimeEnvironment *renv); + + /** + * Constructor. Creates a bond based off of ZT defaults + * + * @param renv Runtime environment + * @param policy Bonding policy + * @param peer + */ + Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr& peer); + + /** + * Constructor. For use when user intends to manually specify parameters + * + * @param basePolicy + * @param policyAlias + * @param peer + */ + Bond(std::string& basePolicy, std::string& policyAlias, const SharedPtr& peer); + + /** + * Constructor. Creates a bond based off of a user-defined bond template + * + * @param renv Runtime environment + * @param original + * @param peer + */ + Bond(const RuntimeEnvironment *renv, const Bond &original, const SharedPtr& peer); + + /** + * + * @return + */ + std::string policyAlias() { return _policyAlias; } + + /** + * Inform the bond about the path that its peer just learned about + * + * @param path Newly-learned Path which should now be handled by the Bond + * @param now Current time + */ + void nominatePath(const SharedPtr& path, int64_t now); + + /** + * Propagate and memoize often-used bonding preferences for each path + */ + void applyUserPrefs(); + + /** + * Check path states and perform bond rebuilds if needed. + * + * @param now Current time + * @param rebuild Whether or not the bond should be reconstructed. + */ + void curateBond(const int64_t now, bool rebuild); + + /** + * Periodically perform statistical summaries of quality metrics for all paths. + * + * @param now Current time + */ + void estimatePathQuality(int64_t now); + + /** + * Record an invalid incoming packet. This packet failed + * MAC/compression/cipher checks and will now contribute to a + * Packet Error Ratio (PER). + * + * @param path Path over which packet was received + */ + void recordIncomingInvalidPacket(const SharedPtr& path); + + /** + * Record statistics on outbound an packet. + * + * @param path Path over which packet is being sent + * @param packetId Packet ID + * @param payloadLength Packet data length + * @param verb Packet verb + * @param flowId Flow ID + * @param now Current time + */ + void recordOutgoingPacket(const SharedPtr &path, uint64_t packetId, + uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now); + + /** + * Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations. + * + * @param now Current time + * @param count Number of records + * @param rx_id table of packet IDs + * @param rx_ts table of holding times + */ + void receivedQoS(const SharedPtr& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts); + + /** + * Process the contents of an inbound VERB_ACK to gather path quality observations. + * + * @param path Path over which packet was received + * @param now Current time + * @param ackedBytes Number of bytes ACKed by this VERB_ACK + */ + void receivedAck(const SharedPtr& path, int64_t now, int32_t ackedBytes); + + /** + * Generate the contents of a VERB_QOS_MEASUREMENT packet. + * + * @param now Current time + * @param qosBuffer destination buffer + * @return Size of payload + */ + int32_t generateQoSPacket(const SharedPtr& path, int64_t now, char *qosBuffer); + + /** + * Record statistics for an inbound packet. + * + * @param path Path over which packet was received + * @param packetId Packet ID + * @param payloadLength Packet data length + * @param verb Packet verb + * @param flowId Flow ID + * @param now Current time + */ + void recordIncomingPacket(const SharedPtr& path, uint64_t packetId, uint16_t payloadLength, + Packet::Verb verb, int32_t flowId, int64_t now); + + /** + * Determines the most appropriate path for packet and flow egress. This decision is made by + * the underlying bonding policy as well as QoS-related statistical observations of path quality. + * + * @param now Current time + * @param flowId Flow ID + * @return Pointer to suggested Path + */ + SharedPtr getAppropriatePath(int64_t now, int32_t flowId); + + /** + * Creates a new flow record + * + * @param path Path over which flow shall be handled + * @param flowId Flow ID + * @param entropy A byte of entropy to be used by the bonding algorithm + * @param now Current time + * @return Pointer to newly-created Flow + */ + SharedPtr createFlow(const SharedPtr &path, int32_t flowId, unsigned char entropy, int64_t now); + + /** + * Removes flow records that are past a certain age limit. + * + * @param age Age threshold to be forgotten + * @param oldest Whether only the oldest shall be forgotten + * @param now Current time + */ + void forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now); + + /** + * Assigns a new flow to a bonded path + * + * @param flow Flow to be assigned + * @param now Current time + */ + bool assignFlowToBondedPath(SharedPtr &flow, int64_t now); + + /** + * Determine whether a path change should occur given the remote peer's reported utility and our + * local peer's known utility. This has the effect of assigning inbound and outbound traffic to + * the same path. + * + * @param now Current time + * @param path Path over which the negotiation request was received + * @param remoteUtility How much utility the remote peer claims to gain by using the declared path + */ + void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr &path, int16_t remoteUtility); + + /** + * Determine state of path synchronization and whether a negotiation request + * shall be sent to the peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void pathNegotiationCheck(void *tPtr, const int64_t now); + + /** + * Sends a VERB_ACK to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + * @param localSocket Local source socket + * @param atAddress + * @param now Current time + */ + void sendACK(void *tPtr,const SharedPtr &path,int64_t localSocket, + const InetAddress &atAddress,int64_t now); + + /** + * Sends a VERB_QOS_MEASUREMENT to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + * @param localSocket Local source socket + * @param atAddress + * @param now Current time + */ + void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,int64_t localSocket, + const InetAddress &atAddress,int64_t now); + + /** + * Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param path Path over which packet should be sent + */ + void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr &path); + + /** + * + * @param now Current time + */ + void processBalanceTasks(int64_t now); + + /** + * Perform periodic tasks unique to active-backup + * + * @param now Current time + */ + void processActiveBackupTasks(int64_t now); + + /** + * Switches the active slave in an active-backup scenario to the next best during + * a failover event. + * + * @param now Current time + */ + void dequeueNextActiveBackupPath(uint64_t now); + + /** + * Set bond parameters to reasonable defaults, these may later be overwritten by + * user-specified parameters. + * + * @param policy Bonding policy + */ + void setReasonableDefaults(int policy); + + /** + * Check and assign user-specified quality weights to this bond. + * + * @param weights Set of user-specified weights + * @param len Length of weight vector + */ + void setUserQualityWeights(float weights[], int len); + + /** + * @param latencyInMilliseconds Maximum acceptable latency. + */ + void setMaxAcceptableLatency(int16_t latencyInMilliseconds) { + _maxAcceptableLatency = latencyInMilliseconds; + } + + /** + * @param latencyInMilliseconds Maximum acceptable (mean) latency. + */ + void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) { + _maxAcceptableMeanLatency = latencyInMilliseconds; + } + + /** + * @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter). + */ + void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) { + _maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds; + } + + /** + * @param lossRatio Maximum acceptable packet loss ratio (PLR). + */ + void setMaxAcceptablePacketLossRatio(float lossRatio) { + _maxAcceptablePacketLossRatio = lossRatio; + } + + /** + * @param errorRatio Maximum acceptable packet error ratio (PER). + */ + void setMaxAcceptablePacketErrorRatio(float errorRatio) { + _maxAcceptablePacketErrorRatio = errorRatio; + } + + /** + * @param errorRatio Maximum acceptable packet error ratio (PER). + */ + void setMinAcceptableAllocation(float minAlloc) { + _minAcceptableAllocation = minAlloc * 255; + } + + /** + * @return Whether the user has defined slaves for use on this bond + */ + inline bool userHasSpecifiedSlaves() { return _userHasSpecifiedSlaves; } + + /** + * @return Whether the user has defined a set of failover slave(s) for this bond + */ + inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; }; + + /** + * @return Whether the user has specified a primary slave + */ + inline bool userHasSpecifiedPrimarySlave() { return _userHasSpecifiedPrimarySlave; } + + /** + * @return Whether the user has specified slave speeds + */ + inline bool userHasSpecifiedSlaveSpeeds() { return _userHasSpecifiedSlaveSpeeds; } + + /** + * Periodically perform maintenance tasks for each active bond. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void processBackgroundTasks(void *tPtr, int64_t now); + + /** + * Rate limit gate for VERB_ACK + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGateACK(const int64_t now) + { + _ackCutoffCount++; + int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount; + _lastAckRateCheck = now; + if (_ackCutoffCount > numToDrain) { + _ackCutoffCount-=numToDrain; + } else { + _ackCutoffCount = 0; + } + return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT); + } + + /** + * Rate limit gate for VERB_QOS_MEASUREMENT + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGateQoS(const int64_t now) + { + _qosCutoffCount++; + int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR; + _lastQoSRateCheck = now; + if (_qosCutoffCount > numToDrain) { + _qosCutoffCount-=numToDrain; + } else { + _qosCutoffCount = 0; + } + return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT); + } + + /** + * Rate limit gate for VERB_PATH_NEGOTIATION_REQUEST + * + * @param now Current time + * @return Whether the incoming packet should be rate-gated + */ + inline bool rateGatePathNegotiation(const int64_t now) + { + if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME) + ++_pathNegotiationCutoffCount; + else _pathNegotiationCutoffCount = 0; + _lastPathNegotiationReceived = now; + return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT); + } + + /** + * @param interval Maximum amount of time user expects a failover to take on this bond. + */ + inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; } + + /** + * @param strategy The strategy that the bond uses to prob for path aliveness and quality + */ + inline void setSlaveMonitorStrategy(uint8_t strategy) { _slaveMonitorStrategy = strategy; } + + /** + * @return the current up delay parameter + */ + inline uint16_t getUpDelay() { return _upDelay; } + + /** + * @param upDelay Length of time before a newly-discovered path is admitted to the bond + */ + inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } } + + /** + * @return Length of time before a newly-failed path is removed from the bond + */ + inline uint16_t getDownDelay() { return _downDelay; } + + /** + * @param downDelay Length of time before a newly-failed path is removed from the bond + */ + inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } } + + /** + * @return the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + */ + inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; } + + /** + * Set the current monitoring interval for the bond (can be overridden with intervals specific to certain slaves.) + * + * @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer. + */ + inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; } + + /** + * @param policy Bonding policy for this bond + */ + inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; } + + /** + * @return the current bonding policy + */ + inline uint8_t getPolicy() { return _bondingPolicy; } + + /** + * + * @param allowFlowHashing + */ + inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; } + + /** + * @return Whether flow-hashing is currently enabled for this bond. + */ + bool flowHashingEnabled() { return _allowFlowHashing; } + + /** + * + * @param packetsPerSlave + */ + inline void setPacketsPerSlave(int packetsPerSlave) { _packetsPerSlave = packetsPerSlave; } + + /** + * + * @param slaveSelectMethod + */ + inline void setSlaveSelectMethod(uint8_t method) { _abSlaveSelectMethod = method; } + + /** + * + * @return + */ + inline uint8_t getSlaveSelectMethod() { return _abSlaveSelectMethod; } + + /** + * + * @param allowPathNegotiation + */ + inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; } + + /** + * + * @return + */ + inline bool allowPathNegotiation() { return _allowPathNegotiation; } + +private: + + const RuntimeEnvironment *RR; + AtomicCounter __refCount; + + /** + * Custom name given by the user to this bond type. + */ + std::string _policyAlias; + + /** + * Paths that this bond has been made aware of but that are not necessarily + * part of the bond proper. + */ + SharedPtr _paths[ZT_MAX_PEER_NETWORK_PATHS]; + + /** + * Set of indices corresponding to paths currently included in the bond proper. This + * may only be updated during a call to curateBond(). The reason for this is so that + * we can simplify the high frequency packet egress logic. + */ + int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS]; + + /** + * Number of paths currently included in the _bondedIdx set. + */ + int _numBondedPaths; + + /** + * Flows hashed according to port and protocol + */ + std::map > _flows; + + float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path. + + uint8_t _bondingPolicy; + uint32_t _upDelay; + uint32_t _downDelay; + + // active-backup + SharedPtr _abPath; // current active path + std::list > _abFailoverQueue; + uint8_t _abSlaveSelectMethod; // slave re-selection policy for the primary slave in active-backup + uint64_t _lastActiveBackupPathChange; + + // balance-rr + uint8_t _rrIdx; // index to path currently in use during Round Robin operation + uint16_t _rrPacketsSentOnCurrSlave; // number of packets sent on this slave since the most recent path switch. + /** + * How many packets will be sent on a path before moving to the next path + * in the round-robin sequence. A value of zero will cause a random path + * selection for each outgoing packet. + */ + int _packetsPerSlave; + + // balance-aware + uint64_t _totalBondUnderload; + + // dynamic slave monitoring + uint8_t _slaveMonitorStrategy; + uint64_t _lastFrame; + uint32_t _dynamicPathMonitorInterval; + + // path negotiation + int16_t _localUtility; + SharedPtr negotiatedPath; + uint8_t _numSentPathNegotiationRequests; + unsigned int _pathNegotiationCutoffCount; + bool _allowPathNegotiation; + uint64_t _lastPathNegotiationReceived; + uint64_t _lastSentPathNegotiationRequest; + + // timers + uint32_t _failoverInterval; + uint32_t _qosSendInterval; + uint32_t _ackSendInterval; + uint16_t _ackCutoffCount; + uint64_t _lastAckRateCheck; + uint16_t _qosCutoffCount; + uint64_t _lastQoSRateCheck; + uint32_t throughputMeasurementInterval; + uint32_t _qualityEstimationInterval; + + // timestamps + uint64_t _lastCheckUserPreferences; + uint64_t _lastQualityEstimation; + uint64_t _lastFlowStatReset; + uint64_t _lastFlowExpirationCheck; + uint64_t _lastFlowRebalance; + uint64_t _lastPathNegotiationCheck; + uint64_t _lastBackgroundTaskCheck; + + float _maxAcceptablePacketLossRatio; + float _maxAcceptablePacketErrorRatio; + uint16_t _maxAcceptableLatency; + uint16_t _maxAcceptableMeanLatency; + uint16_t _maxAcceptablePacketDelayVariance; + uint8_t _minAcceptableAllocation; + + /** + * Default initial punishment inflicted on misbehaving paths. Punishment slowly + * drains linearly. For each eligibility change the remaining punishment is doubled. + */ + uint32_t _defaultPathRefractoryPeriod; + + /** + * Whether the current bonding policy requires computation of path statistics + */ + bool _shouldCollectPathStatistics; + + /** + * Free byte of entropy that is updated on every packet egress event. + */ + unsigned char _freeRandomByte; + + /** + * Remote peer that this bond services + */ + SharedPtr _peer; + + Mutex _paths_m; + Mutex _flows_m; + + /** + * Whether the user has specified slaves for this bond. + */ + bool _userHasSpecifiedSlaves; + + /** + * Whether the user has specified a primary slave for this bond. + */ + bool _userHasSpecifiedPrimarySlave; + + /** + * Whether the user has specified failover instructions for this bond. + */ + bool _userHasSpecifiedFailoverInstructions; + + /** + * Whether the user has specified slaves speeds for this bond. + */ + bool _userHasSpecifiedSlaveSpeeds; + + /** + * How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a + * path is still active. A value of zero (0) will disable active path + * monitoring; as result, all monitoring will be a function of traffic. + */ + uint16_t _bondMonitorInterval; + + /** + * Whether or not flow hashing is allowed. + */ + bool _allowFlowHashing; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/BondController.cpp b/node/BondController.cpp new file mode 100644 index 000000000..4bc8d2261 --- /dev/null +++ b/node/BondController.cpp @@ -0,0 +1,203 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#include "BondController.hpp" +#include "Peer.hpp" + +namespace ZeroTier { + +int BondController::_minReqPathMonitorInterval; +uint8_t BondController::_defaultBondingPolicy; + +BondController::BondController(const RuntimeEnvironment *renv) : + RR(renv) +{ + bondStartTime = RR->node->now(); +} + +bool BondController::slaveAllowed(std::string &policyAlias, SharedPtr slave) +{ + bool foundInDefinitions = false; + if (_slaveDefinitions.count(policyAlias)) { + auto it = _slaveDefinitions[policyAlias].begin(); + while (it != _slaveDefinitions[policyAlias].end()) { + if (slave->ifname() == (*it)->ifname()) { + foundInDefinitions = true; + break; + } + ++it; + } + } + return _slaveDefinitions[policyAlias].empty() || foundInDefinitions; +} + +void BondController::addCustomSlave(std::string& policyAlias, SharedPtr slave) +{ + Mutex::Lock _l(_slaves_m); + _slaveDefinitions[policyAlias].push_back(slave); + auto search = _interfaceToSlaveMap[policyAlias].find(slave->ifname()); + if (search == _interfaceToSlaveMap[policyAlias].end()) { + slave->setAsUserSpecified(true); + _interfaceToSlaveMap[policyAlias].insert(std::pair>(slave->ifname(), slave)); + } else { + fprintf(stderr, "slave already exists=%s\n", slave->ifname().c_str()); + // Slave is already defined, overlay user settings + } +} + +bool BondController::addCustomPolicy(const SharedPtr& newBond) +{ + Mutex::Lock _l(_bonds_m); + if (!_bondPolicyTemplates.count(newBond->policyAlias())) { + _bondPolicyTemplates[newBond->policyAlias()] = newBond; + return true; + } + return false; +} + +bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias) +{ + Mutex::Lock _l(_bonds_m); + if (!_policyTemplateAssignments.count(identity)) { + _policyTemplateAssignments[identity] = policyAlias; + return true; + } + return false; +} + +SharedPtr BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer) +{ + fprintf(stderr, "createTransportTriggeredBond\n"); + Mutex::Lock _l(_bonds_m); + int64_t identity = peer->identity().address().toInt(); + Bond *bond = nullptr; + if (!_bonds.count(identity)) { + std::string policyAlias; + int _defaultBondingPolicy = defaultBondingPolicy(); + fprintf(stderr, "new bond, registering for %llx\n", identity); + if (!_policyTemplateAssignments.count(identity)) { + if (defaultBondingPolicy()) { + fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy); + bond = new Bond(renv, _defaultBondingPolicy, peer); + } + if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) { + fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str()); + bond = new Bond(renv, *(_bondPolicyTemplates[_defaultBondingPolicyStr].ptr()), peer); + } + } + else { + fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str()); + if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) { + fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity); + bond = new Bond(renv, _defaultBondingPolicy, peer); + } + else { + bond = new Bond(renv, *(_bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr()), peer); + } + } + } + else { + fprintf(stderr, "bond already exists for %llx, cannot re-register. exiting\n", identity); exit(0); // TODO: Remove + } + if (bond) { + _bonds[identity] = bond; + /** + * Determine if user has specified anything that could affect the bonding policy's decisions + */ + if (_interfaceToSlaveMap.count(bond->policyAlias())) { + std::map >::iterator it = _interfaceToSlaveMap[bond->policyAlias()].begin(); + while (it != _interfaceToSlaveMap[bond->policyAlias()].end()) { + if (it->second->isUserSpecified()) { + bond->_userHasSpecifiedSlaves = true; + } + if (it->second->isUserSpecified() && it->second->primary()) { + bond->_userHasSpecifiedPrimarySlave = true; + } + if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) { + bond->_userHasSpecifiedFailoverInstructions = true; + } + if (it->second->isUserSpecified() && (it->second->speed() > 0)) { + bond->_userHasSpecifiedSlaveSpeeds = true; + } + ++it; + } + } + return bond; + } + return SharedPtr(); +} + +SharedPtr BondController::getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket) +{ + Mutex::Lock _l(_slaves_m); + char ifname[16]; + _phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16); + std::string ifnameStr(ifname); + auto search = _interfaceToSlaveMap[policyAlias].find(ifnameStr); + if (search == _interfaceToSlaveMap[policyAlias].end()) { + SharedPtr s = new Slave(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0); + _interfaceToSlaveMap[policyAlias].insert(std::pair >(ifnameStr, s)); + return s; + } + else { + return search->second; + } +} + +SharedPtr BondController::getSlaveByName(const std::string& policyAlias, const std::string& ifname) +{ + Mutex::Lock _l(_slaves_m); + auto search = _interfaceToSlaveMap[policyAlias].find(ifname); + if (search != _interfaceToSlaveMap[policyAlias].end()) { + return search->second; + } + return SharedPtr(); +} + +bool BondController::allowedToBind(const std::string& ifname) +{ + return true; + /* + if (!_defaultBondingPolicy) { + return true; // no restrictions + } + Mutex::Lock _l(_slaves_m); + if (_interfaceToSlaveMap.empty()) { + return true; // no restrictions + } + std::map > >::iterator policyItr = _interfaceToSlaveMap.begin(); + while (policyItr != _interfaceToSlaveMap.end()) { + std::map >::iterator slaveItr = policyItr->second.begin(); + while (slaveItr != policyItr->second.end()) { + if (slaveItr->first == ifname) { + return true; + } + ++slaveItr; + } + ++policyItr; + } + return false; + */ +} + +void BondController::processBackgroundTasks(void *tPtr, const int64_t now) +{ + Mutex::Lock _l(_bonds_m); + std::map >::iterator bondItr = _bonds.begin(); + while (bondItr != _bonds.end()) { + bondItr->second->processBackgroundTasks(tPtr, now); + ++bondItr; + } +} + +} // namespace ZeroTier \ No newline at end of file diff --git a/node/BondController.hpp b/node/BondController.hpp new file mode 100644 index 000000000..c8fa660b0 --- /dev/null +++ b/node/BondController.hpp @@ -0,0 +1,231 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_BONDCONTROLLER_HPP +#define ZT_BONDCONTROLLER_HPP + +#include +#include + +#include "SharedPtr.hpp" +#include "../osdep/Phy.hpp" +#include "../osdep/Slave.hpp" + +namespace ZeroTier { + +class RuntimeEnvironment; +class Bond; +class Peer; + +class BondController +{ + friend class Bond; + +public: + + BondController(const RuntimeEnvironment *renv); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + bool slaveAllowed(std::string &policyAlias, SharedPtr slave); + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; } + + /** + * @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements. + */ + static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; } + + /** + * @return Whether the bonding layer is currently set up to be used. + */ + bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; } + + /** + * @param basePolicyName Bonding policy name (See ZeroTierOne.h) + * @return The bonding policy code for a given human-readable bonding policy name + */ + static int getPolicyCodeByStr(const std::string& basePolicyName) + { + if (basePolicyName == "active-backup") { return 1; } + if (basePolicyName == "broadcast") { return 2; } + if (basePolicyName == "balance-rr") { return 3; } + if (basePolicyName == "balance-xor") { return 4; } + if (basePolicyName == "balance-aware") { return 5; } + return 0; // "none" + } + + /** + * @param policy Bonding policy code (See ZeroTierOne.h) + * @return The human-readable name for the given bonding policy code + */ + static std::string getPolicyStrByCode(int policy) + { + if (policy == 1) { return "active-backup"; } + if (policy == 2) { return "broadcast"; } + if (policy == 3) { return "balance-rr"; } + if (policy == 4) { return "balance-xor"; } + if (policy == 5) { return "balance-aware"; } + return "none"; + } + + /** + * Sets the default bonding policy for new or undefined bonds. + * + * @param bp Bonding policy + */ + void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; } + + /** + * Sets the default (custom) bonding policy for new or undefined bonds. + * + * @param alias Human-readable string alias for bonding policy + */ + void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; } + + /** + * @return The default bonding policy + */ + static int defaultBondingPolicy() { return _defaultBondingPolicy; } + + /** + * Add a user-defined slave to a given bonding policy. + * + * @param policyAlias User-defined custom name for variant of bonding policy + * @param slave Pointer to new slave definition + */ + void addCustomSlave(std::string& policyAlias, SharedPtr slave); + + /** + * Add a user-defined bonding policy that is based on one of the standard types. + * + * @param newBond Pointer to custom Bond object + * @return Whether a uniquely-named custom policy was successfully added + */ + bool addCustomPolicy(const SharedPtr& newBond); + + /** + * Assigns a specific bonding policy + * + * @param identity + * @param policyAlias + * @return + */ + bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias); + + /** + * Add a new bond to the bond controller. + * + * @param renv Runtime environment + * @param peer Remote peer that this bond services + * @return A pointer to the newly created Bond + */ + SharedPtr createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr& peer); + + /** + * Periodically perform maintenance tasks for the bonding layer. + * + * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call + * @param now Current time + */ + void processBackgroundTasks(void *tPtr, int64_t now); + + /** + * Gets a reference to a physical slave definition given a policy alias and a local socket. + * + * @param policyAlias Policy in use + * @param localSocket Local source socket + * @return Physical slave definition + */ + SharedPtr getSlaveBySocket(const std::string& policyAlias, uint64_t localSocket); + + /** + * Gets a reference to a physical slave definition given its human-readable system name. + * + * @param policyAlias Policy in use + * @param ifname Alphanumeric human-readable name + * @return Physical slave definition + */ + SharedPtr getSlaveByName(const std::string& policyAlias, const std::string& ifname); + + /** + * @param ifname Name of interface that we want to know if we can bind to + */ + bool allowedToBind(const std::string& ifname); + + uint64_t getBondStartTime() { return bondStartTime; } + +private: + + Phy *_phy; + const RuntimeEnvironment *RR; + + Mutex _bonds_m; + Mutex _slaves_m; + + /** + * The last time that the bond controller updated the set of bonds. + */ + uint64_t _lastBackgroundBondControlTaskCheck; + + /** + * The minimum monitoring interval among all paths in this bond. + */ + static int _minReqPathMonitorInterval; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + static uint8_t _defaultBondingPolicy; + + /** + * The default bonding policy used for new bonds unless otherwise specified. + */ + std::string _defaultBondingPolicyStr; + + /** + * All currently active bonds. + */ + std::map > _bonds; + + /** + * Map of peers to custom bonding policies + */ + std::map _policyTemplateAssignments; + + /** + * User-defined bonding policies (can be assigned to a peer) + */ + std::map > _bondPolicyTemplates; + + /** + * Set of slaves defined for a given bonding policy + */ + std::map > > _slaveDefinitions; + + /** + * Set of slave objects mapped to their physical interfaces + */ + std::map > > _interfaceToSlaveMap; + + // TODO: Remove + uint64_t bondStartTime; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/Constants.hpp b/node/Constants.hpp index 4b88798df..c27e02319 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -192,7 +192,7 @@ /** * Minimum delay between timer task checks to prevent thrashing */ -#define ZT_CORE_TIMER_TASK_GRANULARITY 500 +#define ZT_CORE_TIMER_TASK_GRANULARITY 60 /** * How often Topology::clean() and Network::clean() and similar are called, in ms @@ -253,203 +253,6 @@ */ #define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000 -/** - * How long before we consider a flow to be dead and remove it from the balancing - * policy's list. - */ -#define ZT_MULTIPATH_FLOW_EXPIRATION 60000 - -/** - * How frequently to check for changes to the system's network interfaces. When - * the service decides to use this constant it's because we want to react more - * quickly to new interfaces that pop up or go down. - */ -#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000 - -/** - * Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by - * this integer. This is to provide a mechanism for both peers to agree on which packets need - * special treatment without having to exchange information. Changing this value would be - * a breaking change and would necessitate a protocol version upgrade. Since each incoming and - * outgoing packet ID is checked against this value its evaluation is of the form: - * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. - * - * This value is set at (16) so that given a normally-distributed RNG output we will sample - * 1/16th (or ~6.25%) of packets. - */ -#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10 - -/** - * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff - */ -#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000 - -/** - * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be - * processed within cutoff time. Separate totals are kept for each type but - * the limit is the same for both. - * - * This limits how often this peer will compute statistical estimates - * of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to - * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent - * this from being useful for DOS amplification attacks. - */ -#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128 - -/** - * Path choice history window size. This is used to keep track of which paths were - * previously selected so that we can maintain a target allocation over time. - */ -#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128 - -/** - * How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL - * since we will record a 0 bit/s measurement if no valid latency measurement was made within this - * window of time. - */ -#define ZT_PATH_LATENCY_SAMPLE_INTERVAL (ZT_MULTIPATH_PEER_PING_PERIOD * 2) - -/** - * Interval used for rate-limiting the computation of path quality estimates. - */ -#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000 - -/** - * Number of samples to consider when computing real-time path statistics - */ -#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128 - -/** - * Number of samples to consider when computing performing long-term path quality analysis. - * By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can - * be set to any value greater than that to observe longer-term path quality behavior. - */ -#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ - -/** - * Maximum acceptable Packet Delay Variance (PDV) over a path - */ -#define ZT_PATH_MAX_PDV 1000 - -/** - * Maximum acceptable time interval between expectation and receipt of at least one ACK over a path - */ -#define ZT_PATH_MAX_AGE 30000 - -/** - * Maximum acceptable mean latency over a path - */ -#define ZT_PATH_MAX_MEAN_LATENCY 1000 - -/** - * How much each factor contributes to the "stability" score of a path - */ - -#if 0 -#define ZT_PATH_CONTRIB_PDV (1.5 / 3.0) -#define ZT_PATH_CONTRIB_LATENCY (0.0 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.5 / 3.0) -#else -#define ZT_PATH_CONTRIB_PDV (1.0 / 3.0) -#define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0) -#endif - -/** - * How much each factor contributes to the "quality" score of a path - */ -#if 0 -#define ZT_PATH_CONTRIB_STABILITY (2.00 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT (0.50 / 3.0) -#define ZT_PATH_CONTRIB_SCOPE (0.50 / 3.0) -#else -#define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0) -#define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0) -#define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0) -#endif - -/** - * How often a QoS packet is sent - */ -#define ZT_PATH_QOS_INTERVAL 3000 - -/** - * Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet - */ -#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1 -#define ZT_PATH_MAX_QOS_PACKET_SZ 1400 - -/** - * How many ID:sojourn time pairs in a single QoS packet - */ -#define ZT_PATH_QOS_TABLE_SIZE ((ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16)) - -/** - * Maximum number of outgoing packets we monitor for QoS information - */ -#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128 - -/** - * Timeout for QoS records - */ -#define ZT_PATH_QOS_TIMEOUT (ZT_PATH_QOS_INTERVAL * 2) - -/** - * How often the service tests the path throughput - */ -#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL (ZT_PATH_ACK_INTERVAL * 8) - -/** - * Minimum amount of time between each ACK packet - */ -#define ZT_PATH_ACK_INTERVAL 1000 - -/** - * How often an aggregate link statistics report is emitted into this tracing system - */ -#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 30000 - -/** - * How much an aggregate link's component paths can vary from their target allocation - * before the link is considered to be in a state of imbalance. - */ -#define ZT_PATH_IMBALANCE_THRESHOLD 0.20 - -/** - * Max allowable time spent in any queue - */ -#define ZT_QOS_TARGET 5 // ms - -/** - * Time period where the time spent in the queue by a packet should fall below - * target at least once - */ -#define ZT_QOS_INTERVAL 100 // ms - -/** - * The number of bytes that each queue is allowed to send during each DRR cycle. - * This approximates a single-byte-based fairness queuing scheme - */ -#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU - -/** - * The maximum total number of packets that can be queued among all - * active/inactive, old/new queues - */ -#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024 - -/** - * Number of QoS queues (buckets) - */ -#define ZT_QOS_NUM_BUCKETS 9 - -/** - * All unspecified traffic is put in this bucket. Anything in a bucket with a smaller - * value is de-prioritized. Anything in a bucket with a higher value is prioritized over - * other traffic. - */ -#define ZT_QOS_DEFAULT_BUCKET 0 - /** * How frequently to send heartbeats over in-use paths */ @@ -465,21 +268,6 @@ */ #define ZT_PEER_PING_PERIOD 60000 -/** - * Delay between full-fledge pings of directly connected peers. - * - * With multipath bonding enabled ping peers more often to measure - * packet loss and latency. This uses more bandwidth so is disabled - * by default to avoid increasing idle bandwidth use for regular - * links. - */ -#define ZT_MULTIPATH_PEER_PING_PERIOD (ZT_PEER_PING_PERIOD / 10) - -/** - * How long before we consider a path to be dead in rapid fail-over scenarios - */ -#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 250 - /** * Paths are considered expired if they have not sent us a real packet in this long */ @@ -490,6 +278,210 @@ */ #define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10) +/** + * Outgoing packets are only used for QoS/ACK statistical sampling if their + * packet ID is divisible by this integer. This is to provide a mechanism for + * both peers to agree on which packets need special treatment without having + * to exchange information. Changing this value would be a breaking change and + * would necessitate a protocol version upgrade. Since each incoming and + * outgoing packet ID is checked against this value its evaluation is of the + * form: + * + * (id & (divisor - 1)) == 0, thus the divisor must be a power of 2. + * + * This value is set at (16) so that given a normally-distributed RNG output + * we will sample 1/16th (or ~6.25%) of packets. + */ +#define ZT_QOS_ACK_DIVISOR 0x2 + +/** + * Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff + */ +#define ZT_QOS_ACK_CUTOFF_TIME 30000 + +/** + * Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be + * processed within cutoff time. Separate totals are kept for each type but + * the limit is the same for both. + * + * This limits how often this peer will compute statistical estimates + * of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to + * CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent + * this from being useful for DOS amplification attacks. + */ +#define ZT_QOS_ACK_CUTOFF_LIMIT 128 + +/** + * Minimum acceptable size for a VERB_QOS_MEASUREMENT packet + */ +#define ZT_QOS_MIN_PACKET_SIZE (8 + 1) + +/** + * Maximum acceptable size for a VERB_QOS_MEASUREMENT packet + */ +#define ZT_QOS_MAX_PACKET_SIZE 1400 + +/** + * How many ID:sojourn time pairs are in a single QoS packet + */ +#define ZT_QOS_TABLE_SIZE ((ZT_QOS_MAX_PACKET_SIZE * 8) / (64 + 16)) + +/** + * Maximum number of outgoing packets we monitor for QoS information + */ +#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16) + +/** + * Interval used for rate-limiting the computation of path quality estimates. + */ +#define ZT_QOS_COMPUTE_INTERVAL 1000 + +/** + * Number of samples to consider when processing real-time path statistics + */ +#define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32 + +/** + * Number of samples to consider when processing long-term trends + */ +#define ZT_QOS_LONGTERM_SAMPLE_WIN_SIZE (ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE * 4) + +/** + * Max allowable time spent in any queue (in ms) + */ +#define ZT_AQM_TARGET 5 + +/** + * Time period where the time spent in the queue by a packet should fall below. + * target at least once. (in ms) + */ +#define ZT_AQM_INTERVAL 100 + +/** + * The number of bytes that each queue is allowed to send during each DRR cycle. + * This approximates a single-byte-based fairness queuing scheme. + */ +#define ZT_AQM_QUANTUM ZT_DEFAULT_MTU + +/** + * The maximum total number of packets that can be queued among all + * active/inactive, old/new queues. + */ +#define ZT_AQM_MAX_ENQUEUED_PACKETS 1024 + +/** + * Number of QoS queues (buckets) + */ +#define ZT_AQM_NUM_BUCKETS 9 + +/** + * All unspecified traffic is put in this bucket. Anything in a bucket with a + * smaller value is deprioritized. Anything in a bucket with a higher value is + prioritized over other traffic. + */ +#define ZT_AQM_DEFAULT_BUCKET 0 + +/** + * How long before we consider a path to be dead in the general sense. This is + * used while searching for default or alternative paths to try in the absence + * of direct guidance from the user or a selection policy. + */ +#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000 + +/** + * How often flows are evaluated + */ +#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000 + +/** + * How long before we consider a flow to be dead and remove it from the + * policy's list. + */ +#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL 30000 + +/** + * How often a flow's statistical counters are reset + */ +#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL + +/** + * Maximum number of flows allowed before we start forcibly forgetting old ones + */ +#define ZT_FLOW_MAX_COUNT (1024*64) + +/** + * How often flows are rebalanced across slave interfaces (if at all) + */ +#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000 + +/** + * How often flows are rebalanced across slave interfaces (if at all) + */ +#define ZT_FLOW_REBALANCE_INTERVAL 5000 + +/** + * A defensive timer to prevent path quality metrics from being + * processed too often. + */ +#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * How often a bonding policy's background tasks are processed, + * some need more frequent attention than others. + */ +#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY + +/** + * Minimum amount of time (since a previous transition) before the active-backup bonding + * policy is allowed to transition to a different slave. Only valid for active-backup. + */ +#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000 + +/** + * How often a peer checks that incoming (and outgoing) traffic on a bonded link is + * appropriately paired. + */ +#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000 + +/** + * Time horizon for path negotiation paths cutoff + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000 + +/** + * Maximum number of path negotiations within cutoff time + * + * This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses + * per CUTOFF_TIME milliseconds per peer to prevent this from being + * useful for DOS amplification attacks. + */ +#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8 + +/** + * How many times a peer will attempt to petition another peer to synchronize its + * traffic to the same path before giving up and surrendering to the other peer's preference. + */ +#define ZT_PATH_NEGOTIATION_TRY_COUNT 3 + +/** + * How much greater the quality of a path should be before an + * optimization procedure triggers a switch. + */ +#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10 + +/** + * Artificially inflates the failover score for paths which meet + * certain non-performance-related policy ranking criteria. + */ +#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500 +#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000 +#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000 + +/** + * An indicator that no flow is to be associated with the given packet + */ +#define ZT_QOS_NO_FLOW -1 + /** * Timeout for overall peer activity (measured from last receive) */ @@ -557,20 +549,32 @@ */ #define ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH 120000 -/** - * Interval between direct path pushes in milliseconds if we are currently in multipath - * mode. In this mode the distinction between ZT_DIRECT_PATH_PUSH_INTERVAL and - * ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH does not exist since we want to inform other - * peers of this peer's new link/address as soon as possible so that both peers can - * begin forming an aggregated link. - */ -#define ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH (ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH / 16) - /** * Time horizon for push direct paths cutoff */ #define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 30000 +/** + * Drainage constants for VERB_ECHO rate-limiters + */ +#define ZT_ECHO_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS) +#define ZT_ECHO_DRAINAGE_DIVISOR (1000 / ZT_ECHO_CUTOFF_LIMIT) + +/** + * Drainage constants for VERB_QOS rate-limiters + */ +#define ZT_QOS_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS) +#define ZT_QOS_DRAINAGE_DIVISOR (1000 / ZT_QOS_CUTOFF_LIMIT) + +/** + * Drainage constants for VERB_ACK rate-limiters + */ +#define ZT_ACK_CUTOFF_LIMIT 128 +#define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT) + +#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000 +#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000 + /** * Maximum number of direct path pushes within cutoff time * diff --git a/node/Flow.hpp b/node/Flow.hpp new file mode 100644 index 000000000..cb8c3e4aa --- /dev/null +++ b/node/Flow.hpp @@ -0,0 +1,123 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_FLOW_HPP +#define ZT_FLOW_HPP + +#include "Path.hpp" +#include "SharedPtr.hpp" + +namespace ZeroTier { + +/** + * A protocol flow that is identified by the origin and destination port. + */ +struct Flow +{ + /** + * @param flowId Given flow ID + * @param now Current time + */ + Flow(int32_t flowId, int64_t now) : + _flowId(flowId), + _bytesInPerUnitTime(0), + _bytesOutPerUnitTime(0), + _lastActivity(now), + _lastPathReassignment(0), + _assignedPath(SharedPtr()) + {} + + /** + * Reset flow statistics + */ + void resetByteCounts() + { + _bytesInPerUnitTime = 0; + _bytesOutPerUnitTime = 0; + } + + /** + * @return The Flow's ID + */ + int32_t id() { return _flowId; } + + /** + * @return Number of incoming bytes processed on this flow per unit time + */ + int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; } + + /** + * Record number of incoming bytes on this flow + * + * @param bytes Number of incoming bytes + */ + void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; } + + /** + * @return Number of outgoing bytes processed on this flow per unit time + */ + int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; } + + /** + * Record number of outgoing bytes on this flow + * + * @param bytes + */ + void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; } + + /** + * @return The total number of bytes processed on this flow + */ + uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; } + + /** + * How long since a packet was sent or received in this flow + * + * @param now Current time + * @return The age of the flow in terms of last recorded activity + */ + int64_t age(int64_t now) { return now - _lastActivity; } + + /** + * Record that traffic was processed on this flow at the given time. + * + * @param now Current time + */ + void updateActivity(int64_t now) { _lastActivity = now; } + + /** + * @return Path assigned to this flow + */ + SharedPtr assignedPath() { return _assignedPath; } + + /** + * @param path Assigned path over which this flow should be handled + */ + void assignPath(const SharedPtr &path, int64_t now) { + _assignedPath = path; + _lastPathReassignment = now; + } + + AtomicCounter __refCount; + + int32_t _flowId; + uint64_t _bytesInPerUnitTime; + uint64_t _bytesOutPerUnitTime; + int64_t _lastActivity; + int64_t _lastPathReassignment; + SharedPtr _assignedPath; +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index 331446ced..702c08090 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -35,10 +35,12 @@ #include "Tag.hpp" #include "Revocation.hpp" #include "Trace.hpp" +#include "Path.hpp" +#include "Bond.hpp" namespace ZeroTier { -bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) +bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId) { const Address sourceAddress(source()); @@ -67,7 +69,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) if (!trusted) { if (!dearmor(peer->key())) { RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC"); - _path->recordInvalidPacket(); + peer->recordIncomingInvalidPacket(_path); return true; } } @@ -78,11 +80,12 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) } const Packet::Verb v = verb(); + bool r = true; switch(v) { //case Packet::VERB_NOP: default: // ignore unknown verbs, but if they pass auth check they are "received" - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); break; case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break; case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break; @@ -91,8 +94,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break; case Packet::VERB_WHOIS: r = _doWHOIS(RR,tPtr,peer); break; case Packet::VERB_RENDEZVOUS: r = _doRENDEZVOUS(RR,tPtr,peer); break; - case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer); break; - case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer); break; + case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer,flowId); break; + case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer,flowId); break; case Packet::VERB_ECHO: r = _doECHO(RR,tPtr,peer); break; case Packet::VERB_MULTICAST_LIKE: r = _doMULTICAST_LIKE(RR,tPtr,peer); break; case Packet::VERB_NETWORK_CREDENTIALS: r = _doNETWORK_CREDENTIALS(RR,tPtr,peer); break; @@ -103,6 +106,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) case Packet::VERB_PUSH_DIRECT_PATHS: r = _doPUSH_DIRECT_PATHS(RR,tPtr,peer); break; case Packet::VERB_USER_MESSAGE: r = _doUSER_MESSAGE(RR,tPtr,peer); break; case Packet::VERB_REMOTE_TRACE: r = _doREMOTE_TRACE(RR,tPtr,peer); break; + case Packet::VERB_PATH_NEGOTIATION_REQUEST: r = _doPATH_NEGOTIATION_REQUEST(RR,tPtr,peer); break; } if (r) { RR->node->statsLogVerb((unsigned int)v,(unsigned int)size()); @@ -113,9 +117,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr) RR->sw->requestWhois(tPtr,RR->node->now(),sourceAddress); return false; } - } catch (int ztExcCode) { - RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()"); - return true; } catch ( ... ) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()"); return true; @@ -193,59 +194,59 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar default: break; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW); return true; } bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateACK(RR->node->now())) + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGateACK(RR->node->now())) { return true; + } /* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known * maximums and detect packet loss. */ - if (peer->localMultipathSupport()) { - int32_t ackedBytes; - if (payloadLength() != sizeof(ackedBytes)) { - return true; // ignore - } - memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); - _path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes)); - peer->inferRemoteMultipathEnabled(); + int32_t ackedBytes; + if (payloadLength() != sizeof(ackedBytes)) { + return true; // ignore + } + memcpy(&ackedBytes, payload(), sizeof(ackedBytes)); + if (bond) { + bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes)); } - return true; } bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateQoS(RR->node->now())) + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGateQoS(RR->node->now())) { return true; + } /* Dissect incoming QoS packet. From this we can compute latency values and their variance. * The latency variance is used as a measure of "jitter". */ - if (peer->localMultipathSupport()) { - if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) { - return true; // ignore - } - const int64_t now = RR->node->now(); - uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE]; - uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE]; - char *begin = (char *)payload(); - char *ptr = begin; - int count = 0; - int len = payloadLength(); - // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet - while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) { - memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); - ptr+=sizeof(uint64_t); - memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t)); - ptr+=sizeof(uint16_t); - count++; - } - _path->receivedQoS(now, count, rx_id, rx_ts); - peer->inferRemoteMultipathEnabled(); + if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) { + return true; // ignore + } + const int64_t now = RR->node->now(); + uint64_t rx_id[ZT_QOS_TABLE_SIZE]; + uint16_t rx_ts[ZT_QOS_TABLE_SIZE]; + char *begin = (char *)payload(); + char *ptr = begin; + int count = 0; + unsigned int len = payloadLength(); + // Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet + while (ptr < (begin + len) && (count < ZT_QOS_TABLE_SIZE)) { + memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t)); + ptr+=sizeof(uint64_t); + memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t)); + ptr+=sizeof(uint16_t); + count++; + } + if (bond) { + bond->receivedQoS(_path, now, count, rx_id, rx_ts); } - return true; } @@ -441,11 +442,12 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool } outp.setAt(worldUpdateSizeAt,(uint16_t)(outp.size() - (worldUpdateSizeAt + 2))); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),now); peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version - peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -493,7 +495,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP } if (!hops()) { - _path->updateLatency((unsigned int)latency,RR->node->now()); + SharedPtr bond = peer->bond(); + if (!bond) { + _path->updateLatency((unsigned int)latency,RR->node->now()); + } } peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision); @@ -522,8 +527,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP if (network) { const MulticastGroup mg(MAC(field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_MAC,6),6),at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_ADI)); const unsigned int count = at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 4); - if (((ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6) + (count * 5)) <= size()) - RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS)); + RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS)); } } break; @@ -556,7 +560,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP default: break; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW); return true; } @@ -591,7 +595,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -615,13 +619,108 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } -bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +// Returns true if packet appears valid; pos and proto will be set +static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) { + if (frameLen < 40) + return false; + pos = 40; + proto = frameData[6]; + while (pos <= frameLen) { + switch(proto) { + case 0: // hop-by-hop options + case 43: // routing + case 60: // destination options + case 135: // mobility options + if ((pos + 8) > frameLen) + return false; // invalid! + proto = frameData[pos]; + pos += ((unsigned int)frameData[pos + 1] * 8) + 8; + break; + + //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway + //case 50: + //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff + default: + return true; + } + } + return false; // overflow == invalid +} + +bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId) +{ + int32_t _flowId = ZT_QOS_NO_FLOW; + SharedPtr bond = peer->bond(); + if (bond && bond->flowHashingEnabled()) { + if (size() > ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD) { + const unsigned int etherType = at(ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE); + const unsigned int frameLen = size() - ZT_PROTO_VERB_FRAME_IDX_PAYLOAD; + const uint8_t *const frameData = reinterpret_cast(data()) + ZT_PROTO_VERB_FRAME_IDX_PAYLOAD; + + if (etherType == ZT_ETHERTYPE_IPV4 && (frameLen >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + uint8_t proto = (reinterpret_cast(frameData)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(frameData)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + //flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (frameLen > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(frameData)[pos++]) << 8; + srcPort |= (reinterpret_cast(frameData)[pos]); + pos++; + dstPort = (reinterpret_cast(frameData)[pos++]) << 8; + dstPort |= (reinterpret_cast(frameData)[pos]); + _flowId = dstPort ^ srcPort ^ proto; + } + break; + } + } + + if (etherType == ZT_ETHERTYPE_IPV6 && (frameLen >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)frameData, frameLen, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + //flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (frameLen > (pos + 4)) { + srcPort = (reinterpret_cast(frameData)[pos++]) << 8; + srcPort |= (reinterpret_cast(frameData)[pos]); + pos++; + dstPort = (reinterpret_cast(frameData)[pos++]) << 8; + dstPort |= (reinterpret_cast(frameData)[pos]); + _flowId = dstPort ^ srcPort ^ proto; + } + break; + default: + break; + } + } + } + } + const uint64_t nwid = at(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID); const SharedPtr network(RR->node->network(nwid)); bool trustEstablished = false; @@ -641,13 +740,12 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar return false; } } - - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid,_flowId); return true; } -bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId) { const uint64_t nwid = at(ZT_PROTO_VERB_EXT_FRAME_IDX_NETWORK_ID); const SharedPtr network(RR->node->network(nwid)); @@ -676,7 +774,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen); if ((!from)||(from == network->mac())) { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } @@ -687,19 +785,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } else if (to != network->mac()) { if (to.isMulticast()) { if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } else if (!network->config().permitsBridging(RR->identity.address())) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay return true; } } @@ -715,13 +813,15 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const outp.append((uint8_t)Packet::VERB_EXT_FRAME); outp.append((uint64_t)packetId()); outp.append((uint64_t)nwid); + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); } else { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid,flowId); } return true; @@ -729,8 +829,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) { - if (!peer->rateGateEchoRequest(RR->node->now())) + uint64_t now = RR->node->now(); + if (!peer->rateGateEchoRequest(now)) { return true; + } const uint64_t pid = packetId(); Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK); @@ -738,10 +840,11 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share outp.append((uint64_t)pid); if (size() > ZT_PACKET_IDX_PAYLOAD) outp.append(reinterpret_cast(data()) + ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); - peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -767,7 +870,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c RR->mc->add(tPtr,now,nwid,MulticastGroup(MAC(field(ptr + 8,6),6),at(ptr + 14)),peer->address()); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -889,7 +992,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0,ZT_QOS_NO_FLOW); return true; } @@ -915,7 +1018,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } - peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW); return true; } @@ -931,12 +1034,14 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c outp.append((uint64_t)packetId()); outp.append((uint64_t)network->id()); outp.append((uint64_t)configUpdateId); + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0,ZT_QOS_NO_FLOW); return true; } @@ -979,12 +1084,13 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr outp.append((uint32_t)mg.adi()); const unsigned int gatheredLocally = RR->mc->gather(peer->address(),nwid,mg,outp,gatherLimit); if (gatheredLocally > 0) { + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),now); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid,ZT_QOS_NO_FLOW); return true; } @@ -1032,19 +1138,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, if (network->config().multicastLimit == 0) { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW); return true; } if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) { if (!to.mac().isMulticast()) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } if ((!from)||(from.isMulticast())||(from == network->mac())) { RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } @@ -1058,7 +1164,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, network->learnBridgeRoute(from,peer->address()); } else { RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)"); - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay return true; } } @@ -1076,12 +1182,14 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr, outp.append((uint32_t)to.adi()); outp.append((unsigned char)0x02); // flag 0x02 = contains gather results if (RR->mc->gather(peer->address(),nwid,to,outp,gatherLimit)) { + const int64_t now = RR->node->now(); + peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now); outp.armor(peer->key(),true); _path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now()); } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); } else { _sendErrorNeedCredentials(RR,tPtr,peer,nwid); return false; @@ -1094,9 +1202,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt { const int64_t now = RR->node->now(); - // First, subject this to a rate limit if (!peer->rateGatePushDirectPaths(now)) { - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1108,8 +1215,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2; while (count--) { // if ptr overflows Buffer will throw - // TODO: some flags are not yet implemented - unsigned int flags = (*this)[ptr++]; unsigned int extLen = at(ptr); ptr += 2; ptr += extLen; // unused right now @@ -1132,6 +1237,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt } } break; case 6: { + const InetAddress a(field(ptr,16),16,at(ptr + 16)); if ( ((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget @@ -1149,7 +1255,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt ptr += addrLen; } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1165,7 +1271,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast(&um)); } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } @@ -1189,11 +1295,29 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con } } - peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0); + peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW); return true; } +bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer) +{ + uint64_t now = RR->node->now(); + SharedPtr bond = peer->bond(); + if (!bond || !bond->rateGatePathNegotiation(now)) { + return true; + } + if (payloadLength() != sizeof(int16_t)) { + return true; + } + int16_t remoteUtility = 0; + memcpy(&remoteUtility, payload(), sizeof(int16_t)); + if (peer->bond()) { + peer->bond()->processIncomingPathNegotiationRequest(now, _path, Utils::ntoh(remoteUtility)); + } + return true; +} + void IncomingPacket::_sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,const uint64_t nwid) { Packet outp(source(),RR->identity.address(),Packet::VERB_ERROR); diff --git a/node/IncomingPacket.hpp b/node/IncomingPacket.hpp index cf9a6474f..b1032d99d 100644 --- a/node/IncomingPacket.hpp +++ b/node/IncomingPacket.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -100,7 +100,7 @@ public: * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call * @return True if decoding and processing is complete, false if caller should try again */ - bool tryDecode(const RuntimeEnvironment *RR,void *tPtr); + bool tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId); /** * @return Time of packet receipt / start of decode @@ -117,8 +117,8 @@ private: bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); - bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); - bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId); + bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,int32_t flowId); bool _doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); @@ -129,6 +129,7 @@ private: bool _doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); bool _doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); + bool _doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer); void _sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr &peer,const uint64_t nwid); diff --git a/node/Node.cpp b/node/Node.cpp index 5330b74c2..e71c1424c 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -48,6 +48,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 _networks(8), _now(now), _lastPingCheck(0), + _lastGratuitousPingCheck(0), _lastHousekeepingRun(0), _lastMemoizedTraceSettings(0) { @@ -102,8 +103,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0); const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0); const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0); + const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0); - m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas)); + m = reinterpret_cast(::malloc(16 + ts + sws + mcs + topologys + sas + bc)); if (!m) throw std::bad_alloc(); RR->rtmem = m; @@ -118,12 +120,15 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 RR->topology = new (m) Topology(RR,tptr); m += topologys; RR->sa = new (m) SelfAwareness(RR); + m += sas; + RR->bc = new (m) BondController(RR); } catch ( ... ) { if (RR->sa) RR->sa->~SelfAwareness(); if (RR->topology) RR->topology->~Topology(); if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); + if (RR->bc) RR->bc->~BondController(); ::free(m); throw; } @@ -142,6 +147,7 @@ Node::~Node() if (RR->mc) RR->mc->~Multicaster(); if (RR->sw) RR->sw->~Switch(); if (RR->t) RR->t->~Trace(); + if (RR->bc) RR->bc->~BondController(); ::free(RR->rtmem); } @@ -246,9 +252,23 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 _now = now; Mutex::Lock bl(_backgroundTasksLock); + + unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY; + if (RR->bc->inUse()) { + // Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled) + bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL); + if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) { + Hashtable< Address,std::vector > alwaysContact; + _PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now); + RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc); + _lastGratuitousPingCheck = now; + } + RR->bc->processBackgroundTasks(tptr, now); + } + unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL; const int64_t timeSinceLastPingCheck = now - _lastPingCheck; - if (timeSinceLastPingCheck >= ZT_PING_CHECK_INVERVAL) { + if (timeSinceLastPingCheck >= timeUntilNextPingCheck) { try { _lastPingCheck = now; @@ -354,7 +374,7 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64 } try { - *nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); + *nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(bondCheckInterval,std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now))),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY); } catch ( ... ) { return ZT_RESULT_FATAL_ERROR_INTERNAL; } @@ -461,7 +481,7 @@ ZT_PeerList *Node::peers() const for(std::vector< std::pair< Address,SharedPtr > >::iterator pi(peers.begin());pi!=peers.end();++pi) { ZT_Peer *p = &(pl->peers[pl->peerCount++]); p->address = pi->second->address().toInt(); - p->hadAggregateLink = 0; + p->isBonded = 0; if (pi->second->remoteVersionKnown()) { p->versionMajor = pi->second->remoteVersionMajor(); p->versionMinor = pi->second->remoteVersionMinor(); @@ -478,28 +498,24 @@ ZT_PeerList *Node::peers() const std::vector< SharedPtr > paths(pi->second->paths(_now)); SharedPtr bestp(pi->second->getAppropriatePath(_now,false)); - p->hadAggregateLink |= pi->second->hasAggregateLink(); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); + //memcpy(&(p->paths[p->pathCount].ifname,&((*path)->slave()),32);) + p->paths[p->pathCount].localSocket = (*path)->localSocket(); p->paths[p->pathCount].lastSend = (*path)->lastOut(); p->paths[p->pathCount].lastReceive = (*path)->lastIn(); p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); p->paths[p->pathCount].expired = 0; p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0; - p->paths[p->pathCount].latency = (float)(*path)->latency(); - p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance(); - p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient(); - p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio(); - p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio(); - p->paths[p->pathCount].stability = (*path)->lastComputedStability(); - p->paths[p->pathCount].throughput = (*path)->meanThroughput(); - p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput(); - p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255; - p->paths[p->pathCount].ifname = (*path)->getName(); - + //p->paths[p->pathCount].age = (*path)->age(_now); + p->paths[p->pathCount].scope = (*path)->ipScope(); ++p->pathCount; } + if (pi->second->bond()) { + p->isBonded = pi->second->bond(); + p->bondingPolicy = pi->second->bond()->getPolicy(); + } } return pl; diff --git a/node/Node.hpp b/node/Node.hpp index 21d49f515..6461e4cd6 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -34,6 +34,7 @@ #include "Salsa20.hpp" #include "NetworkController.hpp" #include "Hashtable.hpp" +#include "BondController.hpp" // Bit mask for "expecting reply" hash #define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255 @@ -186,6 +187,8 @@ public: inline const Identity &identity() const { return _RR.identity; } + inline BondController *bondController() const { return _RR.bc; } + /** * Register that we are expecting a reply to a packet ID * @@ -247,9 +250,6 @@ public: inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; } inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; } - inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; } - inline uint8_t getMultipathMode() { return _multipathMode; } - inline bool localControllerHasAuthorized(const int64_t now,const uint64_t nwid,const Address &addr) const { _localControllerAuthorizations_m.lock(); @@ -306,10 +306,9 @@ private: Address _remoteTraceTarget; enum Trace::Level _remoteTraceLevel; - uint8_t _multipathMode; - volatile int64_t _now; int64_t _lastPingCheck; + int64_t _lastGratuitousPingCheck; int64_t _lastHousekeepingRun; int64_t _lastMemoizedTraceSettings; volatile int64_t _prngState[2]; diff --git a/node/Packet.cpp b/node/Packet.cpp index 25006416a..381864a45 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. diff --git a/node/Packet.hpp b/node/Packet.hpp index 53a1883ce..ca789db81 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -931,13 +931,13 @@ public: * * Upon receipt of this packet, the local peer will verify that the correct * number of bytes were received by the remote peer. If these values do - * not agree that could be an indicator of packet loss. + * not agree that could be an indication of packet loss. * * Additionally, the local peer knows the interval of time that has * elapsed since the last received ACK. With this information it can compute * a rough estimate of the current throughput. * - * This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL + * This is sent at a maximum rate of once per every ZT_QOS_ACK_INTERVAL */ VERB_ACK = 0x12, @@ -963,7 +963,8 @@ public: * measure of the amount of time between when a packet was received and the * egress time of its tracking QoS packet. * - * This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL + * This is sent at a maximum rate of once per every + * ZT_QOS_MEASUREMENT_INTERVAL */ VERB_QOS_MEASUREMENT = 0x13, @@ -996,7 +997,34 @@ public: * node on startup. This is helpful in identifying traces from different * members of a cluster. */ - VERB_REMOTE_TRACE = 0x15 + VERB_REMOTE_TRACE = 0x15, + + /** + * A request to a peer to use a specific path in a multi-path scenario: + * <[2] 16-bit unsigned integer that encodes a path choice utility> + * + * This is sent when a node operating in multipath mode observes that + * its inbound and outbound traffic aren't going over the same path. The + * node will compute its perceived utility for using its chosen outbound + * path and send this to a peer in an attempt to petition it to send + * its traffic over this same path. + * + * Scenarios: + * + * (1) Remote peer utility is GREATER than ours: + * - Remote peer will refuse the petition and continue using current path + * (2) Remote peer utility is LESS than than ours: + * - Remote peer will accept the petition and switch to our chosen path + * (3) Remote peer utility is EQUAL to our own: + * - To prevent confusion and flapping, both side will agree to use the + * numerical values of their identities to determine which path to use. + * The peer with the greatest identity will win. + * + * If a node petitions a peer repeatedly with no effect it will regard + * that as a refusal by the remote peer, in this case if the utility is + * negligible it will voluntarily switch to the remote peer's chosen path. + */ + VERB_PATH_NEGOTIATION_REQUEST = 0x16 }; /** diff --git a/node/Path.hpp b/node/Path.hpp index fc5dbff16..9c54f718f 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -26,12 +26,11 @@ #include "SharedPtr.hpp" #include "AtomicCounter.hpp" #include "Utils.hpp" -#include "RingBuffer.hpp" #include "Packet.hpp" +#include "RingBuffer.hpp" +//#include "Bond.hpp" -#include "../osdep/Phy.hpp" - -#include "../include/ZeroTierDebug.h" +#include "../osdep/Slave.hpp" /** * Maximum return value of preferenceRank() @@ -48,7 +47,8 @@ class RuntimeEnvironment; class Path { friend class SharedPtr; - Phy *_phy; + friend class Bond; + //friend class SharedPtr; public: /** @@ -87,77 +87,113 @@ public: _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), - _lastPathQualityComputeTime(0), _localSocket(-1), _latency(0xffff), _addr(), _ipScope(InetAddress::IP_SCOPE_NONE), - _lastAck(0), - _lastThroughputEstimation(0), + _lastAckReceived(0), + _lastAckSent(0), _lastQoSMeasurement(0), - _lastQoSRecordPurge(0), + _lastThroughputEstimation(0), + _lastRefractoryUpdate(0), + _lastAliveToggle(0), + _lastEligibilityState(false), + _lastTrialBegin(0), + _refractoryPeriod(0), + _monitorInterval(0), + _upDelay(0), + _downDelay(0), + _ipvPref(0), + _mode(0), + _onlyPathOnSlave(false), + _enabled(false), + _bonded(false), + _negotiated(false), + _deprecated(false), + _shouldReallocateFlows(false), + _assignedFlowCount(0), + _latencyMean(0), + _latencyVariance(0), + _packetLossRatio(0), + _packetErrorRatio(0), + _throughputMean(0), + _throughputMax(0), + _throughputVariance(0), + _allocation(0), + _byteLoad(0), + _relativeByteLoad(0), + _affinity(0), + _failoverScore(0), _unackedBytes(0), - _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _maxLifetimeThroughput(0), - _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _lastComputedMeanLatency(0.0), - _lastComputedPacketDelayVariance(0.0), - _lastComputedPacketErrorRatio(0.0), - _lastComputedPacketLossRatio(0), - _lastComputedStability(0.0), - _lastComputedRelativeQuality(0), - _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0) - { - memset(_ifname, 0, 16); - memset(_addrString, 0, sizeof(_addrString)); - } + _packetsIn(0), + _packetsOut(0), + _prevEligibility(false) + {} Path(const int64_t localSocket,const InetAddress &addr) : _lastOut(0), _lastIn(0), _lastTrustEstablishedPacketReceived(0), - _lastPathQualityComputeTime(0), _localSocket(localSocket), _latency(0xffff), _addr(addr), _ipScope(addr.ipScope()), - _lastAck(0), - _lastThroughputEstimation(0), + _lastAckReceived(0), + _lastAckSent(0), _lastQoSMeasurement(0), - _lastQoSRecordPurge(0), + _lastThroughputEstimation(0), + _lastRefractoryUpdate(0), + _lastAliveToggle(0), + _lastEligibilityState(false), + _lastTrialBegin(0), + _refractoryPeriod(0), + _monitorInterval(0), + _upDelay(0), + _downDelay(0), + _ipvPref(0), + _mode(0), + _onlyPathOnSlave(false), + _enabled(false), + _bonded(false), + _negotiated(false), + _deprecated(false), + _shouldReallocateFlows(false), + _assignedFlowCount(0), + _latencyMean(0), + _latencyVariance(0), + _packetLossRatio(0), + _packetErrorRatio(0), + _throughputMean(0), + _throughputMax(0), + _throughputVariance(0), + _allocation(0), + _byteLoad(0), + _relativeByteLoad(0), + _affinity(0), + _failoverScore(0), _unackedBytes(0), - _expectingAckAsOf(0), _packetsReceivedSinceLastAck(0), _packetsReceivedSinceLastQoS(0), - _maxLifetimeThroughput(0), - _lastComputedMeanThroughput(0), _bytesAckedSinceLastThroughputEstimation(0), - _lastComputedMeanLatency(0.0), - _lastComputedPacketDelayVariance(0.0), - _lastComputedPacketErrorRatio(0.0), - _lastComputedPacketLossRatio(0), - _lastComputedStability(0.0), - _lastComputedRelativeQuality(0), - _lastComputedThroughputDistCoeff(0.0), - _lastAllocation(0) - { - memset(_ifname, 0, 16); - memset(_addrString, 0, sizeof(_addrString)); - if (_localSocket != -1) { - _phy->getIfName((PhySocket *) ((uintptr_t) _localSocket), _ifname, 16); - } - } + _packetsIn(0), + _packetsOut(0), + _prevEligibility(false) + {} /** * Called when a packet is received from this remote path, regardless of content * * @param t Time of receive */ - inline void received(const uint64_t t) { _lastIn = t; } + inline void received(const uint64_t t) { + _lastIn = t; + if (!_prevEligibility) { + _lastAliveToggle = _lastIn; + } + } /** * Set time last trusted packet was received (done in Peer::received()) @@ -197,7 +233,6 @@ public: else { _latency = l; } - _latencySamples.push(l); } /** @@ -286,341 +321,32 @@ public: } /** - * Record statistics on outgoing packets. Used later to estimate QoS metrics. - * - * @param now Current time - * @param packetId ID of packet - * @param payloadLength Length of payload - * @param verb Packet verb + * @param bonded Whether this path is part of a bond. */ - inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) - { - Mutex::Lock _l(_statistics_m); - if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { - _unackedBytes += payloadLength; - // Take note that we're expecting a VERB_ACK on this path as of a specific time - _expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now; - if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) { - _outQoSRecords[packetId] = now; - } - } - } - } + inline void setBonded(bool bonded) { _bonded = bonded; } /** - * Record statistics on incoming packets. Used later to estimate QoS metrics. - * - * @param now Current time - * @param packetId ID of packet - * @param payloadLength Length of payload - * @param verb Packet verb + * @return True if this path is currently part of a bond. */ - inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb) - { - Mutex::Lock _l(_statistics_m); - if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) { - if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) { - _inACKRecords[packetId] = payloadLength; - _packetsReceivedSinceLastAck++; - _inQoSRecords[packetId] = now; - _packetsReceivedSinceLastQoS++; - } - _packetValiditySamples.push(true); - } - } - - /** - * Record that we've received a VERB_ACK on this path, also compute throughput if required. - * - * @param now Current time - * @param ackedBytes Number of bytes acknowledged by other peer - */ - inline void receivedAck(int64_t now, int32_t ackedBytes) - { - _expectingAckAsOf = 0; - _unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes; - int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation); - if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) { - uint64_t throughput = (uint64_t)((float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000)); - _throughputSamples.push(throughput); - _maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput; - _lastThroughputEstimation = now; - _bytesAckedSinceLastThroughputEstimation = 0; - } else { - _bytesAckedSinceLastThroughputEstimation += ackedBytes; - } - } - - /** - * @return Number of bytes this peer is responsible for ACKing since last ACK - */ - inline int32_t bytesToAck() - { - Mutex::Lock _l(_statistics_m); - int32_t bytesToAck = 0; - std::map::iterator it = _inACKRecords.begin(); - while (it != _inACKRecords.end()) { - bytesToAck += it->second; - it++; - } - return bytesToAck; - } - - /** - * @return Number of bytes thus far sent that have not been acknowledged by the remote peer - */ - inline int64_t unackedSentBytes() - { - return _unackedBytes; - } - - /** - * Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers - * - * @param Current time - */ - inline void sentAck(int64_t now) - { - Mutex::Lock _l(_statistics_m); - _inACKRecords.clear(); - _packetsReceivedSinceLastAck = 0; - _lastAck = now; - } - - /** - * Receive QoS data, match with recorded egress times from this peer, compute latency - * estimates. - * - * @param now Current time - * @param count Number of records - * @param rx_id table of packet IDs - * @param rx_ts table of holding times - */ - inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts) - { - Mutex::Lock _l(_statistics_m); - // Look up egress times and compute latency values for each record - std::map::iterator it; - for (int j=0; jsecond); - uint16_t rtt_compensated = rtt - rx_ts[j]; - uint16_t latency = rtt_compensated / 2; - updateLatency(latency, now); - _outQoSRecords.erase(it); - } - } - } - - /** - * Generate the contents of a VERB_QOS_MEASUREMENT packet. - * - * @param now Current time - * @param qosBuffer destination buffer - * @return Size of payload - */ - inline int32_t generateQoSPacket(int64_t now, char *qosBuffer) - { - Mutex::Lock _l(_statistics_m); - int32_t len = 0; - std::map::iterator it = _inQoSRecords.begin(); - int i=0; - while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) { - uint64_t id = it->first; - memcpy(qosBuffer, &id, sizeof(uint64_t)); - qosBuffer+=sizeof(uint64_t); - uint16_t holdingTime = (uint16_t)(now - it->second); - memcpy(qosBuffer, &holdingTime, sizeof(uint16_t)); - qosBuffer+=sizeof(uint16_t); - len+=sizeof(uint64_t)+sizeof(uint16_t); - _inQoSRecords.erase(it++); - i++; - } - return len; - } - - /** - * Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers. - * - * @param Current time - */ - inline void sentQoS(int64_t now) { - _packetsReceivedSinceLastQoS = 0; - _lastQoSMeasurement = now; - } - - /** - * @param now Current time - * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time - */ - inline bool needsToSendAck(int64_t now) { - return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL || - (_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; - } - - /** - * @param now Current time - * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time - */ - inline bool needsToSendQoS(int64_t now) { - return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) || - ((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS; - } - - /** - * How much time has elapsed since we've been expecting a VERB_ACK on this path. This value - * is used to determine a more relevant path "age". This lets us penalize paths which are no - * longer ACKing, but not those that simple aren't being used to carry traffic at the - * current time. - */ - inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; } - - /** - * The maximum observed throughput (in bits/s) for this path - */ - inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; } - - /** - * @return The mean throughput (in bits/s) of this link - */ - inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; } - - /** - * Assign a new relative quality value for this path in the aggregate link - * - * @param rq Quality of this path in comparison to other paths available to this peer - */ - inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; } - - /** - * @return Quality of this path compared to others in the aggregate link - */ - inline float relativeQuality() { return _lastComputedRelativeQuality; } - - /** - * Assign a new allocation value for this path in the aggregate link - * - * @param allocation Percentage of traffic to be sent over this path to a peer - */ - inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; } - - /** - * @return Percentage of traffic allocated to this path in the aggregate link - */ - inline unsigned char allocation() { return _lastAllocation; } - - /** - * @return Stability estimates can become expensive to compute, we cache the most recent result. - */ - inline float lastComputedStability() { return _lastComputedStability; } - - /** - * @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to - */ - inline char *getName() { return _ifname; } - - /** - * @return Packet delay variance - */ - inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; } - - /** - * @return Previously-computed mean latency - */ - inline float meanLatency() { return _lastComputedMeanLatency; } - - /** - * @return Packet loss rate (PLR) - */ - inline float packetLossRatio() { return _lastComputedPacketLossRatio; } - - /** - * @return Packet error ratio (PER) - */ - inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; } - - /** - * Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now - * contribute to a Packet Error Ratio (PER). - */ - inline void recordInvalidPacket() { _packetValiditySamples.push(false); } - - /** - * @return A pointer to a cached copy of the address string for this Path (For debugging only) - */ - inline char *getAddressString() { return _addrString; } - - /** - * @return The current throughput disturbance coefficient - */ - inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; } - - /** - * Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved" - * this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality". - * - * @param now Current time - */ - inline void processBackgroundPathMeasurements(const int64_t now) - { - if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - Mutex::Lock _l(_statistics_m); - _lastPathQualityComputeTime = now; - address().toString(_addrString); - _lastComputedMeanLatency = _latencySamples.mean(); - _lastComputedPacketDelayVariance = _latencySamples.stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689) - _lastComputedMeanThroughput = (uint64_t)_throughputSamples.mean(); - - // If no packet validity samples, assume PER==0 - _lastComputedPacketErrorRatio = 1 - (_packetValiditySamples.count() ? _packetValiditySamples.mean() : 1); - - // Compute path stability - // Normalize measurements with wildly different ranges into a reasonable range - float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10); - float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10); - float throughput_cv = _throughputSamples.mean() > 0 ? _throughputSamples.stddev() / _throughputSamples.mean() : 1; - - // Form an exponential cutoff and apply contribution weights - float pdv_contrib = expf((-1.0f)*normalized_pdv) * (float)ZT_PATH_CONTRIB_PDV; - float latency_contrib = expf((-1.0f)*normalized_la) * (float)ZT_PATH_CONTRIB_LATENCY; - - // Throughput Disturbance Coefficient - float throughput_disturbance_contrib = expf((-1.0f)*throughput_cv) * (float)ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE; - _throughputDisturbanceSamples.push(throughput_cv); - _lastComputedThroughputDistCoeff = _throughputDisturbanceSamples.mean(); - - // Obey user-defined ignored contributions - pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1; - latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1; - throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1; - - // Stability - _lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib; - _lastComputedStability *= 1 - _lastComputedPacketErrorRatio; - - // Prevent QoS records from sticking around for too long - std::map::iterator it = _outQoSRecords.begin(); - while (it != _outQoSRecords.end()) { - // Time since egress of tracked packet - if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) { - _outQoSRecords.erase(it++); - } else { it++; } - } - } - } + inline bool bonded() { return _bonded; } /** * @return True if this path is alive (receiving heartbeats) */ - inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); } + inline bool alive(const int64_t now, bool bondingEnabled = false) const { + return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); + } /** * @return True if this path needs a heartbeat */ inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); } + /** + * @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency + */ + inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); } + /** * @return Last time we sent something */ @@ -631,62 +357,339 @@ public: */ inline int64_t lastIn() const { return _lastIn; } + /** + * @return the age of the path in terms of receiving packets + */ + inline int64_t age(int64_t now) { return (now - _lastIn); } + /** * @return Time last trust-established packet was received */ inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; } + /** + * @return Time since last VERB_ACK was received + */ + inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; } + + /** + * Set or update a refractory period for the path. + * + * @param punishment How much a path should be punished + * @param pathFailure Whether this call is the result of a recent path failure + */ + inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) { + if (pathFailure) { + unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment; + _refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD); + _lastRefractoryUpdate = 0; + } else { + uint32_t drainRefractory = 0; + if (_lastRefractoryUpdate) { + drainRefractory = (now - _lastRefractoryUpdate); + } else { + drainRefractory = (now - _lastAliveToggle); + } + _lastRefractoryUpdate = now; + if (_refractoryPeriod > drainRefractory) { + _refractoryPeriod -= drainRefractory; + } else { + _refractoryPeriod = 0; + _lastRefractoryUpdate = 0; + } + } + } + + /** + * Determine the current state of eligibility of the path. + * + * @param includeRefractoryPeriod Whether current punishment should be taken into consideration + * @return True if this path can be used in a bond at the current time + */ + inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) { + if (includeRefractoryPeriod && _refractoryPeriod) { + return false; + } + bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs) + bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us + bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online + bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period + bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial); + return currEligibility; + } + + /** + * Record when this path first entered the bond. Each path is given a trial period where it is admitted + * to the bond without requiring observations to prove its performance or reliability. + */ + inline void startTrial(uint64_t now) { _lastTrialBegin = now; } + + /** + * @return True if a path is permitted to be used in a bond (according to user pref.) + */ + inline bool allowed() { + return _enabled + && (!_ipvPref + || ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64)) + || ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64))))); + } + + /** + * @return True if a path is preferred over another on the same physical slave (according to user pref.) + */ + inline bool preferred() { + return _onlyPathOnSlave + || (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46)) + || (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64)); + } + + /** + * @param now Current time + * @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time + */ + inline bool needsToSendAck(int64_t now, int ackSendInterval) { + return ((now - _lastAckSent) >= ackSendInterval || + (_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck; + } + + /** + * @param now Current time + * @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time + */ + inline bool needsToSendQoS(int64_t now, int qosSendInterval) { + return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) || + ((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS; + } + + /** + * Reset packet counters + */ + inline void resetPacketCounts() + { + _packetsIn = 0; + _packetsOut = 0; + } + private: - Mutex _statistics_m; volatile int64_t _lastOut; volatile int64_t _lastIn; volatile int64_t _lastTrustEstablishedPacketReceived; - volatile int64_t _lastPathQualityComputeTime; int64_t _localSocket; volatile unsigned int _latency; InetAddress _addr; InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often AtomicCounter __refCount; - std::map _outQoSRecords; // id:egress_time - std::map _inQoSRecords; // id:now - std::map _inACKRecords; // id:len + std::map qosStatsOut; // id:egress_time + std::map qosStatsIn; // id:now + std::map ackStatsIn; // id:len - int64_t _lastAck; - int64_t _lastThroughputEstimation; - int64_t _lastQoSMeasurement; - int64_t _lastQoSRecordPurge; + RingBuffer qosRecordSize; + RingBuffer qosRecordLossSamples; + RingBuffer throughputSamples; + RingBuffer packetValiditySamples; + RingBuffer _throughputVarianceSamples; + RingBuffer latencySamples; + /** + * Last time that a VERB_ACK was received on this path. + */ + uint64_t _lastAckReceived; + + /** + * Last time that a VERB_ACK was sent out on this path. + */ + uint64_t _lastAckSent; + + /** + * Last time that a VERB_QOS_MEASUREMENT was sent out on this path. + */ + uint64_t _lastQoSMeasurement; + + /** + * Last time that a the path's throughput was estimated. + */ + uint64_t _lastThroughputEstimation; + + /** + * The last time that the refractory period was updated. + */ + uint64_t _lastRefractoryUpdate; + + /** + * The last time that the path was marked as "alive". + */ + uint64_t _lastAliveToggle; + + /** + * State of eligibility at last check. Used for determining state changes. + */ + bool _lastEligibilityState; + + /** + * Timestamp indicating when this path's trial period began. + */ + uint64_t _lastTrialBegin; + + /** + * Amount of time that this path is prevented from becoming a member of a bond. + */ + uint32_t _refractoryPeriod; + + /** + * Monitor interval specific to this path or that was inherited from the bond controller. + */ + int32_t _monitorInterval; + + /** + * Up delay interval specific to this path or that was inherited from the bond controller. + */ + uint32_t _upDelay; + + /** + * Down delay interval specific to this path or that was inherited from the bond controller. + */ + uint32_t _downDelay; + + /** + * IP version preference inherited from the physical slave. + */ + uint8_t _ipvPref; + + /** + * Mode inherited from the physical slave. + */ + uint8_t _mode; + + /** + * IP version preference inherited from the physical slave. + */ + bool _onlyPathOnSlave; + + /** + * Enabled state inherited from the physical slave. + */ + bool _enabled; + + /** + * Whether this path is currently part of a bond. + */ + bool _bonded; + + /** + * Whether this path was intentionally _negotiated by either peer. + */ + bool _negotiated; + + /** + * Whether this path has been deprecated due to performance issues. Current traffic flows + * will be re-allocated to other paths in the most non-disruptive manner (if possible), + * and new traffic will not be allocated to this path. + */ + bool _deprecated; + + /** + * Whether flows should be moved from this path. Current traffic flows will be re-allocated + * immediately. + */ + bool _shouldReallocateFlows; + + /** + * The number of flows currently assigned to this path. + */ + uint16_t _assignedFlowCount; + + /** + * The mean latency (computed from a sliding window.) + */ + float _latencyMean; + + /** + * Packet delay variance (computed from a sliding window.) + */ + float _latencyVariance; + + /** + * The ratio of lost packets to received packets. + */ + float _packetLossRatio; + + /** + * The ratio of packets that failed their MAC/CRC checks to those that did not. + */ + float _packetErrorRatio; + + /** + * The estimated mean throughput of this path. + */ + uint64_t _throughputMean; + + /** + * The maximum observed throughput of this path. + */ + uint64_t _throughputMax; + + /** + * The variance in the estimated throughput of this path. + */ + float _throughputVariance; + + /** + * The relative quality of this path to all others in the bond, [0-255]. + */ + uint8_t _allocation; + + /** + * How much load this path is under. + */ + uint64_t _byteLoad; + + /** + * How much load this path is under (relative to other paths in the bond.) + */ + uint8_t _relativeByteLoad; + + /** + * Relative value expressing how "deserving" this path is of new traffic. + */ + uint8_t _affinity; + + /** + * Score that indicates to what degree this path is preferred over others that + * are available to the bonding policy. (specifically for active-backup) + */ + uint32_t _failoverScore; + + /** + * Number of bytes thus far sent that have not been acknowledged by the remote peer. + */ int64_t _unackedBytes; - int64_t _expectingAckAsOf; - int16_t _packetsReceivedSinceLastAck; - int16_t _packetsReceivedSinceLastQoS; - uint64_t _maxLifetimeThroughput; - uint64_t _lastComputedMeanThroughput; + /** + * Number of packets received since the last VERB_ACK was sent to the remote peer. + */ + int32_t _packetsReceivedSinceLastAck; + + /** + * Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer. + */ + int32_t _packetsReceivedSinceLastQoS; + + /** + * Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput. + */ uint64_t _bytesAckedSinceLastThroughputEstimation; - float _lastComputedMeanLatency; - float _lastComputedPacketDelayVariance; + /** + * Counters used for tracking path load. + */ + int _packetsIn; + int _packetsOut; - float _lastComputedPacketErrorRatio; - float _lastComputedPacketLossRatio; + // TODO: Remove - // cached estimates - float _lastComputedStability; - float _lastComputedRelativeQuality; - float _lastComputedThroughputDistCoeff; - unsigned char _lastAllocation; - - // cached human-readable strings for tracing purposes - char _ifname[16]; - char _addrString[256]; - - RingBuffer _throughputSamples; - RingBuffer _latencySamples; - RingBuffer _packetValiditySamples; - RingBuffer _throughputDisturbanceSamples; + bool _prevEligibility; }; } // namespace ZeroTier diff --git a/node/Peer.cpp b/node/Peer.cpp index 3c45d53fb..1ee0c1240 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -14,7 +14,6 @@ #include "../version.h" #include "Constants.hpp" #include "Peer.hpp" -#include "Node.hpp" #include "Switch.hpp" #include "Network.hpp" #include "SelfAwareness.hpp" @@ -24,8 +23,6 @@ #include "RingBuffer.hpp" #include "Utils.hpp" -#include "../include/ZeroTierDebug.h" - namespace ZeroTier { static unsigned char s_freeRandomByteCounter = 0; @@ -37,20 +34,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _lastTriedMemorizedPath(0), _lastDirectPathPushSent(0), _lastDirectPathPushReceive(0), + _lastEchoRequestReceived(0), _lastCredentialRequestSent(0), _lastWhoisRequestReceived(0), - _lastEchoRequestReceived(0), _lastCredentialsReceived(0), _lastTrustEstablishedPacketReceived(0), _lastSentFullHello(0), - _lastACKWindowReset(0), - _lastQoSWindowReset(0), - _lastMultipathCompatibilityCheck(0), + _lastEchoCheck(0), _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter), - _uniqueAlivePathCount(0), - _localMultipathSupported(false), - _remoteMultipathSupported(false), - _canUseMultipath(false), _vProto(0), _vMajor(0), _vMinor(0), @@ -58,17 +49,17 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident _id(peerIdentity), _directPathPushCutoffCount(0), _credentialsCutoffCount(0), - _linkIsBalanced(false), - _linkIsRedundant(false), - _remotePeerMultipathEnabled(false), - _lastAggregateStatsReport(0), - _lastAggregateAllocation(0), - _virtualPathCount(0), - _roundRobinPathAssignmentIdx(0), - _pathAssignmentIdx(0) + _echoRequestCutoffCount(0), + _uniqueAlivePathCount(0), + _localMultipathSupported(false), + _remoteMultipathSupported(false), + _canUseMultipath(false), + _shouldCollectPathStatistics(0), + _lastComputedAggregateMeanLatency(0) { - if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) + if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) { throw ZT_EXCEPTION_INVALID_ARGUMENT; + } } void Peer::received( @@ -81,7 +72,8 @@ void Peer::received( const uint64_t inRePacketId, const Packet::Verb inReVerb, const bool trustEstablished, - const uint64_t networkId) + const uint64_t networkId, + const int32_t flowId) { const int64_t now = RR->node->now(); @@ -98,28 +90,13 @@ void Peer::received( break; } + recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, flowId, now); + if (trustEstablished) { _lastTrustEstablishedPacketReceived = now; path->trustedPacketReceived(now); } - { - Mutex::Lock _l(_paths_m); - - recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, now); - - if (_canUseMultipath) { - if (path->needsToSendQoS(now)) { - sendQOS_MEASUREMENT(tPtr, path, path->localSocket(), path->address(), now); - } - for(unsigned int i=0;iprocessBackgroundPathMeasurements(now); - } - } - } - } - if (hops == 0) { // If this is a direct packet (no hops), update existing paths or learn new ones bool havePath = false; @@ -137,60 +114,45 @@ void Peer::received( } bool attemptToContact = false; + + int replaceIdx = ZT_MAX_PEER_NETWORK_PATHS; if ((!havePath)&&(RR->node->shouldUsePathForZeroTierTraffic(tPtr,_id.address(),path->localSocket(),path->address()))) { Mutex::Lock _l(_paths_m); - - // Paths are redundant if they duplicate an alive path to the same IP or - // with the same local socket and address family. - bool redundant = false; - unsigned int replacePath = ZT_MAX_PEER_NETWORK_PATHS; for(unsigned int i=0;ialive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) || (_paths[i].p->address().ipsEqual2(path->address())) ) ) { - redundant = true; - break; - } - // If the path is the same address and port, simply assume this is a replacement - if ( (_paths[i].p->address().ipsEqual2(path->address()))) { - replacePath = i; - break; - } - } else break; - } - - // If the path isn't a duplicate of the same localSocket AND we haven't already determined a replacePath, - // then find the worst path and replace it. - if (!redundant && replacePath == ZT_MAX_PEER_NETWORK_PATHS) { - int replacePathQuality = 0; - for(unsigned int i=0;iquality(now); - if (q > replacePathQuality) { - replacePathQuality = q; - replacePath = i; + // match addr + if ( (_paths[i].p->alive(now)) && ( ((_paths[i].p->localSocket() == path->localSocket())&&(_paths[i].p->address().ss_family == path->address().ss_family)) && (_paths[i].p->address().ipsEqual2(path->address())) ) ) { + // port + if (_paths[i].p->address().port() == path->address().port()) { + replaceIdx = i; + break; } - } else { - replacePath = i; + } + } + } + if (replaceIdx == ZT_MAX_PEER_NETWORK_PATHS) { + for(unsigned int i=0;it->peerLearnedNewPath(tPtr,networkId,*this,path,packetId); - _paths[replacePath].lr = now; - _paths[replacePath].p = path; - _paths[replacePath].priority = 1; + performMultipathStateCheck(now); + if (_bondToPeer) { + _bondToPeer->nominatePath(path, now); + } + _paths[replaceIdx].lr = now; + _paths[replaceIdx].p = path; + _paths[replaceIdx].priority = 1; } else { attemptToContact = true; } - - // Every time we learn of new path, rebuild set of virtual paths - constructSetOfVirtualPaths(); } } - if (attemptToContact) { attemptToContactAt(tPtr,path->localSocket(),path->address(),now,true); path->sent(now); @@ -203,8 +165,7 @@ void Peer::received( // is done less frequently. if (this->trustEstablished(now)) { const int64_t sinceLastPush = now - _lastDirectPathPushSent; - if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL) - || (_localMultipathSupported && (sinceLastPush >= (ZT_DIRECT_PATH_PUSH_INTERVAL_MULTIPATH)))) { + if (sinceLastPush >= ((hops == 0) ? ZT_DIRECT_PATH_PUSH_INTERVAL_HAVEPATH : ZT_DIRECT_PATH_PUSH_INTERVAL)) { _lastDirectPathPushSent = now; std::vector pathsToPush(RR->node->directPaths()); if (pathsToPush.size() > 0) { @@ -249,189 +210,15 @@ void Peer::received( } } -void Peer::constructSetOfVirtualPaths() +SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId) { - if (!_remoteMultipathSupported) { - return; - } - Mutex::Lock _l(_virtual_paths_m); - - int64_t now = RR->node->now(); - _virtualPathCount = 0; - for(unsigned int i=0;ialive(now)) { - for(unsigned int j=0;jalive(now)) { - int64_t localSocket = _paths[j].p->localSocket(); - bool foundVirtualPath = false; - for (int k=0; k<_virtualPaths.size(); k++) { - if (_virtualPaths[k]->localSocket == localSocket && _virtualPaths[k]->p == _paths[i].p) { - foundVirtualPath = true; - } - } - if (!foundVirtualPath) - { - VirtualPath *np = new VirtualPath; - np->p = _paths[i].p; - np->localSocket = localSocket; - _virtualPaths.push_back(np); - } - } - } - } - } -} - -void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, - uint16_t payloadLength, const Packet::Verb verb, int64_t now) -{ - _freeRandomByte += (unsigned char)(packetId >> 8); // grab entropy to use in path selection logic for multipath - if (_canUseMultipath) { - path->recordOutgoingPacket(now, packetId, payloadLength, verb); - } -} - -void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, - uint16_t payloadLength, const Packet::Verb verb, int64_t now) -{ - if (_canUseMultipath) { - if (path->needsToSendAck(now)) { - sendACK(tPtr, path, path->localSocket(), path->address(), now); - } - path->recordIncomingPacket(now, packetId, payloadLength, verb); - } -} - -void Peer::computeAggregateAllocation(int64_t now) -{ - float maxStability = 0; - float totalRelativeQuality = 0; - float maxThroughput = 1; - float maxScope = 0; - float relStability[ZT_MAX_PEER_NETWORK_PATHS]; - float relThroughput[ZT_MAX_PEER_NETWORK_PATHS]; - memset(&relStability, 0, sizeof(relStability)); - memset(&relThroughput, 0, sizeof(relThroughput)); - // Survey all paths - for(unsigned int i=0;ilastComputedStability(); - relThroughput[i] = (float)_paths[i].p->maxLifetimeThroughput(); - maxStability = relStability[i] > maxStability ? relStability[i] : maxStability; - maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput; - maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope; - } - } - // Convert to relative values - for(unsigned int i=0;iackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10); - float age_contrib = exp((-1)*normalized_ma); - float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1)); - float relQuality = - (relStability[i] * (float)ZT_PATH_CONTRIB_STABILITY) - + (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT) - + relScope * (float)ZT_PATH_CONTRIB_SCOPE; - relQuality *= age_contrib; - // Clamp values - relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f; - relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f; - totalRelativeQuality += relQuality; - _paths[i].p->updateRelativeQuality(relQuality); - } - } - // Convert set of relative performances into an allocation set - for(uint16_t i=0;inode->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { - _paths[i].p->updateComponentAllocationOfAggregateLink(((float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count()) * 255); - } - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { - _paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255)); - } - } - } -} - -int Peer::computeAggregateLinkPacketDelayVariance() -{ - float pdv = 0.0; - for(unsigned int i=0;irelativeQuality() * _paths[i].p->packetDelayVariance(); - } - } - return (int)pdv; -} - -int Peer::computeAggregateLinkMeanLatency() -{ - int ml = 0; - int pathCount = 0; - for(unsigned int i=0;irelativeQuality() * _paths[i].p->meanLatency()); - } - } - return ml / pathCount; -} - -int Peer::aggregateLinkPhysicalPathCount() -{ - std::map ifnamemap; - int pathCount = 0; - int64_t now = RR->node->now(); - for(unsigned int i=0;ialive(now)) { - if (!ifnamemap[_paths[i].p->getName()]) { - ifnamemap[_paths[i].p->getName()] = true; - pathCount++; - } - } - } - return pathCount; -} - -int Peer::aggregateLinkLogicalPathCount() -{ - int pathCount = 0; - int64_t now = RR->node->now(); - for(unsigned int i=0;ialive(now)) { - pathCount++; - } - } - return pathCount; -} - -std::vector > Peer::getAllPaths(int64_t now) -{ - Mutex::Lock _l(_virtual_paths_m); // FIXME: TX can now lock RX - std::vector > paths; - for (int i=0; i<_virtualPaths.size(); i++) { - if (_virtualPaths[i]->p) { - paths.push_back(_virtualPaths[i]->p); - } - } - return paths; -} - -SharedPtr Peer::getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId) -{ - Mutex::Lock _l(_paths_m); - SharedPtr selectedPath; - char curPathStr[128]; - char newPathStr[128]; - unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; - - /** - * Send traffic across the highest quality path only. This algorithm will still - * use the old path quality metric from protocol version 9. - */ - if (!_canUseMultipath) { + if (!_bondToPeer) { + Mutex::Lock _l(_paths_m); + unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS; + /** + * Send traffic across the highest quality path only. This algorithm will still + * use the old path quality metric from protocol version 9. + */ long bestPathQuality = 2147483647; for(unsigned int i=0;i Peer::getAppropriatePath(int64_t now, bool includeExpired, int64 } return SharedPtr(); } - - // Update path measurements - for(unsigned int i=0;iprocessBackgroundPathMeasurements(now); - } - } - if (RR->sw->isFlowAware()) { - // Detect new flows and update existing records - if (_flows.count(flowId)) { - _flows[flowId]->lastSend = now; - } - else { - fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", - flowId, this->_id.address().toInt(), (_flows.size()+1)); - struct Flow *newFlow = new Flow(flowId, now); - _flows[flowId] = newFlow; - newFlow->assignedPath = nullptr; - } - } - // Construct set of virtual paths if needed - if (!_virtualPaths.size()) { - constructSetOfVirtualPaths(); - } - if (!_virtualPaths.size()) { - fprintf(stderr, "no paths to send packet out on\n"); - return SharedPtr(); - } - - /** - * All traffic is sent on all paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - // Not handled here. Handled in Switch::_trySend() - } - - /** - * Only one link is active. Fail-over is immediate. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_ACTIVE_BACKUP) { - bool bFoundHotPath = false; - if (!_activeBackupPath) { - /* Select the fist path that appears to still be active. - * This will eventually be user-configurable */ - for (int i=0; ilastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - bFoundHotPath = true; - _activeBackupPath = _paths[i].p; - _pathAssignmentIdx = i; - _activeBackupPath->address().toString(curPathStr); - fprintf(stderr, "selected %s as the primary active-backup path to %llx (idx=%d)\n", - curPathStr, this->_id.address().toInt(), _pathAssignmentIdx); - break; - } - } - } - } - else { - char what[128]; - if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - _activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace - int16_t previousIdx = _pathAssignmentIdx; - SharedPtr nextAlternativePath; - // Search for a hot path, at the same time find the next path in - // a RR sequence that seems viable to use as an alternative - int searchCount = 0; - while (searchCount < ZT_MAX_PEER_NETWORK_PATHS) { - _pathAssignmentIdx++; - if (_pathAssignmentIdx == ZT_MAX_PEER_NETWORK_PATHS) { - _pathAssignmentIdx = 0; - } - searchCount++; - if (_paths[_pathAssignmentIdx].p) { - _paths[_pathAssignmentIdx].p->address().toString(what); - if (_activeBackupPath.ptr() == _paths[_pathAssignmentIdx].p.ptr()) { - continue; - } - if (!nextAlternativePath) { // Record the first viable alternative in the RR sequence - nextAlternativePath = _paths[_pathAssignmentIdx].p; - } - if ((now - _paths[_pathAssignmentIdx].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { - bFoundHotPath = true; - _activeBackupPath = _paths[_pathAssignmentIdx].p; - _activeBackupPath->address().toString(newPathStr); - fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to %s\n", - curPathStr, this->_id.address().toInt(), newPathStr); - break; - } - } - } - if (!bFoundHotPath) { - if (nextAlternativePath) { - _activeBackupPath = nextAlternativePath; - _activeBackupPath->address().toString(curPathStr); - //fprintf(stderr, "no hot paths found to use as active-backup primary to %llx, using next best: %s\n", - // this->_id.address().toInt(), curPathStr); - } - else { - // No change - } - } - } - } - if (!_activeBackupPath) { - return SharedPtr(); - } - return _activeBackupPath; - } - - /** - * Traffic is randomly distributed among all active paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM) { - int sz = _virtualPaths.size(); - if (sz) { - int idx = _freeRandomByte % sz; - _pathChoiceHist.push(idx); - _virtualPaths[idx]->p->address().toString(curPathStr); - fprintf(stderr, "sending out: (%llx), idx=%d: path=%s, localSocket=%lld\n", - this->_id.address().toInt(), idx, curPathStr, _virtualPaths[idx]->localSocket); - return _virtualPaths[idx]->p; - } - // This call is algorithmically inert but gives us a value to show in the status output - computeAggregateAllocation(now); - } - - /** - * Packets are striped across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_OPAQUE) { - int16_t previousIdx = _roundRobinPathAssignmentIdx; - int cycleCount = 0; - int minLastIn = 0; - int bestAlternativeIdx = -1; - while (cycleCount < ZT_MAX_PEER_NETWORK_PATHS) { - if (_roundRobinPathAssignmentIdx < (_virtualPaths.size()-1)) { - _roundRobinPathAssignmentIdx++; - } - else { - _roundRobinPathAssignmentIdx = 0; - } - cycleCount++; - if (_virtualPaths[_roundRobinPathAssignmentIdx]->p) { - uint64_t lastIn = _virtualPaths[_roundRobinPathAssignmentIdx]->p->lastIn(); - if (bestAlternativeIdx == -1) { - minLastIn = lastIn; // Initialization - bestAlternativeIdx = 0; - } - if (lastIn < minLastIn) { - minLastIn = lastIn; - bestAlternativeIdx = _roundRobinPathAssignmentIdx; - } - if ((now - lastIn) < 5000) { - selectedPath = _virtualPaths[_roundRobinPathAssignmentIdx]->p; - } - } - } - // If we can't find an appropriate path, try the most recently active one - if (!selectedPath) { - _roundRobinPathAssignmentIdx = bestAlternativeIdx; - selectedPath = _virtualPaths[bestAlternativeIdx]->p; - selectedPath->address().toString(curPathStr); - fprintf(stderr, "could not find good path, settling for next best %s\n",curPathStr); - } - selectedPath->address().toString(curPathStr); - fprintf(stderr, "sending packet out on path %s at index %d\n", - curPathStr, _roundRobinPathAssignmentIdx); - return selectedPath; - } - - /** - * Flows are striped across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RR_FLOW) { - // fprintf(stderr, "ZT_MULTIPATH_BALANCE_RR_FLOW\n"); - } - - /** - * Flows are hashed across all available paths. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_XOR_FLOW) { - // fprintf(stderr, "ZT_MULTIPATH_BALANCE_XOR_FLOW (%llx) \n", flowId); - struct Flow *currFlow = NULL; - if (_flows.count(flowId)) { - currFlow = _flows[flowId]; - if (!currFlow->assignedPath) { - int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); - currFlow->assignedPath = _virtualPaths[idx]; - _virtualPaths[idx]->p->address().toString(curPathStr); - fprintf(stderr, "assigning flow %llx between this node and peer %llx to path %s at index %d\n", - currFlow->flowId, this->_id.address().toInt(), curPathStr, idx); - } - else { - if (!currFlow->assignedPath->p->alive(now)) { - currFlow->assignedPath->p->address().toString(curPathStr); - // Re-assign - int idx = abs((int)(currFlow->flowId % (_virtualPaths.size()-1))); - currFlow->assignedPath = _virtualPaths[idx]; - _virtualPaths[idx]->p->address().toString(newPathStr); - fprintf(stderr, "path %s assigned to flow %llx between this node and %llx appears to be dead, reassigning to path %s\n", - curPathStr, currFlow->flowId, this->_id.address().toInt(), newPathStr); - } - } - return currFlow->assignedPath->p; - } - } - - /** - * Proportionally allocate traffic according to dynamic path quality measurements. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE) { - if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - _lastAggregateAllocation = now; - computeAggregateAllocation(now); - } - // Randomly choose path according to their allocations - float rf = _freeRandomByte; - for(int i=0;iallocation()) { - bestPath = i; - _pathChoiceHist.push(bestPath); // Record which path we chose - break; - } - rf -= _paths[i].p->allocation(); - } - } - if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) { - return _paths[bestPath].p; - } - } - - /** - * Flows are dynamically allocated across paths in proportion to link strength and load. - */ - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW) { - } - - return SharedPtr(); -} - -char *Peer::interfaceListStr() -{ - std::map ifnamemap; - char tmp[32]; - const int64_t now = RR->node->now(); - char *ptr = _interfaceListStr; - bool imbalanced = false; - memset(_interfaceListStr, 0, sizeof(_interfaceListStr)); - int alivePathCount = aggregateLinkLogicalPathCount(); - for(unsigned int i=0;ialive(now)) { - int ipv = _paths[i].p->address().isV4(); - // If this is acting as an aggregate link, check allocations - float targetAllocation = 1.0f / (float)alivePathCount; - float currentAllocation = 1.0f; - if (alivePathCount > 1) { - currentAllocation = (float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count(); - if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) { - imbalanced = true; - } - } - char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6"; - sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation); - // Prevent duplicates - if(ifnamemap[_paths[i].p->getName()] != ipv) { - memcpy(ptr, tmp, strlen(tmp)); - ptr += strlen(tmp); - *ptr = ' '; - ptr++; - ifnamemap[_paths[i].p->getName()] = ipv; - } - } - } - ptr--; // Overwrite trailing space - if (imbalanced) { - sprintf(tmp, ", is asymmetrical"); - memcpy(ptr, tmp, sizeof(tmp)); - } else { - *ptr = '\0'; - } - return _interfaceListStr; + return _bondToPeer->getAppropriatePath(now, flowId); } void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &other) const @@ -859,87 +360,6 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr &o } } -inline void Peer::processBackgroundPeerTasks(const int64_t now) -{ - // Determine current multipath compatibility with other peer - if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) { - // - // Cache number of available paths so that we can short-circuit multipath logic elsewhere - // - // We also take notice of duplicate paths (same IP only) because we may have - // recently received a direct path push from a peer and our list might contain - // a dead path which hasn't been fully recognized as such. In this case we - // don't want the duplicate to trigger execution of multipath code prematurely. - // - // This is done to support the behavior of auto multipath enable/disable - // without user intervention. - // - int currAlivePathCount = 0; - int duplicatePathsFound = 0; - for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { - duplicatePathsFound+=1; - break; - } - } - } - } - _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); - _lastMultipathCompatibilityCheck = now; - _localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9)); - _remoteMultipathSupported = _vProto > 9; - // If both peers support multipath and more than one path exist, we can use multipath logic - _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); - } - - // Remove old flows - if (RR->sw->isFlowAware()) { - std::map::iterator it = _flows.begin(); - while (it != _flows.end()) { - if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { - fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", - it->first, this->_id.address().toInt(), _flows.size()); - it = _flows.erase(it); - } else { - it++; - } - } - } -} - -void Peer::sendACK(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) -{ - Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK); - uint32_t bytesToAck = path->bytesToAck(); - outp.append(bytesToAck); - if (atAddress) { - outp.armor(_key,false); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); - } - path->sentAck(now); -} - -void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now) -{ - const int64_t _now = RR->node->now(); - Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT); - char qosData[ZT_PATH_MAX_QOS_PACKET_SZ]; - int16_t len = path->generateQoSPacket(_now,qosData); - outp.append(qosData,len); - if (atAddress) { - outp.armor(_key,false); - RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size()); - } else { - RR->sw->send(tPtr,outp,false); - } - path->sentQoS(now); -} - void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now) { Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO); @@ -1005,29 +425,57 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now) } } +void Peer::performMultipathStateCheck(int64_t now) +{ + /** + * Check for conditions required for multipath bonding and create a bond + * if allowed. + */ + _localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9)); + if (_localMultipathSupported) { + int currAlivePathCount = 0; + int duplicatePathsFound = 0; + for (unsigned int i=0;iaddress().ipsEqual2(_paths[j].p->address()) && i != j) { + duplicatePathsFound+=1; + break; + } + } + } + } + _uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2)); + _remoteMultipathSupported = _vProto > 9; + _canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1); + } + if (_canUseMultipath && !_bondToPeer) { + if (RR->bc) { + _bondToPeer = RR->bc->createTransportTriggeredBond(RR, this); + /** + * Allow new bond to retroactively learn all paths known to this peer + */ + if (_bondToPeer) { + for (unsigned int i=0;inominatePath(_paths[i].p, now); + } + } + } + } + } +} + unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) { unsigned int sent = 0; Mutex::Lock _l(_paths_m); - processBackgroundPeerTasks(now); + performMultipathStateCheck(now); - // Emit traces regarding aggregate link status - if (_canUseMultipath) { - int alivePathCount = aggregateLinkPhysicalPathCount(); - if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) { - _lastAggregateStatsReport = now; - if (alivePathCount) { - RR->t->peerLinkAggregateStatistics(NULL,*this); - } - } if (alivePathCount < 2 && _linkIsRedundant) { - _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNoLongerAggregate(NULL,*this); - } if (alivePathCount > 1 && !_linkIsRedundant) { - _linkIsRedundant = !_linkIsRedundant; - RR->t->peerLinkNoLongerAggregate(NULL,*this); - } - } + const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); + _lastSentFullHello = now; // Right now we only keep pinging links that have the maximum priority. The // priority is used to track cluster redirections, meaning that when a cluster @@ -1040,15 +488,13 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) else break; } - const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD); - _lastSentFullHello = now; - unsigned int j = 0; for(unsigned int i=0;ineedsHeartbeat(now))) { + if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now)) + || (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) { attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello); _paths[i].p->sent(now); sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2; @@ -1059,14 +505,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now) } } else break; } - if (canUseMultipath()) { - while(j < ZT_MAX_PEER_NETWORK_PATHS) { - _paths[j].lr = 0; - _paths[j].p.zero(); - _paths[j].priority = 1; - ++j; - } - } return sent; } @@ -1133,4 +571,30 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres } } +void Peer::recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now); +} + +void Peer::recordIncomingInvalidPacket(const SharedPtr& path) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordIncomingInvalidPacket(path); +} + +void Peer::recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now) +{ + if (!_shouldCollectPathStatistics || !_bondToPeer) { + return; + } + _bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now); +} + } // namespace ZeroTier diff --git a/node/Peer.hpp b/node/Peer.hpp index ef4645e9a..1a2b6abc1 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -15,8 +15,6 @@ #define ZT_PEER_HPP #include -#include -#include #include "../include/ZeroTierOne.h" @@ -33,6 +31,8 @@ #include "AtomicCounter.hpp" #include "Hashtable.hpp" #include "Mutex.hpp" +#include "Bond.hpp" +#include "BondController.hpp" #define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2)) @@ -44,6 +44,9 @@ namespace ZeroTier { class Peer { friend class SharedPtr; + friend class SharedPtr; + friend class Switch; + friend class Bond; private: Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized @@ -97,7 +100,8 @@ public: const uint64_t inRePacketId, const Packet::Verb inReVerb, const bool trustEstablished, - const uint64_t networkId); + const uint64_t networkId, + const int32_t flowId); /** * Check whether we have an active path to this peer via the given address @@ -136,94 +140,49 @@ public: return false; } - void constructSetOfVirtualPaths(); - /** - * Record statistics on outgoing packets - * - * @param path Path over which packet was sent - * @param id Packet ID - * @param len Length of packet payload - * @param verb Packet verb - * @param now Current time - */ - void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); - - /** - * Record statistics on incoming packets - * - * @param path Path over which packet was sent - * @param id Packet ID - * @param len Length of packet payload - * @param verb Packet verb - * @param now Current time - */ - void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now); - - /** - * Send an ACK to peer for the most recent packets received + * Record incoming packets to * * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param localSocket Raw socket the ACK packet will be sent over - * @param atAddress Destination for the ACK packet + * @param path Path over which packet was received + * @param packetId Packet ID + * @param payloadLength Length of packet data payload + * @param verb Packet verb + * @param flowId Flow ID * @param now Current time */ - void sendACK(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + void recordIncomingPacket(void *tPtr, const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now); /** - * Send a QoS packet to peer so that it can evaluate the quality of this link * - * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call - * @param localSocket Raw socket the QoS packet will be sent over - * @param atAddress Destination for the QoS packet + * @param path Path over which packet is being sent + * @param packetId Packet ID + * @param payloadLength Length of packet data payload + * @param verb Packet verb + * @param flowId Flow ID * @param now Current time */ - void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now); + void recordOutgoingPacket(const SharedPtr &path, const uint64_t packetId, + uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now); /** - * Compute relative quality values and allocations for the components of the aggregate link + * Record an invalid incoming packet. This packet failed + * MAC/compression/cipher checks and will now contribute to a + * Packet Error Ratio (PER). * - * @param now Current time + * @param path Path over which packet was received */ - void computeAggregateAllocation(int64_t now); - - /** - * @return The aggregate link Packet Delay Variance (PDV) - */ - int computeAggregateLinkPacketDelayVariance(); - - /** - * @return The aggregate link mean latency - */ - int computeAggregateLinkMeanLatency(); - - /** - * @return The number of currently alive "physical" paths in the aggregate link - */ - int aggregateLinkPhysicalPathCount(); - - /** - * @return The number of currently alive "logical" paths in the aggregate link - */ - int aggregateLinkLogicalPathCount(); - - std::vector> getAllPaths(int64_t now); + void recordIncomingInvalidPacket(const SharedPtr& path); /** * Get the most appropriate direct path based on current multipath and QoS configuration * * @param now Current time - * @param flowId Session-specific protocol flow identifier used for path allocation * @param includeExpired If true, include even expired paths * @return Best current path or NULL if none */ - SharedPtr getAppropriatePath(int64_t now, bool includeExpired, int64_t flowId = -1); - - /** - * Generate a human-readable string of interface names making up the aggregate link, also include - * moving allocation and IP version number for each (for tracing) - */ - char *interfaceListStr(); + SharedPtr getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId = -1); /** * Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path @@ -265,6 +224,13 @@ public: */ void tryMemorizedPath(void *tPtr,int64_t now); + /** + * A check to be performed periodically which determines whether multipath communication is + * possible with this peer. This check should be performed early in the life-cycle of the peer + * as well as during the process of learning new paths. + */ + void performMultipathStateCheck(int64_t now); + /** * Send pings or keepalives depending on configured timeouts * @@ -277,16 +243,6 @@ public: */ unsigned int doPingAndKeepalive(void *tPtr,int64_t now); - /** - * Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state. - * This should be called frequently so that we can detect and remove unproductive or invalid paths. - * - * Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL. - * - * @return Number of paths that were pruned this round - */ - unsigned int prunePaths(); - /** * Process a cluster redirect sent by this peer * @@ -348,7 +304,7 @@ public: inline unsigned int latency(const int64_t now) { if (_canUseMultipath) { - return (int)computeAggregateLinkMeanLatency(); + return (int)_lastComputedAggregateMeanLatency; } else { SharedPtr bp(getAppropriatePath(now,false)); if (bp) @@ -407,37 +363,6 @@ public: inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); } - /** - * Periodically update known multipath activation constraints. This is done so that we know when and when - * not to use multipath logic. Doing this once every few seconds is sufficient. - * - * @param now Current time - */ - inline void processBackgroundPeerTasks(const int64_t now); - - /** - * Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK - * or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client - * shall assume that multipath is not enabled and should only use classical Protocol 9 logic. - */ - inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; } - - /** - * @return Whether the local client supports and is configured to use multipath - */ - inline bool localMultipathSupport() { return _localMultipathSupported; } - - /** - * @return Whether the remote peer supports and is configured to use multipath - */ - inline bool remoteMultipathSupport() { return _remoteMultipathSupported; } - - /** - * @return Whether this client can use multipath to communicate with this peer. True if both peers are using - * the correct protocol and if both peers have multipath enabled. False if otherwise. - */ - inline bool canUseMultipath() { return _canUseMultipath; } - /** * @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms */ @@ -492,50 +417,35 @@ public: } /** - * Rate limit gate for inbound ECHO requests + * Rate limit gate for inbound ECHO requests. This rate limiter works + * by draining a certain number of requests per unit time. Each peer may + * theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second. */ inline bool rateGateEchoRequest(const int64_t now) { - if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) { - _lastEchoRequestReceived = now; - return true; - } - return false; - } - - /** - * Rate limit gate for VERB_ACK - */ - inline bool rateGateACK(const int64_t now) - { - if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { - _lastACKWindowReset = now; - _ACKCutoffCount = 0; + /* + // TODO: Rethink this + if (_canUseMultipath) { + _echoRequestCutoffCount++; + int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR; + _lastEchoCheck = now; + fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR); + if (_echoRequestCutoffCount > numToDrain) { + _echoRequestCutoffCount-=numToDrain; + } + else { + _echoRequestCutoffCount = 0; + } + return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT); } else { - ++_ACKCutoffCount; + if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) { + _lastEchoRequestReceived = now; + return true; + } + return false; } - return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); - } - - /** - * Rate limit gate for VERB_QOS_MEASUREMENT - */ - inline bool rateGateQoS(const int64_t now) - { - if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) { - _lastQoSWindowReset = now; - _QoSCutoffCount = 0; - } else { - ++_QoSCutoffCount; - } - return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT); - } - - /** - * @return Whether this peer is reachable via an aggregate link - */ - inline bool hasAggregateLink() { - return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled; + */ + return true; } /** @@ -610,6 +520,18 @@ public: } } + /** + * + * @return + */ + SharedPtr bond() { return _bondToPeer; } + + /** + * + * @return + */ + inline int8_t bondingPolicy() { return _bondingPolicy; } + private: struct _PeerPath { @@ -628,25 +550,16 @@ private: int64_t _lastTriedMemorizedPath; int64_t _lastDirectPathPushSent; int64_t _lastDirectPathPushReceive; + int64_t _lastEchoRequestReceived; int64_t _lastCredentialRequestSent; int64_t _lastWhoisRequestReceived; - int64_t _lastEchoRequestReceived; int64_t _lastCredentialsReceived; int64_t _lastTrustEstablishedPacketReceived; int64_t _lastSentFullHello; - int64_t _lastPathPrune; - int64_t _lastACKWindowReset; - int64_t _lastQoSWindowReset; - int64_t _lastMultipathCompatibilityCheck; + int64_t _lastEchoCheck; unsigned char _freeRandomByte; - int _uniqueAlivePathCount; - - bool _localMultipathSupported; - bool _remoteMultipathSupported; - bool _canUseMultipath; - uint16_t _vProto; uint16_t _vMajor; uint16_t _vMinor; @@ -659,62 +572,22 @@ private: unsigned int _directPathPushCutoffCount; unsigned int _credentialsCutoffCount; - unsigned int _QoSCutoffCount; - unsigned int _ACKCutoffCount; + unsigned int _echoRequestCutoffCount; AtomicCounter __refCount; - RingBuffer _pathChoiceHist; - - bool _linkIsBalanced; - bool _linkIsRedundant; bool _remotePeerMultipathEnabled; + int _uniqueAlivePathCount; + bool _localMultipathSupported; + bool _remoteMultipathSupported; + bool _canUseMultipath; - int64_t _lastAggregateStatsReport; - int64_t _lastAggregateAllocation; + volatile bool _shouldCollectPathStatistics; + volatile int8_t _bondingPolicy; - char _interfaceListStr[256]; // 16 characters * 16 paths in a link + int32_t _lastComputedAggregateMeanLatency; - // - struct LinkPerformanceEntry - { - int64_t packetId; - struct VirtualPath *egressVirtualPath; - struct VirtualPath *ingressVirtualPath; - }; - - // Virtual paths - int _virtualPathCount; - Mutex _virtual_paths_m; - struct VirtualPath - { - SharedPtr p; - int64_t localSocket; - std::queue performanceEntries; - }; - std::vector _virtualPaths; - - // Flows - struct Flow - { - Flow(int64_t fid, int64_t ls) : - flowId(fid), - lastSend(ls), - assignedPath(NULL) - {} - - int64_t flowId; - int64_t bytesPerSecond; - int64_t lastSend; - struct VirtualPath *assignedPath; - }; - - std::map _flows; - - int16_t _roundRobinPathAssignmentIdx; - - SharedPtr _activeBackupPath; - int16_t _pathAssignmentIdx; + SharedPtr _bondToPeer; }; } // namespace ZeroTier diff --git a/node/RingBuffer.hpp b/node/RingBuffer.hpp index 2d6cd1949..42047a873 100644 --- a/node/RingBuffer.hpp +++ b/node/RingBuffer.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -238,6 +238,21 @@ public: return curr_cnt ? subtotal / (float)curr_cnt : 0; } + /** + * @return The sum of the contents of the buffer + */ + inline float sum() + { + size_t iterator = begin; + float total = 0; + size_t curr_cnt = count(); + for (size_t i=0; i frameLen) + return false; // invalid! + proto = frameData[pos]; + pos += ((unsigned int)frameData[pos + 1] * 8) + 8; + break; + + //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway + //case 50: + //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff + default: + return true; + } + } + return false; // overflow == invalid +} + void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len) { + int32_t flowId = ZT_QOS_NO_FLOW; try { const int64_t now = RR->node->now(); @@ -112,6 +142,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre if (rq->packetId != fragmentPacketId) { // No packet found, so we received a fragment without its head. + rq->flowId = flowId; rq->timestamp = now; rq->packetId = fragmentPacketId; rq->frags[fragmentNumber - 1] = fragment; @@ -130,7 +161,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre for(unsigned int f=1;ffrag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength()); - if (rq->frag0.tryDecode(RR,tPtr)) { + if (rq->frag0.tryDecode(RR,tPtr,flowId)) { rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something @@ -195,6 +226,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre if (rq->packetId != packetId) { // If we have no other fragments yet, create an entry and save the head + rq->flowId = flowId; rq->timestamp = now; rq->packetId = packetId; rq->frag0.init(data,len,path,now); @@ -211,7 +243,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre for(unsigned int f=1;ftotalFragments;++f) rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength()); - if (rq->frag0.tryDecode(RR,tPtr)) { + if (rq->frag0.tryDecode(RR,tPtr,flowId)) { rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something @@ -224,9 +256,10 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } else { // Packet is unfragmented, so just process it IncomingPacket packet(data,len,path,now); - if (!packet.tryDecode(RR,tPtr)) { + if (!packet.tryDecode(RR,tPtr,flowId)) { RXQueueEntry *const rq = _nextRXQueueEntry(); Mutex::Lock rql(rq->lock); + rq->flowId = flowId; rq->timestamp = now; rq->packetId = packet.packetId(); rq->frag0 = packet; @@ -242,43 +275,6 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } catch ( ... ) {} // sanity check, should be caught elsewhere } -// Returns true if packet appears valid; pos and proto will be set -static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) -{ - if (frameLen < 40) - return false; - pos = 40; - proto = frameData[6]; - while (pos <= frameLen) { - switch(proto) { - case 0: // hop-by-hop options - case 43: // routing - case 60: // destination options - case 135: // mobility options - if ((pos + 8) > frameLen) - return false; // invalid! - proto = frameData[pos]; - pos += ((unsigned int)frameData[pos + 1] * 8) + 8; - break; - - //case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway - //case 50: - //case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff - default: - return true; - } - } - return false; // overflow == invalid -} - -bool Switch::isFlowAware() -{ - int mode = RR->node->getMultipathMode(); - return (( mode == ZT_MULTIPATH_BALANCE_RR_FLOW) - || (mode == ZT_MULTIPATH_BALANCE_XOR_FLOW) - || (mode == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW)); -} - void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) { if (!network->hasConfig()) @@ -293,75 +289,73 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } - uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET; + uint8_t qosBucket = ZT_AQM_DEFAULT_BUCKET; - /* A pseudo-unique identifier used by the balancing and bonding policies to associate properties - * of a specific protocol flow over time and to determine which virtual path this packet - * shall be sent out on. This identifier consists of the source port and destination port - * of the encapsulated frame. + /** + * A pseudo-unique identifier used by balancing and bonding policies to + * categorize individual flows/conversations for assignment to a specific + * physical path. This identifier consists of the source port and + * destination port of the encapsulated frame. * - * A flowId of -1 will indicate that whatever packet we are about transmit has no - * preferred virtual path and will be sent out according to what the multipath logic - * deems appropriate. An example of this would be an ICMP packet. + * A flowId of -1 will indicate that there is no preference for how this + * packet shall be sent. An example of this would be an ICMP packet. */ - int64_t flowId = -1; + int32_t flowId = ZT_QOS_NO_FLOW; - if (isFlowAware()) { - if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - int8_t proto = (reinterpret_cast(data)[9]); - const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); - switch(proto) { - case 0x01: // ICMP - flowId = 0x01; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (headerLen + 4)) { - unsigned int pos = headerLen + 0; - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; - } + if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + uint8_t proto = (reinterpret_cast(data)[9]); + const unsigned int headerLen = 4 * (reinterpret_cast(data)[0] & 0xf); + switch(proto) { + case 0x01: // ICMP + //flowId = 0x01; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (headerLen + 4)) { + unsigned int pos = headerLen + 0; + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = dstPort ^ srcPort ^ proto; + } + break; } + } - if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { - uint16_t srcPort = 0; - uint16_t dstPort = 0; - unsigned int pos; - unsigned int proto; - _ipv6GetPayload((const uint8_t *)data, len, pos, proto); - switch(proto) { - case 0x3A: // ICMPv6 - flowId = 0x3A; - break; - // All these start with 16-bit source and destination port in that order - case 0x06: // TCP - case 0x11: // UDP - case 0x84: // SCTP - case 0x88: // UDPLite - if (len > (pos + 4)) { - srcPort = (reinterpret_cast(data)[pos++]) << 8; - srcPort |= (reinterpret_cast(data)[pos]); - pos++; - dstPort = (reinterpret_cast(data)[pos++]) << 8; - dstPort |= (reinterpret_cast(data)[pos]); - flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; - } - break; - default: - break; - } + if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { + uint16_t srcPort = 0; + uint16_t dstPort = 0; + unsigned int pos; + unsigned int proto; + _ipv6GetPayload((const uint8_t *)data, len, pos, proto); + switch(proto) { + case 0x3A: // ICMPv6 + //flowId = 0x3A; + break; + // All these start with 16-bit source and destination port in that order + case 0x06: // TCP + case 0x11: // UDP + case 0x84: // SCTP + case 0x88: // UDPLite + if (len > (pos + 4)) { + srcPort = (reinterpret_cast(data)[pos++]) << 8; + srcPort |= (reinterpret_cast(data)[pos]); + pos++; + dstPort = (reinterpret_cast(data)[pos++]) << 8; + dstPort |= (reinterpret_cast(data)[pos]); + flowId = dstPort ^ srcPort ^ proto; + } + break; + default: + break; } } @@ -595,7 +589,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const } } -void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId) +void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId) { if(!network->qosEnabled()) { send(tPtr, packet, encrypt, flowId); @@ -603,18 +597,16 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & } NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()]; if (!nqcb) { - // DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id()); nqcb = new NetworkQoSControlBlock(); _netQueueControlBlock[network->id()] = nqcb; // Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list // These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm - for (int i=0; iinactiveQueues.push_back(new ManagedQueue(i)); } } // Don't apply QoS scheduling to ZT protocol traffic if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) { - // just send packet normally, no QoS for ZT protocol traffic send(tPtr, packet, encrypt, flowId); } @@ -624,8 +616,9 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & const Address dest(packet.destination()); TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId); + ManagedQueue *selectedQueue = nullptr; - for (size_t i=0; ioldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue) if (nqcb->oldQueues[i]->id == qosBucket) { selectedQueue = nqcb->oldQueues[i]; @@ -638,7 +631,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & if (nqcb->inactiveQueues[i]->id == qosBucket) { selectedQueue = nqcb->inactiveQueues[i]; // move queue to end of NEW queue list - selectedQueue->byteCredit = ZT_QOS_QUANTUM; + selectedQueue->byteCredit = ZT_AQM_QUANTUM; // DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue); nqcb->newQueues.push_back(selectedQueue); nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i); @@ -657,11 +650,11 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & // Drop a packet if necessary ManagedQueue *selectedQueueToDropFrom = nullptr; - if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS) + if (nqcb->_currEnqueuedPackets > ZT_AQM_MAX_ENQUEUED_PACKETS) { // DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets); int maxQueueLength = 0; - for (size_t i=0; ioldQueues.size()) { if (nqcb->oldQueues[i]->byteLength > maxQueueLength) { maxQueueLength = nqcb->oldQueues[i]->byteLength; @@ -694,7 +687,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr &network, Packet & uint64_t Switch::control_law(uint64_t t, int count) { - return (uint64_t)(t + ZT_QOS_INTERVAL / sqrt(count)); + return (uint64_t)(t + ZT_AQM_INTERVAL / sqrt(count)); } Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now) @@ -708,14 +701,14 @@ Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now) return r; } uint64_t sojourn_time = now - r.p->creationTime; - if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) { + if (sojourn_time < ZT_AQM_TARGET || q->byteLength <= ZT_DEFAULT_MTU) { // went below - stay below for at least interval q->first_above_time = 0; } else { if (q->first_above_time == 0) { // just went above from below. if still above at // first_above_time, will say it's ok to drop. - q->first_above_time = now + ZT_QOS_INTERVAL; + q->first_above_time = now + ZT_AQM_INTERVAL; } else if (now >= q->first_above_time) { r.ok_to_drop = true; } @@ -747,7 +740,7 @@ Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_ q->q.pop_front(); // drop r = dodequeue(q, now); q->dropping = true; - q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)? + q->count = (q->count > 2 && now - q->drop_next < 8*ZT_AQM_INTERVAL)? q->count - 2 : 1; q->drop_next = control_law(now, q->count); } @@ -775,7 +768,7 @@ void Switch::aqm_dequeue(void *tPtr) while (currQueues->size()) { ManagedQueue *queueAtFrontOfList = currQueues->front(); if (queueAtFrontOfList->byteCredit < 0) { - queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM; // Move to list of OLD queues // DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList); oldQueues->push_back(queueAtFrontOfList); @@ -810,7 +803,7 @@ void Switch::aqm_dequeue(void *tPtr) while (currQueues->size()) { ManagedQueue *queueAtFrontOfList = currQueues->front(); if (queueAtFrontOfList->byteCredit < 0) { - queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM; + queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM; oldQueues->push_back(queueAtFrontOfList); currQueues->erase(currQueues->begin()); } else { @@ -850,7 +843,7 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid) } } -void Switch::send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) +void Switch::send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) { const Address dest(packet.destination()); if (dest == RR->identity.address()) @@ -883,7 +876,7 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr) const SharedPtr upstream(RR->topology->getUpstreamPeer()); if (upstream) { - int64_t flowId = -1; + int32_t flowId = ZT_QOS_NO_FLOW; Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS); addr.appendTo(outp); RR->node->expectReplyTo(outp.packetId()); @@ -903,7 +896,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr &peer) RXQueueEntry *const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { - if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) + if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) rq->timestamp = 0; } } @@ -954,7 +947,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now) RXQueueEntry *const rq = &(_rxQueue[ptr]); Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { - if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { rq->timestamp = 0; } else { const Address src(rq->frag0.source()); @@ -1000,7 +993,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address return false; } -bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) +bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) { SharedPtr viaPath; const int64_t now = RR->node->now(); @@ -1008,8 +1001,18 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) const SharedPtr peer(RR->topology->getPeer(tPtr,destination)); if (peer) { - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - // Nothing here, we'll grab an entire set of paths to send out on below + if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST) + && (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) { + const SharedPtr relay(RR->topology->getUpstreamPeer()); + Mutex::Lock _l(peer->_paths_m); + for(int i=0;i_paths[i].p && peer->_paths[i].p->alive(now)) { + char pathStr[128]; + peer->_paths[i].p->address().toString(pathStr); + _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId); + } + } + return true; } else { viaPath = peer->getAppropriatePath(now,false,flowId); @@ -1021,61 +1024,51 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId) return false; } } + if (viaPath) { + _sendViaSpecificPath(tPtr,peer,viaPath,now,packet,encrypt,flowId); + return true; + } } + } + return false; +} + +void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId) +{ + unsigned int mtu = ZT_DEFAULT_PHYSMTU; + uint64_t trustedPathId = 0; + RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); + + unsigned int chunkSize = std::min(packet.size(),mtu); + packet.setFragmented(chunkSize < packet.size()); + + peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), flowId, now); + + if (trustedPathId) { + packet.setTrusted(trustedPathId); } else { - return false; + packet.armor(peer->key(),encrypt); } - // If sending on all paths, set viaPath to first path - int nextPathIdx = 0; - std::vector> paths = peer->getAllPaths(now); - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - if (paths.size()) { - viaPath = paths[nextPathIdx++]; - } - } + if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { + if (chunkSize < packet.size()) { + // Too big for one packet, fragment the rest + unsigned int fragStart = chunkSize; + unsigned int remaining = packet.size() - chunkSize; + unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); + if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) + ++fragsRemaining; + const unsigned int totalFragments = fragsRemaining + 1; - while (viaPath) { - unsigned int mtu = ZT_DEFAULT_PHYSMTU; - uint64_t trustedPathId = 0; - RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId); - unsigned int chunkSize = std::min(packet.size(),mtu); - packet.setFragmented(chunkSize < packet.size()); - peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now); - - if (trustedPathId) { - packet.setTrusted(trustedPathId); - } else { - packet.armor(peer->key(),encrypt); - } - - if (viaPath->send(RR,tPtr,packet.data(),chunkSize,now)) { - if (chunkSize < packet.size()) { - // Too big for one packet, fragment the rest - unsigned int fragStart = chunkSize; - unsigned int remaining = packet.size() - chunkSize; - unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); - if ((fragsRemaining * (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)) < remaining) - ++fragsRemaining; - const unsigned int totalFragments = fragsRemaining + 1; - - for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); - fragStart += chunkSize; - remaining -= chunkSize; - } - } - } - viaPath.zero(); - if (RR->node->getMultipathMode() == ZT_MULTIPATH_BROADCAST) { - if (paths.size() > nextPathIdx) { - viaPath = paths[nextPathIdx++]; + for(unsigned int fno=1;fnosend(RR,tPtr,frag.data(),frag.size(),now); + fragStart += chunkSize; + remaining -= chunkSize; } } } - return true; } } // namespace ZeroTier diff --git a/node/Switch.hpp b/node/Switch.hpp index f535cb8eb..f1436c7cf 100644 --- a/node/Switch.hpp +++ b/node/Switch.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -59,6 +59,8 @@ class Switch struct ManagedQueue; struct TXQueueEntry; + friend class SharedPtr; + typedef struct { TXQueueEntry *p; bool ok_to_drop; @@ -123,7 +125,7 @@ public: * @param encrypt Encrypt packet payload? (always true except for HELLO) * @param qosBucket Which bucket the rule-system determined this packet should fall into */ - void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int64_t flowId = -1); + void aqm_enqueue(void *tPtr, const SharedPtr &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW); /** * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks @@ -169,7 +171,7 @@ public: * @param packet Packet to send (buffer may be modified) * @param encrypt Encrypt packet payload? (always true except for HELLO) */ - void send(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); + void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); /** * Request WHOIS on a given address @@ -204,7 +206,8 @@ public: private: bool _shouldUnite(const int64_t now,const Address &source,const Address &destination); - bool _trySend(void *tPtr,Packet &packet,bool encrypt,int64_t flowId = -1); // packet is modified if return is true + bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true + void _sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId); const RuntimeEnvironment *const RR; int64_t _lastBeaconResponse; @@ -225,6 +228,7 @@ private: unsigned int totalFragments; // 0 if only frag0 received, waiting for frags uint32_t haveFragments; // bit mask, LSB to MSB volatile bool complete; // if true, packet is complete + volatile int32_t flowId; Mutex lock; }; RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE]; @@ -253,7 +257,7 @@ private: struct TXQueueEntry { TXQueueEntry() {} - TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int64_t fid) : + TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) : dest(d), creationTime(ct), packet(p), @@ -264,7 +268,7 @@ private: uint64_t creationTime; Packet packet; // unencrypted/unMAC'd packet -- this is done at send time bool encrypt; - int64_t flowId; + int32_t flowId; }; std::list< TXQueueEntry > _txQueue; Mutex _txQueue_m; @@ -296,7 +300,7 @@ private: { ManagedQueue(int id) : id(id), - byteCredit(ZT_QOS_QUANTUM), + byteCredit(ZT_AQM_QUANTUM), byteLength(0), dropping(false) {} diff --git a/node/Trace.cpp b/node/Trace.cpp index 96abf5c72..f7175c4c0 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -94,29 +94,26 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId, } } -void Trace::peerLinkNowAggregate(void *const tPtr,Peer &peer) +void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer) { - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_RANDOM)) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a randomly-distributed aggregate link",peer.address().toInt()); - } - if ((RR->node->getMultipathMode() == ZT_MULTIPATH_BALANCE_DYNAMIC_OPAQUE)) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is now a proportionally-balanced aggregate link",peer.address().toInt()); - } + //ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt()); } -void Trace::peerLinkNoLongerAggregate(void *const tPtr,Peer &peer) +void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx has degraded and is no longer an aggregate link",peer.address().toInt()); + //ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt()); } void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer) { - ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has PDV (%.0f ms), mean latency (%.0f ms)", + /* + ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)", peer.address().toInt(), peer.aggregateLinkPhysicalPathCount(), peer.interfaceListStr(), peer.computeAggregateLinkPacketDelayVariance(), peer.computeAggregateLinkMeanLatency()); + */ } void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &newPath,const uint64_t packetId) diff --git a/node/Trace.hpp b/node/Trace.hpp index b2a77161f..71169ebbb 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -109,8 +109,8 @@ public: void peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr &path,const uint64_t packetId,const Packet::Verb verb); - void peerLinkNowAggregate(void *const tPtr,Peer &peer); - void peerLinkNoLongerAggregate(void *const tPtr,Peer &peer); + void peerLinkNowRedundant(void *const tPtr,Peer &peer); + void peerLinkNoLongerRedundant(void *const tPtr,Peer &peer); void peerLinkAggregateStatistics(void *const tPtr,Peer &peer); diff --git a/node/Utils.hpp b/node/Utils.hpp index 5ba5b035f..b80a7528d 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -214,12 +214,12 @@ public: return l; } - static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax) + static inline float normalize(float value, float bigMin, float bigMax, float targetMin, float targetMax) { - int64_t bigSpan = bigMax - bigMin; - int64_t smallSpan = targetMax - targetMin; - float valueScaled = (value - (float)bigMin) / (float)bigSpan; - return (float)targetMin + valueScaled * (float)smallSpan; + float bigSpan = bigMax - bigMin; + float smallSpan = targetMax - targetMin; + float valueScaled = (value - bigMin) / bigSpan; + return targetMin + valueScaled * smallSpan; } /** @@ -253,6 +253,7 @@ public: static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); } static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); } static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); } + static inline double strToDouble(const char *s) { return strtod(s,NULL); } static inline unsigned long long strToU64(const char *s) { #ifdef __WINDOWS__ diff --git a/objects.mk b/objects.mk index efa2f3c0f..b55ba3044 100644 --- a/objects.mk +++ b/objects.mk @@ -24,7 +24,9 @@ CORE_OBJS=\ node/Tag.o \ node/Topology.o \ node/Trace.o \ - node/Utils.o + node/Utils.o \ + node/Bond.o \ + node/BondController.o ONE_OBJS=\ controller/EmbeddedNetworkController.o \ diff --git a/osdep/Binder.hpp b/osdep/Binder.hpp index 660e6f0c3..0fde33452 100644 --- a/osdep/Binder.hpp +++ b/osdep/Binder.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -347,6 +347,23 @@ public: } } + // Generate set of unique interface names (used for formation of logical slave set in multipath code) + for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { + slaveIfNames.insert(ii->second); + } + for (std::set::iterator si(slaveIfNames.begin());si!=slaveIfNames.end();si++) { + bool bFoundMatch = false; + for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { + if (ii->second == *si) { + bFoundMatch = true; + break; + } + } + if (!bFoundMatch) { + slaveIfNames.erase(si); + } + } + // Create new bindings for those not already bound for(std::map::const_iterator ii(localIfAddrs.begin());ii!=localIfAddrs.end();++ii) { unsigned int bi = 0; @@ -444,7 +461,15 @@ public: return false; } + inline std::set getSlaveInterfaceNames() + { + Mutex::Lock _l(_lock); + return slaveIfNames; + } + private: + + std::set slaveIfNames; _Binding _bindings[ZT_BINDER_MAX_BINDINGS]; std::atomic _bindingCount; Mutex _lock; diff --git a/osdep/LinuxNetLink.cpp b/osdep/LinuxNetLink.cpp index 8d4ce2482..13e7176e4 100644 --- a/osdep/LinuxNetLink.cpp +++ b/osdep/LinuxNetLink.cpp @@ -55,8 +55,6 @@ LinuxNetLink::LinuxNetLink() { // set socket timeout to 1 sec so we're not permablocking recv() calls _setSocketTimeout(_fd, 1); - int yes=1; - setsockopt(_fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); _la.nl_family = AF_NETLINK; _la.nl_pid = 0; //getpid()+1; @@ -430,8 +428,6 @@ void LinuxNetLink::_linkDeleted(struct nlmsghdr *nlp) void LinuxNetLink::_requestIPv4Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -485,8 +481,6 @@ void LinuxNetLink::_requestIPv4Routes() void LinuxNetLink::_requestIPv6Routes() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -540,8 +534,6 @@ void LinuxNetLink::_requestIPv6Routes() void LinuxNetLink::_requestInterfaceList() { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -595,8 +587,6 @@ void LinuxNetLink::addRoute(const InetAddress &target, const InetAddress &via, c if (!target) return; int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -713,8 +703,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c if (!target) return; int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -828,8 +816,6 @@ void LinuxNetLink::delRoute(const InetAddress &target, const InetAddress &via, c void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; @@ -948,8 +934,6 @@ void LinuxNetLink::addAddress(const InetAddress &addr, const char *iface) void LinuxNetLink::removeAddress(const InetAddress &addr, const char *iface) { int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - int yes=1; - setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); if (fd == -1) { fprintf(stderr, "Error opening RTNETLINK socket: %s\n", strerror(errno)); return; diff --git a/osdep/OSUtils.cpp b/osdep/OSUtils.cpp index 3770f0217..537e14966 100644 --- a/osdep/OSUtils.cpp +++ b/osdep/OSUtils.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -459,6 +459,22 @@ uint64_t OSUtils::jsonInt(const nlohmann::json &jv,const uint64_t dfl) return dfl; } +double OSUtils::jsonDouble(const nlohmann::json &jv,const double dfl) +{ + try { + if (jv.is_number()) { + return (double)jv; + } + else if (jv.is_string()) { + std::string s = jv; + return Utils::strToDouble(s.c_str()); + } else if (jv.is_boolean()) { + return (double)jv; + } + } catch ( ... ) {} + return dfl; +} + uint64_t OSUtils::jsonIntHex(const nlohmann::json &jv,const uint64_t dfl) { try { diff --git a/osdep/OSUtils.hpp b/osdep/OSUtils.hpp index 172575a09..70a5daccc 100644 --- a/osdep/OSUtils.hpp +++ b/osdep/OSUtils.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -277,6 +277,7 @@ public: static nlohmann::json jsonParse(const std::string &buf); static std::string jsonDump(const nlohmann::json &j,int indentation = 1); static uint64_t jsonInt(const nlohmann::json &jv,const uint64_t dfl); + static double jsonDouble(const nlohmann::json &jv,const double dfl); static uint64_t jsonIntHex(const nlohmann::json &jv,const uint64_t dfl); static bool jsonBool(const nlohmann::json &jv,const bool dfl); static std::string jsonString(const nlohmann::json &jv,const char *dfl); diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index b65a520eb..30da8b395 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -261,46 +261,6 @@ public: } } - /** - * Whether or not the socket object is in a closed state - * - * @param s Socket object - * @return true if socket is closed, false if otherwise - */ - inline bool isClosed(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type == ZT_PHY_SOCKET_CLOSED; - } - - /** - * Get state of socket object - * - * @param s Socket object - * @return State of socket - */ - inline int getState(PhySocket *s) - { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type; - } - - /** - * In the event that this socket is erased, we need a way to convey to the multipath logic - * that this path is no longer valid. - * - * @param s Socket object - * @return Whether the state of this socket is within an acceptable range of values - */ - inline bool isValidState(PhySocket *s) - { - if (s) { - PhySocketImpl *sws = (reinterpret_cast(s)); - return sws->type >= ZT_PHY_SOCKET_CLOSED && sws->type <= ZT_PHY_SOCKET_UNIX_LISTEN; - } - return false; - } - /** * Cause poll() to stop waiting immediately * diff --git a/osdep/Slave.hpp b/osdep/Slave.hpp new file mode 100644 index 000000000..b1ae326ea --- /dev/null +++ b/osdep/Slave.hpp @@ -0,0 +1,238 @@ +/* + * Copyright (c)2013-2020 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2024-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_SLAVE_HPP +#define ZT_SLAVE_HPP + +#include + +#include "../node/AtomicCounter.hpp" + +namespace ZeroTier { + +class Slave +{ + friend class SharedPtr; + +public: + + Slave() {} + + /** + * + * @param ifnameStr + * @param ipvPref + * @param speed + * @param enabled + * @param mode + * @param failoverToSlaveStr + * @param userSpecifiedAlloc + */ + Slave(std::string& ifnameStr, + uint8_t ipvPref, + uint32_t speed, + uint32_t slaveMonitorInterval, + uint32_t upDelay, + uint32_t downDelay, + bool enabled, + uint8_t mode, + std::string failoverToSlaveStr, + float userSpecifiedAlloc) : + _ifnameStr(ifnameStr), + _ipvPref(ipvPref), + _speed(speed), + _relativeSpeed(0), + _slaveMonitorInterval(slaveMonitorInterval), + _upDelay(upDelay), + _downDelay(downDelay), + _enabled(enabled), + _mode(mode), + _failoverToSlaveStr(failoverToSlaveStr), + _userSpecifiedAlloc(userSpecifiedAlloc), + _isUserSpecified(false) + {} + + /** + * @return The string representation of this slave's underlying interface's system name. + */ + inline std::string ifname() { return _ifnameStr; } + + /** + * @return Whether this slave is designated as a primary. + */ + inline bool primary() { return _mode == ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } + + /** + * @return Whether this slave is designated as a spare. + */ + inline bool spare() { return _mode == ZT_MULTIPATH_SLAVE_MODE_SPARE; } + + /** + * @return The name of the slave interface that should be used in the event of a failure. + */ + inline std::string failoverToSlave() { return _failoverToSlaveStr; } + + /** + * @return Whether this slave interface was specified by the user or auto-detected. + */ + inline bool isUserSpecified() { return _isUserSpecified; } + + /** + * Signify that this slave was specified by the user and not the result of auto-detection. + * + * @param isUserSpecified + */ + inline void setAsUserSpecified(bool isUserSpecified) { _isUserSpecified = isUserSpecified; } + + /** + * @return Whether or not the user has specified failover instructions. + */ + inline bool userHasSpecifiedFailoverInstructions() { return _failoverToSlaveStr.length(); } + + /** + * @return The speed of the slave relative to others in the bond. + */ + inline uint8_t relativeSpeed() { return _relativeSpeed; } + + /** + * Sets the speed of the slave relative to others in the bond. + * + * @param relativeSpeed The speed relative to the rest of the slave interfaces. + */ + inline void setRelativeSpeed(uint8_t relativeSpeed) { _relativeSpeed = relativeSpeed; } + + /** + * Sets the speed of the slave relative to others in the bond. + * + * @param relativeSpeed + */ + inline void setMonitorInterval(uint32_t interval) { _slaveMonitorInterval = interval; } + + /** + * @return The absolute speed of the slave interface (as specified by the user.) + */ + inline uint32_t monitorInterval() { return _slaveMonitorInterval; } + + /** + * @return The absolute speed of the slave interface (as specified by the user.) + */ + inline uint32_t speed() { return _speed; } + + /** + * @return The address preference for this slave interface (as specified by the user.) + */ + inline uint8_t ipvPref() { return _ipvPref; } + + /** + * @return The mode (e.g. primary/spare) for this slave interface (as specified by the user.) + */ + inline uint8_t mode() { return _mode; } + + /** + * @return The upDelay parameter for all paths on this slave interface. + */ + inline uint32_t upDelay() { return _upDelay; } + + /** + * @return The downDelay parameter for all paths on this slave interface. + */ + inline uint32_t downDelay() { return _downDelay; } + + /** + * @return Whether this slave is enabled or disabled + */ + inline uint8_t enabled() { return _enabled; } + +private: + + /** + * String representation of underlying interface's system name + */ + std::string _ifnameStr; + + /** + * What preference (if any) a user has for IP protocol version used in + * path aggregations. Preference is expressed in the order of the digits: + * + * 0: no preference + * 4: IPv4 only + * 6: IPv6 only + * 46: IPv4 over IPv6 + * 64: IPv6 over IPv4 + */ + uint8_t _ipvPref; + + /** + * User-specified speed of this slave/link + */ + uint32_t _speed; + + /** + * Speed relative to other specified slaves/links (computed by Bond) + */ + uint8_t _relativeSpeed; + + /** + * User-specified interval for monitoring paths on this specific slave + * instead of using the more generic interval specified for the entire + * bond. + */ + uint32_t _slaveMonitorInterval; + + /** + * How long before a path is considered to be usable after coming online. (when using policies that + * support fail-over events). + */ + uint32_t _upDelay; + + /** + * How long before a path is considered to be dead (when using policies that + * support fail-over events). + */ + uint32_t _downDelay; + + /** + * Whether this slave is enabled, or (disabled (possibly bad config)) + */ + uint8_t _enabled; + + /** + * Whether this slave is designated as a primary, a spare, or no preference. + */ + uint8_t _mode; + + /** + * The specific name of the interface to be used in the event that this + * slave fails. + */ + std::string _failoverToSlaveStr; + + /** + * User-specified allocation + */ + float _userSpecifiedAlloc; + + /** + * Whether or not this slave was created as a result of manual user specification. This is + * important to know because certain policy decisions are dependent on whether the user + * intents to use a specific set of interfaces. + */ + bool _isUserSpecified; + + AtomicCounter __refCount; + +}; + +} // namespace ZeroTier + +#endif \ No newline at end of file diff --git a/service/OneService.cpp b/service/OneService.cpp index 22c4f82e9..2b1cb631f 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1,10 +1,10 @@ /* - * Copyright (c)2019 ZeroTier, Inc. + * Copyright (c)2013-2020 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * - * Change Date: 2023-01-01 + * Change Date: 2024-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. @@ -39,6 +39,8 @@ #include "../node/Salsa20.hpp" #include "../node/Poly1305.hpp" #include "../node/SHA512.hpp" +#include "../node/Bond.hpp" +#include "../node/Peer.hpp" #include "../osdep/Phy.hpp" #include "../osdep/Thread.hpp" @@ -48,6 +50,7 @@ #include "../osdep/Binder.hpp" #include "../osdep/ManagedRoute.hpp" #include "../osdep/BlockingQueue.hpp" +#include "../osdep/Slave.hpp" #include "OneService.hpp" #include "SoftwareUpdater.hpp" @@ -266,37 +269,43 @@ static void _peerToJson(nlohmann::json &pj,const ZT_Peer *peer) pj["paths"] = pa; } -static void _peerAggregateLinkToJson(nlohmann::json &pj,const ZT_Peer *peer) +static void _peerBondToJson(nlohmann::json &pj,const ZT_Peer *peer) { char tmp[256]; OSUtils::ztsnprintf(tmp,sizeof(tmp),"%.10llx",peer->address); - pj["aggregateLinkLatency"] = peer->latency; + //pj["aggregateLinkLatency"] = peer->latency; + std::string policyStr = BondController::getPolicyStrByCode(peer->bondingPolicy); + pj["policy"] = policyStr; nlohmann::json pa = nlohmann::json::array(); for(unsigned int i=0;ipathCount;++i) { int64_t lastSend = peer->paths[i].lastSend; int64_t lastReceive = peer->paths[i].lastReceive; nlohmann::json j; - j["address"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); - j["lastSend"] = (lastSend < 0) ? 0 : lastSend; - j["lastReceive"] = (lastReceive < 0) ? 0 : lastReceive; + j["ifname"] = std::string(peer->paths[i].ifname); + j["path"] = reinterpret_cast(&(peer->paths[i].address))->toString(tmp); + j["lastTX"] = (lastSend < 0) ? 0 : lastSend; + j["lastRX"] = (lastReceive < 0) ? 0 : lastReceive; + j["lat"] = peer->paths[i].latencyMean; + j["pdv"] = peer->paths[i].latencyVariance; + //j["trustedPathId"] = peer->paths[i].trustedPathId; //j["active"] = (bool)(peer->paths[i].expired == 0); //j["expired"] = (bool)(peer->paths[i].expired != 0); //j["preferred"] = (bool)(peer->paths[i].preferred != 0); - j["latency"] = peer->paths[i].latency; - j["pdv"] = peer->paths[i].packetDelayVariance; - //j["throughputDisturbCoeff"] = peer->paths[i].throughputDisturbCoeff; - //j["packetErrorRatio"] = peer->paths[i].packetErrorRatio; - //j["packetLossRatio"] = peer->paths[i].packetLossRatio; - j["stability"] = peer->paths[i].stability; - j["throughput"] = peer->paths[i].throughput; - //j["maxThroughput"] = peer->paths[i].maxThroughput; - j["allocation"] = peer->paths[i].allocation; - j["ifname"] = peer->paths[i].ifname; + //j["ltm"] = peer->paths[i].latencyMax; + //j["plr"] = peer->paths[i].packetLossRatio; + //j["per"] = peer->paths[i].packetErrorRatio; + //j["thr"] = peer->paths[i].throughputMean; + //j["thm"] = peer->paths[i].throughputMax; + //j["thv"] = peer->paths[i].throughputVariance; + //j["avl"] = peer->paths[i].availability; + //j["age"] = peer->paths[i].age; + //j["alloc"] = peer->paths[i].allocation; + //j["ifname"] = peer->paths[i].ifname; pa.push_back(j); } - pj["paths"] = pa; + pj["slaves"] = pa; } static void _moonToJson(nlohmann::json &mj,const World &world) @@ -429,7 +438,7 @@ public: bool _updateAutoApply; bool _allowTcpFallbackRelay; bool _allowSecondaryPort; - unsigned int _multipathMode; + unsigned int _primaryPort; unsigned int _secondaryPort; unsigned int _tertiaryPort; @@ -718,6 +727,7 @@ public: } } #endif + // Delete legacy iddb.d if present (cleanup) OSUtils::rmDashRf((_homePath + ZT_PATH_SEPARATOR_S "iddb.d").c_str()); @@ -752,7 +762,6 @@ public: int64_t lastTapMulticastGroupCheck = 0; int64_t lastBindRefresh = 0; int64_t lastUpdateCheck = clockShouldBe; - int64_t lastMultipathModeUpdate = 0; int64_t lastCleanedPeersDb = 0; int64_t lastLocalInterfaceAddressCheck = (clockShouldBe - ZT_LOCAL_INTERFACE_CHECK_INTERVAL) + 15000; // do this in 15s to give portmapper time to configure and other things time to settle int64_t lastLocalConfFileCheck = OSUtils::now(); @@ -798,7 +807,7 @@ public: } // Refresh bindings in case device's interfaces have changed, and also sync routes to update any shadow routes (e.g. shadow default) - if (((now - lastBindRefresh) >= (_multipathMode ? ZT_BINDER_REFRESH_PERIOD / 8 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { + if (((now - lastBindRefresh) >= (_node->bondController()->inUse() ? ZT_BINDER_REFRESH_PERIOD / 4 : ZT_BINDER_REFRESH_PERIOD))||(restarted)) { lastBindRefresh = now; unsigned int p[3]; unsigned int pc = 0; @@ -815,11 +824,6 @@ public: } } } - // Update multipath mode (if needed) - if (((now - lastMultipathModeUpdate) >= ZT_BINDER_REFRESH_PERIOD / 8)||(restarted)) { - lastMultipathModeUpdate = now; - _node->setMultipathMode(_multipathMode); - } // Run background task processor in core if it's time to do so int64_t dl = _nextBackgroundTaskDeadline; @@ -855,7 +859,7 @@ public: } // Sync information about physical network interfaces - if ((now - lastLocalInterfaceAddressCheck) >= (_multipathMode ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) { + if ((now - lastLocalInterfaceAddressCheck) >= (_node->bondController()->inUse() ? ZT_LOCAL_INTERFACE_CHECK_INTERVAL / 8 : ZT_LOCAL_INTERFACE_CHECK_INTERVAL)) { lastLocalInterfaceAddressCheck = now; _node->clearLocalInterfaceAddresses(); @@ -869,8 +873,9 @@ public: #endif std::vector boundAddrs(_binder.allBoundLocalInterfaceAddresses()); - for(std::vector::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) + for(std::vector::const_iterator i(boundAddrs.begin());i!=boundAddrs.end();++i) { _node->addLocalInterfaceAddress(reinterpret_cast(&(*i))); + } } // Clean peers.d periodically @@ -1209,15 +1214,15 @@ public: settings["primaryPort"] = OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; settings["allowTcpFallbackRelay"] = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],_allowTcpFallbackRelay); - if (_multipathMode) { - json &multipathConfig = res["multipath"]; + if (_node->bondController()->inUse()) { + json &multipathConfig = res["bonds"]; ZT_PeerList *pl = _node->peers(); char peerAddrStr[256]; if (pl) { for(unsigned long i=0;ipeerCount;++i) { - if (pl->peers[i].hadAggregateLink) { + if (pl->peers[i].isBonded) { nlohmann::json pj; - _peerAggregateLinkToJson(pj,&(pl->peers[i])); + _peerBondToJson(pj,&(pl->peers[i])); OSUtils::ztsnprintf(peerAddrStr,sizeof(peerAddrStr),"%.10llx",pl->peers[i].address); multipathConfig[peerAddrStr] = (pj); } @@ -1346,8 +1351,8 @@ public: if (j.is_object()) { seed = Utils::hexStrToU64(OSUtils::jsonString(j["seed"],"0").c_str()); } - } catch (std::exception &exc) { } catch ( ... ) { + // discard invalid JSON } std::vector moons(_node->moons()); @@ -1396,8 +1401,8 @@ public: json &allowDefault = j["allowDefault"]; if (allowDefault.is_boolean()) localSettings.allowDefault = (bool)allowDefault; } - } catch (std::exception &exc) { } catch ( ... ) { + // discard invalid JSON } setNetworkSettings(nws->networks[i].nwid,localSettings); @@ -1551,10 +1556,133 @@ public: json &settings = lc["settings"]; + if (!_node->bondController()->inUse()) { + // defaultBondingPolicy + std::string defaultBondingPolicyStr(OSUtils::jsonString(settings["defaultBondingPolicy"],"")); + int defaultBondingPolicy = _node->bondController()->getPolicyCodeByStr(defaultBondingPolicyStr); + _node->bondController()->setBondingLayerDefaultPolicy(defaultBondingPolicy); + _node->bondController()->setBondingLayerDefaultPolicyStr(defaultBondingPolicyStr); // Used if custom policy + // Custom Policies + json &customBondingPolicies = settings["policies"]; + for (json::iterator policyItr = customBondingPolicies.begin(); policyItr != customBondingPolicies.end();++policyItr) { + fprintf(stderr, "\n\n--- (%s)\n", policyItr.key().c_str()); + // Custom Policy + std::string customPolicyStr(policyItr.key()); + json &customPolicy = policyItr.value(); + std::string basePolicyStr(OSUtils::jsonString(customPolicy["basePolicy"],"")); + if (_node->bondController()->getPolicyCodeByStr(basePolicyStr) == ZT_BONDING_POLICY_NONE) { + fprintf(stderr, "error: custom policy (%s) is invalid, unknown base policy (%s).\n", + customPolicyStr.c_str(), basePolicyStr.c_str()); + continue; + } if (_node->bondController()->getPolicyCodeByStr(customPolicyStr) != ZT_BONDING_POLICY_NONE) { + fprintf(stderr, "error: custom policy (%s) will be ignored, cannot use standard policy names for custom policies.\n", + customPolicyStr.c_str()); + continue; + } + // New bond, used as a copy template for new instances + SharedPtr newTemplateBond = new Bond(basePolicyStr, customPolicyStr, SharedPtr()); + // Acceptable ranges + newTemplateBond->setMaxAcceptableLatency(OSUtils::jsonInt(customPolicy["maxAcceptableLatency"],-1)); + newTemplateBond->setMaxAcceptableMeanLatency(OSUtils::jsonInt(customPolicy["maxAcceptableMeanLatency"],-1)); + newTemplateBond->setMaxAcceptablePacketDelayVariance(OSUtils::jsonInt(customPolicy["maxAcceptablePacketDelayVariance"],-1)); + newTemplateBond->setMaxAcceptablePacketLossRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketLossRatio"],-1)); + newTemplateBond->setMaxAcceptablePacketErrorRatio((float)OSUtils::jsonDouble(customPolicy["maxAcceptablePacketErrorRatio"],-1)); + newTemplateBond->setMinAcceptableAllocation((float)OSUtils::jsonDouble(customPolicy["minAcceptableAllocation"],0)); + // Quality weights + json &qualityWeights = customPolicy["qualityWeights"]; + if (qualityWeights.size() == ZT_QOS_WEIGHT_SIZE) { // TODO: Generalize this + float weights[ZT_QOS_WEIGHT_SIZE]; + weights[ZT_QOS_LAT_IDX] = (float)OSUtils::jsonDouble(qualityWeights["lat"],0.0); + weights[ZT_QOS_LTM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["ltm"],0.0); + weights[ZT_QOS_PDV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["pdv"],0.0); + weights[ZT_QOS_PLR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["plr"],0.0); + weights[ZT_QOS_PER_IDX] = (float)OSUtils::jsonDouble(qualityWeights["per"],0.0); + weights[ZT_QOS_THR_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thr"],0.0); + weights[ZT_QOS_THM_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thm"],0.0); + weights[ZT_QOS_THV_IDX] = (float)OSUtils::jsonDouble(qualityWeights["thv"],0.0); + newTemplateBond->setUserQualityWeights(weights,ZT_QOS_WEIGHT_SIZE); + } + // Bond-specific properties + newTemplateBond->setUpDelay(OSUtils::jsonInt(customPolicy["upDelay"],-1)); + newTemplateBond->setDownDelay(OSUtils::jsonInt(customPolicy["downDelay"],-1)); + newTemplateBond->setFailoverInterval(OSUtils::jsonInt(customPolicy["failoverInterval"],(uint64_t)0)); + newTemplateBond->setPacketsPerSlave(OSUtils::jsonInt(customPolicy["packetsPerSlave"],-1)); + std::string slaveMonitorStrategyStr(OSUtils::jsonString(customPolicy["slaveMonitorStrategy"],"")); + uint8_t slaveMonitorStrategy = ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DEFAULT; + if (slaveMonitorStrategyStr == "passive") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_PASSIVE); } + if (slaveMonitorStrategyStr == "active") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_ACTIVE); } + if (slaveMonitorStrategyStr == "dynamic") { newTemplateBond->setSlaveMonitorStrategy(ZT_MULTIPATH_SLAVE_MONITOR_STRATEGY_DYNAMIC); } + // Policy-Specific slave set + json &slaves = customPolicy["slaves"]; + for (json::iterator slaveItr = slaves.begin(); slaveItr != slaves.end();++slaveItr) { + fprintf(stderr, "\t--- slave (%s)\n", slaveItr.key().c_str()); + std::string slaveNameStr(slaveItr.key()); + json &slave = slaveItr.value(); + + bool enabled = OSUtils::jsonInt(slave["enabled"],true); + uint32_t speed = OSUtils::jsonInt(slave["speed"],0); + float alloc = (float)OSUtils::jsonDouble(slave["alloc"],0); + + if (speed && alloc) { + fprintf(stderr, "error: cannot specify both speed (%d) and alloc (%f) for slave (%s), pick one, slave disabled.\n", + speed, alloc, slaveNameStr.c_str()); + enabled = false; + } + uint32_t upDelay = OSUtils::jsonInt(slave["upDelay"],-1); + uint32_t downDelay = OSUtils::jsonInt(slave["downDelay"],-1); + uint8_t ipvPref = OSUtils::jsonInt(slave["ipvPref"],0); + uint32_t slaveMonitorInterval = OSUtils::jsonInt(slave["monitorInterval"],(uint64_t)0); + std::string failoverToStr(OSUtils::jsonString(slave["failoverTo"],"")); + // Mode + std::string slaveModeStr(OSUtils::jsonString(slave["mode"],"spare")); + uint8_t slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; + if (slaveModeStr == "primary") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_PRIMARY; } + if (slaveModeStr == "spare") { slaveMode = ZT_MULTIPATH_SLAVE_MODE_SPARE; } + // ipvPref + if ((ipvPref != 0) && (ipvPref != 4) && (ipvPref != 6) && (ipvPref != 46) && (ipvPref != 64)) { + fprintf(stderr, "error: invalid ipvPref value (%d), slave disabled.\n", ipvPref); + enabled = false; + } + if (slaveMode == ZT_MULTIPATH_SLAVE_MODE_SPARE && failoverToStr.length()) { + fprintf(stderr, "error: cannot specify failover slaves for spares, slave disabled.\n"); + failoverToStr = ""; + enabled = false; + } + _node->bondController()->addCustomSlave(customPolicyStr, new Slave(slaveNameStr,ipvPref,speed,slaveMonitorInterval,upDelay,downDelay,enabled,slaveMode,failoverToStr,alloc)); + } + // TODO: This is dumb + std::string slaveSelectMethodStr(OSUtils::jsonString(customPolicy["activeReselect"],"optimize")); + if (slaveSelectMethodStr == "always") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_ALWAYS); } + if (slaveSelectMethodStr == "better") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_BETTER); } + if (slaveSelectMethodStr == "failure") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_FAILURE); } + if (slaveSelectMethodStr == "optimize") { newTemplateBond->setSlaveSelectMethod(ZT_MULTIPATH_RESELECTION_POLICY_OPTIMIZE); } + if (newTemplateBond->getSlaveSelectMethod() < 0 || newTemplateBond->getSlaveSelectMethod() > 3) { + fprintf(stderr, "warning: invalid value (%s) for slaveSelectMethod, assuming mode: always\n", slaveSelectMethodStr.c_str()); + } + /* + newBond->setPolicy(_node->bondController()->getPolicyCodeByStr(basePolicyStr)); + newBond->setFlowHashing((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowFlowHashing"],(bool)allowFlowHashing)); + newBond->setBondMonitorInterval((unsigned int)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["monitorInterval"],(uint64_t)0)); + newBond->setAllowPathNegotiation((bool)OSUtils::jsonInt(userSpecifiedBondingPolicies[i]["allowPathNegotiation"],(bool)false)); + */ + if (!_node->bondController()->addCustomPolicy(newTemplateBond)) { + fprintf(stderr, "error: a custom policy of this name (%s) already exists.\n", customPolicyStr.c_str()); + } + } + // Peer-specific bonding + json &peerSpecificBonds = settings["peerSpecificBonds"]; + for (json::iterator peerItr = peerSpecificBonds.begin(); peerItr != peerSpecificBonds.end();++peerItr) { + _node->bondController()->assignBondingPolicyToPeer(std::stoull(peerItr.key(),0,16), peerItr.value()); + } + // Check settings + if (defaultBondingPolicyStr.length() && !defaultBondingPolicy && !_node->bondController()->inUse()) { + fprintf(stderr, "error: unknown policy (%s) specified by defaultBondingPolicy, slave disabled.\n", defaultBondingPolicyStr.c_str()); + } + } + + // bondingPolicy cannot be used with allowTcpFallbackRelay + _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !(_node->bondController()->inUse()); _primaryPort = (unsigned int)OSUtils::jsonInt(settings["primaryPort"],(uint64_t)_primaryPort) & 0xffff; - _multipathMode = (unsigned int)OSUtils::jsonInt(settings["multipathMode"],0); - // multipathMode cannot be used with allowTcpFallbackRelay - _allowTcpFallbackRelay = OSUtils::jsonBool(settings["allowTcpFallbackRelay"],true) && !_multipathMode; _allowSecondaryPort = OSUtils::jsonBool(settings["allowSecondaryPort"],true); _secondaryPort = (unsigned int)OSUtils::jsonInt(settings["secondaryPort"],0); _tertiaryPort = (unsigned int)OSUtils::jsonInt(settings["tertiaryPort"],0); @@ -1705,9 +1833,8 @@ public: } } #ifdef __SYNOLOGY__ - if (!n.tap->addIps(newManagedIps)) { + if (!n.tap->addIpSyn(newManagedIps)) fprintf(stderr,"ERROR: unable to add ip addresses to ifcfg" ZT_EOL_S); - } #else for(std::vector::iterator ip(newManagedIps.begin());ip!=newManagedIps.end();++ip) { if (std::find(n.managedIps.begin(),n.managedIps.end(),*ip) == n.managedIps.end()) { @@ -2025,8 +2152,6 @@ public: return; } - } catch (std::exception &exc) { - _phy.close(sock); } catch ( ... ) { _phy.close(sock); } @@ -2135,8 +2260,6 @@ public: #endif _nets.erase(nwid); return -999; - } catch (int exc) { - return -999; } catch ( ... ) { return -999; // tap init failed } @@ -2743,6 +2866,7 @@ public: if (!strncmp(p->c_str(),ifname,p->length())) return false; } + return _node->bondController()->allowedToBind(std::string(ifname)); } { // Check global blacklists