Added flow-awareness check for policies, more work on ZT_MULTIPATH_ACTIVE_BACKUP

This commit is contained in:
Joseph Henry 2019-08-20 18:50:38 -07:00
parent b0a91c0187
commit 5453cab22b
5 changed files with 134 additions and 93 deletions

View file

@ -491,7 +491,7 @@
/** /**
* How long before we consider a path to be dead in rapid fail-over scenarios * How long before we consider a path to be dead in rapid fail-over scenarios
*/ */
#define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 1000 #define ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD 250
/** /**
* Paths are considered expired if they have not sent us a real packet in this long * Paths are considered expired if they have not sent us a real packet in this long

View file

@ -77,7 +77,8 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastAggregateStatsReport(0), _lastAggregateStatsReport(0),
_lastAggregateAllocation(0), _lastAggregateAllocation(0),
_virtualPathCount(0), _virtualPathCount(0),
_roundRobinPathAssignmentIdx(0) _roundRobinPathAssignmentIdx(0),
_pathAssignmentIdx(0)
{ {
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH))
throw ZT_EXCEPTION_INVALID_ARGUMENT; throw ZT_EXCEPTION_INVALID_ARGUMENT;
@ -468,16 +469,18 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
_paths[i].p->processBackgroundPathMeasurements(now); _paths[i].p->processBackgroundPathMeasurements(now);
} }
} }
// Detect new flows and update existing records if (RR->sw->isFlowAware()) {
if (_flows.count(flowId)) { // Detect new flows and update existing records
_flows[flowId]->lastSend = now; if (_flows.count(flowId)) {
} _flows[flowId]->lastSend = now;
else { }
fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n", else {
flowId, this->_id.address().toInt(), (_flows.size()+1)); fprintf(stderr, "new flow %llx detected between this node and %llx (%lu active flow(s))\n",
struct Flow *newFlow = new Flow(flowId, now); flowId, this->_id.address().toInt(), (_flows.size()+1));
_flows[flowId] = newFlow; struct Flow *newFlow = new Flow(flowId, now);
newFlow->assignedPath = nullptr; _flows[flowId] = newFlow;
newFlow->assignedPath = nullptr;
}
} }
// Construct set of virtual paths if needed // Construct set of virtual paths if needed
if (!_virtualPaths.size()) { if (!_virtualPaths.size()) {
@ -532,45 +535,64 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int64
if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
bFoundHotPath = true; bFoundHotPath = true;
_activeBackupPath = _paths[i].p; _activeBackupPath = _paths[i].p;
_pathAssignmentIdx = i;
_activeBackupPath->address().toString(curPathStr); _activeBackupPath->address().toString(curPathStr);
fprintf(stderr, "selected %s as the primary active-backup path to %llx\n", fprintf(stderr, "selected %s as the primary active-backup path to %llx (idx=%d)\n",
curPathStr, this->_id.address().toInt()); curPathStr, this->_id.address().toInt(), _pathAssignmentIdx);
break;
} }
} }
} }
if (!_activeBackupPath) {
return SharedPtr<Path>();
}
if (!bFoundHotPath) {
_activeBackupPath->address().toString(curPathStr);
fprintf(stderr, "no hot paths available to to use as active-backup primary to %llx, selected %s anyway\n",
this->_id.address().toInt(), curPathStr);
}
} }
else { else {
char what[128];
if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { if ((now - _activeBackupPath->lastIn()) > ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
_activeBackupPath->address().toString(curPathStr); _activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace
/* Fail-over to the fist path that appears to still be active. int16_t previousIdx = _pathAssignmentIdx;
* This will eventually be user-configurable */ SharedPtr<Path> nextAlternativePath;
for (int i=0; i<ZT_MAX_PEER_NETWORK_PATHS; i++) { // Search for a hot path, at the same time find the next path in
if (_paths[i].p) { // a RR sequence that seems viable to use as an alternative
if (_activeBackupPath.ptr() == _paths[i].p.ptr()) { int searchCount = 0;
while (searchCount < ZT_MAX_PEER_NETWORK_PATHS) {
_pathAssignmentIdx++;
if (_pathAssignmentIdx == ZT_MAX_PEER_NETWORK_PATHS) {
_pathAssignmentIdx = 0;
}
searchCount++;
if (_paths[_pathAssignmentIdx].p) {
_paths[_pathAssignmentIdx].p->address().toString(what);
if (_activeBackupPath.ptr() == _paths[_pathAssignmentIdx].p.ptr()) {
continue; continue;
} }
if ((now - _paths[i].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) { if (!nextAlternativePath) { // Record the first viable alternative in the RR sequence
nextAlternativePath = _paths[_pathAssignmentIdx].p;
}
if ((now - _paths[_pathAssignmentIdx].p->lastIn()) < ZT_MULTIPATH_ACTIVE_BACKUP_RAPID_FAILOVER_PERIOD) {
bFoundHotPath = true; bFoundHotPath = true;
_activeBackupPath->address().toString(curPathStr); // Record path string for later debug trace _activeBackupPath = _paths[_pathAssignmentIdx].p;
_activeBackupPath = _paths[i].p;
_activeBackupPath->address().toString(newPathStr); _activeBackupPath->address().toString(newPathStr);
fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to %s\n",
curPathStr, this->_id.address().toInt(), newPathStr);
break;
} }
} }
} }
if (bFoundHotPath) { if (!bFoundHotPath) {
fprintf(stderr, "primary active-backup path %s to %llx appears to be dead, switched to path %s\n", if (nextAlternativePath) {
curPathStr, this->_id.address().toInt(), newPathStr); _activeBackupPath = nextAlternativePath;
_activeBackupPath->address().toString(curPathStr);
//fprintf(stderr, "no hot paths found to use as active-backup primary to %llx, using next best: %s\n",
// this->_id.address().toInt(), curPathStr);
}
else {
// No change
}
} }
} }
} }
if (!_activeBackupPath) {
return SharedPtr<Path>();
}
return _activeBackupPath; return _activeBackupPath;
} }
@ -866,14 +888,16 @@ inline void Peer::processBackgroundPeerTasks(const int64_t now)
} }
// Remove old flows // Remove old flows
std::map<int64_t, struct Flow *>::iterator it = _flows.begin(); if (RR->sw->isFlowAware()) {
while (it != _flows.end()) { std::map<int64_t, struct Flow *>::iterator it = _flows.begin();
if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) { while (it != _flows.end()) {
fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n", if ((now - it->second->lastSend) > ZT_MULTIPATH_FLOW_EXPIRATION) {
it->first, this->_id.address().toInt(), _flows.size()); fprintf(stderr, "forgetting flow %llx between this node and %llx (%lu active flow(s))\n",
it = _flows.erase(it); it->first, this->_id.address().toInt(), _flows.size());
} else { it = _flows.erase(it);
it++; } else {
it++;
}
} }
} }
} }

View file

@ -727,6 +727,7 @@ private:
int16_t _roundRobinPathAssignmentIdx; int16_t _roundRobinPathAssignmentIdx;
SharedPtr<Path> _activeBackupPath; SharedPtr<Path> _activeBackupPath;
int16_t _pathAssignmentIdx;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View file

@ -284,6 +284,14 @@ static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsig
return false; // overflow == invalid return false; // overflow == invalid
} }
bool Switch::isFlowAware()
{
int mode = RR->node->getMultipathMode();
return (( mode == ZT_MULTIPATH_BALANCE_RR_FLOW)
|| (mode == ZT_MULTIPATH_BALANCE_XOR_FLOW)
|| (mode == ZT_MULTIPATH_BALANCE_DYNAMIC_FLOW));
}
void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len) void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len)
{ {
if (!network->hasConfig()) if (!network->hasConfig())
@ -309,61 +317,64 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
* preferred virtual path and will be sent out according to what the multipath logic * preferred virtual path and will be sent out according to what the multipath logic
* deems appropriate. An example of this would be an ICMP packet. * deems appropriate. An example of this would be an ICMP packet.
*/ */
int64_t flowId = -1; int64_t flowId = -1;
if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) { if (isFlowAware()) {
uint16_t srcPort = 0; if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) {
uint16_t dstPort = 0; uint16_t srcPort = 0;
int8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]); uint16_t dstPort = 0;
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf); int8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]);
switch(proto) { const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf);
case 0x01: // ICMP switch(proto) {
flowId = 0x01; case 0x01: // ICMP
break; flowId = 0x01;
// All these start with 16-bit source and destination port in that order break;
case 0x06: // TCP // All these start with 16-bit source and destination port in that order
case 0x11: // UDP case 0x06: // TCP
case 0x84: // SCTP case 0x11: // UDP
case 0x88: // UDPLite case 0x84: // SCTP
if (len > (headerLen + 4)) { case 0x88: // UDPLite
unsigned int pos = headerLen + 0; if (len > (headerLen + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8; unsigned int pos = headerLen + 0;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]); srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
pos++; srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8; pos++;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]); dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
} flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto;
break; }
break;
}
} }
}
if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) { if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) {
uint16_t srcPort = 0; uint16_t srcPort = 0;
uint16_t dstPort = 0; uint16_t dstPort = 0;
unsigned int pos; unsigned int pos;
unsigned int proto; unsigned int proto;
_ipv6GetPayload((const uint8_t *)data, len, pos, proto); _ipv6GetPayload((const uint8_t *)data, len, pos, proto);
switch(proto) { switch(proto) {
case 0x3A: // ICMPv6 case 0x3A: // ICMPv6
flowId = 0x3A; flowId = 0x3A;
break; break;
// All these start with 16-bit source and destination port in that order // All these start with 16-bit source and destination port in that order
case 0x06: // TCP case 0x06: // TCP
case 0x11: // UDP case 0x11: // UDP
case 0x84: // SCTP case 0x84: // SCTP
case 0x88: // UDPLite case 0x88: // UDPLite
if (len > (pos + 4)) { if (len > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8; srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]); srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++; pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8; dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]); dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto; flowId = ((int64_t)srcPort << 48) | ((int64_t)dstPort << 32) | proto;
} }
break; break;
default: default:
break; break;
}
} }
} }

View file

@ -91,6 +91,11 @@ public:
*/ */
void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len); void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len);
/**
* Returns whether our bonding or balancing policy is aware of flows.
*/
bool isFlowAware();
/** /**
* Called when a packet comes from a local Ethernet tap * Called when a packet comes from a local Ethernet tap
* *