Some more bridging work... wiring up in Switch - GitHub issue #68

This commit is contained in:
Adam Ierymenko 2014-06-13 17:49:33 -07:00
parent 08b7bb3c7a
commit 5682f0b772
4 changed files with 180 additions and 94 deletions

View file

@ -166,6 +166,22 @@ public:
return _a; return _a;
} }
/**
* Test whether this address is within a multicast propagation prefix
*
* Multicast propagation prefixes are (right-to-left a.k.a. LSB to MSB)
* bit pattern prefixes of prefixBits bits that restrict which peers are
* visited along a given multicast graph traversal path.
*
* @param prefix Prefix bit pattern (LSB to MSB)
* @param prefixBits Number of bits in prefix bit pattern
* @return True if address is within prefix
*/
inline bool withinMulticastPropagationPrefix(uint64_t prefix,unsigned int prefixBits)
{
return ((_a & (0xffffffffffffffffULL >> (64 - prefixBits))) == prefix);
}
/** /**
* @return Hexadecimal string * @return Hexadecimal string
*/ */

View file

@ -407,4 +407,9 @@ error_no_byte_order_defined;
*/ */
#define ZT_MAX_BRIDGE_ROUTES 67108864 #define ZT_MAX_BRIDGE_ROUTES 67108864
/**
* If there is no known route, spam to up to this many active bridges
*/
#define ZT_MAX_BRIDGE_SPAM 4
#endif #endif

View file

@ -152,15 +152,15 @@ public:
* @param prefixBits Number of bits in propagation restriction prefix * @param prefixBits Number of bits in propagation restriction prefix
* @param prefix Propagation restrition prefix * @param prefix Propagation restrition prefix
*/ */
AddToPropagationQueue(unsigned char **ptr,unsigned char *end,unsigned char *bloom,uint16_t bloomNonce,const Address &origin,unsigned int prefixBits,unsigned int prefix) AddToPropagationQueue(unsigned char **ptr,unsigned char *end,unsigned char *bloom,uint16_t bloomNonce,const Address &origin,unsigned int prefixBits,uint64_t prefix)
throw() : throw() :
_origin(origin), _origin(origin),
_bloomNonce((uint64_t)bloomNonce), _bloomNonce((uint64_t)bloomNonce),
_prefixMask(0xffffffffffffffffULL >> (64 - prefixBits)), _prefix(prefix),
_prefix((uint64_t)prefix & _prefixMask),
_ptr(ptr), _ptr(ptr),
_end(end), _end(end),
_bloom(bloom) {} _bloom(bloom),
_prefixBits(prefixBits) {}
inline bool operator()(const Address &a) inline bool operator()(const Address &a)
throw() throw()
@ -170,7 +170,7 @@ public:
return true; return true;
// Exclude addresses not in this prefix domain // Exclude addresses not in this prefix domain
if ((a.toInt() & _prefixMask) != _prefix) if (!a.withinMulticastPropagationPrefix(_prefix,_prefixBits))
return true; return true;
// Exclude addresses remembered in bloom filter -- or else remember them // Exclude addresses remembered in bloom filter -- or else remember them
@ -189,11 +189,11 @@ public:
private: private:
const Address _origin; const Address _origin;
const uint64_t _bloomNonce; const uint64_t _bloomNonce;
const uint64_t _prefixMask;
const uint64_t _prefix; const uint64_t _prefix;
unsigned char **const _ptr; unsigned char **const _ptr;
unsigned char *const _end; unsigned char *const _end;
unsigned char *const _bloom; unsigned char *const _bloom;
unsigned int _prefixBits;
}; };
private: private:

View file

@ -89,117 +89,182 @@ void Switch::onLocalEthernet(const SharedPtr<Network> &network,const MAC &from,c
if (!nconf) if (!nconf)
return; return;
if (!_r->antiRec->checkEthernetFrame(data.data(),data.size())) { // This should not happen
TRACE("%s: rejected recursively addressed ZeroTier packet by tail match (type %s, length: %u)",network->tapDeviceName().c_str(),etherTypeName(etherType),data.size());
return;
}
if (to == network->mac()) { if (to == network->mac()) {
LOG("%s: frame received from self, ignoring (bridge loop? OS bug?)",network->tapDeviceName().c_str()); LOG("%s: frame received from self, ignoring (bridge loop? OS bug?)",network->tapDeviceName().c_str());
return; return;
} }
// Check anti-recursion module to ensure that this is not ZeroTier talking over its own links
if (!_r->antiRec->checkEthernetFrame(data.data(),data.size())) {
TRACE("%s: rejected recursively addressed ZeroTier packet by tail match (type %s, length: %u)",network->tapDeviceName().c_str(),etherTypeName(etherType),data.size());
return;
}
// Check to make sure this protocol is allowed on this network
if (!nconf->permitsEtherType(etherType)) { if (!nconf->permitsEtherType(etherType)) {
LOG("%s: ignored tap: %s -> %s: ethertype %s not allowed on network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),(unsigned long long)network->id()); LOG("%s: ignored tap: %s -> %s: ethertype %s not allowed on network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),(unsigned long long)network->id());
return; return;
} }
if (from == network->mac()) { // Check if this packet is from someone other than the tap -- i.e. bridged in
if (to.isMulticast()) { bool fromBridged = false;
MulticastGroup mg(to,0); if (from != network->mac()) {
if (!network->permitsBridging(_r->identity.address())) {
LOG("%s: UNICAST %s -> %s %s dropped, bridging disabled on network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),network->id());
return;
}
fromBridged = true;
}
if (to.isBroadcast()) { if (to.isMulticast()) {
if ((etherType == ZT_ETHERTYPE_ARP)&&(data.size() == 28)&&(data[2] == 0x08)&&(data[3] == 0x00)&&(data[4] == 6)&&(data[5] == 4)&&(data[7] == 0x01)) { MulticastGroup mg(to,0);
// Cram IPv4 IP into ADI field to make IPv4 ARP broadcast channel specific and scalable
mg = MulticastGroup::deriveMulticastGroupForAddressResolution(InetAddress(data.field(24,4),4,0)); if (to.isBroadcast()) {
} else if (!nconf->enableBroadcast()) { if ((etherType == ZT_ETHERTYPE_ARP)&&(data.size() == 28)&&(data[2] == 0x08)&&(data[3] == 0x00)&&(data[4] == 6)&&(data[5] == 4)&&(data[7] == 0x01)) {
// Don't transmit broadcasts if this network doesn't want them // Cram IPv4 IP into ADI field to make IPv4 ARP broadcast channel specific and scalable
TRACE("%s: dropped broadcast since ff:ff:ff:ff:ff:ff is not enabled on network %.16llx",network->tapDeviceName().c_str(),network->id()); // Also: enableBroadcast() does not apply to ARP since it's required for IPv4
return; mg = MulticastGroup::deriveMulticastGroupForAddressResolution(InetAddress(data.field(24,4),4,0));
} else if (!nconf->enableBroadcast()) {
// Don't transmit broadcasts if this network doesn't want them
TRACE("%s: dropped broadcast since ff:ff:ff:ff:ff:ff is not enabled on network %.16llx",network->tapDeviceName().c_str(),network->id());
return;
}
}
// Learn multicast groups for bridged-in hosts
if (fromBridged)
network->learnBridgedMulticastGroup(mg);
// Check multicast/broadcast bandwidth quotas
if (!network->updateAndCheckMulticastBalance(_r->identity.address(),mg,data.size())) {
TRACE("%s: didn't multicast %d bytes, quota exceeded for multicast group %s",network->tapDeviceName().c_str(),(int)data.size(),mg.toString().c_str());
return;
}
const unsigned int mcid = ++_multicastIdCounter & 0xffffff;
const uint16_t bloomNonce = (uint16_t)(_r->prng->next32() & 0xffff); // doesn't need to be cryptographically strong
unsigned char bloom[ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM];
unsigned char fifo[ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO + ZT_ADDRESS_LENGTH]; // extra ZT_ADDRESS_LENGTH is for first hop, not put in packet but serves as destination for packet
unsigned char *const fifoEnd = fifo + sizeof(fifo);
const unsigned int signedPartLen = (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME - ZT_PROTO_VERB_MULTICAST_FRAME_IDX__START_OF_SIGNED_PORTION) + data.size();
const SharedPtr<Peer> supernode(_r->topology->getBestSupernode());
// For each bit prefix send a packet to a list of destinations within it
for(unsigned int prefix=0,np=((unsigned int)2 << (nconf->multicastPrefixBits() - 1));prefix<np;++prefix) {
memset(bloom,0,sizeof(bloom));
unsigned char *fifoPtr = fifo;
// All multicasts visit all active bridges first -- this is one of the two active/passive bridge differences
for(std::set<Address>::const_iterator ab(nconf->activeBridges().begin());ab!=nconf->activeBridges().end();++ab) {
if ((*ab != _r->identity.address())&&(ab->withinMulticastPropagationPrefix(prefix,nconf->multicastPrefixBits()))) {
ab->copyTo(fifoPtr,ZT_ADDRESS_LENGTH);
if ((fifoPtr += ZT_ADDRESS_LENGTH) == fifoEnd)
break;
} }
} }
if (!network->updateAndCheckMulticastBalance(_r->identity.address(),mg,data.size())) { // Then visit next hops according to multicaster (if there's room... almost certainly will be)
TRACE("%s: didn't multicast %d bytes, quota exceeded for multicast group %s",network->tapDeviceName().c_str(),(int)data.size(),mg.toString().c_str()); if (fifoPtr != fifoEnd) {
return;
}
const unsigned int mcid = ++_multicastIdCounter & 0xffffff;
const uint16_t bloomNonce = (uint16_t)(_r->prng->next32() & 0xffff); // doesn't need to be cryptographically strong
unsigned char bloom[ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM];
unsigned char fifo[ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO + ZT_ADDRESS_LENGTH];
unsigned char *const fifoEnd = fifo + sizeof(fifo);
const unsigned int signedPartLen = (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME - ZT_PROTO_VERB_MULTICAST_FRAME_IDX__START_OF_SIGNED_PORTION) + data.size();
const SharedPtr<Peer> supernode(_r->topology->getBestSupernode());
for(unsigned int prefix=0,np=((unsigned int)2 << (nconf->multicastPrefixBits() - 1));prefix<np;++prefix) {
memset(bloom,0,sizeof(bloom));
unsigned char *fifoPtr = fifo;
_r->mc->getNextHops(network->id(),mg,Multicaster::AddToPropagationQueue(&fifoPtr,fifoEnd,bloom,bloomNonce,_r->identity.address(),nconf->multicastPrefixBits(),prefix)); _r->mc->getNextHops(network->id(),mg,Multicaster::AddToPropagationQueue(&fifoPtr,fifoEnd,bloom,bloomNonce,_r->identity.address(),nconf->multicastPrefixBits(),prefix));
while (fifoPtr != fifoEnd) while (fifoPtr != fifoEnd)
*(fifoPtr++) = (unsigned char)0; *(fifoPtr++) = (unsigned char)0;
Address firstHop(fifo,ZT_ADDRESS_LENGTH); // fifo is +1 in size, with first element being used here
if (!firstHop) {
if (supernode)
firstHop = supernode->address();
else continue;
}
Packet outp(firstHop,_r->identity.address(),Packet::VERB_MULTICAST_FRAME);
outp.append((uint16_t)0);
outp.append(fifo + ZT_ADDRESS_LENGTH,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO); // remainder of fifo is loaded into packet
outp.append(bloom,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM);
outp.append((nconf->com()) ? (unsigned char)ZT_PROTO_VERB_MULTICAST_FRAME_FLAGS_HAS_MEMBERSHIP_CERTIFICATE : (unsigned char)0);
outp.append(network->id());
outp.append(bloomNonce);
outp.append((unsigned char)nconf->multicastPrefixBits());
outp.append((unsigned char)prefix);
_r->identity.address().appendTo(outp);
outp.append((unsigned char)((mcid >> 16) & 0xff));
outp.append((unsigned char)((mcid >> 8) & 0xff));
outp.append((unsigned char)(mcid & 0xff));
from.appendTo(outp);
mg.mac().appendTo(outp);
outp.append(mg.adi());
outp.append((uint16_t)etherType);
outp.append((uint16_t)data.size());
outp.append(data);
C25519::Signature sig(_r->identity.sign(outp.field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX__START_OF_SIGNED_PORTION,signedPartLen),signedPartLen));
outp.append((uint16_t)sig.size());
outp.append(sig.data,(unsigned int)sig.size());
// FIXME: now we send the netconf cert with every single multicast,
// which pretty much ensures everyone has it ahead of time but adds
// some redundant payload. Maybe think abouut this in the future.
if (nconf->com())
nconf->com().serialize(outp);
outp.compress();
send(outp,true);
} }
} else if (to[0] == MAC::firstOctetForNetwork(network->id())) {
// Simple unicast frame from us to another node on the same virtual network
Address toZT(to.toAddress(network->id()));
if (network->isAllowed(toZT)) {
network->pushMembershipCertificate(toZT,false,Utils::now());
Packet outp(toZT,_r->identity.address(),Packet::VERB_FRAME); // First element in FIFO is first hop, rest of FIFO is sent in packet *to* first hop
outp.append(network->id()); Address firstHop(fifo,ZT_ADDRESS_LENGTH);
outp.append((uint16_t)etherType); if (!firstHop) {
outp.append(data); if (supernode)
outp.compress(); firstHop = supernode->address();
send(outp,true); else continue; // no first hop = nowhere to go, try next bit prefix
} else {
TRACE("%s: UNICAST: %s -> %s %s dropped, destination not a member of closed network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),network->id());
} }
Packet outp(firstHop,_r->identity.address(),Packet::VERB_MULTICAST_FRAME);
outp.append((uint16_t)0);
outp.append(fifo + ZT_ADDRESS_LENGTH,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO); // remainder of fifo is loaded into packet
outp.append(bloom,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM);
outp.append((nconf->com()) ? (unsigned char)ZT_PROTO_VERB_MULTICAST_FRAME_FLAGS_HAS_MEMBERSHIP_CERTIFICATE : (unsigned char)0);
outp.append(network->id());
outp.append(bloomNonce);
outp.append((unsigned char)nconf->multicastPrefixBits());
outp.append((unsigned char)prefix);
_r->identity.address().appendTo(outp);
outp.append((unsigned char)((mcid >> 16) & 0xff));
outp.append((unsigned char)((mcid >> 8) & 0xff));
outp.append((unsigned char)(mcid & 0xff));
from.appendTo(outp);
to.appendTo(outp);
outp.append(mg.adi());
outp.append((uint16_t)etherType);
outp.append((uint16_t)data.size());
outp.append(data);
C25519::Signature sig(_r->identity.sign(outp.field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX__START_OF_SIGNED_PORTION,signedPartLen),signedPartLen));
outp.append((uint16_t)sig.size());
outp.append(sig.data,(unsigned int)sig.size());
// FIXME: now we send the netconf cert with every single multicast,
// which pretty much ensures everyone has it ahead of time but adds
// some redundant payload. Maybe think abouut this in the future.
if (nconf->com())
nconf->com().serialize(outp);
outp.compress();
send(outp,true);
}
} else if (to[0] == MAC::firstOctetForNetwork(network->id())) {
// Simple unicast frame from us to another node on the same virtual network
Address toZT(to.toAddress(network->id()));
if (network->isAllowed(toZT)) {
network->pushMembershipCertificate(toZT,false,Utils::now());
Packet outp(toZT,_r->identity.address(),Packet::VERB_FRAME);
outp.append(network->id());
outp.append((uint16_t)etherType);
outp.append(data);
outp.compress();
send(outp,true);
} else { } else {
LOG("%s: UNICAST %s -> %s %s dropped, bridging disabled, unicast destination not on network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),network->id()); TRACE("%s: UNICAST: %s -> %s %s dropped, destination not a member of closed network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),network->id());
} }
} else { } else {
LOG("%s: UNICAST %s -> %s %s dropped, bridging disabled, unicast source not on network %.16llx",network->tapDeviceName().c_str(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),network->id()); // Simple unicast from us to another node behind another bridge
Address bridges[ZT_MAX_BRIDGE_SPAM];
unsigned int numBridges = 0;
bridges[0] = network->findBridgeTo(to));
if ((bridges[0])&&(bridges[0] != _r->identity.address())&&(network->isAllowed(bridges[0]))&&(network->permitsBridging(bridges[0]))) {
++numBridges;
} else if (!nconf->activeBridges().empty()) {
// If there is no known route, spam to up to ZT_MAX_BRIDGE_SPAM active bridges
std::set<Address>::const_iterator ab(nconf->activeBridges().begin());
if (nconf->activeBridges().size() <= ZT_MAX_BRIDGE_SPAM) {
// If there are <= ZT_MAX_BRIDGE_SPAM active bridges, just take them all
while (numBridges < nconf->activeBridges().size())
bridges[numBridges++] = *(ab++);
} else {
// Otherwise do this less efficient multipass thing to pick randomly from an ordered set until we have enough
while (numBridges < ZT_MAX_BRIDGE_SPAM) {
if (ab == nconf->activeBridges().end())
ab = nconf->activeBridges().begin();
if (((unsigned long)_r->prng->next32() % (unsigned long)nconf->activeBridges().size()) == 0)
bridges[numBridges++] = *(ab++);
else ++ab;
}
}
}
for(unsigned int b=0;b<numBridges;++b) {
Packet outp(bridges[b],_r->identity.address(),Packet::VERB_EXT_FRAME);
outp.append(network->id());
outp.append((unsigned char)0);
to.appendTo(outp);
from.appendTo(outp);
outp.append((uint16_t)etherType);
outp.append(data);
outp.compress();
send(outp,true);
}
} }
} }