Stability fix + introduction of connection probation

This commit is contained in:
Joseph Henry 2016-01-18 23:41:28 -08:00
parent 31ed86740c
commit 357cb92f2e
2 changed files with 23 additions and 36 deletions

View file

@ -292,9 +292,6 @@ void NetconEthernetTap::threadMain()
{ {
uint64_t prev_tcp_time = 0, prev_status_time = 0, prev_etharp_time = 0; uint64_t prev_tcp_time = 0, prev_status_time = 0, prev_etharp_time = 0;
Mutex::Lock _l(_tcpconns_m);
_tcpconns_m.unlock();
// Main timer loop // Main timer loop
while (_run) { while (_run) {
uint64_t now = OSUtils::now(); uint64_t now = OSUtils::now();
@ -303,23 +300,17 @@ void NetconEthernetTap::threadMain()
uint64_t since_status = now - prev_status_time; uint64_t since_status = now - prev_status_time;
uint64_t tcp_remaining = ZT_LWIP_TCP_TIMER_INTERVAL; uint64_t tcp_remaining = ZT_LWIP_TCP_TIMER_INTERVAL;
uint64_t etharp_remaining = ARP_TMR_INTERVAL; uint64_t etharp_remaining = ARP_TMR_INTERVAL;
uint64_t status_remaining = STATUS_TMR_INTERVAL;
// Connection prunning // Connection prunning
if (since_status >= STATUS_TMR_INTERVAL) { if (since_status >= STATUS_TMR_INTERVAL) {
prev_status_time = now; prev_status_time = now;
status_remaining = STATUS_TMR_INTERVAL - since_status;
_tcpconns_m.lock();
for(size_t i=0;i<_TcpConnections.size();++i) { for(size_t i=0;i<_TcpConnections.size();++i) {
if(!_TcpConnections[i]->sock) if(!_TcpConnections[i]->sock)
continue; // Skip, this is a pending connection continue;
int fd = _phy.getDescriptor(_TcpConnections[i]->sock); int fd = _phy.getDescriptor(_TcpConnections[i]->sock);
dwr(MSG_DEBUG," tap_thread(): tcp\\jobs = {%d, %d}\n", _TcpConnections.size(), jobmap.size()); dwr(MSG_DEBUG," tap_thread(): tcp\\jobs = {%d, %d}\n", _TcpConnections.size(), jobmap.size());
dwr(MSG_DEBUG," tap_thread(): sock=%x, pcb->state=%d\n", _TcpConnections[i]->sock, _TcpConnections[i]->pcb->state);
fcntl(fd, F_SETFL, O_NONBLOCK); fcntl(fd, F_SETFL, O_NONBLOCK);
unsigned char tmpbuf[BUF_SZ]; unsigned char tmpbuf[BUF_SZ];
@ -332,17 +323,15 @@ void NetconEthernetTap::threadMain()
closeConnection(_TcpConnections[i]->sock); closeConnection(_TcpConnections[i]->sock);
} else if (n > 0) { } else if (n > 0) {
dwr(MSG_DEBUG," tap_thread(): data read during connection check (%d bytes)\n", n); dwr(MSG_DEBUG," tap_thread(): data read during connection check (%d bytes)\n", n);
phyOnUnixData(_TcpConnections[i]->sock,_phy.getuptr(_TcpConnections[i]->sock),&tmpbuf,BUF_SZ); phyOnUnixData(_TcpConnections[i]->sock,_phy.getuptr(_TcpConnections[i]->sock),&tmpbuf,n);
} }
} }
_tcpconns_m.unlock();
} }
// Main TCP/ETHARP timer section // Main TCP/ETHARP timer section
if (since_tcp >= ZT_LWIP_TCP_TIMER_INTERVAL) { if (since_tcp >= ZT_LWIP_TCP_TIMER_INTERVAL) {
prev_tcp_time = now; prev_tcp_time = now;
lwipstack->tcp_tmr(); lwipstack->tcp_tmr();
// Makeshift poll // Makeshift poll
_tcpconns_m.lock();
for(size_t i=0;i<_TcpConnections.size();++i) { for(size_t i=0;i<_TcpConnections.size();++i) {
if(_TcpConnections[i]->txsz > 0){ if(_TcpConnections[i]->txsz > 0){
lwipstack->_lock.lock(); lwipstack->_lock.lock();
@ -350,7 +339,6 @@ void NetconEthernetTap::threadMain()
lwipstack->_lock.unlock(); lwipstack->_lock.unlock();
} }
} }
_tcpconns_m.unlock();
} else { } else {
tcp_remaining = ZT_LWIP_TCP_TIMER_INTERVAL - since_tcp; tcp_remaining = ZT_LWIP_TCP_TIMER_INTERVAL - since_tcp;
} }
@ -386,8 +374,6 @@ TcpConnection *NetconEthernetTap::getConnection(PhySocket *sock)
void NetconEthernetTap::closeConnection(PhySocket *sock) void NetconEthernetTap::closeConnection(PhySocket *sock)
{ {
// Here we assume _tcpconns_m is already locked by caller // Here we assume _tcpconns_m is already locked by caller
dwr(MSG_DEBUG,"closeConnection(%x)\n",sock);
if(!sock) { if(!sock) {
dwr(MSG_DEBUG," closeConnection(): invalid PhySocket\n"); dwr(MSG_DEBUG," closeConnection(): invalid PhySocket\n");
return; return;
@ -395,10 +381,10 @@ void NetconEthernetTap::closeConnection(PhySocket *sock)
TcpConnection *conn = getConnection(sock); TcpConnection *conn = getConnection(sock);
if(!conn) if(!conn)
return; return;
if(conn->pcb) { if(conn->pcb && conn->pcb->state != CLOSED) {
dwr(MSG_DEBUG," closeConnection(): PCB->state = %d\n", conn->pcb->state); dwr(MSG_DEBUG," closeConnection(%x): PCB->state = %d\n", sock, conn->pcb->state);
if(conn->pcb->state == SYN_SENT) { if(conn->pcb->state == SYN_SENT) {
dwr(MSG_DEBUG," closeConnection(): invalid PCB state for this operation. ignoring.\n"); dwr(MSG_DEBUG," closeConnection(%x): invalid PCB state for this operation. ignoring.\n", sock);
return; return;
} }
if(lwipstack->_tcp_close(conn->pcb) == ERR_OK) { if(lwipstack->_tcp_close(conn->pcb) == ERR_OK) {
@ -410,7 +396,7 @@ void NetconEthernetTap::closeConnection(PhySocket *sock)
lwipstack->_tcp_poll(conn->pcb, NULL, 1); lwipstack->_tcp_poll(conn->pcb, NULL, 1);
} }
else { else {
dwr(MSG_ERROR," closeConnection(): error while calling tcp_close()\n"); dwr(MSG_ERROR," closeConnection(%x): error while calling tcp_close()\n", sock);
} }
} }
for(size_t i=0;i<_TcpConnections.size();++i) { for(size_t i=0;i<_TcpConnections.size();++i) {
@ -446,8 +432,8 @@ void NetconEthernetTap::phyOnUnixWritable(PhySocket *sock,void **uptr)
memcpy(conn->rxbuf, conn->rxbuf+n, len-n); memcpy(conn->rxbuf, conn->rxbuf+n, len-n);
conn->rxsz -= n; conn->rxsz -= n;
float max = (float)DEFAULT_BUF_SZ; float max = (float)DEFAULT_BUF_SZ;
dwr(MSG_TRANSFER," <--- RX :: { TX: %.3f%% | RX: %.3f%% } :: %d bytes\n", dwr(MSG_TRANSFER," <--- RX :: {TX: %.3f%%, RX: %.3f%%, sock=%x} :: %d bytes\n",
(float)conn->txsz / max, (float)conn->rxsz / max, n); (float)conn->txsz / max, (float)conn->rxsz / max, sock, n);
lwipstack->_tcp_recved(conn->pcb, n); lwipstack->_tcp_recved(conn->pcb, n);
if(conn->rxsz == 0){ if(conn->rxsz == 0){
_phy.setNotifyWritable(conn->sock, false); // Nothing more to be notified about _phy.setNotifyWritable(conn->sock, false); // Nothing more to be notified about
@ -567,7 +553,6 @@ void NetconEthernetTap::phyOnUnixData(PhySocket *sock,void **uptr,void *data,uns
rpcSock = sockdata.first; rpcSock = sockdata.first;
buf = (unsigned char*)sockdata.second; buf = (unsigned char*)sockdata.second;
} }
// Process RPC if we have a corresponding jobmap entry // Process RPC if we have a corresponding jobmap entry
if(foundJob) { if(foundJob) {
unloadRPC(buf, pid, tid, rpcCount, timestamp, CANARY, cmd, payload); unloadRPC(buf, pid, tid, rpcCount, timestamp, CANARY, cmd, payload);
@ -576,25 +561,21 @@ void NetconEthernetTap::phyOnUnixData(PhySocket *sock,void **uptr,void *data,uns
switch(cmd) { switch(cmd) {
case RPC_BIND: case RPC_BIND:
dwr(MSG_DEBUG," <%x> RPC_BIND\n", sock);
struct bind_st bind_rpc; struct bind_st bind_rpc;
memcpy(&bind_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct bind_st)); memcpy(&bind_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct bind_st));
handleBind(sock, rpcSock, uptr, &bind_rpc); handleBind(sock, rpcSock, uptr, &bind_rpc);
break; break;
case RPC_LISTEN: case RPC_LISTEN:
dwr(MSG_DEBUG," <%x> RPC_LISTEN\n", sock);
struct listen_st listen_rpc; struct listen_st listen_rpc;
memcpy(&listen_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct listen_st)); memcpy(&listen_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct listen_st));
handleListen(sock, rpcSock, uptr, &listen_rpc); handleListen(sock, rpcSock, uptr, &listen_rpc);
break; break;
case RPC_GETSOCKNAME: case RPC_GETSOCKNAME:
dwr(MSG_DEBUG," <%x> RPC_GETSOCKNAME\n", sock);
struct getsockname_st getsockname_rpc; struct getsockname_st getsockname_rpc;
memcpy(&getsockname_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct getsockname_st)); memcpy(&getsockname_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct getsockname_st));
handleGetsockname(sock, rpcSock, uptr, &getsockname_rpc); handleGetsockname(sock, rpcSock, uptr, &getsockname_rpc);
break; break;
case RPC_CONNECT: case RPC_CONNECT:
dwr(MSG_DEBUG," <%x> RPC_CONNECT\n", sock);
struct connect_st connect_rpc; struct connect_st connect_rpc;
memcpy(&connect_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct connect_st)); memcpy(&connect_rpc, &buf[IDX_PAYLOAD+STRUCT_IDX], sizeof(struct connect_st));
handleConnect(sock, rpcSock, conn, &connect_rpc); handleConnect(sock, rpcSock, conn, &connect_rpc);
@ -731,9 +712,11 @@ err_t NetconEthernetTap::nc_sent(void* arg, struct tcp_pcb *PCB, u16_t len)
{ {
Larg *l = (Larg*)arg; Larg *l = (Larg*)arg;
Mutex::Lock _l(l->tap->_tcpconns_m); Mutex::Lock _l(l->tap->_tcpconns_m);
if(l && l->conn && len) { if(l->conn->probation && l->conn->txsz == 0){
float max = (float)DEFAULT_BUF_SZ; l->conn->probation = false; // TX buffer now empty, removing from probation
if(l->conn->txsz < max / 2) { }
if(l && l->conn && len && !l->conn->probation) {
if(l->conn->txsz < (float)DEFAULT_BUF_SOFTMAX) {
l->tap->_phy.setNotifyReadable(l->conn->sock, true); l->tap->_phy.setNotifyReadable(l->conn->sock, true);
l->tap->_phy.whack(); l->tap->_phy.whack();
} }
@ -1033,8 +1016,11 @@ void NetconEthernetTap::handleWrite(TcpConnection *conn)
/* PCB send buffer is full, turn off readability notifications for the /* PCB send buffer is full, turn off readability notifications for the
corresponding PhySocket until nc_sent() is called and confirms that there is corresponding PhySocket until nc_sent() is called and confirms that there is
now space on the buffer */ now space on the buffer */
dwr(MSG_DEBUG," handleWrite(): sndbuf == 0, LWIP stack is full\n"); if(!conn->probation) {
_phy.setNotifyReadable(conn->sock, false); dwr(MSG_DEBUG," handleWrite(): sndbuf == 0, LWIP stack is full\n");
_phy.setNotifyReadable(conn->sock, false);
conn->probation = true;
}
return; return;
} }
if(conn->txsz <= 0) if(conn->txsz <= 0)
@ -1061,8 +1047,8 @@ void NetconEthernetTap::handleWrite(TcpConnection *conn)
conn->txsz -= r; conn->txsz -= r;
float max = (float)DEFAULT_BUF_SZ; float max = (float)DEFAULT_BUF_SZ;
dwr(MSG_TRANSFER," TX ---> :: { TX: %.3f%% | RX: %.3f%% } :: %d bytes\n", dwr(MSG_TRANSFER," TX ---> :: {TX: %.3f%%, RX: %.3f%%, sock=%x} :: %d bytes\n",
(float)conn->txsz / max, (float)conn->rxsz / max, r); (float)conn->txsz / max, (float)conn->rxsz / max, conn->sock, r);
return; return;
} }
} }

View file

@ -60,6 +60,7 @@ struct accept_st;
#define ZT_LWIP_TCP_TIMER_INTERVAL 5 #define ZT_LWIP_TCP_TIMER_INTERVAL 5
#define STATUS_TMR_INTERVAL 500 // How often we check connection statuses (in ms) #define STATUS_TMR_INTERVAL 500 // How often we check connection statuses (in ms)
#define DEFAULT_BUF_SZ 1024 * 1024 * 2 #define DEFAULT_BUF_SZ 1024 * 1024 * 2
#define DEFAULT_BUF_SOFTMAX DEFAULT_BUF_SZ / 2
namespace ZeroTier { namespace ZeroTier {
@ -71,7 +72,7 @@ class LWIPStack;
*/ */
struct TcpConnection struct TcpConnection
{ {
bool listening, closing; bool listening, probation;
int pid, txsz, rxsz; int pid, txsz, rxsz;
PhySocket *rpcSock, *sock; PhySocket *rpcSock, *sock;
struct tcp_pcb *pcb; struct tcp_pcb *pcb;