From 83eacdfadab38a43f6d91966b039f954ad9c5e83 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Fri, 10 Jan 2020 15:28:27 -0800 Subject: [PATCH] New Buffer implementation (not used yet) and cleanup --- node/AES.cpp | 64 +++--- node/AES.hpp | 83 ++------ node/Address.hpp | 8 +- node/AtomicCounter.hpp | 15 +- node/Buf.cpp | 116 ++++++++++ node/Buf.hpp | 467 +++++++++++++++++++++++++++++++++++++++++ node/CMakeLists.txt | 2 + node/Utils.cpp | 24 ++- node/Utils.hpp | 17 ++ 9 files changed, 684 insertions(+), 112 deletions(-) create mode 100644 node/Buf.cpp create mode 100644 node/Buf.hpp diff --git a/node/AES.cpp b/node/AES.cpp index ecd75cafa..fc129f39c 100644 --- a/node/AES.cpp +++ b/node/AES.cpp @@ -14,10 +14,6 @@ #include "Constants.hpp" #include "AES.hpp" -// This file contains the software implementations of AES and GHASH. They're -// only used if your CPU lacks hardware acceleration as the hardware -// accelerated code is 10-20X as fast in most cases. - #ifdef __WINDOWS__ #include #endif @@ -48,28 +44,6 @@ static inline void writeuint32_t(void *out,const uint32_t v) #define writeuint32_t(o,v) (*((uint32_t *)(o)) = Utils::hton(v)) #endif -#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) -static bool _zt_aesni_supported() -{ -#ifdef __WINDOWS__ - int regs[4]; - __cpuid(regs,1); - return ( (((regs[2] >> 25) & 1) != 0) && (((regs[2] >> 19) & 1) != 0) && (((regs[2] >> 1) & 1) != 0) ); // AES-NI, SSE4.1, PCLMUL -#else - uint32_t eax,ebx,ecx,edx; - __asm__ __volatile__ ( - "cpuid" - : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) - : "a"(1),"c"(0) - ); - return ( ((ecx & (1 << 25)) != 0) && ((ecx & (1 << 19)) != 0) && ((ecx & (1 << 1)) != 0) ); // AES-NI, SSE4.1, PCLMUL -#endif -} -const bool AES::HW_ACCEL = _zt_aesni_supported(); -#else -const bool AES::HW_ACCEL = false; -#endif - const uint32_t AES::Te0[256] = { 0xc66363a5,0xf87c7c84,0xee777799,0xf67b7b8d,0xfff2f20d,0xd66b6bbd,0xde6f6fb1,0x91c5c554,0x60303050,0x02010103,0xce6767a9,0x562b2b7d,0xe7fefe19,0xb5d7d762,0x4dababe6,0xec76769a,0x8fcaca45,0x1f82829d,0x89c9c940,0xfa7d7d87,0xeffafa15,0xb25959eb,0x8e4747c9,0xfbf0f00b,0x41adadec,0xb3d4d467,0x5fa2a2fd,0x45afafea,0x239c9cbf,0x53a4a4f7,0xe4727296,0x9bc0c05b,0x75b7b7c2,0xe1fdfd1c,0x3d9393ae,0x4c26266a,0x6c36365a,0x7e3f3f41,0xf5f7f702,0x83cccc4f,0x6834345c,0x51a5a5f4,0xd1e5e534,0xf9f1f108,0xe2717193,0xabd8d873,0x62313153,0x2a15153f,0x0804040c,0x95c7c752,0x46232365,0x9dc3c35e,0x30181828,0x379696a1,0x0a05050f,0x2f9a9ab5,0x0e070709,0x24121236,0x1b80809b,0xdfe2e23d,0xcdebeb26,0x4e272769,0x7fb2b2cd,0xea75759f,0x1209091b,0x1d83839e,0x582c2c74,0x341a1a2e,0x361b1b2d,0xdc6e6eb2,0xb45a5aee,0x5ba0a0fb,0xa45252f6,0x763b3b4d,0xb7d6d661,0x7db3b3ce,0x5229297b,0xdde3e33e,0x5e2f2f71,0x13848497,0xa65353f5,0xb9d1d168,0x00000000,0xc1eded2c,0x40202060,0xe3fcfc1f,0x79b1b1c8,0xb65b5bed,0xd46a6abe,0x8dcbcb46,0x67bebed9,0x7239394b,0x944a4ade,0x984c4cd4,0xb05858e8,0x85cfcf4a,0xbbd0d06b,0xc5efef2a,0x4faaaae5,0xedfbfb16,0x864343c5,0x9a4d4dd7,0x66333355,0x11858594,0x8a4545cf,0xe9f9f910,0x04020206,0xfe7f7f81,0xa05050f0,0x783c3c44,0x259f9fba,0x4ba8a8e3,0xa25151f3,0x5da3a3fe,0x804040c0,0x058f8f8a,0x3f9292ad,0x219d9dbc,0x70383848,0xf1f5f504,0x63bcbcdf,0x77b6b6c1,0xafdada75,0x42212163,0x20101030,0xe5ffff1a,0xfdf3f30e,0xbfd2d26d,0x81cdcd4c,0x180c0c14,0x26131335,0xc3ecec2f,0xbe5f5fe1,0x359797a2,0x884444cc,0x2e171739,0x93c4c457,0x55a7a7f2,0xfc7e7e82,0x7a3d3d47,0xc86464ac,0xba5d5de7,0x3219192b,0xe6737395,0xc06060a0,0x19818198,0x9e4f4fd1,0xa3dcdc7f,0x44222266,0x542a2a7e,0x3b9090ab,0x0b888883,0x8c4646ca,0xc7eeee29,0x6bb8b8d3,0x2814143c,0xa7dede79,0xbc5e5ee2,0x160b0b1d,0xaddbdb76,0xdbe0e03b,0x64323256,0x743a3a4e,0x140a0a1e,0x924949db,0x0c06060a,0x4824246c,0xb85c5ce4,0x9fc2c25d,0xbdd3d36e,0x43acacef,0xc46262a6,0x399191a8,0x319595a4,0xd3e4e437,0xf279798b,0xd5e7e732,0x8bc8c843,0x6e373759,0xda6d6db7,0x018d8d8c,0xb1d5d564,0x9c4e4ed2,0x49a9a9e0,0xd86c6cb4,0xac5656fa,0xf3f4f407,0xcfeaea25,0xca6565af,0xf47a7a8e,0x47aeaee9,0x10080818,0x6fbabad5,0xf0787888,0x4a25256f,0x5c2e2e72,0x381c1c24,0x57a6a6f1,0x73b4b4c7,0x97c6c651,0xcbe8e823,0xa1dddd7c,0xe874749c,0x3e1f1f21,0x964b4bdd,0x61bdbddc,0x0d8b8b86,0x0f8a8a85,0xe0707090,0x7c3e3e42,0x71b5b5c4,0xcc6666aa,0x904848d8,0x06030305,0xf7f6f601,0x1c0e0e12,0xc26161a3,0x6a35355f,0xae5757f9,0x69b9b9d0,0x17868691,0x99c1c158,0x3a1d1d27,0x279e9eb9,0xd9e1e138,0xebf8f813,0x2b9898b3,0x22111133,0xd26969bb,0xa9d9d970,0x078e8e89,0x339494a7,0x2d9b9bb6,0x3c1e1e22,0x15878792,0xc9e9e920,0x87cece49,0xaa5555ff,0x50282878,0xa5dfdf7a,0x038c8c8f,0x59a1a1f8,0x09898980,0x1a0d0d17,0x65bfbfda,0xd7e6e631,0x844242c6,0xd06868b8,0x824141c3,0x299999b0,0x5a2d2d77,0x1e0f0f11,0x7bb0b0cb,0xa85454fc,0x6dbbbbd6,0x2c16163a }; const uint32_t AES::Te1[256] = { 0xa5c66363,0x84f87c7c,0x99ee7777,0x8df67b7b,0x0dfff2f2,0xbdd66b6b,0xb1de6f6f,0x5491c5c5,0x50603030,0x03020101,0xa9ce6767,0x7d562b2b,0x19e7fefe,0x62b5d7d7,0xe64dabab,0x9aec7676,0x458fcaca,0x9d1f8282,0x4089c9c9,0x87fa7d7d,0x15effafa,0xebb25959,0xc98e4747,0x0bfbf0f0,0xec41adad,0x67b3d4d4,0xfd5fa2a2,0xea45afaf,0xbf239c9c,0xf753a4a4,0x96e47272,0x5b9bc0c0,0xc275b7b7,0x1ce1fdfd,0xae3d9393,0x6a4c2626,0x5a6c3636,0x417e3f3f,0x02f5f7f7,0x4f83cccc,0x5c683434,0xf451a5a5,0x34d1e5e5,0x08f9f1f1,0x93e27171,0x73abd8d8,0x53623131,0x3f2a1515,0x0c080404,0x5295c7c7,0x65462323,0x5e9dc3c3,0x28301818,0xa1379696,0x0f0a0505,0xb52f9a9a,0x090e0707,0x36241212,0x9b1b8080,0x3ddfe2e2,0x26cdebeb,0x694e2727,0xcd7fb2b2,0x9fea7575,0x1b120909,0x9e1d8383,0x74582c2c,0x2e341a1a,0x2d361b1b,0xb2dc6e6e,0xeeb45a5a,0xfb5ba0a0,0xf6a45252,0x4d763b3b,0x61b7d6d6,0xce7db3b3,0x7b522929,0x3edde3e3,0x715e2f2f,0x97138484,0xf5a65353,0x68b9d1d1,0x00000000,0x2cc1eded,0x60402020,0x1fe3fcfc,0xc879b1b1,0xedb65b5b,0xbed46a6a,0x468dcbcb,0xd967bebe,0x4b723939,0xde944a4a,0xd4984c4c,0xe8b05858,0x4a85cfcf,0x6bbbd0d0,0x2ac5efef,0xe54faaaa,0x16edfbfb,0xc5864343,0xd79a4d4d,0x55663333,0x94118585,0xcf8a4545,0x10e9f9f9,0x06040202,0x81fe7f7f,0xf0a05050,0x44783c3c,0xba259f9f,0xe34ba8a8,0xf3a25151,0xfe5da3a3,0xc0804040,0x8a058f8f,0xad3f9292,0xbc219d9d,0x48703838,0x04f1f5f5,0xdf63bcbc,0xc177b6b6,0x75afdada,0x63422121,0x30201010,0x1ae5ffff,0x0efdf3f3,0x6dbfd2d2,0x4c81cdcd,0x14180c0c,0x35261313,0x2fc3ecec,0xe1be5f5f,0xa2359797,0xcc884444,0x392e1717,0x5793c4c4,0xf255a7a7,0x82fc7e7e,0x477a3d3d,0xacc86464,0xe7ba5d5d,0x2b321919,0x95e67373,0xa0c06060,0x98198181,0xd19e4f4f,0x7fa3dcdc,0x66442222,0x7e542a2a,0xab3b9090,0x830b8888,0xca8c4646,0x29c7eeee,0xd36bb8b8,0x3c281414,0x79a7dede,0xe2bc5e5e,0x1d160b0b,0x76addbdb,0x3bdbe0e0,0x56643232,0x4e743a3a,0x1e140a0a,0xdb924949,0x0a0c0606,0x6c482424,0xe4b85c5c,0x5d9fc2c2,0x6ebdd3d3,0xef43acac,0xa6c46262,0xa8399191,0xa4319595,0x37d3e4e4,0x8bf27979,0x32d5e7e7,0x438bc8c8,0x596e3737,0xb7da6d6d,0x8c018d8d,0x64b1d5d5,0xd29c4e4e,0xe049a9a9,0xb4d86c6c,0xfaac5656,0x07f3f4f4,0x25cfeaea,0xafca6565,0x8ef47a7a,0xe947aeae,0x18100808,0xd56fbaba,0x88f07878,0x6f4a2525,0x725c2e2e,0x24381c1c,0xf157a6a6,0xc773b4b4,0x5197c6c6,0x23cbe8e8,0x7ca1dddd,0x9ce87474,0x213e1f1f,0xdd964b4b,0xdc61bdbd,0x860d8b8b,0x850f8a8a,0x90e07070,0x427c3e3e,0xc471b5b5,0xaacc6666,0xd8904848,0x05060303,0x01f7f6f6,0x121c0e0e,0xa3c26161,0x5f6a3535,0xf9ae5757,0xd069b9b9,0x91178686,0x5899c1c1,0x273a1d1d,0xb9279e9e,0x38d9e1e1,0x13ebf8f8,0xb32b9898,0x33221111,0xbbd26969,0x70a9d9d9,0x89078e8e,0xa7339494,0xb62d9b9b,0x223c1e1e,0x92158787,0x20c9e9e9,0x4987cece,0xffaa5555,0x78502828,0x7aa5dfdf,0x8f038c8c,0xf859a1a1,0x80098989,0x171a0d0d,0xda65bfbf,0x31d7e6e6,0xc6844242,0xb8d06868,0xc3824141,0xb0299999,0x775a2d2d,0x111e0f0f,0xcb7bb0b0,0xfca85454,0xd66dbbbb,0x3a2c1616 }; const uint32_t AES::Te2[256] = { 0x63a5c663,0x7c84f87c,0x7799ee77,0x7b8df67b,0xf20dfff2,0x6bbdd66b,0x6fb1de6f,0xc55491c5,0x30506030,0x01030201,0x67a9ce67,0x2b7d562b,0xfe19e7fe,0xd762b5d7,0xabe64dab,0x769aec76,0xca458fca,0x829d1f82,0xc94089c9,0x7d87fa7d,0xfa15effa,0x59ebb259,0x47c98e47,0xf00bfbf0,0xadec41ad,0xd467b3d4,0xa2fd5fa2,0xafea45af,0x9cbf239c,0xa4f753a4,0x7296e472,0xc05b9bc0,0xb7c275b7,0xfd1ce1fd,0x93ae3d93,0x266a4c26,0x365a6c36,0x3f417e3f,0xf702f5f7,0xcc4f83cc,0x345c6834,0xa5f451a5,0xe534d1e5,0xf108f9f1,0x7193e271,0xd873abd8,0x31536231,0x153f2a15,0x040c0804,0xc75295c7,0x23654623,0xc35e9dc3,0x18283018,0x96a13796,0x050f0a05,0x9ab52f9a,0x07090e07,0x12362412,0x809b1b80,0xe23ddfe2,0xeb26cdeb,0x27694e27,0xb2cd7fb2,0x759fea75,0x091b1209,0x839e1d83,0x2c74582c,0x1a2e341a,0x1b2d361b,0x6eb2dc6e,0x5aeeb45a,0xa0fb5ba0,0x52f6a452,0x3b4d763b,0xd661b7d6,0xb3ce7db3,0x297b5229,0xe33edde3,0x2f715e2f,0x84971384,0x53f5a653,0xd168b9d1,0x00000000,0xed2cc1ed,0x20604020,0xfc1fe3fc,0xb1c879b1,0x5bedb65b,0x6abed46a,0xcb468dcb,0xbed967be,0x394b7239,0x4ade944a,0x4cd4984c,0x58e8b058,0xcf4a85cf,0xd06bbbd0,0xef2ac5ef,0xaae54faa,0xfb16edfb,0x43c58643,0x4dd79a4d,0x33556633,0x85941185,0x45cf8a45,0xf910e9f9,0x02060402,0x7f81fe7f,0x50f0a050,0x3c44783c,0x9fba259f,0xa8e34ba8,0x51f3a251,0xa3fe5da3,0x40c08040,0x8f8a058f,0x92ad3f92,0x9dbc219d,0x38487038,0xf504f1f5,0xbcdf63bc,0xb6c177b6,0xda75afda,0x21634221,0x10302010,0xff1ae5ff,0xf30efdf3,0xd26dbfd2,0xcd4c81cd,0x0c14180c,0x13352613,0xec2fc3ec,0x5fe1be5f,0x97a23597,0x44cc8844,0x17392e17,0xc45793c4,0xa7f255a7,0x7e82fc7e,0x3d477a3d,0x64acc864,0x5de7ba5d,0x192b3219,0x7395e673,0x60a0c060,0x81981981,0x4fd19e4f,0xdc7fa3dc,0x22664422,0x2a7e542a,0x90ab3b90,0x88830b88,0x46ca8c46,0xee29c7ee,0xb8d36bb8,0x143c2814,0xde79a7de,0x5ee2bc5e,0x0b1d160b,0xdb76addb,0xe03bdbe0,0x32566432,0x3a4e743a,0x0a1e140a,0x49db9249,0x060a0c06,0x246c4824,0x5ce4b85c,0xc25d9fc2,0xd36ebdd3,0xacef43ac,0x62a6c462,0x91a83991,0x95a43195,0xe437d3e4,0x798bf279,0xe732d5e7,0xc8438bc8,0x37596e37,0x6db7da6d,0x8d8c018d,0xd564b1d5,0x4ed29c4e,0xa9e049a9,0x6cb4d86c,0x56faac56,0xf407f3f4,0xea25cfea,0x65afca65,0x7a8ef47a,0xaee947ae,0x08181008,0xbad56fba,0x7888f078,0x256f4a25,0x2e725c2e,0x1c24381c,0xa6f157a6,0xb4c773b4,0xc65197c6,0xe823cbe8,0xdd7ca1dd,0x749ce874,0x1f213e1f,0x4bdd964b,0xbddc61bd,0x8b860d8b,0x8a850f8a,0x7090e070,0x3e427c3e,0xb5c471b5,0x66aacc66,0x48d89048,0x03050603,0xf601f7f6,0x0e121c0e,0x61a3c261,0x355f6a35,0x57f9ae57,0xb9d069b9,0x86911786,0xc15899c1,0x1d273a1d,0x9eb9279e,0xe138d9e1,0xf813ebf8,0x98b32b98,0x11332211,0x69bbd269,0xd970a9d9,0x8e89078e,0x94a73394,0x9bb62d9b,0x1e223c1e,0x87921587,0xe920c9e9,0xce4987ce,0x55ffaa55,0x28785028,0xdf7aa5df,0x8c8f038c,0xa1f859a1,0x89800989,0x0d171a0d,0xbfda65bf,0xe631d7e6,0x42c68442,0x68b8d068,0x41c38241,0x99b02999,0x2d775a2d,0x0f111e0f,0xb0cb7bb0,0x54fca854,0xbbd66dbb,0x163a2c16 }; @@ -175,6 +149,39 @@ void AES::_encryptSW(const uint8_t in[16],uint8_t out[16]) const writeuint32_t(out + 12,(Te2[(t3 >> 24)] & 0xff000000) ^ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t2) & 0xff] & 0x000000ff) ^ rk[59]); } +void AES::_ctrSW(const uint8_t iv[16],const void *in,unsigned int len,void *out) const +{ + uint64_t ctr[2],cenc[2]; + memcpy(ctr,iv,16); + uint64_t bctr = Utils::ntoh(ctr[1]); + + const uint8_t *i = (const uint8_t *)in; + uint8_t *o = (uint8_t *)out; + + while (len >= 16) { + _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); + ctr[1] = Utils::hton(++bctr); +#ifdef ZT_NO_TYPE_PUNNING + for(unsigned int k=0;k<16;++k) + *(o++) = *(i++) ^ ((uint8_t *)cenc)[k]; +#else + *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[0]; + o += 8; + i += 8; + *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[1]; + o += 8; + i += 8; +#endif + len -= 16; + } + + if (len) { + _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); + for(unsigned int k=0;k #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) - #include #include #include #include - #define ZT_AES_AESNI 1 - -#endif // x64 +#endif namespace ZeroTier { /** - * AES-256 and pals + * AES-256 and pals including GMAC, CTR, etc. */ class AES { public: - /** - * This will be true if your platform's type of AES acceleration is supported on this machine - */ - static const bool HW_ACCEL; - - inline AES() {} - inline AES(const uint8_t key[32]) { this->init(key); } - inline ~AES() { Utils::burn(&_k,sizeof(_k)); } + ZT_ALWAYS_INLINE AES() {} + ZT_ALWAYS_INLINE AES(const uint8_t key[32]) { this->init(key); } + ZT_ALWAYS_INLINE ~AES() { Utils::burn(&_k,sizeof(_k)); } /** * Set (or re-set) this AES256 cipher's key */ - inline void init(const uint8_t key[32]) + ZT_ALWAYS_INLINE void init(const uint8_t key[32]) { #ifdef ZT_AES_AESNI - if (likely(HW_ACCEL)) { + if (likely(Utils::CPUID.aes)) { _init_aesni(key); return; } #endif - _initSW(key); } @@ -69,15 +60,14 @@ public: * @param in Input block * @param out Output block (can be same as input) */ - inline void encrypt(const uint8_t in[16],uint8_t out[16]) const + ZT_ALWAYS_INLINE void encrypt(const uint8_t in[16],uint8_t out[16]) const { #ifdef ZT_AES_AESNI - if (likely(HW_ACCEL)) { + if (likely(Utils::CPUID.aes)) { _encrypt_aesni(in,out); return; } #endif - _encryptSW(in,out); } @@ -89,15 +79,14 @@ public: * @param len Length of input * @param out 128-bit authorization tag from GMAC */ - inline void gmac(const uint8_t iv[12],const void *in,const unsigned int len,uint8_t out[16]) const + ZT_ALWAYS_INLINE void gmac(const uint8_t iv[12],const void *in,const unsigned int len,uint8_t out[16]) const { #ifdef ZT_AES_AESNI - if (likely(HW_ACCEL)) { + if (likely(Utils::CPUID.aes)) { _gmac_aesni(iv,(const uint8_t *)in,len,out); return; } #endif - _gmacSW(iv,(const uint8_t *)in,len,out); } @@ -113,44 +102,15 @@ public: * @param len Length of input * @param out Output plaintext or ciphertext */ - inline void ctr(const uint8_t iv[16],const void *in,unsigned int len,void *out) const + ZT_ALWAYS_INLINE void ctr(const uint8_t iv[16],const void *in,unsigned int len,void *out) const { #ifdef ZT_AES_AESNI - if (likely(HW_ACCEL)) { + if (likely(Utils::CPUID.aes)) { _ctr_aesni(_k.ni.k,iv,(const uint8_t *)in,len,(uint8_t *)out); return; } #endif - - uint64_t ctr[2],cenc[2]; - memcpy(ctr,iv,16); - uint64_t bctr = Utils::ntoh(ctr[1]); - - const uint8_t *i = (const uint8_t *)in; - uint8_t *o = (uint8_t *)out; - - while (len >= 16) { - _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); - ctr[1] = Utils::hton(++bctr); -#ifdef ZT_NO_TYPE_PUNNING - for(unsigned int k=0;k<16;++k) - *(o++) = *(i++) ^ ((uint8_t *)cenc)[k]; -#else - *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[0]; - o += 8; - i += 8; - *((uint64_t *)o) = *((const uint64_t *)i) ^ cenc[1]; - o += 8; - i += 8; -#endif - len -= 16; - } - - if (len) { - _encryptSW((const uint8_t *)ctr,(uint8_t *)cenc); - for(unsigned int k=0;k> 32) == ZT_ADDRESS_RESERVED_PREFIX)); } + ZT_ALWAYS_INLINE bool isReserved() const { return ((!_a)||((_a >> 32U) == ZT_ADDRESS_RESERVED_PREFIX)); } /** * @param i Value from 0 to 4 (inclusive) @@ -135,6 +130,7 @@ public: */ ZT_ALWAYS_INLINE uint8_t operator[](unsigned int i) const { return (uint8_t)(_a >> (32 - (i * 8))); } + explicit ZT_ALWAYS_INLINE operator bool() const { return (_a != 0); } explicit ZT_ALWAYS_INLINE operator unsigned int() const { return (unsigned int)_a; } explicit ZT_ALWAYS_INLINE operator unsigned long() const { return (unsigned long)_a; } explicit ZT_ALWAYS_INLINE operator unsigned long long() const { return (unsigned long long)_a; } diff --git a/node/AtomicCounter.hpp b/node/AtomicCounter.hpp index 4e6b95315..6bfc7025d 100644 --- a/node/AtomicCounter.hpp +++ b/node/AtomicCounter.hpp @@ -24,25 +24,30 @@ namespace ZeroTier { /** * Simple atomic counter supporting increment and decrement + * + * This is used as the reference counter in reference counted objects that + * work with SharedPtr<>. */ class AtomicCounter { public: - ZT_ALWAYS_INLINE AtomicCounter() { _v = 0; } + ZT_ALWAYS_INLINE AtomicCounter() : _v(0) {} ZT_ALWAYS_INLINE int load() const { #ifdef __GNUC__ - return __sync_or_and_fetch(const_cast(&_v),0); + return _v; #else return _v.load(); #endif } + ZT_ALWAYS_INLINE void zero() { _v = 0; } + ZT_ALWAYS_INLINE int operator++() { #ifdef __GNUC__ - return __sync_add_and_fetch(&_v,1); + return __sync_add_and_fetch((int *)&_v,1); #else return ++_v; #endif @@ -51,7 +56,7 @@ public: ZT_ALWAYS_INLINE int operator--() { #ifdef __GNUC__ - return __sync_sub_and_fetch(&_v,1); + return __sync_sub_and_fetch((int *)&_v,1); #else return --_v; #endif @@ -62,7 +67,7 @@ private: ZT_ALWAYS_INLINE const AtomicCounter &operator=(const AtomicCounter &) { return *this; } #ifdef __GNUC__ - int _v; + volatile int _v; #else std::atomic_int _v; #endif diff --git a/node/Buf.cpp b/node/Buf.cpp new file mode 100644 index 000000000..80df63c73 --- /dev/null +++ b/node/Buf.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c)2019 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2023-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#include "Buf.hpp" + +#ifndef __GNUC__ +#include +#endif + +namespace ZeroTier { + +#ifdef __GNUC__ +static uintptr_t s_pool = 0; +#else +static std::atomic s_pool(0); +#endif + +void Buf::operator delete(void *ptr,std::size_t sz) +{ + if (ptr) { + uintptr_t bb; + const uintptr_t locked = ~((uintptr_t)0); + for (;;) { +#ifdef __GNUC__ + bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's +#else + bb = s_pool.fetch_or(locked); +#endif + if (bb != locked) + break; + } + + ((Buf *)ptr)->__nextInPool = bb; +#ifdef __GNUC__ + __sync_fetch_and_and(&s_pool,(uintptr_t)ptr); +#else + s_pool.store((uintptr_t)ptr); +#endif + } +} + +SharedPtr Buf::get() +{ + uintptr_t bb; + const uintptr_t locked = ~((uintptr_t)0); + for (;;) { +#ifdef __GNUC__ + bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's +#else + bb = s_pool.fetch_or(locked); +#endif + if (bb != locked) + break; + } + + Buf *b; + if (bb == 0) { +#ifdef __GNUC__ + __sync_fetch_and_and(&s_pool,bb); +#else + s_pool.store(bb); +#endif + b = (Buf *)malloc(sizeof(Buf)); + if (!b) + return SharedPtr(); + } else { + b = (Buf *)bb; +#ifdef __GNUC__ + __sync_fetch_and_and(&s_pool,b->__nextInPool); +#else + s_pool.store(b->__nextInPool); +#endif + } + + b->__refCount.zero(); + return SharedPtr(b); +} + +void Buf::freePool() +{ + uintptr_t bb; + const uintptr_t locked = ~((uintptr_t)0); + for (;;) { +#ifdef __GNUC__ + bb = __sync_fetch_and_or(&s_pool,locked); // get value of s_pool and "lock" by filling with all 1's +#else + bb = s_pool.fetch_or(locked); +#endif + if (bb != locked) + break; + } + +#ifdef __GNUC__ + __sync_fetch_and_and(&s_pool,(uintptr_t)0); +#else + s_pool.store((uintptr_t)0); +#endif + + while (bb != 0) { + uintptr_t next = ((Buf *)bb)->__nextInPool; + free((void *)bb); + bb = next; + } +} + +} // namespace ZeroTier diff --git a/node/Buf.hpp b/node/Buf.hpp new file mode 100644 index 000000000..8c0346de6 --- /dev/null +++ b/node/Buf.hpp @@ -0,0 +1,467 @@ +/* + * Copyright (c)2019 ZeroTier, Inc. + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file in the project's root directory. + * + * Change Date: 2023-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2.0 of the Apache License. + */ +/****/ + +#ifndef ZT_BUF_HPP +#define ZT_BUF_HPP + +#include "Constants.hpp" +#include "AtomicCounter.hpp" +#include "Utils.hpp" +#include "SharedPtr.hpp" +#include "Mutex.hpp" + +#include +#include +#include + +// Buffers are 16384 bytes in size because this is the smallest size that can hold any packet +// and is a power of two. It needs to be a power of two because masking is significantly faster +// than integer division modulus. +#define ZT_BUF_MEM_SIZE 0x00004000 +#define ZT_BUF_MEM_MASK 0x00003fffU + +namespace ZeroTier { + +/** + * Buffer and methods for branch-free bounds-checked data assembly and parsing + * + * This implements an extremely fast buffer for packet assembly and parsing that avoids + * branching whenever possible. To be safe it must be used correctly! + * + * The read methods are prefixed by 'r', and write methods with 'w'. All methods take + * an iterator, which is just an int that should be initialized to 0 (or whatever starting + * position is desired). All read methods will advance the iterator regardless of outcome. + * + * Read and write methods fail silently in the event of overflow. They do not corrupt or + * access memory outside the bounds of Buf, but will otherwise produce undefined results. + * + * IT IS THE RESPONSIBILITY OF THE USER of this class to use the readOverflow() and + * writeOverflow() static methods to check the iterator for overflow after each series + * of reads and writes and BEFORE ANY PARSING or other decisions are made on the basis + * of the data obtained from a buffer. Failure to do so can result in bugs due + * to parsing and branching on undefined or corrupt data. + * + * ^^ THIS IS VERY IMPORTANT ^^ + * + * A typical packet assembly consists of repeated calls to the write methods followed by + * a check to writeOverflow() before final packet armoring and transport. A typical packet + * disassembly and parsing consists of a series of read calls to obtain the packet's + * fields followed by a call to readOverflow() to check that these fields are valid. The + * packet is discarded if readOverflow() returns true. Some packet parsers may make + * additional reads and in this case readOverflow() must be checked after each set of + * reads to ensure that overflow did not occur. + * + * Buf uses a lock-free pool for extremely fast allocation and deallocation. + */ +class Buf +{ + friend class SharedPtr; + +private: + // Direct construction isn't allowed; use get(). + ZT_ALWAYS_INLINE Buf() {} + ZT_ALWAYS_INLINE Buf(const Buf &b) {} + +public: + static void operator delete(void *ptr,std::size_t sz); + + /** + * Get obtains a buffer from the pool or allocates a new buffer if the pool is empty + * + * @return Buffer + */ + static SharedPtr get(); + + /** + * Free buffers in the pool + * + * New buffers will be created and the pool repopulated if get() is called + * and outstanding buffers will still be returned to the pool. This just + * frees buffers currently held in reserve. + */ + static void freePool(); + + /** + * Check for overflow beyond the size of the buffer + * + * This is used to check for overflow when writing. It returns true if the iterator + * has passed beyond the capacity of the buffer. + * + * @param ii Iterator to check + * @return True if iterator has read past the size of the buffer + */ + static ZT_ALWAYS_INLINE bool writeOverflow(const int &ii) { return ((ii - ZT_BUF_MEM_SIZE) > 0); } + + /** + * Check for overflow beyond the size of the data that should be in the buffer + * + * This is used to check for overflow when reading, with the second argument being the + * size of the meaningful data actually present in the buffer. + * + * @param ii Iterator to check + * @param size Size of data that should be in buffer + * @return True if iterator has read past the size of the data + */ + static ZT_ALWAYS_INLINE bool readOverflow(const int &ii,const unsigned int size) { return ((ii - (int)size) > 0); } + + //////////////////////////////////////////////////////////////////////////// + // Read methods + //////////////////////////////////////////////////////////////////////////// + + /** + * Read a byte + * + * @param ii Iterator + * @return Byte (undefined on overflow) + */ + ZT_ALWAYS_INLINE uint8_t rI8(int &ii) const + { + const unsigned int s = (unsigned int)ii++; + return data[s & ZT_BUF_MEM_MASK]; + } + + /** + * Read a 16-bit integer + * + * @param ii Integer + * @return Integer (undefined on overflow) + */ + ZT_ALWAYS_INLINE uint16_t rI16(int &ii) const + { + const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK; + ii += 2; +#ifdef ZT_NO_TYPE_PUNNING + return ( + ((uint16_t)data[s] << 8U) | + (uint16_t)data[s + 1]); +#else + return Utils::ntoh(*reinterpret_cast(data + s)); +#endif + } + + /** + * Read a 32-bit integer + * + * @param ii Integer + * @return Integer (undefined on overflow) + */ + ZT_ALWAYS_INLINE uint32_t rI32(int &ii) const + { + const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK; + ii += 4; +#ifdef ZT_NO_TYPE_PUNNING + return ( + ((uint32_t)data[s] << 24U) | + ((uint32_t)data[s + 1] << 16U) | + ((uint32_t)data[s + 2] << 8U) | + (uint32_t)data[s + 3]); +#else + return Utils::ntoh(*reinterpret_cast(data + s)); +#endif + } + + /** + * Read a 64-bit integer + * + * @param ii Integer + * @return Integer (undefined on overflow) + */ + ZT_ALWAYS_INLINE uint64_t rI64(int &ii) const + { + const unsigned int s = (unsigned int)ii & ZT_BUF_MEM_MASK; + ii += 8; +#ifdef ZT_NO_TYPE_PUNNING + return ( + ((uint64_t)data[s] << 56U) | + ((uint64_t)data[s + 1] << 48U) | + ((uint64_t)data[s + 2] << 40U) | + ((uint64_t)data[s + 3] << 32U) | + ((uint64_t)data[s + 4] << 24U) | + ((uint64_t)data[s + 5] << 16U) | + ((uint64_t)data[s + 6] << 8U) | + (uint64_t)data[s + 7]); +#else + return Utils::ntoh(*reinterpret_cast(data + s)); +#endif + } + + /** + * Read an object supporting the marshal/unmarshal interface + * + * If the return value is negative the object's state is undefined. A return value of + * zero typically also indicates a problem, though this may depend on the object type. + * + * Since objects may be invalid even if there is no overflow, it's important to check + * the return value of this function in all cases and discard invalid packets as it + * indicates. + * + * @tparam T Object type + * @param ii Iterator + * @param obj Object to read + * @return Bytes read or a negative value on unmarshal error (passed from object) or overflow + */ + template + ZT_ALWAYS_INLINE int rO(int &ii,T &obj) const + { + if (ii < ZT_BUF_MEM_SIZE) { + int ms = obj.unmarshal(data + ii, ZT_BUF_MEM_SIZE - ii); + if (ms > 0) + ii += ms; + return ms; + } + return -1; + } + + /** + * Read a C-style string from the buffer, making a copy and advancing the iterator + * + * Use this if the buffer's memory may get changed between reading and processing + * what is read. + * + * @param ii Iterator + * @param buf Buffer to receive string + * @param bufSize Capacity of buffer in bytes + * @return Pointer to buf or NULL on overflow or error + */ + ZT_ALWAYS_INLINE char *rS(int &ii,char *const buf,const unsigned int bufSize) const + { + const char *const s = (const char *)(data + ii); + const int sii = ii; + while (ii < ZT_BUF_MEM_SIZE) { + if (data[ii++] == 0) { + memcpy(buf,s,ii - sii); + return buf; + } + } + return nullptr; + } + + /** + * Obtain a pointer to a C-style string in the buffer without copying and advance the iterator + * + * The iterator is advanced even if this fails and returns NULL so that readOverflow() + * will indicate that an overflow occurred. As with other reads the string's contents are + * undefined if readOverflow() returns true. + * + * This version avoids a copy and so is faster if the buffer won't be modified between + * reading and processing. + * + * @param ii Iterator + * @return Pointer to null-terminated C-style string or NULL on overflow or error + */ + ZT_ALWAYS_INLINE const char *rSnc(int &ii) const + { + const char *const s = (const char *)(data + ii); + while (ii < ZT_BUF_MEM_SIZE) { + if (data[ii++] == 0) + return s; + } + return nullptr; + } + + /** + * Read a byte array from the buffer, making a copy and advancing the iterator + * + * Use this if the buffer's memory may get changed between reading and processing + * what is read. + * + * @param ii Iterator + * @param bytes Buffer to contain data to read + * @param len Length of buffer + * @return Pointer to data or NULL on overflow or error + */ + ZT_ALWAYS_INLINE void *rB(int &ii,void *bytes,unsigned int len) const + { + const void *const b = (const void *)(data + ii); + if ((ii += (int)len) <= ZT_BUF_MEM_SIZE) { + memcpy(bytes, b, len); + return bytes; + } + return nullptr; + } + + /** + * Obtain a pointer to a field in the buffer without copying and advance the iterator + * + * The iterator is advanced even if this fails and returns NULL so that readOverflow() + * will indicate that an overflow occurred. + * + * This version avoids a copy and so is faster if the buffer won't be modified between + * reading and processing. + * + * @param ii Iterator + * @param len Length of data field to obtain a pointer to + * @return Pointer to field or NULL on overflow + */ + ZT_ALWAYS_INLINE const void *rBnc(int &ii,unsigned int len) const + { + const void *const b = (const void *)(data + ii); + return ((ii += (int)len) <= ZT_BUF_MEM_SIZE) ? b : nullptr; + } + + //////////////////////////////////////////////////////////////////////////// + // Write methods + //////////////////////////////////////////////////////////////////////////// + + /** + * Write a byte + * + * @param ii Iterator + * @param n Byte + */ + ZT_ALWAYS_INLINE void wI(int &ii,uint8_t n) + { + const unsigned int s = (unsigned int)ii++; + data[s & ZT_BUF_MEM_MASK] = n; + } + + /** + * Write a 16-bit integer in big-endian byte order + * + * @param ii Iterator + * @param n Integer + */ + ZT_ALWAYS_INLINE void wI(int &ii,uint16_t n) + { + const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK; + ii += 2; +#ifdef ZT_NO_TYPE_PUNNING + data[s] = (uint8_t)(n >> 8U); + data[s + 1] = (uint8_t)n; +#else + *reinterpret_cast(data + s) = Utils::hton(n); +#endif + } + + /** + * Write a 32-bit integer in big-endian byte order + * + * @param ii Iterator + * @param n Integer + */ + ZT_ALWAYS_INLINE void wI(int &ii,uint32_t n) + { + const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK; + ii += 4; +#ifdef ZT_NO_TYPE_PUNNING + data[s] = (uint8_t)(n >> 24U); + data[s + 1] = (uint8_t)(n >> 16U); + data[s + 2] = (uint8_t)(n >> 8U); + data[s + 3] = (uint8_t)n; +#else + *reinterpret_cast(data + s) = Utils::hton(n); +#endif + } + + /** + * Write a 64-bit integer in big-endian byte order + * + * @param ii Iterator + * @param n Integer + */ + ZT_ALWAYS_INLINE void wI(int &ii,uint64_t n) + { + const unsigned int s = ((unsigned int)ii) & ZT_BUF_MEM_MASK; + ii += 8; +#ifdef ZT_NO_TYPE_PUNNING + data[s] = (uint8_t)(n >> 56U); + data[s + 1] = (uint8_t)(n >> 48U); + data[s + 2] = (uint8_t)(n >> 40U); + data[s + 3] = (uint8_t)(n >> 32U); + data[s + 4] = (uint8_t)(n >> 24U); + data[s + 5] = (uint8_t)(n >> 16U); + data[s + 6] = (uint8_t)(n >> 8U); + data[s + 7] = (uint8_t)n; +#else + *reinterpret_cast(data + s) = Utils::hton(n); +#endif + } + + /** + * Write an object implementing the marshal interface + * + * @tparam T Object type + * @param ii Iterator + * @param t Object to write + */ + template + ZT_ALWAYS_INLINE void wO(int &ii,T &t) + { + const unsigned int s = (unsigned int)ii; + if ((s + T::marshalSizeMax()) <= ZT_BUF_MEM_SIZE) { + int ms = t.marshal(data + s); + if (ms > 0) + ii += ms; + } else { + ii += T::marshalSizeMax(); // mark as overflowed even if we didn't do anything + } + } + + /** + * Write a C-style null-terminated string (including the trailing zero) + * + * @param ii Iterator + * @param s String to write (writes an empty string if this is NULL) + */ + ZT_ALWAYS_INLINE void wS(int &ii,const char *s) + { + if (s) { + char c; + do { + c = *(s++); + wI(ii,(uint8_t)c); + } while (c); + } else { + wI(ii,(uint8_t)0); + } + } + + /** + * Write a byte array + * + * @param ii Iterator + * @param bytes Bytes to write + * @param len Size of data in bytes + */ + ZT_ALWAYS_INLINE void wB(int &ii,const void *const bytes,const unsigned int len) + { + unsigned int s = (unsigned int)ii; + if ((ii += (int)len) <= ZT_BUF_MEM_SIZE) + memcpy(data + s, bytes, len); + } + + //////////////////////////////////////////////////////////////////////////// + + ZT_ALWAYS_INLINE Buf &operator=(const Buf &b) + { + if (&b != this) + memcpy(data,b.data,ZT_BUF_MEM_SIZE); + return *this; + } + + /** + * Raw buffer + * + * The extra eight bytes permit silent overflow of integer types without reading or writing + * beyond Buf's memory and without branching or extra masks. They can be ignored otherwise. + */ + uint8_t data[ZT_BUF_MEM_SIZE + 8]; + +private: + volatile uintptr_t __nextInPool; + AtomicCounter __refCount; +}; + +} // namespace ZeroTier + +#endif diff --git a/node/CMakeLists.txt b/node/CMakeLists.txt index 2002cc660..e5ee34e95 100644 --- a/node/CMakeLists.txt +++ b/node/CMakeLists.txt @@ -8,6 +8,7 @@ endif(WIN32) set(core_headers Address.hpp AtomicCounter.hpp + Buf.hpp Buffer.hpp C25519.hpp Capability.hpp @@ -50,6 +51,7 @@ set(core_headers set(core_src AES.cpp + Buf.cpp C25519.cpp Credential.cpp ECC384.cpp diff --git a/node/Utils.cpp b/node/Utils.cpp index 094d57cf1..77790767a 100644 --- a/node/Utils.cpp +++ b/node/Utils.cpp @@ -32,31 +32,33 @@ #include "AES.hpp" #include "SHA512.hpp" +namespace ZeroTier { + +namespace Utils { + #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) -#include -static bool _zt_rdrand_supported() +CPUIDRegisters::CPUIDRegisters() { #ifdef __WINDOWS__ int regs[4]; __cpuid(regs,1); - return (((regs[2] >> 30) & 1) != 0); + eax = (uint32_t)regs[0]; + ebx = (uint32_t)regs[1]; + ecx = (uint32_t)regs[2]; + edx = (uint32_t)regs[3]; #else - uint32_t eax,ebx,ecx,edx; __asm__ __volatile__ ( "cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a"(1),"c"(0) ); - return ((ecx & (1 << 30)) != 0); #endif + rdrand = ((ecx & (1U << 30U)) != 0); + aes = ( ((ecx & (1U << 25U)) != 0) && ((ecx & (1U << 19U)) != 0) && ((ecx & (1U << 1U)) != 0) ); // AES, PCLMUL, SSE4.1 } -static const bool _rdrandSupported = _zt_rdrand_supported(); +CPUIDRegisters CPUID; #endif -namespace ZeroTier { - -namespace Utils { - const char HEXCHARS[16] = { '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' }; bool secureEq(const void *a,const void *b,unsigned int len) @@ -213,7 +215,7 @@ void getSecureRandom(void *buf,unsigned int bytes) randomState[0] ^= (uint64_t)time(nullptr); randomState[1] ^= (uint64_t)((uintptr_t)buf); #if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) - if (_rdrandSupported) { + if (CPUID.rdrand) { uint64_t tmp = 0; _rdrand64_step((unsigned long long *)&tmp); randomState[2] ^= tmp; diff --git a/node/Utils.hpp b/node/Utils.hpp index 1336d6a37..606eac638 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -20,6 +20,12 @@ #include #include +#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) +#include +#include +#include +#endif + #include #include #include @@ -31,6 +37,17 @@ namespace ZeroTier { namespace Utils { +#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64)) +struct CPUIDRegisters +{ + uint32_t eax,ebx,ecx,edx; + bool rdrand; + bool aes; + CPUIDRegisters(); +}; +extern CPUIDRegisters CPUID; +#endif + /** * Hexadecimal characters 0-f */