diff --git a/make-mac.mk b/make-mac.mk index 7776e6ac1..5ced0617d 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -63,7 +63,7 @@ ifeq ($(ZT_DEBUG),1) node/Salsa20.o node/SHA512.o node/C25519.o node/Poly1305.o node/AES.o: CFLAGS = -Wall -O2 -g -maes -mpclmul $(INCLUDES) $(DEFS) else CFLAGS?=-Ofast -fstack-protector-strong - CFLAGS+=$(ARCH_FLAGS) -Wall -flto -fPIE -maes -msse -msse2 -mpclmul -mmacosx-version-min=10.9 -DNDEBUG -Wno-unused-private-field $(INCLUDES) $(DEFS) + CFLAGS+=$(ARCH_FLAGS) -Wall -flto -fPIE -maes -msse -msse2 -msse3 -mpclmul -mmacosx-version-min=10.9 -DNDEBUG -Wno-unused-private-field $(INCLUDES) $(DEFS) STRIP=strip endif diff --git a/node/AES.hpp b/node/AES.hpp index 3e9ce4a94..b6c13e2ce 100644 --- a/node/AES.hpp +++ b/node/AES.hpp @@ -196,17 +196,6 @@ private: _k.ni.hh = _swap128_aesni(hh); _k.ni.hhh = _swap128_aesni(hhh); _k.ni.hhhh = _swap128_aesni(hhhh); - /* - this->h = h; - h = swap128(h); - this->hh = mult_block(h, this->h); - this->hhh = mult_block(h, this->hh); - this->hhhh = mult_block(h, this->hhh); - this->h = swap128(this->h); - this->hh = swap128(this->hh); - this->hhh = swap128(this->hhh); - this->hhhh = swap128(this->hhhh); - */ } inline void _encrypt_aesni(const void *in,void *out) const @@ -354,7 +343,7 @@ private: static inline __m128i _increment_be_aesni(__m128i x) { x = _swap128_aesni(x); - x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); + x = _mm_add_epi64(x,_mm_set_epi32(0,0,0,1)); x = _swap128_aesni(x); return x; } @@ -460,88 +449,101 @@ private: } inline void _encrypt_gcm256_aesni(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize) const { - __m128i d1,d2,d3,d4,t1,t2,t3,t4,k; - __m128i y,j,cb,*bi,*bo; - - j = _create_j_aesni(iv); - cb = _increment_be_aesni(j); - y = _icv_header_aesni(assoc,alen); + __m128i j = _create_j_aesni(iv); + __m128i cb = _increment_be_aesni(j); + __m128i y = _icv_header_aesni(assoc,alen); unsigned int blocks = len / 16; unsigned int pblocks = blocks - (blocks % 4); unsigned int rem = len % 16; - bi = (__m128i *)in; - bo = (__m128i *)out; + __m128i *bi = (__m128i *)in; + __m128i *bo = (__m128i *)out; + + __m128i k0 = _k.ni.k[0]; + __m128i k1 = _k.ni.k[1]; + __m128i k2 = _k.ni.k[2]; + __m128i k3 = _k.ni.k[3]; + __m128i k4 = _k.ni.k[4]; + __m128i k5 = _k.ni.k[5]; + __m128i k6 = _k.ni.k[6]; + __m128i k7 = _k.ni.k[7]; + __m128i k8 = _k.ni.k[8]; + __m128i k9 = _k.ni.k[9]; + __m128i k10 = _k.ni.k[10]; + __m128i k11 = _k.ni.k[11]; + __m128i k12 = _k.ni.k[12]; + __m128i k13 = _k.ni.k[13]; + __m128i k14 = _k.ni.k[14]; unsigned int i; for (i=0;i