mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-06-05 20:13:44 +02:00
More AES stuff
This commit is contained in:
parent
f5e71f64ed
commit
f7bc9f01c9
1 changed files with 302 additions and 295 deletions
597
node/AES.hpp
597
node/AES.hpp
|
@ -97,6 +97,13 @@ public:
|
|||
|
||||
inline bool gcmDecrypt(const uint8_t iv[12],const void *in,unsigned int inlen,const void *assoc,unsigned int assoclen,void *out,const uint8_t *tag,unsigned int taglen)
|
||||
{
|
||||
#ifdef ZT_AES_AESNI
|
||||
if (HW_ACCEL) {
|
||||
uint8_t tagbuf[16];
|
||||
_decrypt_gcm256_aesni(inlen,(const uint8_t *)in,(uint8_t *)out,iv,assoclen,(const uint8_t *)assoc,tagbuf,taglen);
|
||||
return Utils::secureEq(tagbuf,tag,taglen);
|
||||
}
|
||||
#endif
|
||||
abort(); // TODO: software
|
||||
return false;
|
||||
}
|
||||
|
@ -217,41 +224,41 @@ private:
|
|||
{
|
||||
__m128i t1,t2,t3,t4,t5,t6;
|
||||
y = _swap128_aesni(y);
|
||||
t1 = _mm_clmulepi64_si128(h, y, 0x00);
|
||||
t2 = _mm_clmulepi64_si128(h, y, 0x01);
|
||||
t3 = _mm_clmulepi64_si128(h, y, 0x10);
|
||||
t4 = _mm_clmulepi64_si128(h, y, 0x11);
|
||||
t2 = _mm_xor_si128(t2, t3);
|
||||
t3 = _mm_slli_si128(t2, 8);
|
||||
t2 = _mm_srli_si128(t2, 8);
|
||||
t1 = _mm_xor_si128(t1, t3);
|
||||
t4 = _mm_xor_si128(t4, t2);
|
||||
t5 = _mm_srli_epi32(t1, 31);
|
||||
t1 = _mm_slli_epi32(t1, 1);
|
||||
t6 = _mm_srli_epi32(t4, 31);
|
||||
t4 = _mm_slli_epi32(t4, 1);
|
||||
t3 = _mm_srli_si128(t5, 12);
|
||||
t6 = _mm_slli_si128(t6, 4);
|
||||
t5 = _mm_slli_si128(t5, 4);
|
||||
t1 = _mm_or_si128(t1, t5);
|
||||
t4 = _mm_or_si128(t4, t6);
|
||||
t4 = _mm_or_si128(t4, t3);
|
||||
t5 = _mm_slli_epi32(t1, 31);
|
||||
t6 = _mm_slli_epi32(t1, 30);
|
||||
t3 = _mm_slli_epi32(t1, 25);
|
||||
t5 = _mm_xor_si128(t5, t6);
|
||||
t5 = _mm_xor_si128(t5, t3);
|
||||
t6 = _mm_srli_si128(t5, 4);
|
||||
t4 = _mm_xor_si128(t4, t6);
|
||||
t5 = _mm_slli_si128(t5, 12);
|
||||
t1 = _mm_xor_si128(t1, t5);
|
||||
t4 = _mm_xor_si128(t4, t1);
|
||||
t5 = _mm_srli_epi32(t1, 1);
|
||||
t2 = _mm_srli_epi32(t1, 2);
|
||||
t3 = _mm_srli_epi32(t1, 7);
|
||||
t4 = _mm_xor_si128(t4, t2);
|
||||
t4 = _mm_xor_si128(t4, t3);
|
||||
t4 = _mm_xor_si128(t4, t5);
|
||||
t1 = _mm_clmulepi64_si128(h,y,0x00);
|
||||
t2 = _mm_clmulepi64_si128(h,y,0x01);
|
||||
t3 = _mm_clmulepi64_si128(h,y,0x10);
|
||||
t4 = _mm_clmulepi64_si128(h,y,0x11);
|
||||
t2 = _mm_xor_si128(t2,t3);
|
||||
t3 = _mm_slli_si128(t2,8);
|
||||
t2 = _mm_srli_si128(t2,8);
|
||||
t1 = _mm_xor_si128(t1,t3);
|
||||
t4 = _mm_xor_si128(t4,t2);
|
||||
t5 = _mm_srli_epi32(t1,31);
|
||||
t1 = _mm_slli_epi32(t1,1);
|
||||
t6 = _mm_srli_epi32(t4,31);
|
||||
t4 = _mm_slli_epi32(t4,1);
|
||||
t3 = _mm_srli_si128(t5,12);
|
||||
t6 = _mm_slli_si128(t6,4);
|
||||
t5 = _mm_slli_si128(t5,4);
|
||||
t1 = _mm_or_si128(t1,t5);
|
||||
t4 = _mm_or_si128(t4,t6);
|
||||
t4 = _mm_or_si128(t4,t3);
|
||||
t5 = _mm_slli_epi32(t1,31);
|
||||
t6 = _mm_slli_epi32(t1,30);
|
||||
t3 = _mm_slli_epi32(t1,25);
|
||||
t5 = _mm_xor_si128(t5,t6);
|
||||
t5 = _mm_xor_si128(t5,t3);
|
||||
t6 = _mm_srli_si128(t5,4);
|
||||
t4 = _mm_xor_si128(t4,t6);
|
||||
t5 = _mm_slli_si128(t5,12);
|
||||
t1 = _mm_xor_si128(t1,t5);
|
||||
t4 = _mm_xor_si128(t4,t1);
|
||||
t5 = _mm_srli_epi32(t1,1);
|
||||
t2 = _mm_srli_epi32(t1,2);
|
||||
t3 = _mm_srli_epi32(t1,7);
|
||||
t4 = _mm_xor_si128(t4,t2);
|
||||
t4 = _mm_xor_si128(t4,t3);
|
||||
t4 = _mm_xor_si128(t4,t5);
|
||||
return _swap128_aesni(t4);
|
||||
}
|
||||
static inline __m128i _mult4xor_aesni(__m128i h1,__m128i h2,__m128i h3,__m128i h4,__m128i d1,__m128i d2,__m128i d3,__m128i d4)
|
||||
|
@ -261,75 +268,75 @@ private:
|
|||
d2 = _swap128_aesni(d2);
|
||||
d3 = _swap128_aesni(d3);
|
||||
d4 = _swap128_aesni(d4);
|
||||
t0 = _mm_clmulepi64_si128(h1, d1, 0x00);
|
||||
t1 = _mm_clmulepi64_si128(h2, d2, 0x00);
|
||||
t2 = _mm_clmulepi64_si128(h3, d3, 0x00);
|
||||
t3 = _mm_clmulepi64_si128(h4, d4, 0x00);
|
||||
t8 = _mm_xor_si128(t0, t1);
|
||||
t8 = _mm_xor_si128(t8, t2);
|
||||
t8 = _mm_xor_si128(t8, t3);
|
||||
t4 = _mm_clmulepi64_si128(h1, d1, 0x11);
|
||||
t5 = _mm_clmulepi64_si128(h2, d2, 0x11);
|
||||
t6 = _mm_clmulepi64_si128(h3, d3, 0x11);
|
||||
t7 = _mm_clmulepi64_si128(h4, d4, 0x11);
|
||||
t9 = _mm_xor_si128(t4, t5);
|
||||
t9 = _mm_xor_si128(t9, t6);
|
||||
t9 = _mm_xor_si128(t9, t7);
|
||||
t0 = _mm_shuffle_epi32(h1, 78);
|
||||
t4 = _mm_shuffle_epi32(d1, 78);
|
||||
t0 = _mm_xor_si128(t0, h1);
|
||||
t4 = _mm_xor_si128(t4, d1);
|
||||
t1 = _mm_shuffle_epi32(h2, 78);
|
||||
t5 = _mm_shuffle_epi32(d2, 78);
|
||||
t1 = _mm_xor_si128(t1, h2);
|
||||
t5 = _mm_xor_si128(t5, d2);
|
||||
t2 = _mm_shuffle_epi32(h3, 78);
|
||||
t6 = _mm_shuffle_epi32(d3, 78);
|
||||
t2 = _mm_xor_si128(t2, h3);
|
||||
t6 = _mm_xor_si128(t6, d3);
|
||||
t3 = _mm_shuffle_epi32(h4, 78);
|
||||
t7 = _mm_shuffle_epi32(d4, 78);
|
||||
t3 = _mm_xor_si128(t3, h4);
|
||||
t7 = _mm_xor_si128(t7, d4);
|
||||
t0 = _mm_clmulepi64_si128(t0, t4, 0x00);
|
||||
t1 = _mm_clmulepi64_si128(t1, t5, 0x00);
|
||||
t2 = _mm_clmulepi64_si128(t2, t6, 0x00);
|
||||
t3 = _mm_clmulepi64_si128(t3, t7, 0x00);
|
||||
t0 = _mm_xor_si128(t0, t8);
|
||||
t0 = _mm_xor_si128(t0, t9);
|
||||
t0 = _mm_xor_si128(t1, t0);
|
||||
t0 = _mm_xor_si128(t2, t0);
|
||||
t0 = _mm_xor_si128(t3, t0);
|
||||
t4 = _mm_slli_si128(t0, 8);
|
||||
t0 = _mm_srli_si128(t0, 8);
|
||||
t3 = _mm_xor_si128(t4, t8);
|
||||
t6 = _mm_xor_si128(t0, t9);
|
||||
t7 = _mm_srli_epi32(t3, 31);
|
||||
t8 = _mm_srli_epi32(t6, 31);
|
||||
t3 = _mm_slli_epi32(t3, 1);
|
||||
t6 = _mm_slli_epi32(t6, 1);
|
||||
t9 = _mm_srli_si128(t7, 12);
|
||||
t8 = _mm_slli_si128(t8, 4);
|
||||
t7 = _mm_slli_si128(t7, 4);
|
||||
t3 = _mm_or_si128(t3, t7);
|
||||
t6 = _mm_or_si128(t6, t8);
|
||||
t6 = _mm_or_si128(t6, t9);
|
||||
t7 = _mm_slli_epi32(t3, 31);
|
||||
t8 = _mm_slli_epi32(t3, 30);
|
||||
t9 = _mm_slli_epi32(t3, 25);
|
||||
t7 = _mm_xor_si128(t7, t8);
|
||||
t7 = _mm_xor_si128(t7, t9);
|
||||
t8 = _mm_srli_si128(t7, 4);
|
||||
t7 = _mm_slli_si128(t7, 12);
|
||||
t3 = _mm_xor_si128(t3, t7);
|
||||
t2 = _mm_srli_epi32(t3, 1);
|
||||
t4 = _mm_srli_epi32(t3, 2);
|
||||
t5 = _mm_srli_epi32(t3, 7);
|
||||
t2 = _mm_xor_si128(t2, t4);
|
||||
t2 = _mm_xor_si128(t2, t5);
|
||||
t2 = _mm_xor_si128(t2, t8);
|
||||
t3 = _mm_xor_si128(t3, t2);
|
||||
t6 = _mm_xor_si128(t6, t3);
|
||||
t0 = _mm_clmulepi64_si128(h1,d1,0x00);
|
||||
t1 = _mm_clmulepi64_si128(h2,d2,0x00);
|
||||
t2 = _mm_clmulepi64_si128(h3,d3,0x00);
|
||||
t3 = _mm_clmulepi64_si128(h4,d4,0x00);
|
||||
t8 = _mm_xor_si128(t0,t1);
|
||||
t8 = _mm_xor_si128(t8,t2);
|
||||
t8 = _mm_xor_si128(t8,t3);
|
||||
t4 = _mm_clmulepi64_si128(h1,d1,0x11);
|
||||
t5 = _mm_clmulepi64_si128(h2,d2,0x11);
|
||||
t6 = _mm_clmulepi64_si128(h3,d3,0x11);
|
||||
t7 = _mm_clmulepi64_si128(h4,d4,0x11);
|
||||
t9 = _mm_xor_si128(t4,t5);
|
||||
t9 = _mm_xor_si128(t9,t6);
|
||||
t9 = _mm_xor_si128(t9,t7);
|
||||
t0 = _mm_shuffle_epi32(h1,78);
|
||||
t4 = _mm_shuffle_epi32(d1,78);
|
||||
t0 = _mm_xor_si128(t0,h1);
|
||||
t4 = _mm_xor_si128(t4,d1);
|
||||
t1 = _mm_shuffle_epi32(h2,78);
|
||||
t5 = _mm_shuffle_epi32(d2,78);
|
||||
t1 = _mm_xor_si128(t1,h2);
|
||||
t5 = _mm_xor_si128(t5,d2);
|
||||
t2 = _mm_shuffle_epi32(h3,78);
|
||||
t6 = _mm_shuffle_epi32(d3,78);
|
||||
t2 = _mm_xor_si128(t2,h3);
|
||||
t6 = _mm_xor_si128(t6,d3);
|
||||
t3 = _mm_shuffle_epi32(h4,78);
|
||||
t7 = _mm_shuffle_epi32(d4,78);
|
||||
t3 = _mm_xor_si128(t3,h4);
|
||||
t7 = _mm_xor_si128(t7,d4);
|
||||
t0 = _mm_clmulepi64_si128(t0,t4,0x00);
|
||||
t1 = _mm_clmulepi64_si128(t1,t5,0x00);
|
||||
t2 = _mm_clmulepi64_si128(t2,t6,0x00);
|
||||
t3 = _mm_clmulepi64_si128(t3,t7,0x00);
|
||||
t0 = _mm_xor_si128(t0,t8);
|
||||
t0 = _mm_xor_si128(t0,t9);
|
||||
t0 = _mm_xor_si128(t1,t0);
|
||||
t0 = _mm_xor_si128(t2,t0);
|
||||
t0 = _mm_xor_si128(t3,t0);
|
||||
t4 = _mm_slli_si128(t0,8);
|
||||
t0 = _mm_srli_si128(t0,8);
|
||||
t3 = _mm_xor_si128(t4,t8);
|
||||
t6 = _mm_xor_si128(t0,t9);
|
||||
t7 = _mm_srli_epi32(t3,31);
|
||||
t8 = _mm_srli_epi32(t6,31);
|
||||
t3 = _mm_slli_epi32(t3,1);
|
||||
t6 = _mm_slli_epi32(t6,1);
|
||||
t9 = _mm_srli_si128(t7,12);
|
||||
t8 = _mm_slli_si128(t8,4);
|
||||
t7 = _mm_slli_si128(t7,4);
|
||||
t3 = _mm_or_si128(t3,t7);
|
||||
t6 = _mm_or_si128(t6,t8);
|
||||
t6 = _mm_or_si128(t6,t9);
|
||||
t7 = _mm_slli_epi32(t3,31);
|
||||
t8 = _mm_slli_epi32(t3,30);
|
||||
t9 = _mm_slli_epi32(t3,25);
|
||||
t7 = _mm_xor_si128(t7,t8);
|
||||
t7 = _mm_xor_si128(t7,t9);
|
||||
t8 = _mm_srli_si128(t7,4);
|
||||
t7 = _mm_slli_si128(t7,12);
|
||||
t3 = _mm_xor_si128(t3,t7);
|
||||
t2 = _mm_srli_epi32(t3,1);
|
||||
t4 = _mm_srli_epi32(t3,2);
|
||||
t5 = _mm_srli_epi32(t3,7);
|
||||
t2 = _mm_xor_si128(t2,t4);
|
||||
t2 = _mm_xor_si128(t2,t5);
|
||||
t2 = _mm_xor_si128(t2,t8);
|
||||
t3 = _mm_xor_si128(t3,t2);
|
||||
t6 = _mm_xor_si128(t6,t3);
|
||||
return _swap128_aesni(t6);
|
||||
}
|
||||
static inline __m128i _ghash_aesni(__m128i h,__m128i y,__m128i x) { return _mult_block_aesni(h,_mm_xor_si128(y,x)); }
|
||||
|
@ -375,13 +382,13 @@ private:
|
|||
d3 = _mm_loadu_si128(ab + i + 2);
|
||||
d4 = _mm_loadu_si128(ab + i + 3);
|
||||
y = _mm_xor_si128(y, d1);
|
||||
y = _mult4xor_aesni(h1, h2, h3, h4, y, d2, d3, d4);
|
||||
y = _mult4xor_aesni(h1,h2,h3,h4,y,d2,d3,d4);
|
||||
}
|
||||
for (i = pblocks; i < blocks; i++)
|
||||
y = _ghash_aesni(_k.ni.h,y,_mm_loadu_si128(ab + i));
|
||||
if (rem) {
|
||||
last = _mm_setzero_si128();
|
||||
memcpy(&last, ab + blocks, rem);
|
||||
memcpy(&last,ab + blocks,rem);
|
||||
y = _ghash_aesni(_k.ni.h,y,last);
|
||||
}
|
||||
return y;
|
||||
|
@ -395,7 +402,7 @@ private:
|
|||
}
|
||||
inline void _icv_crypt_aesni(__m128i y,__m128i j,uint8_t *icv,unsigned int icvsize) const
|
||||
{
|
||||
__m128i *ks,t,b;
|
||||
__m128i t,b;
|
||||
t = _mm_xor_si128(j,_k.ni.k[0]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[1]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[2]);
|
||||
|
@ -418,7 +425,7 @@ private:
|
|||
|
||||
inline __m128i _encrypt_gcm_rem_aesni(unsigned int rem,const void *in,void *out,__m128i cb,__m128i y) const
|
||||
{
|
||||
__m128i *ks,t,b;
|
||||
__m128i t,b;
|
||||
memset(&b,0,sizeof(b));
|
||||
memcpy(&b,in,rem);
|
||||
t = _mm_xor_si128(cb,_k.ni.k[0]);
|
||||
|
@ -436,15 +443,15 @@ private:
|
|||
t = _mm_aesenc_si128(t,_k.ni.k[12]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[13]);
|
||||
t = _mm_aesenclast_si128(t,_k.ni.k[14]);
|
||||
b = _mm_xor_si128(t, b);
|
||||
b = _mm_xor_si128(t,b);
|
||||
memcpy(out,&b,rem);
|
||||
memset((u_char*)&b + rem,0,16 - rem);
|
||||
return _ghash_aesni(_k.ni.h,y,b);
|
||||
}
|
||||
inline void _encrypt_gcm256_aesni(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize) const
|
||||
{
|
||||
__m128i d1,d2,d3,d4,t1,t2,t3,t4;
|
||||
__m128i *ks,y,j,cb,*bi,*bo;
|
||||
__m128i d1,d2,d3,d4,t1,t2,t3,t4,k;
|
||||
__m128i y,j,cb,*bi,*bo;
|
||||
|
||||
j = _create_j_aesni(iv);
|
||||
cb = _increment_be_aesni(j);
|
||||
|
@ -461,102 +468,102 @@ private:
|
|||
d2 = _mm_loadu_si128(bi + i + 1);
|
||||
d3 = _mm_loadu_si128(bi + i + 2);
|
||||
d4 = _mm_loadu_si128(bi + i + 3);
|
||||
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t1 = _mm_xor_si128(cb,k = _k.ni.k[0]);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t2 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t2 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t3 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t3 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t4 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t4 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[1]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[1]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[2]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[2]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[3]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[3]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[4]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[4]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[5]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[5]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[6]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[6]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[7]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[7]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[8]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[8]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[9]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[9]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[10]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[10]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[11]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[11]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[12]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[12]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[13]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[13]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
|
||||
t2 = _mm_aesenclast_si128(t2, _k.ni.k[14]);
|
||||
t3 = _mm_aesenclast_si128(t3, _k.ni.k[14]);
|
||||
t4 = _mm_aesenclast_si128(t4, _k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1, d1);
|
||||
t2 = _mm_xor_si128(t2, d2);
|
||||
t3 = _mm_xor_si128(t3, d3);
|
||||
t4 = _mm_xor_si128(t4, d4);
|
||||
y = _mm_xor_si128(y, t1);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[1]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[2]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[3]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[4]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[5]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[6]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[7]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[8]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[9]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[10]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[11]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[12]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[13]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenclast_si128(t1,k = _k.ni.k[14]);
|
||||
t2 = _mm_aesenclast_si128(t2,k);
|
||||
t3 = _mm_aesenclast_si128(t3,k);
|
||||
t4 = _mm_aesenclast_si128(t4,k);
|
||||
t1 = _mm_xor_si128(t1,d1);
|
||||
t2 = _mm_xor_si128(t2,d2);
|
||||
t3 = _mm_xor_si128(t3,d3);
|
||||
t4 = _mm_xor_si128(t4,d4);
|
||||
y = _mm_xor_si128(y,t1);
|
||||
y = _mult4xor_aesni(_k.ni.hhhh,_k.ni.hhh,_k.ni.hh,_k.ni.h,y,t2,t3,t4);
|
||||
_mm_storeu_si128(bo + i + 0, t1);
|
||||
_mm_storeu_si128(bo + i + 1, t2);
|
||||
_mm_storeu_si128(bo + i + 2, t3);
|
||||
_mm_storeu_si128(bo + i + 3, t4);
|
||||
_mm_storeu_si128(bo + i + 0,t1);
|
||||
_mm_storeu_si128(bo + i + 1,t2);
|
||||
_mm_storeu_si128(bo + i + 2,t3);
|
||||
_mm_storeu_si128(bo + i + 3,t4);
|
||||
}
|
||||
|
||||
for (i=pblocks;i<blocks;++i) {
|
||||
d1 = _mm_loadu_si128(bi + i);
|
||||
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1, d1);
|
||||
_mm_storeu_si128(bo + i, t1);
|
||||
y = _ghash_aesni(_k.ni.h, y, t1);
|
||||
t1 = _mm_xor_si128(cb,_k.ni.k[0]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1,_k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1,d1);
|
||||
_mm_storeu_si128(bo + i,t1);
|
||||
y = _ghash_aesni(_k.ni.h,y,t1);
|
||||
cb = _increment_be_aesni(cb);
|
||||
}
|
||||
|
||||
|
@ -567,10 +574,10 @@ private:
|
|||
}
|
||||
inline __m128i _decrypt_gcm_rem_aesni(unsigned int rem,const void *in,void *out,__m128i cb,__m128i y)
|
||||
{
|
||||
__m128i *ks, t, b;
|
||||
memset(&b, 0, sizeof(b));
|
||||
memcpy(&b, in, rem);
|
||||
y = _ghash_aesni(_k.ni.h, y, b);
|
||||
__m128i t,b;
|
||||
memset(&b,0,sizeof(b));
|
||||
memcpy(&b,in,rem);
|
||||
y = _ghash_aesni(_k.ni.h,y,b);
|
||||
t = _mm_xor_si128(cb,_k.ni.k[0]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[1]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[2]);
|
||||
|
@ -585,16 +592,16 @@ private:
|
|||
t = _mm_aesenc_si128(t,_k.ni.k[11]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[12]);
|
||||
t = _mm_aesenc_si128(t,_k.ni.k[13]);
|
||||
t = _mm_aesenclast_si128(t, _k.ni.k[14]);
|
||||
b = _mm_xor_si128(t, b);
|
||||
memcpy(out, &b, rem);
|
||||
t = _mm_aesenclast_si128(t,_k.ni.k[14]);
|
||||
b = _mm_xor_si128(t,b);
|
||||
memcpy(out,&b,rem);
|
||||
return y;
|
||||
}
|
||||
inline void decrypt_gcm256(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize)
|
||||
inline void _decrypt_gcm256_aesni(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize)
|
||||
{
|
||||
__m128i d1, d2, d3, d4, t1, t2, t3, t4;
|
||||
__m128i *ks, y, j, cb, *bi, *bo;
|
||||
unsigned int blocks, pblocks, rem;
|
||||
__m128i d1,d2,d3,d4,t1,t2,t3,t4,k;
|
||||
__m128i y,j,cb,*bi,*bo;
|
||||
unsigned int blocks,pblocks,rem;
|
||||
|
||||
j = _create_j_aesni(iv);
|
||||
cb = _increment_be_aesni(j);
|
||||
|
@ -611,102 +618,102 @@ private:
|
|||
d2 = _mm_loadu_si128(bi + i + 1);
|
||||
d3 = _mm_loadu_si128(bi + i + 2);
|
||||
d4 = _mm_loadu_si128(bi + i + 3);
|
||||
y = _mm_xor_si128(y, d1);
|
||||
y = _mm_xor_si128(y,d1);
|
||||
y = _mult4xor_aesni(_k.ni.hhhh,_k.ni.hhh,_k.ni.hh,_k.ni.h,y,d2,d3,d4);
|
||||
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t1 = _mm_xor_si128(cb,k = _k.ni.k[0]);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t2 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t2 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t3 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t3 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t4 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t4 = _mm_xor_si128(cb,k);
|
||||
cb = _increment_be_aesni(cb);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[1]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[1]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[2]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[2]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[3]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[3]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[4]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[4]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[5]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[5]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[6]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[6]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[7]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[7]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[8]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[8]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[9]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[9]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[10]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[10]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[11]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[11]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[12]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[12]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
|
||||
t2 = _mm_aesenc_si128(t2, _k.ni.k[13]);
|
||||
t3 = _mm_aesenc_si128(t3, _k.ni.k[13]);
|
||||
t4 = _mm_aesenc_si128(t4, _k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
|
||||
t2 = _mm_aesenclast_si128(t2, _k.ni.k[14]);
|
||||
t3 = _mm_aesenclast_si128(t3, _k.ni.k[14]);
|
||||
t4 = _mm_aesenclast_si128(t4, _k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1, d1);
|
||||
t2 = _mm_xor_si128(t2, d2);
|
||||
t3 = _mm_xor_si128(t3, d3);
|
||||
t4 = _mm_xor_si128(t4, d4);
|
||||
_mm_storeu_si128(bo + i + 0, t1);
|
||||
_mm_storeu_si128(bo + i + 1, t2);
|
||||
_mm_storeu_si128(bo + i + 2, t3);
|
||||
_mm_storeu_si128(bo + i + 3, t4);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[1]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[2]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[3]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[4]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[5]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[6]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[7]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[8]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[9]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[10]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[11]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[12]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[13]);
|
||||
t2 = _mm_aesenc_si128(t2,k);
|
||||
t3 = _mm_aesenc_si128(t3,k);
|
||||
t4 = _mm_aesenc_si128(t4,k);
|
||||
t1 = _mm_aesenclast_si128(t1,k = _k.ni.k[14]);
|
||||
t2 = _mm_aesenclast_si128(t2,k);
|
||||
t3 = _mm_aesenclast_si128(t3,k);
|
||||
t4 = _mm_aesenclast_si128(t4,k);
|
||||
t1 = _mm_xor_si128(t1,d1);
|
||||
t2 = _mm_xor_si128(t2,d2);
|
||||
t3 = _mm_xor_si128(t3,d3);
|
||||
t4 = _mm_xor_si128(t4,d4);
|
||||
_mm_storeu_si128(bo + i + 0,t1);
|
||||
_mm_storeu_si128(bo + i + 1,t2);
|
||||
_mm_storeu_si128(bo + i + 2,t3);
|
||||
_mm_storeu_si128(bo + i + 3,t4);
|
||||
}
|
||||
|
||||
for (i=pblocks;i<blocks;i++) {
|
||||
d1 = _mm_loadu_si128(bi + i);
|
||||
y = _ghash_aesni(_k.ni.h,y,d1);
|
||||
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1, d1);
|
||||
_mm_storeu_si128(bo + i, t1);
|
||||
t1 = _mm_xor_si128(cb,_k.ni.k[0]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[1]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[2]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[3]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[4]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[5]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[6]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[7]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[8]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[9]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[10]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[11]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[12]);
|
||||
t1 = _mm_aesenc_si128(t1,_k.ni.k[13]);
|
||||
t1 = _mm_aesenclast_si128(t1,_k.ni.k[14]);
|
||||
t1 = _mm_xor_si128(t1,d1);
|
||||
_mm_storeu_si128(bo + i,t1);
|
||||
cb = _increment_be_aesni(cb);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue