More AES stuff

This commit is contained in:
Adam Ierymenko 2019-08-12 15:37:50 -07:00
parent f5e71f64ed
commit f7bc9f01c9
No known key found for this signature in database
GPG key ID: 1657198823E52A61

View file

@ -97,6 +97,13 @@ public:
inline bool gcmDecrypt(const uint8_t iv[12],const void *in,unsigned int inlen,const void *assoc,unsigned int assoclen,void *out,const uint8_t *tag,unsigned int taglen)
{
#ifdef ZT_AES_AESNI
if (HW_ACCEL) {
uint8_t tagbuf[16];
_decrypt_gcm256_aesni(inlen,(const uint8_t *)in,(uint8_t *)out,iv,assoclen,(const uint8_t *)assoc,tagbuf,taglen);
return Utils::secureEq(tagbuf,tag,taglen);
}
#endif
abort(); // TODO: software
return false;
}
@ -395,7 +402,7 @@ private:
}
inline void _icv_crypt_aesni(__m128i y,__m128i j,uint8_t *icv,unsigned int icvsize) const
{
__m128i *ks,t,b;
__m128i t,b;
t = _mm_xor_si128(j,_k.ni.k[0]);
t = _mm_aesenc_si128(t,_k.ni.k[1]);
t = _mm_aesenc_si128(t,_k.ni.k[2]);
@ -418,7 +425,7 @@ private:
inline __m128i _encrypt_gcm_rem_aesni(unsigned int rem,const void *in,void *out,__m128i cb,__m128i y) const
{
__m128i *ks,t,b;
__m128i t,b;
memset(&b,0,sizeof(b));
memcpy(&b,in,rem);
t = _mm_xor_si128(cb,_k.ni.k[0]);
@ -443,8 +450,8 @@ private:
}
inline void _encrypt_gcm256_aesni(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize) const
{
__m128i d1,d2,d3,d4,t1,t2,t3,t4;
__m128i *ks,y,j,cb,*bi,*bo;
__m128i d1,d2,d3,d4,t1,t2,t3,t4,k;
__m128i y,j,cb,*bi,*bo;
j = _create_j_aesni(iv);
cb = _increment_be_aesni(j);
@ -461,70 +468,70 @@ private:
d2 = _mm_loadu_si128(bi + i + 1);
d3 = _mm_loadu_si128(bi + i + 2);
d4 = _mm_loadu_si128(bi + i + 3);
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
t1 = _mm_xor_si128(cb,k = _k.ni.k[0]);
cb = _increment_be_aesni(cb);
t2 = _mm_xor_si128(cb, _k.ni.k[0]);
t2 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t3 = _mm_xor_si128(cb, _k.ni.k[0]);
t3 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t4 = _mm_xor_si128(cb, _k.ni.k[0]);
t4 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[1]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[1]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[1]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[2]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[2]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[2]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[3]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[3]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[3]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[4]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[4]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[4]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[5]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[5]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[5]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[6]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[6]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[6]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[7]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[7]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[7]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[8]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[8]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[8]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[9]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[9]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[9]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[10]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[10]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[10]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[11]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[11]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[11]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[12]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[12]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[12]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[13]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[13]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[13]);
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
t2 = _mm_aesenclast_si128(t2, _k.ni.k[14]);
t3 = _mm_aesenclast_si128(t3, _k.ni.k[14]);
t4 = _mm_aesenclast_si128(t4, _k.ni.k[14]);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[1]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[2]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[3]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[4]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[5]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[6]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[7]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[8]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[9]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[10]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[11]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[12]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[13]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenclast_si128(t1,k = _k.ni.k[14]);
t2 = _mm_aesenclast_si128(t2,k);
t3 = _mm_aesenclast_si128(t3,k);
t4 = _mm_aesenclast_si128(t4,k);
t1 = _mm_xor_si128(t1,d1);
t2 = _mm_xor_si128(t2,d2);
t3 = _mm_xor_si128(t3,d3);
@ -567,7 +574,7 @@ private:
}
inline __m128i _decrypt_gcm_rem_aesni(unsigned int rem,const void *in,void *out,__m128i cb,__m128i y)
{
__m128i *ks, t, b;
__m128i t,b;
memset(&b,0,sizeof(b));
memcpy(&b,in,rem);
y = _ghash_aesni(_k.ni.h,y,b);
@ -590,10 +597,10 @@ private:
memcpy(out,&b,rem);
return y;
}
inline void decrypt_gcm256(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize)
inline void _decrypt_gcm256_aesni(unsigned int len,const uint8_t *in,uint8_t *out,const uint8_t *iv,unsigned int alen,const uint8_t *assoc,uint8_t *icv,unsigned int icvsize)
{
__m128i d1, d2, d3, d4, t1, t2, t3, t4;
__m128i *ks, y, j, cb, *bi, *bo;
__m128i d1,d2,d3,d4,t1,t2,t3,t4,k;
__m128i y,j,cb,*bi,*bo;
unsigned int blocks,pblocks,rem;
j = _create_j_aesni(iv);
@ -613,70 +620,70 @@ private:
d4 = _mm_loadu_si128(bi + i + 3);
y = _mm_xor_si128(y,d1);
y = _mult4xor_aesni(_k.ni.hhhh,_k.ni.hhh,_k.ni.hh,_k.ni.h,y,d2,d3,d4);
t1 = _mm_xor_si128(cb, _k.ni.k[0]);
t1 = _mm_xor_si128(cb,k = _k.ni.k[0]);
cb = _increment_be_aesni(cb);
t2 = _mm_xor_si128(cb, _k.ni.k[0]);
t2 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t3 = _mm_xor_si128(cb, _k.ni.k[0]);
t3 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t4 = _mm_xor_si128(cb, _k.ni.k[0]);
t4 = _mm_xor_si128(cb,k);
cb = _increment_be_aesni(cb);
t1 = _mm_aesenc_si128(t1, _k.ni.k[1]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[1]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[1]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[1]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[2]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[2]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[2]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[2]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[3]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[3]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[3]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[3]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[4]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[4]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[4]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[4]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[5]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[5]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[5]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[5]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[6]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[6]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[6]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[6]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[7]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[7]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[7]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[7]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[8]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[8]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[8]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[8]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[9]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[9]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[9]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[9]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[10]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[10]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[10]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[10]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[11]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[11]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[11]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[11]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[12]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[12]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[12]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[12]);
t1 = _mm_aesenc_si128(t1, _k.ni.k[13]);
t2 = _mm_aesenc_si128(t2, _k.ni.k[13]);
t3 = _mm_aesenc_si128(t3, _k.ni.k[13]);
t4 = _mm_aesenc_si128(t4, _k.ni.k[13]);
t1 = _mm_aesenclast_si128(t1, _k.ni.k[14]);
t2 = _mm_aesenclast_si128(t2, _k.ni.k[14]);
t3 = _mm_aesenclast_si128(t3, _k.ni.k[14]);
t4 = _mm_aesenclast_si128(t4, _k.ni.k[14]);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[1]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[2]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[3]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[4]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[5]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[6]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[7]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[8]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[9]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[10]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[11]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[12]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenc_si128(t1,k = _k.ni.k[13]);
t2 = _mm_aesenc_si128(t2,k);
t3 = _mm_aesenc_si128(t3,k);
t4 = _mm_aesenc_si128(t4,k);
t1 = _mm_aesenclast_si128(t1,k = _k.ni.k[14]);
t2 = _mm_aesenclast_si128(t2,k);
t3 = _mm_aesenclast_si128(t3,k);
t4 = _mm_aesenclast_si128(t4,k);
t1 = _mm_xor_si128(t1,d1);
t2 = _mm_xor_si128(t2,d2);
t3 = _mm_xor_si128(t3,d3);