Mirror over a few more AES fixes.

This commit is contained in:
Adam Ierymenko 2020-12-03 22:29:38 -05:00
parent c943d0e807
commit ec76f6e1d2
No known key found for this signature in database
GPG key ID: C8877CF2D7A5D7F3
3 changed files with 36 additions and 25 deletions

View file

@ -149,22 +149,12 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept
}
}
if (likely(((uintptr_t)in & 7U) == 0U)) {
while (len >= 16) {
y0 ^= *reinterpret_cast<const uint64_t *>(in);
y1 ^= *reinterpret_cast<const uint64_t *>(in + 8);
in += 16;
s_gfmul(h0, h1, y0, y1);
len -= 16;
}
} else {
while (len >= 16) {
y0 ^= Utils::loadMachineEndian< uint64_t >(in);
y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
in += 16;
s_gfmul(h0, h1, y0, y1);
len -= 16;
}
while (len >= 16) {
y0 ^= Utils::loadMachineEndian< uint64_t >(in);
y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
in += 16;
s_gfmul(h0, h1, y0, y1);
len -= 16;
}
_y[0] = y0;

View file

@ -26,7 +26,9 @@ namespace {
const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
#endif
__m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
{
y = _mm_shuffle_epi8(y, s_sseSwapBytes);
@ -57,7 +59,9 @@ __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
#define ZT_AES_VAES512 1
#ifdef __GNUC__
__attribute__((__target__("sse4,aes,avx,avx2,vaes,avx512f,avx512bw")))
#endif
void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
{
const __m512i kk0 = _mm512_broadcast_i32x4(k[0]);
@ -107,7 +111,9 @@ void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, co
#define ZT_AES_VAES256 1
#ifdef __GNUC__
__attribute__((__target__("sse4,aes,avx,avx2,vaes")))
#endif
void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
{
const __m256i kk0 = _mm256_broadcastsi128_si256(k[0]);
@ -175,7 +181,9 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
#endif // does compiler support AVX2 and AVX512 AES intrinsics?
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
#endif
__m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept
{
__m128i x, y;
@ -190,7 +198,9 @@ __m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept
return x;
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
#endif
__m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept
{
__m128i x, y, z;
@ -208,7 +218,9 @@ __m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept
} // anonymous namespace
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
#endif
void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
{
__m128i y = _mm_loadu_si128(reinterpret_cast<const __m128i *>(_y));
@ -274,7 +286,9 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
_rp = len; // len is always less than 16 here
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul,aes")))
#endif
void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
{
__m128i y = _mm_loadu_si128(reinterpret_cast<const __m128i *>(_y));
@ -345,7 +359,9 @@ void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
_mm_storeu_si128(reinterpret_cast<__m128i *>(tag), _mm_xor_si128(_mm_shuffle_epi8(t4, s_sseSwapBytes), encIV));
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes")))
#endif
void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) noexcept
{
const __m128i dd = _mm_set_epi64x(0, (long long)_ctr[0]);
@ -542,7 +558,9 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n
_ctr[1] = Utils::hton(c1);
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
#endif
void AES::p_init_aesni(const uint8_t *key) noexcept
{
__m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13;
@ -604,7 +622,9 @@ void AES::p_init_aesni(const uint8_t *key) noexcept
p_k.ni.h2[3] = _mm_xor_si128(_mm_shuffle_epi32(hhhh, 78), hhhh);
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
#endif
void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
{
__m128i tmp = _mm_loadu_si128((const __m128i *)in);
@ -625,7 +645,9 @@ void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
_mm_storeu_si128((__m128i *)out, _mm_aesenclast_si128(tmp, p_k.ni.k[14]));
}
#ifdef __GNUC__
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
#endif
void AES::p_decrypt_aesni(const void *in, void *out) const noexcept
{
__m128i tmp = _mm_loadu_si128((const __m128i *)in);

View file

@ -131,7 +131,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
uint8x16_t k14 = _aes.p_k.neon.ek[14];
unsigned int totalLen = _len;
if ((totalLen & 15U)) {
if ((totalLen & 15U) != 0) {
for (;;) {
if (unlikely(!len)) {
vst1q_u8(reinterpret_cast<uint8_t *>(_ctr), vrev32q_u8(dd));
@ -140,7 +140,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
}
--len;
out[totalLen++] = *(in++);
if (!(totalLen & 15U)) {
if ((totalLen & 15U) == 0) {
uint8_t *const otmp = out + (totalLen - 16);
uint8x16_t d0 = vrev32q_u8(dd);
uint8x16_t pt = vld1q_u8(otmp);
@ -180,7 +180,10 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
uint8x16_t d2 = vrev32q_u8(dd2);
uint8x16_t d3 = vrev32q_u8(dd3);
uint8x16_t pt0 = vld1q_u8(in);
in += 16;
uint8x16_t pt1 = vld1q_u8(in + 16);
uint8x16_t pt2 = vld1q_u8(in + 32);
uint8x16_t pt3 = vld1q_u8(in + 48);
d0 = vaesmcq_u8(vaeseq_u8(d0, k0));
d1 = vaesmcq_u8(vaeseq_u8(d1, k0));
d2 = vaesmcq_u8(vaeseq_u8(d2, k0));
@ -193,8 +196,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
d1 = vaesmcq_u8(vaeseq_u8(d1, k2));
d2 = vaesmcq_u8(vaeseq_u8(d2, k2));
d3 = vaesmcq_u8(vaeseq_u8(d3, k2));
uint8x16_t pt1 = vld1q_u8(in);
in += 16;
d0 = vaesmcq_u8(vaeseq_u8(d0, k3));
d1 = vaesmcq_u8(vaeseq_u8(d1, k3));
d2 = vaesmcq_u8(vaeseq_u8(d2, k3));
@ -207,8 +208,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
d1 = vaesmcq_u8(vaeseq_u8(d1, k5));
d2 = vaesmcq_u8(vaeseq_u8(d2, k5));
d3 = vaesmcq_u8(vaeseq_u8(d3, k5));
uint8x16_t pt2 = vld1q_u8(in);
in += 16;
d0 = vaesmcq_u8(vaeseq_u8(d0, k6));
d1 = vaesmcq_u8(vaeseq_u8(d1, k6));
d2 = vaesmcq_u8(vaeseq_u8(d2, k6));
@ -221,8 +220,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
d1 = vaesmcq_u8(vaeseq_u8(d1, k8));
d2 = vaesmcq_u8(vaeseq_u8(d2, k8));
d3 = vaesmcq_u8(vaeseq_u8(d3, k8));
uint8x16_t pt3 = vld1q_u8(in);
in += 16;
d0 = vaesmcq_u8(vaeseq_u8(d0, k9));
d1 = vaesmcq_u8(vaeseq_u8(d1, k9));
d2 = vaesmcq_u8(vaeseq_u8(d2, k9));
@ -253,7 +250,9 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
vst1q_u8(out + 16, d1);
vst1q_u8(out + 32, d2);
vst1q_u8(out + 48, d3);
out += 64;
in += 64;
dd = (uint8x16_t)vaddq_u32((uint32x4_t)dd, four);
if (unlikely(len < 64))