Switch to vext and remove unnecessary casts
parent
b129818c35
commit
bbf13b77e7
21
gcm.cpp
21
gcm.cpp
|
|
@ -368,8 +368,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
|
||||||
{
|
{
|
||||||
const uint64x2_t r = s_clmulConstants[0];
|
const uint64x2_t r = s_clmulConstants[0];
|
||||||
const uint64x2_t t = vld1q_u64((const uint64_t *)hashKey);
|
const uint64x2_t t = vld1q_u64((const uint64_t *)hashKey);
|
||||||
const uint64x2_t h0 = vreinterpretq_u64_u8(vrev64q_u8(
|
const uint64x2_t h0 = vrev64q_u8(vextq_u64(t, t, 1));
|
||||||
vreinterpretq_u8_u64(vcombine_u64(vget_high_u64(t), vget_low_u64(t)))));
|
|
||||||
|
|
||||||
uint64x2_t h = h0;
|
uint64x2_t h = h0;
|
||||||
for (i=0; i<tableSize-32; i+=32)
|
for (i=0; i<tableSize-32; i+=32)
|
||||||
|
|
@ -505,8 +504,8 @@ inline void GCM_Base::ReverseHashBufferIfNeeded()
|
||||||
{
|
{
|
||||||
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
|
if (GetNativeByteOrder() != BIG_ENDIAN_ORDER)
|
||||||
{
|
{
|
||||||
const uint64x2_t x = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(HashBuffer())));
|
const uint8x16_t x = vld1q_u8(HashBuffer());
|
||||||
vst1q_u8(HashBuffer(), vreinterpretq_u8_u64(vcombine_u64(vget_high_u64(x), vget_low_u64(x))));
|
vst1q_u8(HashBuffer(), vrev64q_u8(vextq_u8(x, x, 8)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -674,16 +673,17 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
if (++i == s)
|
if (++i == s)
|
||||||
{
|
{
|
||||||
const uint64x2_t t1 = vld1q_u64((const uint64_t *)data);
|
const uint64x2_t t1 = vld1q_u64((const uint64_t *)data);
|
||||||
d1 = veorq_u64(vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(vcombine_u64(vget_high_u64(t1), vget_low_u64(t1))))), x);
|
d1 = veorq_u64(vrev64q_u8(vextq_u64(t1, t1, 1)), x);
|
||||||
c0 = veorq_u64(c0, PMULL_00(d1, h0));
|
c0 = veorq_u64(c0, PMULL_00(d1, h0));
|
||||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), vget_low_u32(vreinterpretq_u32_u64(d1))));
|
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||||
|
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||||
c1 = veorq_u64(c1, PMULL_00(d1, h2));
|
c1 = veorq_u64(c1, PMULL_00(d1, h2));
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
d1 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(vld1q_u64((const uint64_t *)(data+(s-i)*16-8)))));
|
d1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(data+(s-i)*16-8)));
|
||||||
c0 = veorq_u64(c0, PMULL_10(d2, h0));
|
c0 = veorq_u64(c0, PMULL_10(d2, h0));
|
||||||
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
c2 = veorq_u64(c2, PMULL_10(d1, h1));
|
||||||
d2 = veorq_u64(d2, d1);
|
d2 = veorq_u64(d2, d1);
|
||||||
|
|
@ -692,17 +692,18 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
|
||||||
if (++i == s)
|
if (++i == s)
|
||||||
{
|
{
|
||||||
const uint64x2_t t2 = vld1q_u64((const uint64_t *)data);
|
const uint64x2_t t2 = vld1q_u64((const uint64_t *)data);
|
||||||
d1 = veorq_u64((uint64x2_t)vrev64q_u8(vreinterpretq_u8_u64(vcombine_u64(vget_high_u64(t2), vget_low_u64(t2)))), x);
|
d1 = veorq_u64(vrev64q_u8(vextq_u64(t2, t2, 1)), x);
|
||||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||||
c2 = veorq_u64(c2, PMULL_11(d1, h1));
|
c2 = veorq_u64(c2, PMULL_11(d1, h1));
|
||||||
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)), vget_low_u32(vreinterpretq_u32_u64(d1))));
|
d1 = veorq_u64(d1, (uint64x2_t)vcombine_u32(vget_high_u32(vreinterpretq_u32_u64(d1)),
|
||||||
|
vget_low_u32(vreinterpretq_u32_u64(d1))));
|
||||||
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
c1 = veorq_u64(c1, PMULL_01(d1, h2));
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint64x2_t t3 = vld1q_u64((uint64_t *)(data+(s-i)*16-8));
|
const uint64x2_t t3 = vld1q_u64((uint64_t *)(data+(s-i)*16-8));
|
||||||
d2 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(vcombine_u64(vget_high_u64(t3), vget_low_u64(t3)))));
|
d2 = vrev64q_u8(vextq_u64(t3, t3, 1));
|
||||||
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
c0 = veorq_u64(c0, PMULL_01(d1, h0));
|
||||||
c2 = veorq_u64(c2, PMULL_01(d2, h1));
|
c2 = veorq_u64(c2, PMULL_01(d2, h1));
|
||||||
d1 = veorq_u64(d1, d2);
|
d1 = veorq_u64(d1, d2);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue