Drop GCM to POWER7 on PowerPC

GCM can do some bulk XOR's using the SIMD unit. However, we still need loads and stores to be fast. Fast loads and stores of unaligned data requires the VSX unit
pull/748/head
Jeffrey Walton 2018-11-17 00:41:49 -05:00
parent 0ac7b25221
commit bbc5c63d33
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 14 additions and 13 deletions

View File

@ -668,6 +668,7 @@ ifeq ($(DETECT_FEATURES),1)
BLAKE2S_FLAG = $(POWER7_FLAG)
CHACHA_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
GCM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
SIMON64_FLAG = $(POWER7_FLAG)

22
gcm.cpp
View File

@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif
#if CRYPTOPP_ALTIVEC_AVAILABLE
extern void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c);
#if CRYPTOPP_POWER7_AVAILABLE
extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
#endif
#if CRYPTOPP_CLMUL_AVAILABLE
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (k=1; k<j; k++)
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#elif CRYPTOPP_ALTIVEC_AVAILABLE
if (HasAltivec())
#elif CRYPTOPP_POWER7_AVAILABLE
if (HasPower7())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
GCM_Xor16_ALTIVEC(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#endif
for (j=2; j<=0x80; j*=2)
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#elif CRYPTOPP_ALTIVEC_AVAILABLE
if (HasAltivec())
#elif CRYPTOPP_POWER7_AVAILABLE
if (HasPower7())
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
GCM_Xor16_ALTIVEC(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_ALTIVEC(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#endif
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
HasSSE2() ? 16 :
#elif CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? 4 :
#elif CRYPTOPP_ALTIVEC_AVAILABLE
HasAltivec() ? 16 :
#elif CRYPTOPP_POWER7_AVAILABLE
HasPower7() ? 16 :
#endif
GetBlockCipher().OptimalDataAlignment();
}

View File

@ -740,8 +740,8 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
// ***************************** POWER8 ***************************** //
#if CRYPTOPP_ALTIVEC_AVAILABLE
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
#if CRYPTOPP_POWER7_AVAILABLE
void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c)
{
VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
}