Drop GCM to POWER7 on PowerPC

GCM can do some bulk XOR's using the SIMD unit. However, we still need loads and stores to be fast. Fast loads and stores of unaligned data requires the VSX unit
pull/748/head
Jeffrey Walton 2018-11-17 00:41:49 -05:00
parent 0ac7b25221
commit bbc5c63d33
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 14 additions and 13 deletions

View File

@ -668,6 +668,7 @@ ifeq ($(DETECT_FEATURES),1)
BLAKE2S_FLAG = $(POWER7_FLAG) BLAKE2S_FLAG = $(POWER7_FLAG)
CHACHA_FLAG = $(POWER7_FLAG) CHACHA_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG) CHAM_FLAG = $(POWER7_FLAG)
GCM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG) LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG) SIMECK_FLAG = $(POWER7_FLAG)
SIMON64_FLAG = $(POWER7_FLAG) SIMON64_FLAG = $(POWER7_FLAG)

22
gcm.cpp
View File

@ -75,8 +75,8 @@ extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
#endif #endif
#if CRYPTOPP_ALTIVEC_AVAILABLE #if CRYPTOPP_POWER7_AVAILABLE
extern void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c); extern void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c);
#endif #endif
#if CRYPTOPP_CLMUL_AVAILABLE #if CRYPTOPP_CLMUL_AVAILABLE
@ -213,11 +213,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (k=1; k<j; k++) for (k=1; k<j; k++)
GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else else
#elif CRYPTOPP_ALTIVEC_AVAILABLE #elif CRYPTOPP_POWER7_AVAILABLE
if (HasAltivec()) if (HasPower7())
for (j=2; j<=0x80; j*=2) for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++) for (k=1; k<j; k++)
GCM_Xor16_ALTIVEC(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16); GCM_Xor16_POWER7(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else else
#endif #endif
for (j=2; j<=0x80; j*=2) for (j=2; j<=0x80; j*=2)
@ -277,13 +277,13 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
} }
else else
#elif CRYPTOPP_ALTIVEC_AVAILABLE #elif CRYPTOPP_POWER7_AVAILABLE
if (HasAltivec()) if (HasPower7())
for (j=2; j<=8; j*=2) for (j=2; j<=8; j*=2)
for (k=1; k<j; k++) for (k=1; k<j; k++)
{ {
GCM_Xor16_ALTIVEC(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16); GCM_Xor16_POWER7(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
GCM_Xor16_ALTIVEC(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16); GCM_Xor16_POWER7(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
} }
else else
#endif #endif
@ -369,8 +369,8 @@ unsigned int GCM_Base::OptimalDataAlignment() const
HasSSE2() ? 16 : HasSSE2() ? 16 :
#elif CRYPTOPP_ARM_NEON_AVAILABLE #elif CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? 4 : HasNEON() ? 4 :
#elif CRYPTOPP_ALTIVEC_AVAILABLE #elif CRYPTOPP_POWER7_AVAILABLE
HasAltivec() ? 16 : HasPower7() ? 16 :
#endif #endif
GetBlockCipher().OptimalDataAlignment(); GetBlockCipher().OptimalDataAlignment();
} }

View File

@ -740,8 +740,8 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
// ***************************** POWER8 ***************************** // // ***************************** POWER8 ***************************** //
#if CRYPTOPP_ALTIVEC_AVAILABLE #if CRYPTOPP_POWER7_AVAILABLE
void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c) void GCM_Xor16_POWER7(byte *a, const byte *b, const byte *c)
{ {
VecStore(VecXor(VecLoad(b), VecLoad(c)), a); VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
} }