diff --git a/blake2.cpp b/blake2.cpp index e80985d4..96dedfbc 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -1,6 +1,17 @@ -// blake2.cpp - written and placed in the public domain by Jeffrey Walton and Zooko -// Wilcox-O'Hearn. Based on Aumasson, Neves, Wilcox-O'Hearn and Winnerlein's -// reference BLAKE2 implementation at http://github.com/BLAKE2/BLAKE2. +// blake2.cpp - written and placed in the public domain by Jeffrey Walton +// and Zooko Wilcox-O'Hearn. Based on Aumasson, Neves, +// Wilcox-O'Hearn and Winnerlein's reference BLAKE2 +// implementation at http://github.com/BLAKE2/BLAKE2. +// +// The BLAKE2b and BLAKE2s numbers are consistent with the BLAKE2 team's +// numbers. However, we have an Altivec/POWER7 implementation of BLAKE2s, +// and a POWER8 implementation of BLAKE2b (BLAKE2 is missing them). The +// Altivec/POWER7 code is about 2x faster than C++ when using GCC 5.0 or +// above. The POWER8 code is about 2.5x faster than C++ when using GCC 5.0 +// or above. If you use GCC 4.0 (PowerMac) or GCC 4.8 (GCC Compile Farm) +// then the PowerPC code will be slower than C++. Be sure to use GCC 5.0 +// or above for PowerPC builds or disable Altivec for BLAKE2b and BLAKE2s +// if using the old compilers. #include "pch.h" #include "config.h" @@ -15,6 +26,7 @@ // #undef CRYPTOPP_SSE41_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE // #undef CRYPTOPP_ALTIVEC_AVAILABLE +// #undef CRYPTOPP_POWER8_AVAILABLE // Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about // 3 cpb faster than NEON/ASIMD. Also see http://github.com/weidai11/cryptopp/issues/367. @@ -22,11 +34,6 @@ # undef CRYPTOPP_ARM_NEON_AVAILABLE #endif -#if !(CRYPTOPP_ALTIVEC_AVAILABLE) -# undef CRYPTOPP_POWER7_AVAILABLE -# undef CRYPTOPP_POWER8_AVAILABLE -#endif - NAMESPACE_BEGIN(CryptoPP) // Export the tables to the SIMD files @@ -155,7 +162,7 @@ extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state); extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state); #endif -#if CRYPTOPP_POWER7_AVAILABLE +#if CRYPTOPP_ALTIVEC_AVAILABLE extern void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state); #endif @@ -383,6 +390,10 @@ std::string BLAKE2s::AlgorithmProvider() const #if (CRYPTOPP_POWER7_AVAILABLE) if (HasPower7()) return "Power7"; +#endif +#if (CRYPTOPP_ALTIVEC_AVAILABLE) + if (HasAltivec()) + return "Altivec"; #endif return "C++"; } @@ -655,8 +666,8 @@ void BLAKE2s::Compress(const byte *input) return BLAKE2_Compress32_NEON(input, *m_state.data()); } #endif -#if CRYPTOPP_POWER7_AVAILABLE - if(HasPower7()) +#if CRYPTOPP_ALTIVEC_AVAILABLE + if(HasAltivec()) { return BLAKE2_Compress32_POWER7(input, *m_state.data()); } diff --git a/blake2.h b/blake2.h index 1a999036..a34d9285 100644 --- a/blake2.h +++ b/blake2.h @@ -1,6 +1,7 @@ -// blake2.h - written and placed in the public domain by Jeffrey Walton and Zooko -// Wilcox-O'Hearn. Based on Aumasson, Neves, Wilcox-O'Hearn and Winnerlein's -// reference BLAKE2 implementation at http://github.com/BLAKE2/BLAKE2. +// blake2.h - written and placed in the public domain by Jeffrey Walton +// and Zooko Wilcox-O'Hearn. Based on Aumasson, Neves, +// Wilcox-O'Hearn and Winnerlein's reference BLAKE2 +// implementation at http://github.com/BLAKE2/BLAKE2. /// \file blake2.h /// \brief Classes for BLAKE2b and BLAKE2s message digests and keyed message digests diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp index 45e0043e..cc1ec137 100644 --- a/blake2s_simd.cpp +++ b/blake2s_simd.cpp @@ -7,6 +7,16 @@ // needed because additional CXXFLAGS are required to enable the // appropriate instructions sets in some build configurations. +// The BLAKE2b and BLAKE2s numbers are consistent with the BLAKE2 team's +// numbers. However, we have an Altivec/POWER7 implementation of BLAKE2s, +// and a POWER8 implementation of BLAKE2b (BLAKE2 is missing them). The +// Altivec/POWER7 code is about 2x faster than C++ when using GCC 5.0 or +// above. The POWER8 code is about 2.5x faster than C++ when using GCC 5.0 +// or above. If you use GCC 4.0 (PowerMac) or GCC 4.8 (GCC Compile Farm) +// then the PowerPC code will be slower than C++. Be sure to use GCC 5.0 +// or above for PowerPC builds or disable Altivec for BLAKE2b and BLAKE2s +// if using the old compilers. + #include "pch.h" #include "config.h" #include "misc.h" @@ -41,7 +51,7 @@ # include #endif -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_ALTIVEC_AVAILABLE) # include "ppc_simd.h" #endif @@ -671,7 +681,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state) } #endif // CRYPTOPP_ARM_NEON_AVAILABLE -#if (CRYPTOPP_POWER7_AVAILABLE) +#if (CRYPTOPP_ALTIVEC_AVAILABLE) inline uint32x4_p VectorLoad32(const void* p) { @@ -984,6 +994,6 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state) VectorStore32LE(&state.h[0], vec_xor(ff0, vec_xor(row1, row3))); VectorStore32LE(&state.h[4], vec_xor(ff1, vec_xor(row2, row4))); } -#endif // CRYPTOPP_POWER7_AVAILABLE +#endif // CRYPTOPP_ALTIVEC_AVAILABLE NAMESPACE_END