diff --git a/GNUmakefile b/GNUmakefile index 4f4cb21d..7d7f4fb8 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -56,7 +56,13 @@ GCC_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -v -E '(llvm|clan XLC_COMPILER := $(shell $(CXX) $(CXX) -qversion 2>/dev/null |$(GREP) -i -c "IBM XL") CLANG_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -i -c -E '(llvm|clang)') INTEL_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -i -c '\(icc\)') + +# Various Port compilers on OS X MACPORTS_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -i -c "macports") +HOMEBREW_COMPILER := $(shell $(CXX) --version 2>/dev/null | $(GREP) -i -c "homebrew") +ifneq ($(MACPORTS_COMPILER)$(HOMEBREW_COMPILER),00) + OSXPORT_COMPILER := 1 +endif # Sun Studio 12.0 provides SunCC 0x0510; and Sun Studio 12.3 provides SunCC 0x0512 SUNCC_510_OR_LATER := $(shell $(CXX) -V 2>&1 | $(GREP) -i -c -E "CC: (Sun|Studio) .* (5\.1[0-9]|5\.[2-9]|6\.)") @@ -175,7 +181,7 @@ endif # IS_MINGW32 ifneq ($(IS_X86)$(IS_X32)$(IS_X64),000) # Fixup. Clang reports an error rather than "LLVM assembler" or similar. -ifneq ($(MACPORTS_COMPILER),1) +ifneq ($(OSXPORT_COMPILER),1) HAVE_GAS := $(shell $(CXX) -xc -c /dev/null -Wa,-v -o/dev/null 2>&1 | $(GREP) -c "GNU assembler") endif @@ -237,6 +243,7 @@ ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),) ifeq ($(HAVE_SSSE3),1) ARIA_FLAG = -mssse3 SSSE3_FLAG = -mssse3 + SPECK_FLAG = -mssse3 endif ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),) HAVE_SSE4 = $(shell echo | $(CXX) -x c++ $(CXXFLAGS) -msse4.1 -dM -E - 2>/dev/null | $(GREP) -i -c __SSE4_1__) @@ -272,6 +279,7 @@ ifeq ($(SUN_COMPILER),1) ifeq ($(COUNT),0) SSSE3_FLAG = -xarch=ssse3 -D__SSSE3__=1 ARIA_FLAG = -xarch=ssse3 -D__SSSE3__=1 + SPECK_FLAG = -xarch=ssse3 -D__SSSE3__=1 LDFLAGS += -xarch=ssse3 endif COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_1 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal") @@ -307,9 +315,9 @@ ifneq ($(INTEL_COMPILER),0) endif endif -# Tell MacPorts GCC to use Clang integrated assembler +# Tell MacPorts and Homebrew GCC to use Clang integrated assembler # http://github.com/weidai11/cryptopp/issues/190 -ifeq ($(GCC_COMPILER)$(MACPORTS_COMPILER),11) +ifeq ($(GCC_COMPILER)$(OSXPORT_COMPILER),11) ifeq ($(findstring -Wa,-q,$(CXXFLAGS)),) CXXFLAGS += -Wa,-q endif @@ -1033,6 +1041,10 @@ sha-simd.o : sha-simd.cpp shacal2-simd.o : shacal2-simd.cpp $(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $< +# SSE4.1 or ARMv8a available +speck-simd.o : speck-simd.cpp + $(CXX) $(strip $(CXXFLAGS) $(SPECK_FLAG) -c) $< + # Don't build Threefish with UBsan on Travis CI. Timeouts cause the build to fail. # Also see http://stackoverflow.com/q/12983137/608639. ifeq ($(findstring true,$(CI)),true) diff --git a/speck.cpp b/speck.cpp index f52dcb78..956de7ed 100644 --- a/speck.cpp +++ b/speck.cpp @@ -1,10 +1,14 @@ -// speck.h - written and placed in the public domain by Jeffrey Walton +// speck.cpp - written and placed in the public domain by Jeffrey Walton #include "pch.h" #include "config.h" #include "speck.h" #include "misc.h" +#include "cpu.h" + +// Uncomment to benchmark C/C++, and to isolate SSE code. +// #undef CRYPTOPP_SSSE3_AVAILABLE ANONYMOUS_NAMESPACE_BEGIN @@ -21,7 +25,7 @@ using CryptoPP::rotrFixed; //! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt //! kind of messy. template -inline void TF83(W& x, W& y, const W& k) +inline void TF83(W& x, W& y, const W k) { x = rotrFixed(x, 8); x += y; x ^= k; @@ -37,12 +41,12 @@ inline void TF83(W& x, W& y, const W& k) //! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt //! kind of messy. template -inline void TR83(W& x, W& y, const W& k) +inline void TR83(W& x, W& y, const W k) { - y^=x; - y=rotrFixed(y,3); - x^=k; x-=y; - x=rotlFixed(x,8); + y ^= x; + y = rotrFixed(y,3); + x ^= k; x -= y; + x = rotlFixed(x,8); } //! \brief Forward transformation @@ -89,7 +93,7 @@ inline void SPECK_ExpandKey_2W(W key[R], const W k[2]) CRYPTOPP_ASSERT(R==32); W i=0, B=k[0], A=k[1]; - while(i(m_rkey, m_wspace); + m_rkeys.New(26); + m_rounds = 26; + SPECK_ExpandKey_3W(m_rkeys, m_wspace); break; case 4: - m_rkey.New(27); - SPECK_ExpandKey_4W(m_rkey, m_wspace); + m_rkeys.New(27); + m_rounds = 27; + SPECK_ExpandKey_4W(m_rkeys, m_wspace); break; default: CRYPTOPP_ASSERT(0);; @@ -195,13 +209,13 @@ void SPECK64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); - switch (m_kwords) + switch (m_rounds) { - case 3: - SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkey); + case 26: + SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 4: - SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkey); + case 27: + SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; @@ -218,13 +232,13 @@ void SPECK64::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); - switch (m_kwords) + switch (m_rounds) { - case 3: - SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkey); + case 26: + SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 4: - SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkey); + case 27: + SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; @@ -251,16 +265,19 @@ void SPECK128::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength switch (m_kwords) { case 2: - m_rkey.New(32); - SPECK_ExpandKey_2W(m_rkey, m_wspace); + m_rkeys.New(32); + m_rounds = 32; + SPECK_ExpandKey_2W(m_rkeys, m_wspace); break; case 3: - m_rkey.New(33); - SPECK_ExpandKey_3W(m_rkey, m_wspace); + m_rkeys.New(33); + m_rounds = 33; + SPECK_ExpandKey_3W(m_rkeys, m_wspace); break; case 4: - m_rkey.New(34); - SPECK_ExpandKey_4W(m_rkey, m_wspace); + m_rkeys.New(34); + m_rounds = 34; + SPECK_ExpandKey_4W(m_rkeys, m_wspace); break; default: CRYPTOPP_ASSERT(0);; @@ -273,16 +290,16 @@ void SPECK128::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); - switch (m_kwords) + switch (m_rounds) { - case 2: - SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkey); + case 32: + SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 3: - SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkey); + case 33: + SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 4: - SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkey); + case 34: + SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; @@ -299,16 +316,16 @@ void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); - switch (m_kwords) + switch (m_rounds) { - case 2: - SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkey); + case 32: + SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 3: - SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkey); + case 33: + SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; - case 4: - SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkey); + case 34: + SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; @@ -319,4 +336,26 @@ void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]); } +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 +size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const +{ +#if defined(CRYPTOPP_SSSE3_AVAILABLE) + if (HasSSSE3()) + return SPECK128_Enc_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds, + inBlocks, xorBlocks, outBlocks, length, flags); +#endif + return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); +} + +size_t SPECK128::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const +{ +#if defined(CRYPTOPP_SSSE3_AVAILABLE) + if (HasSSSE3()) + return SPECK128_Dec_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds, + inBlocks, xorBlocks, outBlocks, length, flags); +#endif + return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); +} +#endif + NAMESPACE_END diff --git a/speck.h b/speck.h index 7f4acc38..037dab92 100644 --- a/speck.h +++ b/speck.h @@ -45,12 +45,13 @@ template struct SPECK_Base { virtual ~SPECK_Base() {} - SPECK_Base() : m_kwords(0) {} + SPECK_Base() : m_kwords(0), m_rounds(0) {} typedef SecBlock > AlignedSecBlock; mutable AlignedSecBlock m_wspace; // workspace - AlignedSecBlock m_rkey; // round keys + AlignedSecBlock m_rkeys; // round keys unsigned int m_kwords; // number of key words + unsigned int m_rounds; // number of rounds }; //! \class SPECK64 @@ -141,6 +142,9 @@ public: { protected: void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const; +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const; +#endif }; //! \brief Provides implementation for encryption transformation @@ -151,6 +155,9 @@ public: { protected: void ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const; +#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 + size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const; +#endif }; typedef BlockCipherFinal Encryption;