diff --git a/Readme.txt b/Readme.txt index b4e329ac..4af41c4e 100644 --- a/Readme.txt +++ b/Readme.txt @@ -78,10 +78,9 @@ The following compilers are supported for this release. Please visit http://www.cryptopp.com the most up to date build instructions and porting notes. * MSVC 6.0 - 2008 - * GCC 3.3 - 4.2 - * Borland C++Builder 2006 - 2007 - * Intel C++ Compiler 9.1 - 10.0 - * Sun Studio 11 - 12 (CC 5.8 - 5.9) + * GCC 3.3 - 4.3 + * Intel C++ Compiler 9 - 11 + * Sun Studio 12 (CC 5.9) *** Important Usage Notes *** @@ -415,7 +414,7 @@ the mailing list. 5.6 - added AuthenticatedSymmetricCipher interface class and Filter wrappers - added CCM, GCM (with SSE2 assembly), CMAC, and SEED - added support for variable length IVs - - improved AES speed on x86 and x64 + - improved AES and SHA-256 speed on x86 and x64 - fixed run-time validation error on x86-64 with GCC 4.3.2 -O2 - fixed HashFilter bug when putMessage=true - fixed warnings with GCC 4.3 diff --git a/TestVectors/all.txt b/TestVectors/all.txt index f26df0dc..45aa4fbf 100644 --- a/TestVectors/all.txt +++ b/TestVectors/all.txt @@ -1,27 +1,27 @@ AlgorithmType: FileList Name: all.txt collection -Test: tea.txt -Test: camellia.txt -Test: shacal2.txt -Test: ttmac.txt -Test: whrlpool.txt -Test: dlies.txt -Test: dsa.txt -Test: dsa_1363.txt -Test: esign.txt -Test: hmac.txt -Test: nr.txt -Test: rsa_oaep.txt -Test: rsa_pkcs1_1_5.txt -Test: rsa_pss.txt -Test: rw.txt -Test: seal.txt -Test: sha.txt -Test: panama.txt -Test: aes.txt -Test: salsa.txt -Test: vmac.txt -Test: sosemanuk.txt -Test: ccm.txt -Test: gcm.txt -Test: cmac.txt +Test: TestVectors/tea.txt +Test: TestVectors/camellia.txt +Test: TestVectors/shacal2.txt +Test: TestVectors/ttmac.txt +Test: TestVectors/whrlpool.txt +Test: TestVectors/dlies.txt +Test: TestVectors/dsa.txt +Test: TestVectors/dsa_1363.txt +Test: TestVectors/esign.txt +Test: TestVectors/hmac.txt +Test: TestVectors/nr.txt +Test: TestVectors/rsa_oaep.txt +Test: TestVectors/rsa_pkcs1_1_5.txt +Test: TestVectors/rsa_pss.txt +Test: TestVectors/rw.txt +Test: TestVectors/seal.txt +Test: TestVectors/sha.txt +Test: TestVectors/panama.txt +Test: TestVectors/aes.txt +Test: TestVectors/salsa.txt +Test: TestVectors/vmac.txt +Test: TestVectors/sosemanuk.txt +Test: TestVectors/ccm.txt +Test: TestVectors/gcm.txt +Test: TestVectors/cmac.txt diff --git a/asn.h b/asn.h index ab929918..c35126bc 100644 --- a/asn.h +++ b/asn.h @@ -138,8 +138,8 @@ public: byte PeekByte() const; void CheckByte(byte b); - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; // call this to denote end of sequence void MessageEnd(); diff --git a/config.h b/config.h index 2805953c..838d5a94 100644 --- a/config.h +++ b/config.h @@ -311,7 +311,7 @@ NAMESPACE_END #endif // how to declare class constants -#if defined(_MSC_VER) && _MSC_VER <= 1300 +#if (defined(_MSC_VER) && _MSC_VER <= 1300) || defined(__INTEL_COMPILER) # define CRYPTOPP_CONSTANT(x) enum {x}; #else # define CRYPTOPP_CONSTANT(x) static const int x; diff --git a/cryptlib.cpp b/cryptlib.cpp index b84a085e..0e5bd248 100644 --- a/cryptlib.cpp +++ b/cryptlib.cpp @@ -26,7 +26,10 @@ CRYPTOPP_COMPILE_ASSERT(sizeof(word64) == 8); CRYPTOPP_COMPILE_ASSERT(sizeof(dword) == 2*sizeof(word)); #endif -const std::string BufferedTransformation::NULL_CHANNEL; +const std::string DEFAULT_CHANNEL; +const std::string AAD_CHANNEL = "AAD"; +const std::string &BufferedTransformation::NULL_CHANNEL = DEFAULT_CHANNEL; + const NullNameValuePairs g_nullNameValuePairs; BufferedTransformation & TheBitBucket() @@ -254,12 +257,12 @@ word32 RandomNumberGenerator::GenerateWord32(word32 min, word32 max) void RandomNumberGenerator::GenerateBlock(byte *output, size_t size) { ArraySink s(output, size); - GenerateIntoBufferedTransformation(s, BufferedTransformation::NULL_CHANNEL, size); + GenerateIntoBufferedTransformation(s, DEFAULT_CHANNEL, size); } void RandomNumberGenerator::DiscardBytes(size_t n) { - GenerateIntoBufferedTransformation(TheBitBucket(), BufferedTransformation::NULL_CHANNEL, n); + GenerateIntoBufferedTransformation(TheBitBucket(), DEFAULT_CHANNEL, n); } void RandomNumberGenerator::GenerateIntoBufferedTransformation(BufferedTransformation &target, const std::string &channel, lword length) @@ -593,12 +596,12 @@ size_t BufferedTransformation::ChannelPutWord32(const std::string &channel, word size_t BufferedTransformation::PutWord16(word16 value, ByteOrder order, bool blocking) { - return ChannelPutWord16(NULL_CHANNEL, value, order, blocking); + return ChannelPutWord16(DEFAULT_CHANNEL, value, order, blocking); } size_t BufferedTransformation::PutWord32(word32 value, ByteOrder order, bool blocking) { - return ChannelPutWord32(NULL_CHANNEL, value, order, blocking); + return ChannelPutWord32(DEFAULT_CHANNEL, value, order, blocking); } size_t BufferedTransformation::PeekWord16(word16 &value, ByteOrder order) const diff --git a/cryptlib.h b/cryptlib.h index a6b4aaa1..330ce2b8 100644 --- a/cryptlib.h +++ b/cryptlib.h @@ -746,6 +746,12 @@ public: bool Wait(unsigned long milliseconds, CallStack const& callStack); }; +//! the default channel for BufferedTransformation, equal to the empty string +extern const std::string DEFAULT_CHANNEL; + +//! channel for additional authenticated data, equal to "AAD" +extern const std::string AAD_CHANNEL; + //! interface for buffered transformations /*! BufferedTransformation is a generalization of BlockTransformation, @@ -776,7 +782,7 @@ class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE BufferedTransformation : public Algorithm, { public: // placed up here for CW8 - static const std::string NULL_CHANNEL; // the empty string "" + static const std::string &NULL_CHANNEL; // same as DEFAULT_CHANNEL, for backwards compatibility BufferedTransformation() : Algorithm(false) {} @@ -903,18 +909,18 @@ public: size_t PeekWord32(word32 &value, ByteOrder order=BIG_ENDIAN_ORDER) const; //! move transferMax bytes of the buffered output to target as input - lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) + lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) {TransferTo2(target, transferMax, channel); return transferMax;} //! discard skipMax bytes from the output buffer virtual lword Skip(lword skipMax=LWORD_MAX); //! copy copyMax bytes of the buffered output to target as input - lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const + lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const {return CopyRangeTo(target, 0, copyMax, channel);} //! copy copyMax bytes of the buffered output, starting at position (relative to current position), to target as input - lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const + lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const {lword i = position; CopyRangeTo2(target, i, i+copyMax, channel); return i-position;} #ifdef CRYPTOPP_MAINTAIN_BACKWARDS_COMPATIBILITY @@ -939,18 +945,18 @@ public: //! skip count number of messages virtual unsigned int SkipMessages(unsigned int count=UINT_MAX); //! - unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) + unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) {TransferMessagesTo2(target, count, channel); return count;} //! - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; //! virtual void SkipAll(); //! - void TransferAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL) + void TransferAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) {TransferAllTo2(target, channel);} //! - void CopyAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL) const; + void CopyAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const; virtual bool GetNextMessageSeries() {return false;} virtual unsigned int NumberOfMessagesInThisSeries() const {return NumberOfMessages();} @@ -960,13 +966,13 @@ public: //! \name NON-BLOCKING TRANSFER OF OUTPUT //@{ //! upon return, byteCount contains number of bytes that have finished being transfered, and returns the number of bytes left in the current transfer block - virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=NULL_CHANNEL, bool blocking=true) =0; + virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) =0; //! upon return, begin contains the start position of data yet to be finished copying, and returns the number of bytes left in the current transfer block - virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const =0; + virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const =0; //! upon return, messageCount contains number of messages that have finished being transfered, and returns the number of bytes left in the current transfer block - size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=NULL_CHANNEL, bool blocking=true); + size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); //! returns the number of bytes left in the current transfer block - size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL, bool blocking=true); + size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); //@} //! \name CHANNELS diff --git a/cryptlib.vcproj b/cryptlib.vcproj index a0458ff1..cb34a4cf 100755 --- a/cryptlib.vcproj +++ b/cryptlib.vcproj @@ -8528,7 +8528,7 @@ > @@ -8546,7 +8546,7 @@ > @@ -8563,7 +8563,7 @@ > @@ -8581,7 +8581,7 @@ > diff --git a/datatest.cpp b/datatest.cpp index a0282447..3e97a3e5 100644 --- a/datatest.cpp +++ b/datatest.cpp @@ -410,19 +410,19 @@ void TestAuthenticatedSymmetricCipher(TestData &v, const NameValuePairs &overrid if (macAtBegin) sm.TransferTo(df); - sh.CopyTo(df, LWORD_MAX, "AAD"); + sh.CopyTo(df, LWORD_MAX, AAD_CHANNEL); sc.TransferTo(df); - sf.CopyTo(df, LWORD_MAX, "AAD"); + sf.CopyTo(df, LWORD_MAX, AAD_CHANNEL); if (!macAtBegin) sm.TransferTo(df); df.MessageEnd(); - sh.TransferTo(ef, sh.MaxRetrievable()/2+1, "AAD"); - sh.TransferTo(ef, LWORD_MAX, "AAD"); + sh.TransferTo(ef, sh.MaxRetrievable()/2+1, AAD_CHANNEL); + sh.TransferTo(ef, LWORD_MAX, AAD_CHANNEL); sp.TransferTo(ef, sp.MaxRetrievable()/2+1); sp.TransferTo(ef); - sf.TransferTo(ef, sf.MaxRetrievable()/2+1, "AAD"); - sf.TransferTo(ef, LWORD_MAX, "AAD"); + sf.TransferTo(ef, sf.MaxRetrievable()/2+1, AAD_CHANNEL); + sf.TransferTo(ef, LWORD_MAX, AAD_CHANNEL); ef.MessageEnd(); if (test == "Encrypt" && encrypted != ciphertext+mac) diff --git a/files.h b/files.h index d98d4b6f..2c4e2b8e 100644 --- a/files.h +++ b/files.h @@ -31,8 +31,8 @@ public: std::istream* GetStream() {return m_stream;} lword MaxRetrievable() const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; lword Skip(lword skipMax=ULONG_MAX); private: diff --git a/filters.cpp b/filters.cpp index 9a749f4e..083dfd36 100644 --- a/filters.cpp +++ b/filters.cpp @@ -596,7 +596,7 @@ void StreamTransformationFilter::NextPutMultiple(const byte *inString, size_t le do { size_t len = m_optimalBufferSize; - byte *space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, length, len); + byte *space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, length, len); if (len < length) { if (len == m_optimalBufferSize) @@ -636,7 +636,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) { // do padding size_t blockSize = STDMAX(minLastBlockSize, (size_t)m_cipher.MandatoryBlockSize()); - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, blockSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, blockSize); memcpy(space, inString, length); memset(space + length, 0, blockSize - length); m_cipher.ProcessLastBlock(space, space, blockSize); @@ -652,7 +652,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) throw InvalidCiphertext("StreamTransformationFilter: ciphertext length is not a multiple of block size"); } - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, length, m_optimalBufferSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, length, m_optimalBufferSize); m_cipher.ProcessLastBlock(space, inString, length); AttachedTransformation()->Put(space, length); } @@ -664,7 +664,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) unsigned int s; s = m_cipher.MandatoryBlockSize(); assert(s > 1); - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, m_optimalBufferSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, m_optimalBufferSize); if (m_cipher.IsForwardTransformation()) { assert(length < s); @@ -807,9 +807,9 @@ void HashVerificationFilter::LastPut(const byte *inString, size_t length) // ************************************************************* AuthenticatedEncryptionFilter::AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment, - bool putMessage, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding) + bool putAAD, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding) : StreamTransformationFilter(c, attachment, padding, true) - , m_hf(c, new OutputProxy(*this, false), putMessage, truncatedDigestSize, "AAD", macChannel) + , m_hf(c, new OutputProxy(*this, false), putAAD, truncatedDigestSize, AAD_CHANNEL, macChannel) { assert(c.IsForwardTransformation()); } @@ -825,7 +825,7 @@ byte * AuthenticatedEncryptionFilter::ChannelCreatePutSpace(const std::string &c if (channel.empty()) return StreamTransformationFilter::CreatePutSpace(size); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hf.CreatePutSpace(size); throw InvalidChannelName("AuthenticatedEncryptionFilter", channel); @@ -836,7 +836,7 @@ size_t AuthenticatedEncryptionFilter::ChannelPut2(const std::string &channel, co if (channel.empty()) return StreamTransformationFilter::Put2(begin, length, messageEnd, blocking); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hf.Put2(begin, length, 0, blocking); throw InvalidChannelName("AuthenticatedEncryptionFilter", channel); @@ -876,7 +876,7 @@ byte * AuthenticatedDecryptionFilter::ChannelCreatePutSpace(const std::string &c if (channel.empty()) return m_streamFilter.CreatePutSpace(size); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hashVerifier.CreatePutSpace(size); throw InvalidChannelName("AuthenticatedDecryptionFilter", channel); @@ -891,7 +891,7 @@ size_t AuthenticatedDecryptionFilter::ChannelPut2(const std::string &channel, co return FilterWithBufferedInput::Put2(begin, length, messageEnd, blocking); } - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hashVerifier.Put2(begin, length, 0, blocking); throw InvalidChannelName("AuthenticatedDecryptionFilter", channel); diff --git a/filters.h b/filters.h index 0562ad5c..7355646a 100644 --- a/filters.h +++ b/filters.h @@ -22,8 +22,8 @@ public: const BufferedTransformation *AttachedTransformation() const; void Detach(BufferedTransformation *newAttachment = NULL); - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; void Initialize(const NameValuePairs ¶meters=g_nullNameValuePairs, int propagation=-1); bool Flush(bool hardFlush, int propagation=-1, bool blocking=true); @@ -38,11 +38,11 @@ protected: void PropagateInitialize(const NameValuePairs ¶meters, int propagation); - size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL); - size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); + size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); private: member_ptr m_attachment; @@ -289,7 +289,7 @@ typedef StreamTransformationFilter StreamCipherFilter; class CRYPTOPP_DLL HashFilter : public Bufferless, private FilterPutSpaceHelper { public: - HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=NULL_CHANNEL, const std::string &hashPutChannel=NULL_CHANNEL); + HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=DEFAULT_CHANNEL, const std::string &hashPutChannel=DEFAULT_CHANNEL); std::string AlgorithmName() const {return m_hashModule.AlgorithmName();} void IsolatedInitialize(const NameValuePairs ¶meters); @@ -315,7 +315,7 @@ public: : Exception(DATA_INTEGRITY_CHECK_FAILED, "HashVerificationFilter: message hash or MAC not valid") {} }; - enum Flags {HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT}; + enum Flags {HASH_AT_END=0, HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT}; HashVerificationFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1); std::string AlgorithmName() const {return m_hashModule.AlgorithmName();} @@ -345,7 +345,7 @@ class CRYPTOPP_DLL AuthenticatedEncryptionFilter : public StreamTransformationFi { public: /*! See StreamTransformationFilter for documentation on BlockPaddingScheme */ - AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &macChannel=NULL_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING); + AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putAAD=false, int truncatedDigestSize=-1, const std::string &macChannel=DEFAULT_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING); void IsolatedInitialize(const NameValuePairs ¶meters); byte * ChannelCreatePutSpace(const std::string &channel, size_t &size); @@ -361,7 +361,7 @@ protected: class CRYPTOPP_DLL AuthenticatedDecryptionFilter : public FilterWithBufferedInput, public BlockPaddingSchemeDef { public: - enum Flags {MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION}; + enum Flags {MAC_AT_END=0, MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION}; /*! See StreamTransformationFilter for documentation on BlockPaddingScheme */ AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1, BlockPaddingScheme padding = DEFAULT_PADDING); @@ -412,7 +412,7 @@ public: : Exception(DATA_INTEGRITY_CHECK_FAILED, "VerifierFilter: digital signature not valid") {} }; - enum Flags {SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT}; + enum Flags {SIGNATURE_AT_END=0, SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT}; SignatureVerificationFilter(const PK_Verifier &verifier, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS); std::string AlgorithmName() const {return m_verifier.AlgorithmName();} @@ -517,6 +517,8 @@ public: bool MessageSeriesEnd(int propagation=-1, bool blocking=true) {return m_passSignal ? m_owner.AttachedTransformation()->MessageSeriesEnd(propagation, blocking) : false;} + byte * ChannelCreatePutSpace(const std::string &channel, size_t &size) + {return m_owner.AttachedTransformation()->ChannelCreatePutSpace(channel, size);} size_t ChannelPut2(const std::string &channel, const byte *begin, size_t length, int messageEnd, bool blocking) {return m_owner.AttachedTransformation()->ChannelPut2(channel, begin, length, m_passSignal ? messageEnd : 0, blocking);} size_t ChannelPutModifiable2(const std::string &channel, byte *begin, size_t length, int messageEnd, bool blocking) @@ -669,8 +671,8 @@ public: template StringStore(const T &string) {StoreInitialize(MakeParameters("InputBuffer", ConstByteArrayParameter(string)));} - CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: CRYPTOPP_DLL void StoreInitialize(const NameValuePairs ¶meters); @@ -692,8 +694,8 @@ public: bool AnyRetrievable() const {return MaxRetrievable() != 0;} lword MaxRetrievable() const {return m_length-m_count;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const { throw NotImplemented("RandomNumberStore: CopyRangeTo2() is not supported by this store"); } @@ -712,8 +714,8 @@ public: NullStore(lword size = ULONG_MAX) : m_size(size) {} void StoreInitialize(const NameValuePairs ¶meters) {} lword MaxRetrievable() const {return m_size;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: lword m_size; @@ -756,11 +758,11 @@ public: void IsolatedInitialize(const NameValuePairs ¶meters) {m_store.IsolatedInitialize(parameters);} size_t Pump2(lword &byteCount, bool blocking=true) - {return m_store.TransferTo2(*AttachedTransformation(), byteCount, NULL_CHANNEL, blocking);} + {return m_store.TransferTo2(*AttachedTransformation(), byteCount, DEFAULT_CHANNEL, blocking);} size_t PumpMessages2(unsigned int &messageCount, bool blocking=true) - {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, NULL_CHANNEL, blocking);} + {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, DEFAULT_CHANNEL, blocking);} size_t PumpAll2(bool blocking=true) - {return m_store.TransferAllTo2(*AttachedTransformation(), NULL_CHANNEL, blocking);} + {return m_store.TransferAllTo2(*AttachedTransformation(), DEFAULT_CHANNEL, blocking);} bool SourceExhausted() const {return !m_store.AnyRetrievable() && !m_store.AnyMessages();} void SetAutoSignalPropagation(int propagation) diff --git a/fltrimpl.h b/fltrimpl.h index 40d70ff1..4087d7d9 100644 --- a/fltrimpl.h +++ b/fltrimpl.h @@ -34,7 +34,7 @@ } #define FILTER_OUTPUT2(site, statement, output, length, messageEnd) \ - FILTER_OUTPUT3(site, statement, output, length, messageEnd, NULL_CHANNEL) + FILTER_OUTPUT3(site, statement, output, length, messageEnd, DEFAULT_CHANNEL) #define FILTER_OUTPUT(site, output, length, messageEnd) \ FILTER_OUTPUT2(site, 0, output, length, messageEnd) diff --git a/iterhash.cpp b/iterhash.cpp index 642a7ca2..478950c9 100644 --- a/iterhash.cpp +++ b/iterhash.cpp @@ -132,14 +132,18 @@ template void IteratedHashBase::TruncatedFinal(by ByteOrder order = this->GetByteOrder(); PadLastBlock(blockSize - 2*sizeof(HashWordType)); - ConditionalByteReverse(order, dataBuf, dataBuf, blockSize - 2*sizeof(HashWordType)); + dataBuf[blockSize/sizeof(T)-2+order] = ConditionalByteReverse(order, this->GetBitCountLo()); + dataBuf[blockSize/sizeof(T)-1-order] = ConditionalByteReverse(order, this->GetBitCountHi()); - dataBuf[blockSize/sizeof(T)-2] = order ? this->GetBitCountHi() : this->GetBitCountLo(); - dataBuf[blockSize/sizeof(T)-1] = order ? this->GetBitCountLo() : this->GetBitCountHi(); + HashBlock(dataBuf); - HashEndianCorrectedBlock(dataBuf); - ConditionalByteReverse(order, stateBuf, stateBuf, this->DigestSize()); - memcpy(digest, stateBuf, size); + if (IsAligned(digest) && size%sizeof(HashWordType)==0) + ConditionalByteReverse(order, (HashWordType *)digest, stateBuf, size); + else + { + ConditionalByteReverse(order, stateBuf, stateBuf, this->DigestSize()); + memcpy(digest, stateBuf, size); + } this->Restart(); // reinit for next use } diff --git a/iterhash.h b/iterhash.h index 8af3177b..cce9e821 100644 --- a/iterhash.h +++ b/iterhash.h @@ -76,7 +76,7 @@ protected: }; //! _ -template +template class CRYPTOPP_NO_VTABLE IteratedHashWithStaticTransform : public ClonableImpl, T_Transform> > { @@ -90,7 +90,7 @@ protected: void Init() {T_Transform::InitState(this->m_state);} T_HashWordType* StateBuf() {return this->m_state;} - FixedSizeSecBlock m_state; + FixedSizeAlignedSecBlock m_state; }; #ifndef __GNUC__ diff --git a/mqueue.h b/mqueue.h index a4ee117c..b46f67d1 100644 --- a/mqueue.h +++ b/mqueue.h @@ -35,8 +35,8 @@ public: bool AnyRetrievable() const {return m_lengths.front() > 0;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; lword TotalBytesRetrievable() const {return m_queue.MaxRetrievable();} @@ -49,7 +49,7 @@ public: unsigned int NumberOfMessageSeries() const {return (unsigned int)m_messageCounts.size()-1;} - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; const byte * Spy(size_t &contiguousSize) const; diff --git a/queue.cpp b/queue.cpp index f123201e..ae05a783 100644 --- a/queue.cpp +++ b/queue.cpp @@ -64,14 +64,14 @@ public: return len; } - inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const + inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const { size_t len = m_tail-m_head; target.ChannelPut(channel, buf+m_head, len); return len; } - inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const + inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=DEFAULT_CHANNEL) const { size_t len = STDMIN(copyMax, m_tail-m_head); target.ChannelPut(channel, buf+m_head, len); @@ -92,7 +92,7 @@ public: return len; } - inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL) + inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) { size_t len = m_tail-m_head; target.ChannelPutModifiable(channel, buf+m_head, len); @@ -100,7 +100,7 @@ public: return len; } - inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL) + inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=DEFAULT_CHANNEL) { size_t len = UnsignedMin(m_tail-m_head, transferMax); target.ChannelPutModifiable(channel, buf+m_head, len); diff --git a/queue.h b/queue.h index e9e195c5..7e172007 100644 --- a/queue.h +++ b/queue.h @@ -35,8 +35,8 @@ public: size_t Peek(byte &outByte) const; size_t Peek(byte *outString, size_t peekMax) const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; // these member functions are not inherited void SetNodeSize(size_t nodeSize); @@ -80,8 +80,8 @@ public: size_t Peek(byte &outByte) const; size_t Peek(byte *outString, size_t peekMax) const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: const ByteQueue &m_queue; diff --git a/rijndael.cpp b/rijndael.cpp index 589733e0..04e1f21e 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -534,8 +534,10 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l #endif #if CRYPTOPP_BOOL_X86 - AS_PUSH_IF86( bx) - AS_PUSH_IF86( bp) +#if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_PUSH_IF86(bx) +#endif + AS_PUSH_IF86(bp) AS2( mov [ecx+16*12+16*4], esp) AS2( lea esp, [ecx-512]) #endif @@ -583,7 +585,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l // counter mode one-time setup AS2( mov WORD_REG(bp), [L_INBLOCKS]) - AS2( movdqa xmm2, [WORD_REG(bp)]) // counter + AS2( movdqu xmm2, [WORD_REG(bp)]) // counter AS2( pxor xmm2, xmm1) AS2( psrldq xmm1, 14) AS2( movd eax, xmm1) @@ -843,11 +845,13 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l #else AS2( mov rbp, [L_BP]) #endif - AS_POP_IF86( bp) - AS_POP_IF86( bx) + AS_POP_IF86(bp) +#if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_POP_IF86(bx) +#endif #ifndef __GNUC__ - AS_POP_IF86( di) - AS_POP_IF86( si) + AS_POP_IF86(di) + AS_POP_IF86(si) #endif #ifdef CRYPTOPP_GENERATE_X64_MASM pop r12 diff --git a/secblock.h b/secblock.h index c2e9c003..481533c2 100644 --- a/secblock.h +++ b/secblock.h @@ -459,7 +459,7 @@ public: }; template -class FixedSizeAlignedSecBlock : public FixedSizeSecBlock, T_Align16> > +class FixedSizeAlignedSecBlock : public FixedSizeSecBlock, T_Align16> > { }; diff --git a/sha.cpp b/sha.cpp index 7322543f..905d12de 100644 --- a/sha.cpp +++ b/sha.cpp @@ -3,14 +3,21 @@ // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. // Both are in the public domain. +// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code + #include "pch.h" #ifndef CRYPTOPP_IMPORTS +#ifndef CRYPTOPP_GENERATE_X64_MASM #include "sha.h" #include "misc.h" #include "cpu.h" +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include +#endif + NAMESPACE_BEGIN(CryptoPP) // start of Steve Reid's code @@ -93,7 +100,7 @@ void SHA256::InitState(HashWordType *state) memcpy(state, s, sizeof(s)); } -static const word32 SHA256_K[64] = { +extern const word32 SHA256_K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -112,10 +119,333 @@ static const word32 SHA256_K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; +#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM + +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) + +#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code + +static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len) +{ + #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ + #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] + #define G(i) H(i+1) + #define F(i) H(i+2) + #define E(i) H(i+3) + #define D(i) H(i+4) + #define C(i) H(i+5) + #define B(i) H(i+6) + #define A(i) H(i+7) + #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 + #define Wt_2(i) Wt((i)-2) + #define Wt_15(i) Wt((i)-15) + #define Wt_7(i) Wt((i)-7) + #define K_END [BASE+8*4+16*4+0*WORD_SZ] + #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] + #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] + #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] + #define Kt(i) WORD_REG(si)+(i)*4 +#if CRYPTOPP_BOOL_X86 + #define BASE esp+4 +#elif defined(__GNUC__) + #define BASE r8 +#else + #define BASE rsp +#endif + +#define RA0(i, edx, edi) \ + AS2( add edx, [Kt(i)] )\ + AS2( add edx, [Wt(i)] )\ + AS2( add edx, H(i) )\ + +#define RA1(i, edx, edi) + +#define RB0(i, edx, edi) + +#define RB1(i, edx, edi) \ + AS2( mov AS_REG_7d, [Wt_2(i)] )\ + AS2( mov edi, [Wt_15(i)])\ + AS2( mov ebx, AS_REG_7d )\ + AS2( shr AS_REG_7d, 10 )\ + AS2( ror ebx, 17 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( ror ebx, 2 )\ + AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\ + AS2( add ebx, [Wt_7(i)])\ + AS2( mov AS_REG_7d, edi )\ + AS2( shr AS_REG_7d, 3 )\ + AS2( ror edi, 7 )\ + AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\ + AS2( xor AS_REG_7d, edi )\ + AS2( add edx, [Kt(i)])\ + AS2( ror edi, 11 )\ + AS2( add edx, H(i) )\ + AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\ + AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\ + AS2( mov [Wt(i)], AS_REG_7d)\ + AS2( add edx, AS_REG_7d )\ + +#define ROUND(i, r, eax, ecx, edi, edx)\ + /* in: edi = E */\ + /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\ + AS2( mov edx, F(i) )\ + AS2( xor edx, G(i) )\ + AS2( and edx, edi )\ + AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\ + AS2( mov AS_REG_7d, edi )\ + AS2( ror edi, 6 )\ + AS2( ror AS_REG_7d, 25 )\ + RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ + AS2( xor AS_REG_7d, edi )\ + AS2( ror edi, 5 )\ + AS2( xor AS_REG_7d, edi )/* S1(E) */\ + AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\ + RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ + /* in: ecx = A, eax = B^C, edx = T1 */\ + /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\ + AS2( mov ebx, ecx )\ + AS2( xor ecx, B(i) )/* A^B */\ + AS2( and eax, ecx )\ + AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\ + AS2( mov AS_REG_7d, ebx )\ + AS2( ror ebx, 2 )\ + AS2( add eax, edx )/* T1 + Maj(A,B,C) */\ + AS2( add edx, D(i) )\ + AS2( mov D(i), edx )\ + AS2( ror AS_REG_7d, 22 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( ror ebx, 11 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\ + AS2( mov H(i), eax )\ + +#define SWAP_COPY(i) \ + AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ + AS1( bswap WORD_REG(bx))\ + AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx)) + +#if defined(__GNUC__) + #if CRYPTOPP_BOOL_X64 + __m128i workspace[(LOCALS_SIZE+15)/16]; + #endif + __asm__ __volatile__ + ( + #if CRYPTOPP_BOOL_X64 + "movq %4, %%r8;" + #endif + ".intel_syntax noprefix;" +#elif defined(CRYPTOPP_GENERATE_X64_MASM) + ALIGN 8 + X86_SHA256_HashBlocks PROC FRAME + rex_push_reg rsi + push_reg rdi + push_reg rbx + push_reg rbp + alloc_stack(LOCALS_SIZE+8) + .endprolog + mov rdi, r8 + lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] +#endif + +#if CRYPTOPP_BOOL_X86 + #ifndef __GNUC__ + AS2( mov edi, [len]) + AS2( lea WORD_REG(si), [SHA256_K+48*4]) + #endif + #if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_PUSH_IF86(bx) + #endif + + AS_PUSH_IF86(bp) + AS2( mov ebx, esp) + AS2( and esp, -16) + AS2( sub WORD_REG(sp), LOCALS_SIZE) + AS_PUSH_IF86(bx) +#endif + AS2( mov STATE_SAVE, WORD_REG(cx)) + AS2( mov DATA_SAVE, WORD_REG(dx)) + AS2( add WORD_REG(di), WORD_REG(dx)) + AS2( mov DATA_END, WORD_REG(di)) + AS2( mov K_END, WORD_REG(si)) + +#if CRYPTOPP_BOOL_X86 + AS2( test edi, 1) + ASJ( jnz, 2, f) +#endif + + AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16]) + AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16]) + +#if CRYPTOPP_BOOL_X86 + ASJ( jmp, 0, f) + ASL(2) // non-SSE2 + AS2( mov esi, ecx) + AS2( lea edi, A(0)) + AS2( mov ecx, 8) + AS1( rep movsd) + AS2( mov esi, K_END) + ASJ( jmp, 3, f) +#endif + + ASL(0) + AS2( movdqa E(0), xmm1) + AS2( movdqa A(0), xmm0) +#if CRYPTOPP_BOOL_X86 + ASL(3) +#endif + AS2( sub WORD_REG(si), 48*4) + SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3) + SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7) +#if CRYPTOPP_BOOL_X86 + SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11) + SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15) +#endif + AS2( mov edi, E(0)) // E + AS2( mov eax, B(0)) // B + AS2( xor eax, C(0)) // B^C + AS2( mov ecx, A(0)) // A + + ROUND(0, 0, eax, ecx, edi, edx) + ROUND(1, 0, ecx, eax, edx, edi) + ROUND(2, 0, eax, ecx, edi, edx) + ROUND(3, 0, ecx, eax, edx, edi) + ROUND(4, 0, eax, ecx, edi, edx) + ROUND(5, 0, ecx, eax, edx, edi) + ROUND(6, 0, eax, ecx, edi, edx) + ROUND(7, 0, ecx, eax, edx, edi) + ROUND(8, 0, eax, ecx, edi, edx) + ROUND(9, 0, ecx, eax, edx, edi) + ROUND(10, 0, eax, ecx, edi, edx) + ROUND(11, 0, ecx, eax, edx, edi) + ROUND(12, 0, eax, ecx, edi, edx) + ROUND(13, 0, ecx, eax, edx, edi) + ROUND(14, 0, eax, ecx, edi, edx) + ROUND(15, 0, ecx, eax, edx, edi) + + ASL(1) + AS2(add WORD_REG(si), 4*16) + ROUND(0, 1, eax, ecx, edi, edx) + ROUND(1, 1, ecx, eax, edx, edi) + ROUND(2, 1, eax, ecx, edi, edx) + ROUND(3, 1, ecx, eax, edx, edi) + ROUND(4, 1, eax, ecx, edi, edx) + ROUND(5, 1, ecx, eax, edx, edi) + ROUND(6, 1, eax, ecx, edi, edx) + ROUND(7, 1, ecx, eax, edx, edi) + ROUND(8, 1, eax, ecx, edi, edx) + ROUND(9, 1, ecx, eax, edx, edi) + ROUND(10, 1, eax, ecx, edi, edx) + ROUND(11, 1, ecx, eax, edx, edi) + ROUND(12, 1, eax, ecx, edi, edx) + ROUND(13, 1, ecx, eax, edx, edi) + ROUND(14, 1, eax, ecx, edi, edx) + ROUND(15, 1, ecx, eax, edx, edi) + AS2( cmp WORD_REG(si), K_END) + ASJ( jne, 1, b) + + AS2( mov WORD_REG(dx), DATA_SAVE) + AS2( add WORD_REG(dx), 64) + AS2( mov AS_REG_7, STATE_SAVE) + AS2( mov DATA_SAVE, WORD_REG(dx)) + +#if CRYPTOPP_BOOL_X86 + AS2( test DWORD PTR DATA_END, 1) + ASJ( jnz, 4, f) +#endif + + AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16]) + AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16]) + AS2( paddd xmm1, E(0)) + AS2( paddd xmm0, A(0)) + AS2( movdqa [AS_REG_7+1*16], xmm1) + AS2( movdqa [AS_REG_7+0*16], xmm0) + AS2( cmp WORD_REG(dx), DATA_END) + ASJ( jl, 0, b) + +#if CRYPTOPP_BOOL_X86 + ASJ( jmp, 5, f) + ASL(4) // non-SSE2 + AS2( add [AS_REG_7+0*4], ecx) // A + AS2( add [AS_REG_7+4*4], edi) // E + AS2( mov eax, B(0)) + AS2( mov ebx, C(0)) + AS2( mov ecx, D(0)) + AS2( add [AS_REG_7+1*4], eax) + AS2( add [AS_REG_7+2*4], ebx) + AS2( add [AS_REG_7+3*4], ecx) + AS2( mov eax, F(0)) + AS2( mov ebx, G(0)) + AS2( mov ecx, H(0)) + AS2( add [AS_REG_7+5*4], eax) + AS2( add [AS_REG_7+6*4], ebx) + AS2( add [AS_REG_7+7*4], ecx) + AS2( mov ecx, AS_REG_7d) + AS2( cmp WORD_REG(dx), DATA_END) + ASJ( jl, 2, b) + ASL(5) +#endif + + AS_POP_IF86(sp) + AS_POP_IF86(bp) + #if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_POP_IF86(bx) + #endif + +#ifdef CRYPTOPP_GENERATE_X64_MASM + add rsp, LOCALS_SIZE+8 + pop rbp + pop rbx + pop rdi + pop rsi + ret + X86_SHA256_HashBlocks ENDP +#endif + +#ifdef __GNUC__ + ".att_syntax prefix;" + : + : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len) + #if CRYPTOPP_BOOL_X64 + , "r" (workspace) + #endif + : "memory", "cc", "%eax" + #if CRYPTOPP_BOOL_X64 + , "%rbx", "%r8" + #endif + ); +#endif +} + +#endif // #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) + +#ifndef CRYPTOPP_GENERATE_X64_MASM + +#ifdef CRYPTOPP_X64_MASM_AVAILABLE +extern "C" { +void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len); +} +#endif + +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) + +size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) +{ + X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); + return length % BLOCKSIZE; +} + +size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) +{ + X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); + return length % BLOCKSIZE; +} + +#endif + #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) #define Ch(x,y,z) (z^(x&(y^z))) -#define Maj(x,y,z) ((x&y)|(z&(x|y))) +#define Maj(x,y,z) (y^((x^y)&(y^z))) #define a(i) T[(0-i)&7] #define b(i) T[(1-i)&7] @@ -138,6 +468,11 @@ static const word32 SHA256_K[64] = { void SHA256::Transform(word32 *state, const word32 *data) { word32 W[16]; +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + // this byte reverse is a waste of time, but this function is only called by MDC + ByteReverse(W, data, BLOCKSIZE); + X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2()); +#else word32 T[8]; /* Copy context->state[] to working vars */ memcpy(T, state, sizeof(T)); @@ -158,11 +493,12 @@ void SHA256::Transform(word32 *state, const word32 *data) state[5] += f(0); state[6] += g(0); state[7] += h(0); +#endif } /* // smaller but slower -void SHA256_Transform(word32 *state, const word32 *data) +void SHA256::Transform(word32 *state, const word32 *data) { word32 T[20]; word32 W[32]; @@ -176,7 +512,7 @@ void SHA256_Transform(word32 *state, const word32 *data) { word32 w = data[j]; W[j] = w; - w += K[j]; + w += SHA256_K[j]; w += t[7]; w += S1(e); w += Ch(e, t[5], t[6]); @@ -196,7 +532,7 @@ void SHA256_Transform(word32 *state, const word32 *data) i = j&0xf; word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7]; W[i+16] = W[i] = w; - w += K[j]; + w += SHA256_K[j]; w += t[7]; w += S1(e); w += Ch(e, t[5], t[6]); @@ -208,7 +544,7 @@ void SHA256_Transform(word32 *state, const word32 *data) w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7]; W[(i+1)+16] = W[(i+1)] = w; - w += K[j+1]; + w += SHA256_K[j+1]; w += (t-1)[7]; w += S1(e); w += Ch(e, (t-1)[5], (t-1)[6]); @@ -335,22 +671,16 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8 - AS2( movq mm4, [ecx+0*8]) - AS2( movq [edi+0*8], mm4) - AS2( movq mm0, [ecx+1*8]) - AS2( movq [edi+1*8], mm0) - AS2( movq mm0, [ecx+2*8]) - AS2( movq [edi+2*8], mm0) - AS2( movq mm0, [ecx+3*8]) - AS2( movq [edi+3*8], mm0) - AS2( movq mm5, [ecx+4*8]) - AS2( movq [edi+4*8], mm5) - AS2( movq mm0, [ecx+5*8]) - AS2( movq [edi+5*8], mm0) - AS2( movq mm0, [ecx+6*8]) - AS2( movq [edi+6*8], mm0) - AS2( movq mm0, [ecx+7*8]) - AS2( movq [edi+7*8], mm0) + AS2( movdqa xmm0, [ecx+0*16]) + AS2( movdq2q mm4, xmm0) + AS2( movdqa [edi+0*16], xmm0) + AS2( movdqa xmm0, [ecx+1*16]) + AS2( movdqa [edi+1*16], xmm0) + AS2( movdqa xmm0, [ecx+2*16]) + AS2( movdq2q mm5, xmm0) + AS2( movdqa [edi+2*16], xmm0) + AS2( movdqa xmm0, [ecx+3*16]) + AS2( movdqa [edi+3*16], xmm0) ASJ( jmp, 0, f) #define SSE2_S0_S1(r, a, b, c) \ @@ -475,18 +805,14 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state ASJ( jne, 1, b) #define SSE2_CombineState(i) \ - AS2( movq mm0, [edi+i*8])\ - AS2( paddq mm0, [ecx+i*8])\ - AS2( movq [ecx+i*8], mm0) + AS2( movdqa xmm0, [edi+i*16])\ + AS2( paddq xmm0, [ecx+i*16])\ + AS2( movdqa [ecx+i*16], xmm0) SSE2_CombineState(0) SSE2_CombineState(1) SSE2_CombineState(2) SSE2_CombineState(3) - SSE2_CombineState(4) - SSE2_CombineState(5) - SSE2_CombineState(6) - SSE2_CombineState(7) AS1( pop esp) AS1( emms) @@ -550,4 +876,5 @@ void SHA512::Transform(word64 *state, const word64 *data) NAMESPACE_END +#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM #endif // #ifndef CRYPTOPP_IMPORTS diff --git a/sha.h b/sha.h index 09ef24ae..ff580f6a 100644 --- a/sha.h +++ b/sha.h @@ -17,25 +17,31 @@ public: typedef SHA1 SHA; // for backwards compatibility //! implements the SHA-256 standard -class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform +class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform { public: +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + size_t HashMultipleBlocks(const word32 *input, size_t length); +#endif static void CRYPTOPP_API InitState(HashWordType *state); static void CRYPTOPP_API Transform(word32 *digest, const word32 *data); static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-256";} }; //! implements the SHA-224 standard -class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform +class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform { public: +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + size_t HashMultipleBlocks(const word32 *input, size_t length); +#endif static void CRYPTOPP_API InitState(HashWordType *state); static void CRYPTOPP_API Transform(word32 *digest, const word32 *data) {SHA256::Transform(digest, data);} static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-224";} }; //! implements the SHA-512 standard -class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform +class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform { public: static void CRYPTOPP_API InitState(HashWordType *state); @@ -44,7 +50,7 @@ public: }; //! implements the SHA-384 standard -class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform +class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform { public: static void CRYPTOPP_API InitState(HashWordType *state); diff --git a/simple.h b/simple.h index d450876a..35fd65ae 100644 --- a/simple.h +++ b/simple.h @@ -58,7 +58,7 @@ class CRYPTOPP_NO_VTABLE Unflushable : public T { public: bool Flush(bool completeFlush, int propagation=-1, bool blocking=true) - {return ChannelFlush(this->NULL_CHANNEL, completeFlush, propagation, blocking);} + {return ChannelFlush(DEFAULT_CHANNEL, completeFlush, propagation, blocking);} bool IsolatedFlush(bool hardFlush, bool blocking) {assert(false); return false;} bool ChannelFlush(const std::string &channel, bool hardFlush, int propagation=-1, bool blocking=true) @@ -123,15 +123,15 @@ class CRYPTOPP_NO_VTABLE Multichannel : public CustomFlushPropagation { public: bool Flush(bool hardFlush, int propagation=-1, bool blocking=true) - {return ChannelFlush(this->NULL_CHANNEL, hardFlush, propagation, blocking);} + {return this->ChannelFlush(DEFAULT_CHANNEL, hardFlush, propagation, blocking);} bool MessageSeriesEnd(int propagation=-1, bool blocking=true) - {return ChannelMessageSeriesEnd(this->NULL_CHANNEL, propagation, blocking);} + {return this->ChannelMessageSeriesEnd(DEFAULT_CHANNEL, propagation, blocking);} byte * CreatePutSpace(size_t &size) - {return ChannelCreatePutSpace(this->NULL_CHANNEL, size);} + {return this->ChannelCreatePutSpace(DEFAULT_CHANNEL, size);} size_t Put2(const byte *begin, size_t length, int messageEnd, bool blocking) - {return ChannelPut2(this->NULL_CHANNEL, begin, length, messageEnd, blocking);} + {return this->ChannelPut2(DEFAULT_CHANNEL, begin, length, messageEnd, blocking);} size_t PutModifiable2(byte *inString, size_t length, int messageEnd, bool blocking) - {return ChannelPutModifiable2(this->NULL_CHANNEL, inString, length, messageEnd, blocking);} + {return this->ChannelPutModifiable2(DEFAULT_CHANNEL, inString, length, messageEnd, blocking);} // void ChannelMessageSeriesEnd(const std::string &channel, int propagation=-1) // {PropagateMessageSeriesEnd(propagation, channel);} @@ -177,7 +177,7 @@ public: unsigned int NumberOfMessages() const {return m_messageEnd ? 0 : 1;} bool GetNextMessage(); - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; protected: virtual void StoreInitialize(const NameValuePairs ¶meters) =0; @@ -189,9 +189,9 @@ protected: class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE Sink : public BufferedTransformation { public: - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true) + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) {transferBytes = 0; return 0;} - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const {return 0;} }; diff --git a/test.cpp b/test.cpp index de995fd1..683cfb5c 100644 --- a/test.cpp +++ b/test.cpp @@ -559,7 +559,7 @@ void SecretShareFile(int threshold, int nShares, const char *filename, const cha channel = WordToString(i); fileSinks[i]->Put((byte *)channel.data(), 4); - channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL); + channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL); } source.PumpAll(); @@ -609,7 +609,7 @@ void InformationDisperseFile(int threshold, int nShares, const char *filename) channel = WordToString(i); fileSinks[i]->Put((byte *)channel.data(), 4); - channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL); + channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL); } source.PumpAll(); diff --git a/vmac.cpp b/vmac.cpp index f71bafb5..6b490f90 100755 --- a/vmac.cpp +++ b/vmac.cpp @@ -57,12 +57,8 @@ void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, con /* Fill nh key */ in[0] = 0x80; - for (i = 0; i < m_nhKeySize()*sizeof(word64); i += blockSize) - { - cipher.ProcessBlock(in, out.BytePtr()); - ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey()+i/sizeof(word64), out.begin(), blockSize); - in[15]++; - } + cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter); + ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64)); /* Fill poly key */ in[0] = 0xC0; @@ -137,6 +133,7 @@ void VMAC_Base::Resynchronize(const byte *nonce, int len) void VMAC_Base::HashEndianCorrectedBlock(const word64 *data) { assert(false); + throw 0; } #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 diff --git a/x64dll.asm b/x64dll.asm index 643dbe42..6b94e1e1 100644 --- a/x64dll.asm +++ b/x64dll.asm @@ -1,665 +1,1955 @@ -include ksamd64.inc -EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR -EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR -.CODE - - ALIGN 8 -Baseline_Add PROC - lea rdx, [rdx+8*rcx] - lea r8, [r8+8*rcx] - lea r9, [r9+8*rcx] - neg rcx ; rcx is negative index - jz $1@Baseline_Add - mov rax,[r8+8*rcx] - add rax,[r9+8*rcx] - mov [rdx+8*rcx],rax -$0@Baseline_Add: - mov rax,[r8+8*rcx+8] - adc rax,[r9+8*rcx+8] - mov [rdx+8*rcx+8],rax - lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 - jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero - mov rax,[r8+8*rcx] - adc rax,[r9+8*rcx] - mov [rdx+8*rcx],rax - jmp $0@Baseline_Add -$1@Baseline_Add: - mov rax, 0 - adc rax, rax ; store carry into rax (return result register) - ret -Baseline_Add ENDP - - ALIGN 8 -Baseline_Sub PROC - lea rdx, [rdx+8*rcx] - lea r8, [r8+8*rcx] - lea r9, [r9+8*rcx] - neg rcx ; rcx is negative index - jz $1@Baseline_Sub - mov rax,[r8+8*rcx] - sub rax,[r9+8*rcx] - mov [rdx+8*rcx],rax -$0@Baseline_Sub: - mov rax,[r8+8*rcx+8] - sbb rax,[r9+8*rcx+8] - mov [rdx+8*rcx+8],rax - lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 - jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero - mov rax,[r8+8*rcx] - sbb rax,[r9+8*rcx] - mov [rdx+8*rcx],rax - jmp $0@Baseline_Sub -$1@Baseline_Sub: - mov rax, 0 - adc rax, rax ; store carry into rax (return result register) - - ret -Baseline_Sub ENDP - -ALIGN 8 -Rijndael_Enc_AdvancedProcessBlocks PROC FRAME -rex_push_reg rsi -push_reg rdi -push_reg rbx -push_reg rbp -push_reg r12 -.endprolog -mov r8, rcx -mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA -mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA] -mov rbp, [(r8+16*19)] -mov rax, 16 -and rax, rbp -movdqa xmm3, XMMWORD PTR [rdx+16+rax] -movdqa [(r8+16*12)], xmm3 -lea rax, [rdx+rax+2*16] -sub rax, rbp -label0: -movdqa xmm0, [rax+rbp] -movdqa XMMWORD PTR [(r8+0)+rbp], xmm0 -add rbp, 16 -cmp rbp, 16*12 -jl label0 -movdqa xmm4, [rax+rbp] -movdqa xmm1, [rdx] -mov r11d, [rdx+4*4] -mov ebx, [rdx+5*4] -mov ecx, [rdx+6*4] -mov edx, [rdx+7*4] -xor rax, rax -label9: -mov ebp, [rsi+rax] -add rax, rdi -mov ebp, [rsi+rax] -add rax, rdi -mov ebp, [rsi+rax] -add rax, rdi -mov ebp, [rsi+rax] -add rax, rdi -cmp rax, 2048 -jl label9 -lfence -test DWORD PTR [(r8+16*18+8)], 1 -jz label8 -mov rbp, [(r8+16*14)] -movdqa xmm2, [rbp] -pxor xmm2, xmm1 -psrldq xmm1, 14 -movd eax, xmm1 -mov al, BYTE PTR [rbp+15] -mov r12d, eax -movd eax, xmm2 -psrldq xmm2, 4 -movd edi, xmm2 -psrldq xmm2, 4 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movd edi, xmm2 -psrldq xmm2, 4 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movd edi, xmm2 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -psrldq xmm2, 3 -mov eax, [(r8+16*12)+0*4] -mov edi, [(r8+16*12)+2*4] -mov r10d, [(r8+16*12)+3*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr ebx, 16 -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -xor ebx, [(r8+16*12)+1*4] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movd ecx, xmm2 -mov edx, r11d -mov [(r8+0)+3*4], r10d -mov [(r8+0)+0*4], eax -mov [(r8+0)+1*4], ebx -mov [(r8+0)+2*4], edi -jmp label5 -label3: -mov r11d, [(r8+16*12)+0*4] -mov ebx, [(r8+16*12)+1*4] -mov ecx, [(r8+16*12)+2*4] -mov edx, [(r8+16*12)+3*4] -label8: -mov rax, [(r8+16*14)] -movdqu xmm2, [rax] -mov rbp, [(r8+16*14)+8] -movdqu xmm5, [rbp] -pxor xmm2, xmm1 -pxor xmm2, xmm5 -movd eax, xmm2 -psrldq xmm2, 4 -movd edi, xmm2 -psrldq xmm2, 4 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movd edi, xmm2 -psrldq xmm2, 4 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movd edi, xmm2 -movzx ebp, al -xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, edi -movzx ebp, al -xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ah -xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, al -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, ah -xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov eax, r11d -add r8, [(r8+16*19)] -add r8, 4*16 -jmp label2 -label1: -mov ecx, r12d -mov edx, r11d -mov eax, [(r8+0)+0*4] -mov ebx, [(r8+0)+1*4] -xor cl, ch -and rcx, 255 -label5: -add r12d, 1 -xor edx, DWORD PTR [rsi+rcx*8+3] -movzx ebp, dl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr edx, 16 -xor ecx, [(r8+0)+2*4] -movzx ebp, dh -xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, dl -mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -xor edx, [(r8+0)+3*4] -add r8, [(r8+16*19)] -add r8, 3*16 -jmp label4 -label2: -mov r10d, [(r8+0)-4*16+3*4] -mov edi, [(r8+0)-4*16+2*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov cl, al -movzx ebp, ah -xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr ebx, 16 -movzx ebp, al -xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, ah -mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -mov ecx, edi -xor eax, [(r8+0)-4*16+0*4] -xor ebx, [(r8+0)-4*16+1*4] -mov edx, r10d -label4: -mov r10d, [(r8+0)-4*16+7*4] -mov edi, [(r8+0)-4*16+6*4] -movzx ebp, cl -xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -mov cl, al -movzx ebp, ah -xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr eax, 16 -movzx ebp, bl -xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, bh -xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr ebx, 16 -movzx ebp, al -xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, ah -mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, bl -xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, bh -mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, ch -xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -shr ecx, 16 -movzx ebp, dl -xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] -movzx ebp, dh -xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] -shr edx, 16 -movzx ebp, ch -xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -movzx ebp, cl -xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dl -xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] -movzx ebp, dh -xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] -mov ecx, edi -xor eax, [(r8+0)-4*16+4*4] -xor ebx, [(r8+0)-4*16+5*4] -mov edx, r10d -add r8, 32 -test r8, 255 -jnz label2 -sub r8, 16*16 -movzx ebp, ch -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, dl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+2], di -movzx ebp, dh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, al -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+6], di -shr edx, 16 -movzx ebp, ah -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, bl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+10], di -shr eax, 16 -movzx ebp, bh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, cl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+14], di -shr ebx, 16 -movzx ebp, dh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, al -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+12], di -shr ecx, 16 -movzx ebp, ah -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, bl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+0], di -movzx ebp, bh -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, cl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+4], di -movzx ebp, ch -movzx edi, BYTE PTR [rsi+rbp*8+1] -movzx ebp, dl -xor edi, DWORD PTR [rsi+rbp*8+0] -mov WORD PTR [(r8+16*13)+8], di -mov rax, [(r8+16*14)+16] -mov rbx, [(r8+16*14)+24] -mov rcx, [(r8+16*18+8)] -sub rcx, 16 -movdqu xmm2, [rax] -pxor xmm2, xmm4 -movdqa xmm0, [(r8+16*16)+16] -paddq xmm0, [(r8+16*14)+16] -movdqa [(r8+16*14)+16], xmm0 -pxor xmm2, [(r8+16*13)] -movdqu [rbx], xmm2 -jle label7 -mov [(r8+16*18+8)], rcx -test rcx, 1 -jnz label1 -movdqa xmm0, [(r8+16*16)] -paddd xmm0, [(r8+16*14)] -movdqa [(r8+16*14)], xmm0 -jmp label3 -label7: -mov rbp, [(r8+16*18)] -pop r12 -pop rbp -pop rbx -pop rdi -pop rsi -ret -Rijndael_Enc_AdvancedProcessBlocks ENDP - -ALIGN 8 -GCM_AuthenticateBlocks_2K PROC FRAME -rex_push_reg rsi -push_reg rdi -push_reg rbx -.endprolog -mov rsi, r8 -mov r11, r9 -movdqa xmm0, [rsi] -label0: -movdqu xmm4, [rcx] -pxor xmm0, xmm4 -movd ebx, xmm0 -mov eax, 0f0f0f0f0h -and eax, ebx -shl ebx, 4 -and ebx, 0f0f0f0f0h -movzx edi, ah -movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi] -movzx edi, al -movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi] -shr eax, 16 -movzx edi, ah -movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi] -movzx edi, al -movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi] -psrldq xmm0, 4 -movd eax, xmm0 -and eax, 0f0f0f0f0h -movzx edi, bh -pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] -movzx edi, bl -pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] -shr ebx, 16 -movzx edi, bh -pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] -movzx edi, bl -pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] -movd ebx, xmm0 -shl ebx, 4 -and ebx, 0f0f0f0f0h -movzx edi, ah -pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] -movzx edi, al -pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] -shr eax, 16 -movzx edi, ah -pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] -movzx edi, al -pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] -psrldq xmm0, 4 -movd eax, xmm0 -and eax, 0f0f0f0f0h -movzx edi, bh -pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] -movzx edi, bl -pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] -shr ebx, 16 -movzx edi, bh -pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] -movzx edi, bl -pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] -movd ebx, xmm0 -shl ebx, 4 -and ebx, 0f0f0f0f0h -movzx edi, ah -pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] -movzx edi, al -pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] -shr eax, 16 -movzx edi, ah -pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] -movzx edi, al -pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] -psrldq xmm0, 4 -movd eax, xmm0 -and eax, 0f0f0f0f0h -movzx edi, bh -pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] -movzx edi, bl -pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] -shr ebx, 16 -movzx edi, bh -pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] -movzx edi, bl -pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] -movd ebx, xmm0 -shl ebx, 4 -and ebx, 0f0f0f0f0h -movzx edi, ah -pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] -movzx edi, al -pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] -shr eax, 16 -movzx edi, ah -pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] -movzx edi, al -pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] -movzx edi, bh -pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi] -movzx edi, bl -pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi] -shr ebx, 16 -movzx edi, bh -pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi] -movzx edi, bl -pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi] -movdqa xmm0, xmm3 -pslldq xmm3, 1 -pxor xmm2, xmm3 -movdqa xmm1, xmm2 -pslldq xmm2, 1 -pxor xmm5, xmm2 -psrldq xmm0, 15 -movd rdi, xmm0 -movzx eax, WORD PTR [r11 + rdi*2] -shl eax, 8 -movdqa xmm0, xmm5 -pslldq xmm5, 1 -pxor xmm4, xmm5 -psrldq xmm1, 15 -movd rdi, xmm1 -xor ax, WORD PTR [r11 + rdi*2] -shl eax, 8 -psrldq xmm0, 15 -movd rdi, xmm0 -xor ax, WORD PTR [r11 + rdi*2] -movd xmm0, eax -pxor xmm0, xmm4 -add rcx, 16 -sub rdx, 1 -jnz label0 -movdqa [rsi], xmm0 -pop rbx -pop rdi -pop rsi -ret -GCM_AuthenticateBlocks_2K ENDP - -ALIGN 8 -GCM_AuthenticateBlocks_64K PROC FRAME -rex_push_reg rsi -push_reg rdi -.endprolog -mov rsi, r8 -movdqa xmm0, [rsi] -label1: -movdqu xmm1, [rcx] -pxor xmm1, xmm0 -pxor xmm0, xmm0 -movd eax, xmm1 -psrldq xmm1, 4 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8] -shr eax, 16 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8] -movd eax, xmm1 -psrldq xmm1, 4 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8] -shr eax, 16 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8] -movd eax, xmm1 -psrldq xmm1, 4 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8] -shr eax, 16 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8] -movd eax, xmm1 -psrldq xmm1, 4 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8] -shr eax, 16 -movzx edi, al -add rdi, rdi -pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8] -movzx edi, ah -add rdi, rdi -pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8] -add rcx, 16 -sub rdx, 1 -jnz label1 -movdqa [rsi], xmm0 -pop rdi -pop rsi -ret -GCM_AuthenticateBlocks_64K ENDP - -_TEXT ENDS -END +include ksamd64.inc +EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR +EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR +EXTERNDEF ?SHA256_K@CryptoPP@@3QBIB:FAR +.CODE + + ALIGN 8 +Baseline_Add PROC + lea rdx, [rdx+8*rcx] + lea r8, [r8+8*rcx] + lea r9, [r9+8*rcx] + neg rcx ; rcx is negative index + jz $1@Baseline_Add + mov rax,[r8+8*rcx] + add rax,[r9+8*rcx] + mov [rdx+8*rcx],rax +$0@Baseline_Add: + mov rax,[r8+8*rcx+8] + adc rax,[r9+8*rcx+8] + mov [rdx+8*rcx+8],rax + lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 + jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero + mov rax,[r8+8*rcx] + adc rax,[r9+8*rcx] + mov [rdx+8*rcx],rax + jmp $0@Baseline_Add +$1@Baseline_Add: + mov rax, 0 + adc rax, rax ; store carry into rax (return result register) + ret +Baseline_Add ENDP + + ALIGN 8 +Baseline_Sub PROC + lea rdx, [rdx+8*rcx] + lea r8, [r8+8*rcx] + lea r9, [r9+8*rcx] + neg rcx ; rcx is negative index + jz $1@Baseline_Sub + mov rax,[r8+8*rcx] + sub rax,[r9+8*rcx] + mov [rdx+8*rcx],rax +$0@Baseline_Sub: + mov rax,[r8+8*rcx+8] + sbb rax,[r9+8*rcx+8] + mov [rdx+8*rcx+8],rax + lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 + jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero + mov rax,[r8+8*rcx] + sbb rax,[r9+8*rcx] + mov [rdx+8*rcx],rax + jmp $0@Baseline_Sub +$1@Baseline_Sub: + mov rax, 0 + adc rax, rax ; store carry into rax (return result register) + + ret +Baseline_Sub ENDP + +ALIGN 8 +Rijndael_Enc_AdvancedProcessBlocks PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +push_reg rbp +push_reg r12 +.endprolog +mov r8, rcx +mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA +mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA] +mov rbp, [(r8+16*19)] +mov rax, 16 +and rax, rbp +movdqa xmm3, XMMWORD PTR [rdx+16+rax] +movdqa [(r8+16*12)], xmm3 +lea rax, [rdx+rax+2*16] +sub rax, rbp +label0: +movdqa xmm0, [rax+rbp] +movdqa XMMWORD PTR [(r8+0)+rbp], xmm0 +add rbp, 16 +cmp rbp, 16*12 +jl label0 +movdqa xmm4, [rax+rbp] +movdqa xmm1, [rdx] +mov r11d, [rdx+4*4] +mov ebx, [rdx+5*4] +mov ecx, [rdx+6*4] +mov edx, [rdx+7*4] +xor rax, rax +label9: +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +cmp rax, 2048 +jl label9 +lfence +test DWORD PTR [(r8+16*18+8)], 1 +jz label8 +mov rbp, [(r8+16*14)] +movdqa xmm2, [rbp] +pxor xmm2, xmm1 +psrldq xmm1, 14 +movd eax, xmm1 +mov al, BYTE PTR [rbp+15] +mov r12d, eax +movd eax, xmm2 +psrldq xmm2, 4 +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +psrldq xmm2, 3 +mov eax, [(r8+16*12)+0*4] +mov edi, [(r8+16*12)+2*4] +mov r10d, [(r8+16*12)+3*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +xor ebx, [(r8+16*12)+1*4] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movd ecx, xmm2 +mov edx, r11d +mov [(r8+0)+3*4], r10d +mov [(r8+0)+0*4], eax +mov [(r8+0)+1*4], ebx +mov [(r8+0)+2*4], edi +jmp label5 +label3: +mov r11d, [(r8+16*12)+0*4] +mov ebx, [(r8+16*12)+1*4] +mov ecx, [(r8+16*12)+2*4] +mov edx, [(r8+16*12)+3*4] +label8: +mov rax, [(r8+16*14)] +movdqu xmm2, [rax] +mov rbp, [(r8+16*14)+8] +movdqu xmm5, [rbp] +pxor xmm2, xmm1 +pxor xmm2, xmm5 +movd eax, xmm2 +psrldq xmm2, 4 +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, r11d +add r8, [(r8+16*19)] +add r8, 4*16 +jmp label2 +label1: +mov ecx, r12d +mov edx, r11d +mov eax, [(r8+0)+0*4] +mov ebx, [(r8+0)+1*4] +xor cl, ch +and rcx, 255 +label5: +add r12d, 1 +xor edx, DWORD PTR [rsi+rcx*8+3] +movzx ebp, dl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +xor ecx, [(r8+0)+2*4] +movzx ebp, dh +xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, dl +mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +xor edx, [(r8+0)+3*4] +add r8, [(r8+16*19)] +add r8, 3*16 +jmp label4 +label2: +mov r10d, [(r8+0)-4*16+3*4] +mov edi, [(r8+0)-4*16+2*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov cl, al +movzx ebp, ah +xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, al +xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, ah +mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +mov ecx, edi +xor eax, [(r8+0)-4*16+0*4] +xor ebx, [(r8+0)-4*16+1*4] +mov edx, r10d +label4: +mov r10d, [(r8+0)-4*16+7*4] +mov edi, [(r8+0)-4*16+6*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov cl, al +movzx ebp, ah +xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, al +xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, ah +mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +mov ecx, edi +xor eax, [(r8+0)-4*16+4*4] +xor ebx, [(r8+0)-4*16+5*4] +mov edx, r10d +add r8, 32 +test r8, 255 +jnz label2 +sub r8, 16*16 +movzx ebp, ch +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, dl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+2], di +movzx ebp, dh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, al +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+6], di +shr edx, 16 +movzx ebp, ah +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, bl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+10], di +shr eax, 16 +movzx ebp, bh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, cl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+14], di +shr ebx, 16 +movzx ebp, dh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, al +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+12], di +shr ecx, 16 +movzx ebp, ah +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, bl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+0], di +movzx ebp, bh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, cl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+4], di +movzx ebp, ch +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, dl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+8], di +mov rax, [(r8+16*14)+16] +mov rbx, [(r8+16*14)+24] +mov rcx, [(r8+16*18+8)] +sub rcx, 16 +movdqu xmm2, [rax] +pxor xmm2, xmm4 +movdqa xmm0, [(r8+16*16)+16] +paddq xmm0, [(r8+16*14)+16] +movdqa [(r8+16*14)+16], xmm0 +pxor xmm2, [(r8+16*13)] +movdqu [rbx], xmm2 +jle label7 +mov [(r8+16*18+8)], rcx +test rcx, 1 +jnz label1 +movdqa xmm0, [(r8+16*16)] +paddd xmm0, [(r8+16*14)] +movdqa [(r8+16*14)], xmm0 +jmp label3 +label7: +mov rbp, [(r8+16*18)] +pop r12 +pop rbp +pop rbx +pop rdi +pop rsi +ret +Rijndael_Enc_AdvancedProcessBlocks ENDP + +ALIGN 8 +GCM_AuthenticateBlocks_2K PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +.endprolog +mov rsi, r8 +mov r11, r9 +movdqa xmm0, [rsi] +label0: +movdqu xmm4, [rcx] +pxor xmm0, xmm4 +movd ebx, xmm0 +mov eax, 0f0f0f0f0h +and eax, ebx +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi] +movzx edi, al +movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi] +shr eax, 16 +movzx edi, ah +movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi] +movzx edi, al +movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movdqa xmm0, xmm3 +pslldq xmm3, 1 +pxor xmm2, xmm3 +movdqa xmm1, xmm2 +pslldq xmm2, 1 +pxor xmm5, xmm2 +psrldq xmm0, 15 +movd rdi, xmm0 +movzx eax, WORD PTR [r11 + rdi*2] +shl eax, 8 +movdqa xmm0, xmm5 +pslldq xmm5, 1 +pxor xmm4, xmm5 +psrldq xmm1, 15 +movd rdi, xmm1 +xor ax, WORD PTR [r11 + rdi*2] +shl eax, 8 +psrldq xmm0, 15 +movd rdi, xmm0 +xor ax, WORD PTR [r11 + rdi*2] +movd xmm0, eax +pxor xmm0, xmm4 +add rcx, 16 +sub rdx, 1 +jnz label0 +movdqa [rsi], xmm0 +pop rbx +pop rdi +pop rsi +ret +GCM_AuthenticateBlocks_2K ENDP + +ALIGN 8 +GCM_AuthenticateBlocks_64K PROC FRAME +rex_push_reg rsi +push_reg rdi +.endprolog +mov rsi, r8 +movdqa xmm0, [rsi] +label1: +movdqu xmm1, [rcx] +pxor xmm1, xmm0 +pxor xmm0, xmm0 +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8] +add rcx, 16 +sub rdx, 1 +jnz label1 +movdqa [rsi], xmm0 +pop rdi +pop rsi +ret +GCM_AuthenticateBlocks_64K ENDP + +ALIGN 8 +X86_SHA256_HashBlocks PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +push_reg rbp +alloc_stack(8*4 + 16*4 + 4*8 + 8) +.endprolog +mov rdi, r8 +lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] +mov [rsp+8*4+16*4+1*8], rcx +mov [rsp+8*4+16*4+2*8], rdx +add rdi, rdx +mov [rsp+8*4+16*4+3*8], rdi +movdqa xmm0, XMMWORD PTR [rcx+0*16] +movdqa xmm1, XMMWORD PTR [rcx+1*16] +mov [rsp+8*4+16*4+0*8], rsi +label0: +sub rsi, 48*4 +movdqa [rsp+((1024+7-(0+3)) MOD (8))*4], xmm1 +movdqa [rsp+((1024+7-(0+7)) MOD (8))*4], xmm0 +mov rbx, [rdx+0*8] +bswap rbx +mov [rsp+8*4+((1024+15-(0*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+1*8] +bswap rbx +mov [rsp+8*4+((1024+15-(1*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+2*8] +bswap rbx +mov [rsp+8*4+((1024+15-(2*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+3*8] +bswap rbx +mov [rsp+8*4+((1024+15-(3*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+4*8] +bswap rbx +mov [rsp+8*4+((1024+15-(4*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+5*8] +bswap rbx +mov [rsp+8*4+((1024+15-(5*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+6*8] +bswap rbx +mov [rsp+8*4+((1024+15-(6*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+7*8] +bswap rbx +mov [rsp+8*4+((1024+15-(7*(1+1)+1)) MOD (16))*4], rbx +mov edi, [rsp+((1024+7-(0+3)) MOD (8))*4] +mov eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +xor eax, [rsp+((1024+7-(0+5)) MOD (8))*4] +mov ecx, [rsp+((1024+7-(0+7)) MOD (8))*4] +mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(0)*4] +add edx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] +add edx, [rsp+((1024+7-(0)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] +mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(0)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(1)*4] +add edi, [rsp+8*4+((1024+15-(1)) MOD (16))*4] +add edi, [rsp+((1024+7-(1)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] +mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(1)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(2)*4] +add edx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] +add edx, [rsp+((1024+7-(2)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] +mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(2)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(3)*4] +add edi, [rsp+8*4+((1024+15-(3)) MOD (16))*4] +add edi, [rsp+((1024+7-(3)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] +mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(3)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(4)*4] +add edx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] +add edx, [rsp+((1024+7-(4)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] +mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(4)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(5)*4] +add edi, [rsp+8*4+((1024+15-(5)) MOD (16))*4] +add edi, [rsp+((1024+7-(5)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] +mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(5)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(6)*4] +add edx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] +add edx, [rsp+((1024+7-(6)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] +mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(6)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(7)*4] +add edi, [rsp+8*4+((1024+15-(7)) MOD (16))*4] +add edi, [rsp+((1024+7-(7)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] +mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(7)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(8)*4] +add edx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] +add edx, [rsp+((1024+7-(8)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] +mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(8)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(9)*4] +add edi, [rsp+8*4+((1024+15-(9)) MOD (16))*4] +add edi, [rsp+((1024+7-(9)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] +mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(9)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(10)*4] +add edx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] +add edx, [rsp+((1024+7-(10)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] +mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(10)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(11)*4] +add edi, [rsp+8*4+((1024+15-(11)) MOD (16))*4] +add edi, [rsp+((1024+7-(11)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] +mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(11)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(12)*4] +add edx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] +add edx, [rsp+((1024+7-(12)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] +mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(12)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(13)*4] +add edi, [rsp+8*4+((1024+15-(13)) MOD (16))*4] +add edi, [rsp+((1024+7-(13)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] +mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(13)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(14)*4] +add edx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] +add edx, [rsp+((1024+7-(14)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] +mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(14)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(15)*4] +add edi, [rsp+8*4+((1024+15-(15)) MOD (16))*4] +add edi, [rsp+((1024+7-(15)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] +mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(15)) MOD (8))*4], ecx +label1: +add rsi, 4*16 +mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((0)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((0)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((0)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(0)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(0)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(0)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] +mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(0)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((1)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((1)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((1)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(1)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(1)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(1)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(1)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] +mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(1)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((2)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((2)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((2)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(2)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(2)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(2)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] +mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(2)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((3)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((3)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((3)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(3)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(3)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(3)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(3)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] +mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(3)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((4)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((4)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((4)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(4)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(4)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(4)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] +mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(4)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((5)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((5)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((5)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(5)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(5)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(5)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(5)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] +mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(5)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((6)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((6)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((6)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(6)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(6)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(6)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] +mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(6)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((7)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((7)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((7)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(7)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(7)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(7)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(7)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] +mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(7)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((8)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((8)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((8)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(8)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(8)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(8)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] +mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(8)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((9)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((9)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((9)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(9)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(9)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(9)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(9)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] +mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(9)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((10)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((10)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((10)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(10)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(10)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(10)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] +mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(10)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((11)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((11)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((11)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(11)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(11)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(11)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(11)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] +mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(11)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((12)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((12)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((12)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(12)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(12)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(12)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] +mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(12)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((13)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((13)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((13)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(13)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(13)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(13)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(13)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] +mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(13)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((14)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((14)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((14)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(14)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(14)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(14)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] +mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(14)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((15)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((15)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((15)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(15)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(15)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(15)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(15)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] +mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(15)) MOD (8))*4], ecx +cmp rsi, [rsp+8*4+16*4+0*8] +jne label1 +mov rcx, [rsp+8*4+16*4+1*8] +movdqa xmm1, XMMWORD PTR [rcx+1*16] +movdqa xmm0, XMMWORD PTR [rcx+0*16] +paddd xmm1, [rsp+((1024+7-(0+3)) MOD (8))*4] +paddd xmm0, [rsp+((1024+7-(0+7)) MOD (8))*4] +movdqa [rcx+1*16], xmm1 +movdqa [rcx+0*16], xmm0 +mov rdx, [rsp+8*4+16*4+2*8] +add rdx, 64 +mov [rsp+8*4+16*4+2*8], rdx +cmp rdx, [rsp+8*4+16*4+3*8] +jne label0 +add rsp, 8*4 + 16*4 + 4*8 + 8 +pop rbp +pop rbx +pop rdi +pop rsi +ret +X86_SHA256_HashBlocks ENDP + +_TEXT ENDS +END