diff --git a/gzip.cpp b/gzip.cpp index 04568386..4b4869b4 100644 --- a/gzip.cpp +++ b/gzip.cpp @@ -5,19 +5,39 @@ NAMESPACE_BEGIN(CryptoPP) +// Checks whether the character is valid for ISO/IEC 8859-1 as required by RFC 1952 +static inline bool Is8859Character(char c) { + const unsigned char cc = static_cast(c); + return (cc >= 32 && cc <= 126) || (cc >= 160 && cc <= 255); +} + void Gzip::WritePrestreamHeader() { m_totalLen = 0; m_crc.Restart(); + int flags = 0; + if(!m_filename.empty()) + flags |= FILENAME; + if(!m_comment.empty()) + flags |= COMMENTS; + AttachedTransformation()->Put(MAGIC1); AttachedTransformation()->Put(MAGIC2); AttachedTransformation()->Put(DEFLATED); - AttachedTransformation()->Put(0); // general flag - AttachedTransformation()->PutWord32(0); // time stamp - byte extra = byte((GetDeflateLevel() == 1) ? FAST : ((GetDeflateLevel() == 9) ? SLOW : 0)); + AttachedTransformation()->Put((byte)flags); // general flag + AttachedTransformation()->PutWord32(m_filetime, LITTLE_ENDIAN_ORDER); // time stamp + byte extra = (GetDeflateLevel() == 1) ? FAST : ((GetDeflateLevel() == 9) ? SLOW : 0); AttachedTransformation()->Put(extra); AttachedTransformation()->Put(GZIP_OS_CODE); + + // Filename is NULL terminated, hence the +1 + if(!m_filename.empty()) + AttachedTransformation()->Put((const unsigned char*)m_filename.data(), m_filename.size() +1); + + // Comment is NULL terminated, hence the +1 + if(!m_comment.empty()) + AttachedTransformation()->Put((const unsigned char*)m_comment.data(), m_comment.size() +1); } void Gzip::ProcessUncompressedData(const byte *inString, size_t length) @@ -32,12 +52,44 @@ void Gzip::WritePoststreamTail() m_crc.Final(crc); AttachedTransformation()->Put(crc, 4); AttachedTransformation()->PutWord32(m_totalLen, LITTLE_ENDIAN_ORDER); + + m_filetime = 0; + m_filename.clear(); + m_comment.clear(); +} + +void Gzip::SetComment(const std::string& comment, bool throwOnEncodingError) +{ + if(throwOnEncodingError) + { + for(size_t i = 0; i < comment.length(); i++) { + const char c = comment[i]; + if(!Is8859Character(c)) + throw InvalidDataFormat("The comment is not ISO/IEC 8859-1 encoded"); + } + } + + m_comment = comment; +} + +void Gzip::SetFilename(const std::string& filename, bool throwOnEncodingError) +{ + if(throwOnEncodingError) + { + for(size_t i = 0; i < filename.length(); i++) { + const char c = filename[i]; + if(!Is8859Character(c)) + throw InvalidDataFormat("The filename is not ISO/IEC 8859-1 encoded"); + } + } + + m_filename = filename; } // ************************************************************* Gunzip::Gunzip(BufferedTransformation *attachment, bool repeat, int propagation) - : Inflator(attachment, repeat, propagation), m_length(0) + : Inflator(attachment, repeat, propagation), m_length(0), m_filetime(0) { } @@ -46,15 +98,20 @@ void Gunzip::ProcessPrestreamHeader() m_length = 0; m_crc.Restart(); + m_filetime = 0; + m_filename.clear(); + m_comment.clear(); + byte buf[6]; byte b, flags; if (m_inQueue.Get(buf, 2)!=2) throw HeaderErr(); if (buf[0] != MAGIC1 || buf[1] != MAGIC2) throw HeaderErr(); - if (!m_inQueue.Skip(1)) throw HeaderErr(); // skip extra flags + if (!m_inQueue.Get(b) || (b != DEFLATED)) throw HeaderErr(); // skip CM flag if (!m_inQueue.Get(flags)) throw HeaderErr(); if (flags & (ENCRYPTED | CONTINUED)) throw HeaderErr(); - if (m_inQueue.Skip(6)!=6) throw HeaderErr(); // Skip file time, extra flags and OS type + if (m_inQueue.GetWord32(m_filetime, LITTLE_ENDIAN_ORDER) != 4) throw HeaderErr(); + if (m_inQueue.Skip(2)!=2) throw HeaderErr(); // Skip extra flags and OS type if (flags & EXTRA_FIELDS) // skip extra fields { @@ -63,15 +120,25 @@ void Gunzip::ProcessPrestreamHeader() if (m_inQueue.Skip(length)!=length) throw HeaderErr(); } - if (flags & FILENAME) // skip filename + if (flags & FILENAME) // extract filename + { do + { if(!m_inQueue.Get(b)) throw HeaderErr(); + if(b) m_filename.append( 1, (char)b ); + } while (b); + } - if (flags & COMMENTS) // skip comments + if (flags & COMMENTS) // extract comments + { do + { if(!m_inQueue.Get(b)) throw HeaderErr(); + if(b) m_comment.append( 1, (char)b ); + } while (b); + } } void Gunzip::ProcessDecompressedData(const byte *inString, size_t length) @@ -96,4 +163,32 @@ void Gunzip::ProcessPoststreamTail() throw LengthErr(); } +const std::string& Gunzip::GetComment(bool throwOnEncodingError) const +{ + if(throwOnEncodingError) + { + for(size_t i = 0; i < m_comment.length(); i++) { + const char c = m_comment[i]; + if(!Is8859Character(c)) + throw InvalidDataFormat("The comment is not ISO/IEC 8859-1 encoded"); + } + } + + return m_comment; +} + +const std::string& Gunzip::GetFilename(bool throwOnEncodingError) const +{ + if(throwOnEncodingError) + { + for(size_t i = 0; i < m_filename.length(); i++) { + const char c = m_filename[i]; + if(!Is8859Character(c)) + throw InvalidDataFormat("The filename is not ISO/IEC 8859-1 encoded"); + } + } + + return m_filename; +} + NAMESPACE_END diff --git a/gzip.h b/gzip.h index a53a23e4..3f7b7cdc 100644 --- a/gzip.h +++ b/gzip.h @@ -35,9 +35,29 @@ public: Gzip(const NameValuePairs ¶meters, BufferedTransformation *attachment=NULLPTR) : Deflator(parameters, attachment), m_totalLen(0) {} + //! \param filetime the filetime to set in the header. The application is responsible for setting it. + void SetFiletime(word32 filetime) { m_filetime = filetime; } + + //! \param filename the original filename to set in the header. The application is responsible for setting it. + //! RFC 1952 requires a ISO/IEC 8859-1 encoding. + //! \param throwOnEncodingError if throwOnEncodingError is true, then the filename is checked to ensure it is + //! ISO/IEC 8859-1 encoded. If the filename does not adhere to ISO/IEC 8859-1, then a InvalidDataFormat + //! is thrown. If throwOnEncodingError is false then the filename is not checked. + void SetFilename(const std::string& filename, bool throwOnEncodingError = false); + + //! \param comment the comment to set in the header. The application is responsible for setting it. + //! RFC 1952 requires a ISO/IEC 8859-1 encoding. + //! \param throwOnEncodingError if throwOnEncodingError is true, then the comment is checked to ensure it is + //! ISO/IEC 8859-1 encoded. If the comment does not adhere to ISO/IEC 8859-1, then a InvalidDataFormat + //! is thrown. If throwOnEncodingError is false then the comment is not checked. + void SetComment(const std::string& comment, bool throwOnEncodingError = false); + protected: enum {MAGIC1=0x1f, MAGIC2=0x8b, // flags for the header DEFLATED=8, FAST=4, SLOW=2}; + + enum FLAG_MASKS { + FILENAME=8, COMMENTS=16}; void WritePrestreamHeader(); void ProcessUncompressedData(const byte *string, size_t length); @@ -45,6 +65,10 @@ protected: word32 m_totalLen; CRC32 m_crc; + + word32 m_filetime; + std::string m_filename; + std::string m_comment; }; //! \class Gunzip @@ -73,6 +97,21 @@ public: //! \param autoSignalPropagation 0 to turn off MessageEnd signal Gunzip(BufferedTransformation *attachment = NULLPTR, bool repeat = false, int autoSignalPropagation = -1); + //! \return the filetime of the stream as set in the header. The application is responsible for setting it on the decompressed file. + word32 GetFiletime() const { return m_filetime; } + + //! \return the filename of the stream as set in the header. The application is responsible for setting it on the decompressed file. + //! \param throwOnEncodingError if throwOnEncodingError is true, then the filename is checked to ensure it is + //! ISO/IEC 8859-1 encoded. If the filename does not adhere to ISO/IEC 8859-1, then a InvalidDataFormat is thrown. + //! If throwOnEncodingError is false then the filename is not checked. + const std::string& GetFilename(bool throwOnEncodingError = false) const; + + //! \return the comment of the stream as set in the header. + //! \param throwOnEncodingError if throwOnEncodingError is true, then the comment is checked to ensure it is + //! ISO/IEC 8859-1 encoded. If the comment does not adhere to ISO/IEC 8859-1, then a InvalidDataFormat is thrown. + //! If throwOnEncodingError is false then the comment is not checked. + const std::string& GetComment(bool throwOnEncodingError = false) const; + protected: enum { //! \brief First header magic value @@ -94,6 +133,10 @@ protected: word32 m_length; CRC32 m_crc; + + word32 m_filetime; + std::string m_filename; + std::string m_comment; }; NAMESPACE_END