add detection of uncompressibilty

pull/2/head
weidai 2004-04-08 01:28:31 +00:00
parent 391a032791
commit 3b1e3952c9
4 changed files with 96 additions and 48 deletions

4
gzip.h
View File

@ -11,8 +11,8 @@ NAMESPACE_BEGIN(CryptoPP)
class Gzip : public Deflator class Gzip : public Deflator
{ {
public: public:
Gzip(BufferedTransformation *attachment=NULL, unsigned int deflateLevel=DEFAULT_DEFLATE_LEVEL, unsigned int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE) Gzip(BufferedTransformation *attachment=NULL, unsigned int deflateLevel=DEFAULT_DEFLATE_LEVEL, unsigned int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE, bool detectUncompressible=true)
: Deflator(attachment, deflateLevel, log2WindowSize) {} : Deflator(attachment, deflateLevel, log2WindowSize, detectUncompressible) {}
Gzip(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL) Gzip(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL)
: Deflator(parameters, attachment) {} : Deflator(parameters, attachment) {}

View File

@ -210,11 +210,11 @@ inline void HuffmanEncoder::Encode(LowFirstBitWriter &writer, value_t value) con
writer.PutBits(m_valueToCode[value].code, m_valueToCode[value].len); writer.PutBits(m_valueToCode[value].code, m_valueToCode[value].len);
} }
Deflator::Deflator(BufferedTransformation *attachment, int deflateLevel, int log2WindowSize) Deflator::Deflator(BufferedTransformation *attachment, int deflateLevel, int log2WindowSize, bool detectUncompressible)
: LowFirstBitWriter(attachment) : LowFirstBitWriter(attachment)
{ {
InitializeStaticEncoders(); InitializeStaticEncoders();
IsolatedInitialize(MakeParameters("DeflateLevel", deflateLevel)("Log2WindowSize", log2WindowSize)); IsolatedInitialize(MakeParameters("DeflateLevel", deflateLevel)("Log2WindowSize", log2WindowSize)("DetectUncompressible", detectUncompressible));
} }
Deflator::Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment) Deflator::Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment)
@ -239,7 +239,6 @@ void Deflator::InitializeStaticEncoders()
void Deflator::IsolatedInitialize(const NameValuePairs &parameters) void Deflator::IsolatedInitialize(const NameValuePairs &parameters)
{ {
int log2WindowSize = parameters.GetIntValueWithDefault("Log2WindowSize", DEFAULT_LOG2_WINDOW_SIZE); int log2WindowSize = parameters.GetIntValueWithDefault("Log2WindowSize", DEFAULT_LOG2_WINDOW_SIZE);
if (!(MIN_LOG2_WINDOW_SIZE <= log2WindowSize && log2WindowSize <= MAX_LOG2_WINDOW_SIZE)) if (!(MIN_LOG2_WINDOW_SIZE <= log2WindowSize && log2WindowSize <= MAX_LOG2_WINDOW_SIZE))
throw InvalidArgument("Deflator: " + IntToString(log2WindowSize) + " is an invalid window size"); throw InvalidArgument("Deflator: " + IntToString(log2WindowSize) + " is an invalid window size");
@ -252,9 +251,11 @@ void Deflator::IsolatedInitialize(const NameValuePairs &parameters)
m_head.New(HSIZE); m_head.New(HSIZE);
m_prev.New(DSIZE); m_prev.New(DSIZE);
m_matchBuffer.New(DSIZE/2); m_matchBuffer.New(DSIZE/2);
Reset(true);
SetDeflateLevel(parameters.GetIntValueWithDefault("DeflateLevel", DEFAULT_DEFLATE_LEVEL)); SetDeflateLevel(parameters.GetIntValueWithDefault("DeflateLevel", DEFAULT_DEFLATE_LEVEL));
Reset(true); bool detectUncompressible = parameters.GetValueWithDefault("DetectUncompressible", true);
m_compressibleDeflateLevel = detectUncompressible ? m_deflateLevel : 0;
} }
void Deflator::Reset(bool forceReset) void Deflator::Reset(bool forceReset)
@ -270,12 +271,13 @@ void Deflator::Reset(bool forceReset)
m_stringStart = 0; m_stringStart = 0;
m_lookahead = 0; m_lookahead = 0;
m_minLookahead = MAX_MATCH; m_minLookahead = MAX_MATCH;
m_previousMatch = 0;
m_previousLength = 0;
m_matchBufferEnd = 0; m_matchBufferEnd = 0;
m_blockStart = 0; m_blockStart = 0;
m_blockLength = 0; m_blockLength = 0;
m_detectCount = 1;
m_detectSkip = 0;
// m_prev will be initialized automaticly in InsertString // m_prev will be initialized automaticly in InsertString
fill(m_head.begin(), m_head.end(), 0); fill(m_head.begin(), m_head.end(), 0);
@ -288,7 +290,12 @@ void Deflator::SetDeflateLevel(int deflateLevel)
if (!(MIN_DEFLATE_LEVEL <= deflateLevel && deflateLevel <= MAX_DEFLATE_LEVEL)) if (!(MIN_DEFLATE_LEVEL <= deflateLevel && deflateLevel <= MAX_DEFLATE_LEVEL))
throw InvalidArgument("Deflator: " + IntToString(deflateLevel) + " is an invalid deflate level"); throw InvalidArgument("Deflator: " + IntToString(deflateLevel) + " is an invalid deflate level");
unsigned int configurationTable[10][4] = { if (deflateLevel == m_deflateLevel)
return;
EndBlock(false);
static const unsigned int configurationTable[10][4] = {
/* good lazy nice chain */ /* good lazy nice chain */
/* 0 */ {0, 0, 0, 0}, /* store only */ /* 0 */ {0, 0, 0, 0}, /* store only */
/* 1 */ {4, 3, 8, 4}, /* maximum speed, no lazy matches */ /* 1 */ {4, 3, 8, 4}, /* maximum speed, no lazy matches */
@ -310,9 +317,9 @@ void Deflator::SetDeflateLevel(int deflateLevel)
unsigned int Deflator::FillWindow(const byte *str, unsigned int length) unsigned int Deflator::FillWindow(const byte *str, unsigned int length)
{ {
unsigned int accepted = STDMIN(length, 2*DSIZE-(m_stringStart+m_lookahead)); unsigned int maxBlockSize = (unsigned int)STDMIN(2UL*DSIZE, 0xffffUL);
if (m_stringStart >= 2*DSIZE - MAX_MATCH) if (m_stringStart >= maxBlockSize - MAX_MATCH)
{ {
if (m_blockStart < DSIZE) if (m_blockStart < DSIZE)
EndBlock(false); EndBlock(false);
@ -322,7 +329,7 @@ unsigned int Deflator::FillWindow(const byte *str, unsigned int length)
m_dictionaryEnd = m_dictionaryEnd < DSIZE ? 0 : m_dictionaryEnd-DSIZE; m_dictionaryEnd = m_dictionaryEnd < DSIZE ? 0 : m_dictionaryEnd-DSIZE;
assert(m_stringStart >= DSIZE); assert(m_stringStart >= DSIZE);
m_stringStart -= DSIZE; m_stringStart -= DSIZE;
assert(m_previousMatch >= DSIZE || m_previousLength < MIN_MATCH); assert(!m_matchAvailable || m_previousMatch >= DSIZE);
m_previousMatch -= DSIZE; m_previousMatch -= DSIZE;
assert(m_blockStart >= DSIZE); assert(m_blockStart >= DSIZE);
m_blockStart -= DSIZE; m_blockStart -= DSIZE;
@ -334,11 +341,11 @@ unsigned int Deflator::FillWindow(const byte *str, unsigned int length)
for (i=0; i<DSIZE; i++) for (i=0; i<DSIZE; i++)
m_prev[i] = SaturatingSubtract(m_prev[i], DSIZE); m_prev[i] = SaturatingSubtract(m_prev[i], DSIZE);
accepted = STDMIN(accepted + DSIZE, length);
} }
assert(accepted > 0);
assert(maxBlockSize > m_stringStart+m_lookahead);
unsigned int accepted = STDMIN(length, maxBlockSize-(m_stringStart+m_lookahead));
assert(accepted > 0);
memcpy(m_byteBuffer + m_stringStart + m_lookahead, str, accepted); memcpy(m_byteBuffer + m_stringStart + m_lookahead, str, accepted);
m_lookahead += accepted; m_lookahead += accepted;
return accepted; return accepted;
@ -406,11 +413,10 @@ void Deflator::ProcessBuffer()
if (m_deflateLevel == 0) if (m_deflateLevel == 0)
{ {
while (m_lookahead > 0) m_stringStart += m_lookahead;
{ m_lookahead = 0;
LiteralByte(m_byteBuffer[m_stringStart++]); m_blockLength = m_stringStart - m_blockStart;
m_lookahead--; m_matchAvailable = false;
}
return; return;
} }
@ -428,7 +434,7 @@ void Deflator::ProcessBuffer()
else else
{ {
matchLength = LongestMatch(matchPosition); matchLength = LongestMatch(matchPosition);
usePreviousMatch = (m_previousLength > 0 && matchLength == 0); usePreviousMatch = (matchLength == 0);
} }
if (usePreviousMatch) if (usePreviousMatch)
{ {
@ -436,7 +442,6 @@ void Deflator::ProcessBuffer()
m_stringStart += m_previousLength-1; m_stringStart += m_previousLength-1;
m_lookahead -= m_previousLength-1; m_lookahead -= m_previousLength-1;
m_matchAvailable = false; m_matchAvailable = false;
m_previousLength = 0;
} }
else else
{ {
@ -449,13 +454,19 @@ void Deflator::ProcessBuffer()
} }
else else
{ {
m_previousLength = 0;
m_previousLength = LongestMatch(m_previousMatch); m_previousLength = LongestMatch(m_previousMatch);
m_matchAvailable = true; if (m_previousLength)
m_matchAvailable = true;
else
LiteralByte(m_byteBuffer[m_stringStart]);
m_stringStart++; m_stringStart++;
m_lookahead--; m_lookahead--;
} }
assert(m_stringStart - (m_blockStart+m_blockLength) == (unsigned int)m_matchAvailable);
} }
assert(m_stringStart - (m_blockStart+m_blockLength) <= 1);
if (m_minLookahead == 0 && m_matchAvailable) if (m_minLookahead == 0 && m_matchAvailable)
{ {
LiteralByte(m_byteBuffer[m_stringStart-1]); LiteralByte(m_byteBuffer[m_stringStart-1]);
@ -509,15 +520,19 @@ bool Deflator::IsolatedFlush(bool hardFlush, bool blocking)
void Deflator::LiteralByte(byte b) void Deflator::LiteralByte(byte b)
{ {
if (m_matchBufferEnd == m_matchBuffer.size())
EndBlock(false);
m_matchBuffer[m_matchBufferEnd++].literalCode = b; m_matchBuffer[m_matchBufferEnd++].literalCode = b;
m_literalCounts[b]++; m_literalCounts[b]++;
m_blockLength++;
if (m_blockStart+(++m_blockLength) == m_byteBuffer.size() || m_matchBufferEnd == m_matchBuffer.size())
EndBlock(false);
} }
void Deflator::MatchFound(unsigned int distance, unsigned int length) void Deflator::MatchFound(unsigned int distance, unsigned int length)
{ {
if (m_matchBufferEnd == m_matchBuffer.size())
EndBlock(false);
static const unsigned int lengthCodes[] = { static const unsigned int lengthCodes[] = {
257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268,
269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272,
@ -540,6 +555,7 @@ void Deflator::MatchFound(unsigned int distance, unsigned int length)
{1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577}; {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
EncodedMatch &m = m_matchBuffer[m_matchBufferEnd++]; EncodedMatch &m = m_matchBuffer[m_matchBufferEnd++];
assert(length >= 3);
unsigned int lengthCode = lengthCodes[length-3]; unsigned int lengthCode = lengthCodes[length-3];
m.literalCode = lengthCode; m.literalCode = lengthCode;
m.literalExtra = length - lengthBases[lengthCode-257]; m.literalExtra = length - lengthBases[lengthCode-257];
@ -549,9 +565,7 @@ void Deflator::MatchFound(unsigned int distance, unsigned int length)
m_literalCounts[lengthCode]++; m_literalCounts[lengthCode]++;
m_distanceCounts[distanceCode]++; m_distanceCounts[distanceCode]++;
m_blockLength += length;
if (m_blockStart+(m_blockLength+=length) == m_byteBuffer.size() || m_matchBufferEnd == m_matchBuffer.size())
EndBlock(false);
} }
inline unsigned int CodeLengthEncode(const unsigned int *begin, inline unsigned int CodeLengthEncode(const unsigned int *begin,
@ -604,6 +618,7 @@ void Deflator::EncodeBlock(bool eof, unsigned int blockType)
if (blockType == STORED) if (blockType == STORED)
{ {
assert(m_blockStart + m_blockLength <= m_byteBuffer.size()); assert(m_blockStart + m_blockLength <= m_byteBuffer.size());
assert(m_blockLength <= 0xffff);
FlushBitBuffer(); FlushBitBuffer();
AttachedTransformation()->PutWord16(m_blockLength, LITTLE_ENDIAN_ORDER); AttachedTransformation()->PutWord16(m_blockLength, LITTLE_ENDIAN_ORDER);
AttachedTransformation()->PutWord16(~m_blockLength, LITTLE_ENDIAN_ORDER); AttachedTransformation()->PutWord16(~m_blockLength, LITTLE_ENDIAN_ORDER);
@ -701,29 +716,58 @@ void Deflator::EncodeBlock(bool eof, unsigned int blockType)
void Deflator::EndBlock(bool eof) void Deflator::EndBlock(bool eof)
{ {
if (m_matchBufferEnd == 0 && !eof) if (m_blockLength == 0 && !eof)
return; return;
if (m_deflateLevel == 0) if (m_deflateLevel == 0)
{
EncodeBlock(eof, STORED); EncodeBlock(eof, STORED);
else if (m_blockLength < 128)
EncodeBlock(eof, STATIC); if (m_compressibleDeflateLevel > 0 && ++m_detectCount == m_detectSkip)
{
m_deflateLevel = m_compressibleDeflateLevel;
m_detectCount = 1;
}
}
else else
{ {
unsigned int storedLen = 8*(m_blockLength+4) + RoundUpToMultipleOf(m_bitsBuffered+3, 8U)-m_bitsBuffered; unsigned long storedLen = 8*((unsigned long)m_blockLength+4) + RoundUpToMultipleOf(m_bitsBuffered+3, 8U)-m_bitsBuffered;
StartCounting(); StartCounting();
EncodeBlock(eof, STATIC); EncodeBlock(eof, STATIC);
unsigned int staticLen = FinishCounting(); unsigned long staticLen = FinishCounting();
StartCounting();
EncodeBlock(eof, DYNAMIC); unsigned long dynamicLen;
unsigned int dynamicLen = FinishCounting(); if (m_blockLength < 128 && m_deflateLevel < 8)
dynamicLen = ULONG_MAX;
else
{
StartCounting();
EncodeBlock(eof, DYNAMIC);
dynamicLen = FinishCounting();
}
if (storedLen <= staticLen && storedLen <= dynamicLen) if (storedLen <= staticLen && storedLen <= dynamicLen)
{
EncodeBlock(eof, STORED); EncodeBlock(eof, STORED);
else if (staticLen <= dynamicLen)
EncodeBlock(eof, STATIC); if (m_compressibleDeflateLevel > 0)
{
if (m_detectSkip)
m_deflateLevel = 0;
m_detectSkip = m_detectSkip ? STDMIN(2*m_detectSkip, 128U) : 1;
}
}
else else
EncodeBlock(eof, DYNAMIC); {
if (staticLen <= dynamicLen)
EncodeBlock(eof, STATIC);
else
EncodeBlock(eof, DYNAMIC);
if (m_compressibleDeflateLevel > 0)
m_detectSkip = 0;
}
} }
m_matchBufferEnd = 0; m_matchBufferEnd = 0;

View File

@ -57,8 +57,11 @@ class Deflator : public LowFirstBitWriter
public: public:
enum {MIN_DEFLATE_LEVEL = 0, DEFAULT_DEFLATE_LEVEL = 6, MAX_DEFLATE_LEVEL = 9}; enum {MIN_DEFLATE_LEVEL = 0, DEFAULT_DEFLATE_LEVEL = 6, MAX_DEFLATE_LEVEL = 9};
enum {MIN_LOG2_WINDOW_SIZE = 9, DEFAULT_LOG2_WINDOW_SIZE = 15, MAX_LOG2_WINDOW_SIZE = 15}; enum {MIN_LOG2_WINDOW_SIZE = 9, DEFAULT_LOG2_WINDOW_SIZE = 15, MAX_LOG2_WINDOW_SIZE = 15};
Deflator(BufferedTransformation *attachment=NULL, int deflateLevel=DEFAULT_DEFLATE_LEVEL, int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE); /*! \note detectUncompressible makes it faster to process uncompressible files, but
//! possible parameter names: Log2WindowSize, DeflateLevel if a file has both compressible and uncompressible parts, it may fail to compress some of the
compressible parts. */
Deflator(BufferedTransformation *attachment=NULL, int deflateLevel=DEFAULT_DEFLATE_LEVEL, int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE, bool detectUncompressible=true);
//! possible parameter names: Log2WindowSize, DeflateLevel, DetectUncompressible
Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL); Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL);
//! this function can be used to set the deflate level in the middle of compression //! this function can be used to set the deflate level in the middle of compression
@ -70,7 +73,7 @@ public:
unsigned int Put2(const byte *inString, unsigned int length, int messageEnd, bool blocking); unsigned int Put2(const byte *inString, unsigned int length, int messageEnd, bool blocking);
bool IsolatedFlush(bool hardFlush, bool blocking); bool IsolatedFlush(bool hardFlush, bool blocking);
private: protected:
virtual void WritePrestreamHeader() {} virtual void WritePrestreamHeader() {}
virtual void ProcessUncompressedData(const byte *string, unsigned int length) {} virtual void ProcessUncompressedData(const byte *string, unsigned int length) {}
virtual void WritePoststreamTail() {} virtual void WritePoststreamTail() {}
@ -99,7 +102,8 @@ private:
unsigned distanceExtra : 13; unsigned distanceExtra : 13;
}; };
int m_deflateLevel, m_log2WindowSize; int m_deflateLevel, m_log2WindowSize, m_compressibleDeflateLevel;
unsigned int m_detectSkip, m_detectCount;
unsigned int DSIZE, DMASK, HSIZE, HMASK, GOOD_MATCH, MAX_LAZYLENGTH, MAX_CHAIN_LENGTH; unsigned int DSIZE, DMASK, HSIZE, HMASK, GOOD_MATCH, MAX_LAZYLENGTH, MAX_CHAIN_LENGTH;
bool m_headerWritten, m_matchAvailable; bool m_headerWritten, m_matchAvailable;
unsigned int m_dictionaryEnd, m_stringStart, m_lookahead, m_minLookahead, m_previousMatch, m_previousLength; unsigned int m_dictionaryEnd, m_stringStart, m_lookahead, m_minLookahead, m_previousMatch, m_previousLength;

6
zlib.h
View File

@ -11,14 +11,14 @@ NAMESPACE_BEGIN(CryptoPP)
class ZlibCompressor : public Deflator class ZlibCompressor : public Deflator
{ {
public: public:
ZlibCompressor(BufferedTransformation *attachment=NULL, unsigned int deflateLevel=DEFAULT_DEFLATE_LEVEL, unsigned int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE) ZlibCompressor(BufferedTransformation *attachment=NULL, unsigned int deflateLevel=DEFAULT_DEFLATE_LEVEL, unsigned int log2WindowSize=DEFAULT_LOG2_WINDOW_SIZE, bool detectUncompressible=true)
: Deflator(attachment, deflateLevel, log2WindowSize) {} : Deflator(attachment, deflateLevel, log2WindowSize, detectUncompressible) {}
ZlibCompressor(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL) ZlibCompressor(const NameValuePairs &parameters, BufferedTransformation *attachment=NULL)
: Deflator(parameters, attachment) {} : Deflator(parameters, attachment) {}
unsigned int GetCompressionLevel() const; unsigned int GetCompressionLevel() const;
private: protected:
void WritePrestreamHeader(); void WritePrestreamHeader();
void ProcessUncompressedData(const byte *string, unsigned int length); void ProcessUncompressedData(const byte *string, unsigned int length);
void WritePoststreamTail(); void WritePoststreamTail();