|
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// zlib.h -- interface of the 'zlib' general purpose compression library
// version 1.2.1, November 17th, 2003
//
// Copyright (C) 1995-2003 Jean-loup Gailly and Mark Adler
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
//
//
// ==--==
namespace System.IO.Compression
{
using System;
using System.Diagnostics;
internal class Inflater : IInflater {
// const tables used in decoding:
// Extra bits for length code 257 - 285.
private static readonly byte[] extraLengthBits = {
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
// The base length for length code 257 - 285.
// The formula to get the real length for a length code is lengthBase[code - 257] + (value stored in extraBits)
private static readonly int[] lengthBase = {
3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};
// The base distance for distance code 0 - 29
// The real distance for a distance code is distanceBasePosition[code] + (value stored in extraBits)
private static readonly int[] distanceBasePosition= {
1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
// code lengths for code length alphabet is stored in following order
private static readonly byte[] codeOrder = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
private static readonly byte[] staticDistanceTreeTable = {
0x00,0x10,0x08,0x18,0x04,0x14,0x0c,0x1c,0x02,0x12,0x0a,0x1a,
0x06,0x16,0x0e,0x1e,0x01,0x11,0x09,0x19,0x05,0x15,0x0d,0x1d,
0x03,0x13,0x0b,0x1b,0x07,0x17,0x0f,0x1f,
};
private OutputWindow output;
private InputBuffer input;
HuffmanTree literalLengthTree;
HuffmanTree distanceTree;
InflaterState state;
bool hasFormatReader;
int bfinal;
BlockType blockType;
// uncompressed block
byte[] blockLengthBuffer = new byte[4];
int blockLength;
// compressed block
private int length;
private int distanceCode;
private int extraBits;
private int loopCounter;
private int literalLengthCodeCount;
private int distanceCodeCount;
private int codeLengthCodeCount;
private int codeArraySize;
private int lengthCode;
private byte[] codeList; // temporary array to store the code length for literal/Length and distance
private byte[] codeLengthTreeCodeLength;
HuffmanTree codeLengthTree;
IFileFormatReader formatReader; // class to decode header and footer (e.g. gzip)
public Inflater() : this(null) {
}
internal Inflater(IFileFormatReader reader)
{
output = new OutputWindow();
input = new InputBuffer();
codeList = new byte[HuffmanTree.MaxLiteralTreeElements + HuffmanTree.MaxDistTreeElements];
codeLengthTreeCodeLength = new byte[HuffmanTree.NumberOfCodeLengthTreeElements];
if (reader != null)
{
formatReader = reader;
hasFormatReader = true;
}
Reset();
}
internal void SetFileFormatReader(IFileFormatReader reader) {
formatReader = reader;
hasFormatReader = true;
Reset();
}
private void Reset() {
if ( hasFormatReader) {
state = InflaterState.ReadingHeader; // start by reading Header info
}
else {
state = InflaterState.ReadingBFinal; // start by reading BFinal bit
}
}
public void SetInput(byte[] inputBytes, int offset, int length) {
input.SetInput(inputBytes, offset, length); // append the bytes
}
public bool Finished() {
return (state == InflaterState.Done || state== InflaterState.VerifyingFooter);
}
public int AvailableOutput{
get {
return output.AvailableBytes;
}
}
public bool NeedsInput(){
return input.NeedsInput();
}
public int Inflate(byte[] bytes, int offset, int length) {
// copy bytes from output to outputbytes if we have aviable bytes
// if buffer is not filled up. keep decoding until no input are available
// if decodeBlock returns false. Throw an exception.
int count = 0;
do
{
int copied = output.CopyTo(bytes, offset, length);
if( copied > 0) {
if( hasFormatReader) {
formatReader.UpdateWithBytesRead(bytes, offset, copied);
}
offset += copied;
count += copied;
length -= copied;
}
if (length == 0) { // filled in the bytes array
break;
}
// Decode will return false when more input is needed
} while ( !Finished() && Decode());
if( state == InflaterState.VerifyingFooter) { // finished reading CRC
// In this case finished is true and output window has all the data.
// But some data in output window might not be copied out.
if( output.AvailableBytes == 0) {
formatReader.Validate();
}
}
return count;
}
//Each block of compressed data begins with 3 header bits
// containing the following data:
// first bit BFINAL
// next 2 bits BTYPE
// Note that the header bits do not necessarily begin on a byte
// boundary, since a block does not necessarily occupy an integral
// number of bytes.
// BFINAL is set if and only if this is the last block of the data
// set.
// BTYPE specifies how the data are compressed, as follows:
// 00 - no compression
// 01 - compressed with fixed Huffman codes
// 10 - compressed with dynamic Huffman codes
// 11 - reserved (error)
// The only difference between the two compressed cases is how the
// Huffman codes for the literal/length and distance alphabets are
// defined.
//
// This function returns true for success (end of block or output window is full,)
// false if we are short of input
//
private bool Decode() {
bool eob = false;
bool result = false;
if( Finished()) {
return true;
}
if (hasFormatReader) {
if (state == InflaterState.ReadingHeader) {
if (!formatReader.ReadHeader(input)) {
return false;
}
state = InflaterState.ReadingBFinal;
}
else if (state == InflaterState.StartReadingFooter || state == InflaterState.ReadingFooter) {
if (!formatReader.ReadFooter(input))
return false;
state = InflaterState.VerifyingFooter;
return true;
}
}
if( state == InflaterState.ReadingBFinal) { // reading bfinal bit
// Need 1 bit
if (!input.EnsureBitsAvailable(1))
return false;
bfinal = input.GetBits(1);
state = InflaterState.ReadingBType;
}
if( state == InflaterState.ReadingBType) {
// Need 2 bits
if (!input.EnsureBitsAvailable(2)) {
state = InflaterState.ReadingBType;
return false;
}
blockType = (BlockType)input.GetBits(2);
if (blockType == BlockType.Dynamic) {
Debug.WriteLineIf(CompressionTracingSwitch.Informational, "Decoding Dynamic Block", "Compression");
state = InflaterState.ReadingNumLitCodes;
}
else if (blockType == BlockType.Static) {
Debug.WriteLineIf(CompressionTracingSwitch.Informational, "Decoding Static Block", "Compression");
literalLengthTree = HuffmanTree.StaticLiteralLengthTree;
distanceTree = HuffmanTree.StaticDistanceTree;
state = InflaterState.DecodeTop;
}
else if (blockType == BlockType.Uncompressed) {
Debug.WriteLineIf(CompressionTracingSwitch.Informational, "Decoding UnCompressed Block", "Compression");
state = InflaterState.UncompressedAligning;
}
else {
throw new InvalidDataException(SR.GetString(SR.UnknownBlockType));
}
}
if (blockType == BlockType.Dynamic) {
if (state < InflaterState.DecodeTop) { // we are reading the header
result = DecodeDynamicBlockHeader();
}
else {
result = DecodeBlock(out eob); // this can returns true when output is full
}
}
else if (blockType == BlockType.Static) {
result = DecodeBlock(out eob);
}
else if (blockType == BlockType.Uncompressed) {
result = DecodeUncompressedBlock(out eob);
}
else {
throw new InvalidDataException(SR.GetString(SR.UnknownBlockType));
}
//
// If we reached the end of the block and the block we were decoding had
// bfinal=1 (final block)
//
if (eob && (bfinal != 0)) {
if (hasFormatReader)
state = InflaterState.StartReadingFooter;
else
state = InflaterState.Done;
}
return result;
}
// Format of Non-compressed blocks (BTYPE=00):
//
// Any bits of input up to the next byte boundary are ignored.
// The rest of the block consists of the following information:
//
// 0 1 2 3 4...
// +---+---+---+---+================================+
// | LEN | NLEN |... LEN bytes of literal data...|
// +---+---+---+---+================================+
//
// LEN is the number of data bytes in the block. NLEN is the
// one's complement of LEN.
bool DecodeUncompressedBlock(out bool end_of_block) {
end_of_block = false;
while(true) {
switch( state) {
case InflaterState.UncompressedAligning: // intial state when calling this function
// we must skip to a byte boundary
input.SkipToByteBoundary();
state = InflaterState.UncompressedByte1;
goto case InflaterState.UncompressedByte1;
case InflaterState.UncompressedByte1: // decoding block length
case InflaterState.UncompressedByte2:
case InflaterState.UncompressedByte3:
case InflaterState.UncompressedByte4:
int bits = input.GetBits(8);
if( bits < 0) {
return false;
}
blockLengthBuffer[state - InflaterState.UncompressedByte1] = (byte)bits;
if( state == InflaterState.UncompressedByte4) {
blockLength = blockLengthBuffer[0] + ((int)blockLengthBuffer[1]) * 256;
int blockLengthComplement= blockLengthBuffer[2] + ((int)blockLengthBuffer[3]) * 256;
// make sure complement matches
if ((ushort) blockLength != (ushort)(~blockLengthComplement)) {
throw new InvalidDataException(SR.GetString(SR.InvalidBlockLength));
}
}
state += 1;
break;
case InflaterState.DecodingUncompressed: // copying block data
// Directly copy bytes from input to output.
int bytesCopied = output.CopyFrom(input, blockLength);
blockLength -= bytesCopied;
if (blockLength == 0) {
// Done with this block, need to re-init bit buffer for next block
state = InflaterState.ReadingBFinal;
end_of_block = true;
Debug.WriteLineIf(CompressionTracingSwitch.Informational, "End of Block", "Compression");
return true;
}
// We can fail to copy all bytes for two reasons:
// Running out of Input
// running out of free space in output window
if(output.FreeBytes == 0) {
return true;
}
return false;
default:
Debug.Assert(false, "check why we are here!");
throw new InvalidDataException(SR.GetString(SR.UnknownState));
}
}
}
bool DecodeBlock(out bool end_of_block_code_seen) {
end_of_block_code_seen = false;
int freeBytes = output.FreeBytes; // it is a little bit faster than frequently accessing the property
while(freeBytes > 258) {
// 258 means we can safely do decoding since maximum repeat length is 258
int symbol;
switch (state) {
case InflaterState.DecodeTop:
// decode an element from the literal tree
//
symbol = literalLengthTree.GetNextSymbol(input);
if( symbol < 0) { // running out of input
return false;
}
if (symbol < 256) { // literal
output.Write((byte)symbol);
--freeBytes;
}
else if( symbol == 256) { // end of block
end_of_block_code_seen = true;
Debug.WriteLineIf(CompressionTracingSwitch.Informational, "End of Block", "Compression");
// Reset state
state = InflaterState.ReadingBFinal;
return true; // ***********
}
else { // length/distance pair
symbol -= 257; // length code started at 257
if( symbol < 8) {
symbol += 3; // match length = 3,4,5,6,7,8,9,10
extraBits = 0;
}
else if( symbol == 28) { // extra bits for code 285 is 0
symbol = 258; // code 285 means length 258
extraBits = 0;
}
else {
if( symbol < 0 || symbol >= extraLengthBits.Length ) {
throw new InvalidDataException(SR.GetString(SR.GenericInvalidData));
}
extraBits = extraLengthBits[symbol];
Debug.Assert(extraBits != 0, "We handle other cases seperately!");
}
length = symbol;
goto case InflaterState.HaveInitialLength;
}
break;
case InflaterState.HaveInitialLength:
if( extraBits > 0) {
state = InflaterState.HaveInitialLength;
int bits = input.GetBits(extraBits);
if( bits < 0) {
return false;
}
if( length < 0 || length >= lengthBase.Length ) {
throw new InvalidDataException(SR.GetString(SR.GenericInvalidData));
}
length = lengthBase[length] + bits;
}
state = InflaterState.HaveFullLength;
goto case InflaterState.HaveFullLength;
case InflaterState.HaveFullLength:
if( blockType == BlockType.Dynamic) {
distanceCode = distanceTree.GetNextSymbol(input);
}
else { // get distance code directly for static block
distanceCode = input.GetBits(5);
if( distanceCode >= 0 ) {
distanceCode = staticDistanceTreeTable[distanceCode];
}
}
if( distanceCode < 0) { // running out input
return false;
}
state = InflaterState.HaveDistCode;
goto case InflaterState.HaveDistCode;
case InflaterState.HaveDistCode:
// To avoid a table lookup we note that for distanceCode >= 2,
// extra_bits = (distanceCode-2) >> 1
int offset;
if( distanceCode > 3) {
extraBits = (distanceCode-2) >> 1;
int bits = input.GetBits(extraBits);
if( bits < 0 ) {
return false;
}
offset = distanceBasePosition[distanceCode] + bits;
}
else {
offset = distanceCode + 1;
}
Debug.Assert(freeBytes>= 258, "following operation is not safe!");
output.WriteLengthDistance(length, offset);
freeBytes -= length;
state = InflaterState.DecodeTop;
break;
default:
Debug.Assert(false, "check why we are here!");
throw new InvalidDataException(SR.GetString(SR.UnknownState));
}
}
return true;
}
// Format of the dynamic block header:
// 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
// 5 Bits: HDIST, # of Distance codes - 1 (1 - 32)
// 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19)
//
// (HCLEN + 4) x 3 bits: code lengths for the code length
// alphabet given just above, in the order: 16, 17, 18,
// 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
//
// These code lengths are interpreted as 3-bit integers
// (0-7); as above, a code length of 0 means the
// corresponding symbol (literal/length or distance code
// length) is not used.
//
// HLIT + 257 code lengths for the literal/length alphabet,
// encoded using the code length Huffman code
//
// HDIST + 1 code lengths for the distance alphabet,
// encoded using the code length Huffman code
//
// The code length repeat codes can cross from HLIT + 257 to the
// HDIST + 1 code lengths. In other words, all code lengths form
// a single sequence of HLIT + HDIST + 258 values.
bool DecodeDynamicBlockHeader() {
switch (state) {
case InflaterState.ReadingNumLitCodes:
literalLengthCodeCount = input.GetBits(5);
if( literalLengthCodeCount < 0) {
return false;
}
literalLengthCodeCount += 257;
state = InflaterState.ReadingNumDistCodes;
goto case InflaterState.ReadingNumDistCodes;
case InflaterState.ReadingNumDistCodes:
distanceCodeCount = input.GetBits(5);
if( distanceCodeCount < 0) {
return false;
}
distanceCodeCount += 1;
state = InflaterState.ReadingNumCodeLengthCodes;
goto case InflaterState.ReadingNumCodeLengthCodes;
case InflaterState.ReadingNumCodeLengthCodes:
codeLengthCodeCount = input.GetBits(4);
if( codeLengthCodeCount < 0) {
return false;
}
codeLengthCodeCount += 4;
loopCounter = 0;
state = InflaterState.ReadingCodeLengthCodes;
goto case InflaterState.ReadingCodeLengthCodes;
case InflaterState.ReadingCodeLengthCodes:
while(loopCounter < codeLengthCodeCount) {
int bits = input.GetBits(3);
if( bits < 0) {
return false;
}
codeLengthTreeCodeLength[codeOrder[loopCounter]] = (byte)bits;
++loopCounter;
}
for (int i = codeLengthCodeCount; i < codeOrder.Length; i++) {
codeLengthTreeCodeLength[ codeOrder[i] ] = 0;
}
// create huffman tree for code length
codeLengthTree = new HuffmanTree(codeLengthTreeCodeLength);
codeArraySize = literalLengthCodeCount + distanceCodeCount;
loopCounter = 0; // reset loop count
state = InflaterState.ReadingTreeCodesBefore;
goto case InflaterState.ReadingTreeCodesBefore;
case InflaterState.ReadingTreeCodesBefore:
case InflaterState.ReadingTreeCodesAfter:
while (loopCounter < codeArraySize) {
if( state == InflaterState.ReadingTreeCodesBefore) {
if( (lengthCode = codeLengthTree.GetNextSymbol(input)) < 0) {
return false;
}
}
// The alphabet for code lengths is as follows:
// 0 - 15: Represent code lengths of 0 - 15
// 16: Copy the previous code length 3 - 6 times.
// The next 2 bits indicate repeat length
// (0 = 3, ... , 3 = 6)
// Example: Codes 8, 16 (+2 bits 11),
// 16 (+2 bits 10) will expand to
// 12 code lengths of 8 (1 + 6 + 5)
// 17: Repeat a code length of 0 for 3 - 10 times.
// (3 bits of length)
// 18: Repeat a code length of 0 for 11 - 138 times
// (7 bits of length)
if (lengthCode <= 15) {
codeList[loopCounter++] = (byte)lengthCode;
}
else {
if( !input.EnsureBitsAvailable(7)) { // it doesn't matter if we require more bits here
state = InflaterState.ReadingTreeCodesAfter;
return false;
}
int repeatCount;
if (lengthCode == 16) {
if (loopCounter == 0) { // can't have "prev code" on first code
throw new InvalidDataException();
}
byte previousCode = codeList[loopCounter-1];
repeatCount = input.GetBits(2) + 3;
if (loopCounter + repeatCount > codeArraySize) {
throw new InvalidDataException();
}
for (int j = 0; j < repeatCount; j++) {
codeList[loopCounter++] = previousCode;
}
}
else if (lengthCode == 17) {
repeatCount = input.GetBits(3) + 3;
if (loopCounter + repeatCount > codeArraySize) {
throw new InvalidDataException();
}
for (int j = 0; j < repeatCount; j++) {
codeList[loopCounter++] = 0;
}
}
else { // code == 18
repeatCount = input.GetBits(7) + 11;
if (loopCounter + repeatCount > codeArraySize) {
throw new InvalidDataException();
}
for (int j = 0; j < repeatCount; j++) {
codeList[loopCounter++] = 0;
}
}
}
state = InflaterState.ReadingTreeCodesBefore; // we want to read the next code.
}
break;
default:
Debug.Assert(false, "check why we are here!");
throw new InvalidDataException(SR.GetString(SR.UnknownState));
}
byte[] literalTreeCodeLength = new byte[HuffmanTree.MaxLiteralTreeElements];
byte[] distanceTreeCodeLength = new byte[HuffmanTree.MaxDistTreeElements];
// Create literal and distance tables
Array.Copy(codeList, literalTreeCodeLength, literalLengthCodeCount);
Array.Copy(codeList, literalLengthCodeCount, distanceTreeCodeLength, 0, distanceCodeCount);
// Make sure there is an end-of-block code, otherwise how could we ever end?
if (literalTreeCodeLength[HuffmanTree.EndOfBlockCode] == 0) {
throw new InvalidDataException();
}
literalLengthTree = new HuffmanTree(literalTreeCodeLength);
distanceTree = new HuffmanTree(distanceTreeCodeLength);
state = InflaterState.DecodeTop;
return true;
}
public void Dispose() { }
}
}
|