432 lines
14 KiB
C++
432 lines
14 KiB
C++
// Copyright (C) 2003 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_COMPRESS_STREAM_KERNEl_2_
|
|
#define DLIB_COMPRESS_STREAM_KERNEl_2_
|
|
|
|
#include "../algs.h"
|
|
#include <iostream>
|
|
#include <streambuf>
|
|
#include "compress_stream_kernel_abstract.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
template <
|
|
typename fce,
|
|
typename fcd,
|
|
typename lz77_buffer,
|
|
typename sliding_buffer,
|
|
typename fce_length,
|
|
typename fcd_length,
|
|
typename fce_index,
|
|
typename fcd_index,
|
|
typename crc32
|
|
>
|
|
class compress_stream_kernel_2
|
|
{
|
|
/*!
|
|
REQUIREMENTS ON fce
|
|
is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
|
|
the alphabet_size of fce must be 257.
|
|
fce and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON fcd
|
|
is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
|
|
the alphabet_size of fcd must be 257.
|
|
fce and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON lz77_buffer
|
|
is an implementation of lz77_buffer/lz77_buffer_kernel_abstract.h
|
|
|
|
REQUIREMENTS ON sliding_buffer
|
|
is an implementation of sliding_buffer/sliding_buffer_kernel_abstract.h
|
|
is instantiated with T = unsigned char
|
|
|
|
REQUIREMENTS ON fce_length
|
|
is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
|
|
the alphabet_size of fce must be 513. This will be used to encode the length of lz77 matches.
|
|
fce_length and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON fcd_length
|
|
is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
|
|
the alphabet_size of fcd must be 513. This will be used to decode the length of lz77 matches.
|
|
fce_length and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON fce_index
|
|
is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
|
|
the alphabet_size of fce must be 32257. This will be used to encode the index of lz77 matches.
|
|
fce_index and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON fcd_index
|
|
is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
|
|
the alphabet_size of fcd must be 32257. This will be used to decode the index of lz77 matches.
|
|
fce_index and fcd share the same kernel number.
|
|
|
|
REQUIREMENTS ON crc32
|
|
is an implementation of crc32/crc32_kernel_abstract.h
|
|
|
|
INITIAL VALUE
|
|
this object has no state
|
|
|
|
CONVENTION
|
|
this object has no state
|
|
!*/
|
|
|
|
const static unsigned long eof_symbol = 256;
|
|
|
|
public:
|
|
|
|
class decompression_error : public dlib::error
|
|
{
|
|
public:
|
|
decompression_error(
|
|
const char* i
|
|
) :
|
|
dlib::error(std::string(i))
|
|
{}
|
|
|
|
decompression_error(
|
|
const std::string& i
|
|
) :
|
|
dlib::error(i)
|
|
{}
|
|
};
|
|
|
|
|
|
compress_stream_kernel_2 (
|
|
)
|
|
{}
|
|
|
|
~compress_stream_kernel_2 (
|
|
)
|
|
{}
|
|
|
|
void compress (
|
|
std::istream& in,
|
|
std::ostream& out
|
|
) const;
|
|
|
|
void decompress (
|
|
std::istream& in,
|
|
std::ostream& out
|
|
) const;
|
|
|
|
private:
|
|
|
|
// restricted functions
|
|
compress_stream_kernel_2(compress_stream_kernel_2&); // copy constructor
|
|
compress_stream_kernel_2& operator=(compress_stream_kernel_2&); // assignment operator
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// member function definitions
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename fce,
|
|
typename fcd,
|
|
typename lz77_buffer,
|
|
typename sliding_buffer,
|
|
typename fce_length,
|
|
typename fcd_length,
|
|
typename fce_index,
|
|
typename fcd_index,
|
|
typename crc32
|
|
>
|
|
void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
|
|
compress (
|
|
std::istream& in_,
|
|
std::ostream& out_
|
|
) const
|
|
{
|
|
std::streambuf::int_type temp;
|
|
|
|
std::streambuf& in = *in_.rdbuf();
|
|
|
|
typename fce::entropy_encoder_type coder;
|
|
coder.set_stream(out_);
|
|
|
|
fce model(coder);
|
|
fce_length model_length(coder);
|
|
fce_index model_index(coder);
|
|
|
|
const unsigned long LOOKAHEAD_LIMIT = 512;
|
|
lz77_buffer buffer(15,LOOKAHEAD_LIMIT);
|
|
|
|
crc32 crc;
|
|
|
|
|
|
unsigned long count = 0;
|
|
|
|
unsigned long lz77_count = 1; // number of times we used lz77 to encode
|
|
unsigned long ppm_count = 1; // number of times we used ppm to encode
|
|
|
|
|
|
while (true)
|
|
{
|
|
// write out a known value every 20000 symbols
|
|
if (count == 20000)
|
|
{
|
|
count = 0;
|
|
coder.encode(150,151,400);
|
|
}
|
|
++count;
|
|
|
|
// try to fill the lookahead buffer
|
|
if (buffer.get_lookahead_buffer_size() < buffer.get_lookahead_buffer_limit())
|
|
{
|
|
temp = in.sbumpc();
|
|
while (temp != EOF)
|
|
{
|
|
crc.add(static_cast<unsigned char>(temp));
|
|
buffer.add(static_cast<unsigned char>(temp));
|
|
if (buffer.get_lookahead_buffer_size() == buffer.get_lookahead_buffer_limit())
|
|
break;
|
|
temp = in.sbumpc();
|
|
}
|
|
}
|
|
|
|
// compute the sum of ppm_count and lz77_count but make sure
|
|
// it is less than 65536
|
|
unsigned long sum = ppm_count + lz77_count;
|
|
if (sum >= 65536)
|
|
{
|
|
ppm_count >>= 1;
|
|
lz77_count >>= 1;
|
|
ppm_count |= 1;
|
|
lz77_count |= 1;
|
|
sum = ppm_count+lz77_count;
|
|
}
|
|
|
|
// if there are still more symbols in the lookahead buffer to encode
|
|
if (buffer.get_lookahead_buffer_size() > 0)
|
|
{
|
|
unsigned long match_index, match_length;
|
|
buffer.find_match(match_index,match_length,6);
|
|
if (match_length != 0)
|
|
{
|
|
|
|
// signal the decoder that we are using lz77
|
|
coder.encode(0,lz77_count,sum);
|
|
++lz77_count;
|
|
|
|
// encode the index and length pair
|
|
model_index.encode(match_index);
|
|
model_length.encode(match_length);
|
|
|
|
}
|
|
else
|
|
{
|
|
|
|
// signal the decoder that we are using ppm
|
|
coder.encode(lz77_count,sum,sum);
|
|
++ppm_count;
|
|
|
|
// encode the symbol using the ppm model
|
|
model.encode(buffer.lookahead_buffer(0));
|
|
buffer.shift_buffers(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// signal the decoder that we are using ppm
|
|
coder.encode(lz77_count,sum,sum);
|
|
|
|
|
|
model.encode(eof_symbol);
|
|
// now write the checksum
|
|
unsigned long checksum = crc.get_checksum();
|
|
unsigned char byte1 = static_cast<unsigned char>((checksum>>24)&0xFF);
|
|
unsigned char byte2 = static_cast<unsigned char>((checksum>>16)&0xFF);
|
|
unsigned char byte3 = static_cast<unsigned char>((checksum>>8)&0xFF);
|
|
unsigned char byte4 = static_cast<unsigned char>((checksum)&0xFF);
|
|
|
|
model.encode(byte1);
|
|
model.encode(byte2);
|
|
model.encode(byte3);
|
|
model.encode(byte4);
|
|
|
|
break;
|
|
}
|
|
} // while (true)
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename fce,
|
|
typename fcd,
|
|
typename lz77_buffer,
|
|
typename sliding_buffer,
|
|
typename fce_length,
|
|
typename fcd_length,
|
|
typename fce_index,
|
|
typename fcd_index,
|
|
typename crc32
|
|
>
|
|
void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
|
|
decompress (
|
|
std::istream& in_,
|
|
std::ostream& out_
|
|
) const
|
|
{
|
|
|
|
std::streambuf& out = *out_.rdbuf();
|
|
|
|
typename fcd::entropy_decoder_type coder;
|
|
coder.set_stream(in_);
|
|
|
|
fcd model(coder);
|
|
fcd_length model_length(coder);
|
|
fcd_index model_index(coder);
|
|
|
|
unsigned long symbol;
|
|
unsigned long count = 0;
|
|
|
|
sliding_buffer buffer;
|
|
buffer.set_size(15);
|
|
|
|
// Initialize the buffer to all zeros. There is no algorithmic reason to
|
|
// do this. But doing so avoids a warning from valgrind so that is why
|
|
// I'm doing this.
|
|
for (unsigned long i = 0; i < buffer.size(); ++i)
|
|
buffer[i] = 0;
|
|
|
|
crc32 crc;
|
|
|
|
unsigned long lz77_count = 1; // number of times we used lz77 to encode
|
|
unsigned long ppm_count = 1; // number of times we used ppm to encode
|
|
bool next_block_lz77;
|
|
|
|
|
|
// decode until we hit the marker symbol
|
|
while (true)
|
|
{
|
|
// make sure this is the value we expect
|
|
if (count == 20000)
|
|
{
|
|
if (coder.get_target(400) != 150)
|
|
{
|
|
throw decompression_error("Error detected in compressed data stream.");
|
|
}
|
|
count = 0;
|
|
coder.decode(150,151);
|
|
}
|
|
++count;
|
|
|
|
|
|
// compute the sum of ppm_count and lz77_count but make sure
|
|
// it is less than 65536
|
|
unsigned long sum = ppm_count + lz77_count;
|
|
if (sum >= 65536)
|
|
{
|
|
ppm_count >>= 1;
|
|
lz77_count >>= 1;
|
|
ppm_count |= 1;
|
|
lz77_count |= 1;
|
|
sum = ppm_count+lz77_count;
|
|
}
|
|
|
|
// check if we are decoding a lz77 or ppm block
|
|
if (coder.get_target(sum) < lz77_count)
|
|
{
|
|
coder.decode(0,lz77_count);
|
|
next_block_lz77 = true;
|
|
++lz77_count;
|
|
}
|
|
else
|
|
{
|
|
coder.decode(lz77_count,sum);
|
|
next_block_lz77 = false;
|
|
++ppm_count;
|
|
}
|
|
|
|
|
|
if (next_block_lz77)
|
|
{
|
|
|
|
unsigned long match_length, match_index;
|
|
// decode the match index
|
|
model_index.decode(match_index);
|
|
|
|
// decode the match length
|
|
model_length.decode(match_length);
|
|
|
|
|
|
match_index += match_length;
|
|
buffer.rotate_left(match_length);
|
|
for (unsigned long i = 0; i < match_length; ++i)
|
|
{
|
|
unsigned char ch = buffer[match_index-i];
|
|
buffer[match_length-i-1] = ch;
|
|
|
|
crc.add(ch);
|
|
// write this ch to out
|
|
if (out.sputc(static_cast<char>(ch)) != static_cast<int>(ch))
|
|
{
|
|
throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
|
|
}
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
|
|
// decode the next symbol
|
|
model.decode(symbol);
|
|
if (symbol != eof_symbol)
|
|
{
|
|
buffer.rotate_left(1);
|
|
buffer[0] = static_cast<unsigned char>(symbol);
|
|
|
|
|
|
crc.add(static_cast<unsigned char>(symbol));
|
|
// write this symbol to out
|
|
if (out.sputc(static_cast<char>(symbol)) != static_cast<int>(symbol))
|
|
{
|
|
throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// this was the eof marker symbol so we are done. now check the checksum
|
|
|
|
// now get the checksum and make sure it matches
|
|
unsigned char byte1;
|
|
unsigned char byte2;
|
|
unsigned char byte3;
|
|
unsigned char byte4;
|
|
|
|
model.decode(symbol); byte1 = static_cast<unsigned char>(symbol);
|
|
model.decode(symbol); byte2 = static_cast<unsigned char>(symbol);
|
|
model.decode(symbol); byte3 = static_cast<unsigned char>(symbol);
|
|
model.decode(symbol); byte4 = static_cast<unsigned char>(symbol);
|
|
|
|
unsigned long checksum = byte1;
|
|
checksum <<= 8;
|
|
checksum |= byte2;
|
|
checksum <<= 8;
|
|
checksum |= byte3;
|
|
checksum <<= 8;
|
|
checksum |= byte4;
|
|
|
|
if (checksum != crc.get_checksum())
|
|
throw decompression_error("Error detected in compressed data stream.");
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
} // while (true)
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_COMPRESS_STREAM_KERNEl_2_
|
|
|