505 lines
15 KiB
C++
505 lines
15 KiB
C++
// Copyright (C) 2004 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_LZ77_BUFFER_KERNEl_2_
|
|
#define DLIB_LZ77_BUFFER_KERNEl_2_
|
|
|
|
#include "lz77_buffer_kernel_abstract.h"
|
|
#include "../algs.h"
|
|
|
|
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
class lz77_buffer_kernel_2
|
|
{
|
|
/*!
|
|
REQUIREMENTS ON sliding_buffer
|
|
sliding_buffer must be an implementation of sliding_buffer/sliding_buffer_kernel_abstract.h
|
|
and must be instantiated to contain unsigned char data
|
|
|
|
INITIAL VALUE
|
|
history_limit == defined by constructor arguments
|
|
lookahead_limit == defined by constructor arguments
|
|
history_size == 0
|
|
lookahead_size == 0
|
|
buffer.size() == history_limit + lookahead_limit
|
|
buffer[i] == 0 for all valid i
|
|
|
|
nodes == an array of history_limit-3 nodes
|
|
id_table == an array of buffer.size() pointers
|
|
hash_table == an array of buffer.size() pointers and all are set to 0
|
|
mask == buffer.size() - 1
|
|
next_free_node == 0
|
|
|
|
|
|
CONVENTION
|
|
history_limit == get_history_buffer_limit()
|
|
lookahead_limit == get_lookahead_buffer_limit()
|
|
history_size == get_history_buffer_size()
|
|
lookahead_limit == get_lookahead_buffer_size()
|
|
|
|
buffer.size() == history_limit + lookahead_limit
|
|
|
|
lookahead_buffer(i) == buffer[lookahead_limit-1-i]
|
|
history_buffer(i) == buffer[lookahead_limit+i]
|
|
|
|
|
|
hash_table[hash(a,b,c,d)] points to the head of a linked list.
|
|
Each node in this linked list tells the location in the buffer
|
|
of a string that begins with abcd or a string who's first four
|
|
letters have the same hash. The linked list is terminated by a
|
|
node with a null next pointer.
|
|
|
|
hash_table[i] == 0 if there is no linked list for this element of the hash
|
|
table.
|
|
|
|
each node in the hash table is allocated from the array nodes.
|
|
When adding a node to hash_table:
|
|
if (if all nodes aren't already in the hash_table) then
|
|
{
|
|
the next node to use is nodes[next_free_node].
|
|
}
|
|
else
|
|
{
|
|
recycle nodes from the hash_table itself. This works because
|
|
when we add new nodes we also have to remove nodes.
|
|
}
|
|
|
|
if (there is a node defined with an id of i) then
|
|
{
|
|
if (id_table[i] != 0) then
|
|
id_table[i]->next->id == i
|
|
else
|
|
hash_table[some_hash]->id == i
|
|
}
|
|
!*/
|
|
|
|
public:
|
|
|
|
lz77_buffer_kernel_2 (
|
|
unsigned long total_limit_,
|
|
unsigned long lookahead_limit_
|
|
);
|
|
|
|
virtual ~lz77_buffer_kernel_2 (
|
|
);
|
|
|
|
void clear(
|
|
);
|
|
|
|
void add (
|
|
unsigned char symbol
|
|
);
|
|
|
|
void find_match (
|
|
unsigned long& index,
|
|
unsigned long& length,
|
|
unsigned long min_match_length
|
|
);
|
|
|
|
inline unsigned long get_history_buffer_limit (
|
|
) const { return history_limit; }
|
|
|
|
inline unsigned long get_lookahead_buffer_limit (
|
|
) const { return lookahead_limit; }
|
|
|
|
inline unsigned long get_history_buffer_size (
|
|
) const { return history_size; }
|
|
|
|
inline unsigned long get_lookahead_buffer_size (
|
|
) const { return lookahead_size; }
|
|
|
|
inline unsigned char lookahead_buffer (
|
|
unsigned long index
|
|
) const { return buffer[lookahead_limit-1-index]; }
|
|
|
|
inline unsigned char history_buffer (
|
|
unsigned long index
|
|
) const { return buffer[lookahead_limit+index]; }
|
|
|
|
|
|
inline void shift_buffers (
|
|
unsigned long N
|
|
) { shift_buffer(N); }
|
|
|
|
private:
|
|
|
|
inline unsigned long hash (
|
|
unsigned char a,
|
|
unsigned char b,
|
|
unsigned char c,
|
|
unsigned char d
|
|
) const
|
|
/*!
|
|
ensures
|
|
- returns a hash of the 4 arguments and the hash is in the range
|
|
!*/
|
|
{
|
|
unsigned long B = b << 3;
|
|
unsigned long C = c << 6;
|
|
unsigned long D = d << 9;
|
|
|
|
unsigned long temp = a + B;
|
|
temp += C;
|
|
temp += D;
|
|
|
|
return (temp&mask); /**/
|
|
}
|
|
|
|
void shift_buffer (
|
|
unsigned long N
|
|
);
|
|
/*!
|
|
requires
|
|
- N <= lookahead_size
|
|
ensuers
|
|
- #lookahead_size == lookahead_size - N
|
|
- if (history_size+N < history_limit) then
|
|
- #history_size == history_size+N
|
|
- else
|
|
- #history_size == history_limit
|
|
- for all i where 0 <= i < N:
|
|
#history_buffer(N-1-i) == lookahead_buffer(i)
|
|
- for all i where 0 <= i < #history_size-N:
|
|
#history_buffer(N+i) == history_buffer(i)
|
|
- for all i where 0 <= i < #lookahead_size
|
|
#lookahead_buffer(i) == lookahead_buffer(N+i)
|
|
!*/
|
|
|
|
|
|
|
|
// member data
|
|
sliding_buffer buffer;
|
|
unsigned long lookahead_limit;
|
|
unsigned long history_limit;
|
|
|
|
struct node
|
|
{
|
|
unsigned long id;
|
|
node* next;
|
|
};
|
|
|
|
node** hash_table;
|
|
node* nodes;
|
|
node** id_table;
|
|
unsigned long next_free_node;
|
|
unsigned long mask;
|
|
|
|
unsigned long lookahead_size;
|
|
unsigned long history_size;
|
|
|
|
|
|
// restricted functions
|
|
lz77_buffer_kernel_2(lz77_buffer_kernel_2<sliding_buffer>&); // copy constructor
|
|
lz77_buffer_kernel_2<sliding_buffer>& operator=(lz77_buffer_kernel_2<sliding_buffer>&); // assignment operator
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// member function definitions
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
lz77_buffer_kernel_2<sliding_buffer>::
|
|
lz77_buffer_kernel_2 (
|
|
unsigned long total_limit_,
|
|
unsigned long lookahead_limit_
|
|
) :
|
|
lookahead_size(0),
|
|
history_size(0)
|
|
{
|
|
buffer.set_size(total_limit_);
|
|
lookahead_limit = lookahead_limit_;
|
|
history_limit = buffer.size() - lookahead_limit_;
|
|
|
|
nodes = new node[history_limit-3];
|
|
|
|
try { id_table = new node*[buffer.size()]; }
|
|
catch (...) { delete [] nodes; throw; }
|
|
|
|
try { hash_table = new node*[buffer.size()]; }
|
|
catch (...) { delete [] id_table; delete [] nodes; throw; }
|
|
|
|
mask = buffer.size()-1;
|
|
next_free_node = 0;
|
|
|
|
|
|
node** start = hash_table;
|
|
node** end = hash_table + buffer.size();
|
|
while (start != end)
|
|
{
|
|
*start = 0;
|
|
++start;
|
|
}
|
|
|
|
for (unsigned long i = 0; i < buffer.size(); ++i)
|
|
buffer[i] = 0;
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
lz77_buffer_kernel_2<sliding_buffer>::
|
|
~lz77_buffer_kernel_2 (
|
|
)
|
|
{
|
|
delete [] nodes;
|
|
delete [] hash_table;
|
|
delete [] id_table;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
void lz77_buffer_kernel_2<sliding_buffer>::
|
|
clear(
|
|
)
|
|
{
|
|
lookahead_size = 0;
|
|
history_size = 0;
|
|
next_free_node = 0;
|
|
|
|
node** start = hash_table;
|
|
node** end = hash_table + buffer.size();
|
|
while (start != end)
|
|
{
|
|
*start = 0;
|
|
++start;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
void lz77_buffer_kernel_2<sliding_buffer>::
|
|
shift_buffer (
|
|
unsigned long N
|
|
)
|
|
{
|
|
unsigned long old_history_size = history_size;
|
|
unsigned long temp = history_size+N;
|
|
unsigned long new_nodes; // the number of nodes to pull from the nodes array
|
|
unsigned long recycled_nodes; // the number of nodes to pull from hash_table
|
|
lookahead_size -= N;
|
|
if (temp <= history_limit)
|
|
{
|
|
if (history_size <= 3)
|
|
{
|
|
if ((3-history_size) >= N)
|
|
new_nodes = 0;
|
|
else
|
|
new_nodes = N - (3-history_size);
|
|
}
|
|
else
|
|
{
|
|
new_nodes = N;
|
|
}
|
|
|
|
recycled_nodes = 0;
|
|
history_size = temp;
|
|
}
|
|
else
|
|
{
|
|
if (history_size != history_limit)
|
|
{
|
|
new_nodes = history_limit - history_size;
|
|
recycled_nodes = temp - history_limit;
|
|
history_size = history_limit;
|
|
}
|
|
else
|
|
{
|
|
new_nodes = 0;
|
|
recycled_nodes = N;
|
|
}
|
|
}
|
|
|
|
unsigned long i = lookahead_limit + 2;
|
|
|
|
// if there are any "new" nodes to add to the hash table
|
|
if (new_nodes != 0)
|
|
{
|
|
unsigned long stop = i - new_nodes;
|
|
for (; i > stop; --i)
|
|
{
|
|
nodes[next_free_node].next = 0;
|
|
nodes[next_free_node].id = buffer.get_element_id(i);
|
|
id_table[nodes[next_free_node].id] = 0;
|
|
|
|
unsigned long new_hash = hash(buffer[i],buffer[i-1],buffer[i-2],buffer[i-3]);
|
|
|
|
if (hash_table[new_hash] != 0)
|
|
id_table[hash_table[new_hash]->id] = &nodes[next_free_node];
|
|
nodes[next_free_node].next = hash_table[new_hash];
|
|
hash_table[new_hash] = &nodes[next_free_node];
|
|
|
|
++next_free_node;
|
|
}
|
|
} // if (new_nodes != 0)
|
|
|
|
|
|
|
|
unsigned long stop = i - recycled_nodes;
|
|
unsigned long old = old_history_size-1+lookahead_limit;
|
|
for (; i > stop; --i)
|
|
{
|
|
// find the next node to recycle in hash_table
|
|
node* recycled_node;
|
|
|
|
|
|
unsigned long old_id = buffer.get_element_id(old);
|
|
|
|
// find the node with id old_id
|
|
if (id_table[old_id] == 0)
|
|
{
|
|
unsigned long old_hash = hash(buffer[old],buffer[old-1],buffer[old-2],buffer[old-3]);
|
|
recycled_node = hash_table[old_hash];
|
|
|
|
// fill the gap left by removing this node
|
|
hash_table[old_hash] = recycled_node->next;
|
|
}
|
|
else
|
|
{
|
|
recycled_node = id_table[old_id]->next;
|
|
|
|
// fill the gap left by removing this node
|
|
id_table[old_id]->next = recycled_node->next;
|
|
}
|
|
|
|
--old;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
recycled_node->next = 0;
|
|
recycled_node->id = buffer.get_element_id(i);
|
|
id_table[recycled_node->id] = 0;
|
|
|
|
unsigned long new_hash = hash(buffer[i],buffer[i-1],buffer[i-2],buffer[i-3]);
|
|
|
|
if (hash_table[new_hash] != 0)
|
|
id_table[hash_table[new_hash]->id] = recycled_node;
|
|
|
|
recycled_node->next = hash_table[new_hash];
|
|
hash_table[new_hash] = recycled_node;
|
|
|
|
} // for (; i > stop; --i)
|
|
|
|
|
|
|
|
|
|
buffer.rotate_left(N);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
void lz77_buffer_kernel_2<sliding_buffer>::
|
|
add (
|
|
unsigned char symbol
|
|
)
|
|
{
|
|
if (lookahead_size == lookahead_limit)
|
|
{
|
|
shift_buffer(1);
|
|
}
|
|
buffer[lookahead_limit-1-lookahead_size] = symbol;
|
|
++lookahead_size;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename sliding_buffer
|
|
>
|
|
void lz77_buffer_kernel_2<sliding_buffer>::
|
|
find_match (
|
|
unsigned long& index,
|
|
unsigned long& length,
|
|
unsigned long min_match_length
|
|
)
|
|
{
|
|
unsigned long match_length = 0; // the length of the longest match we find
|
|
unsigned long match_index = 0; // the index of the longest match we find
|
|
|
|
|
|
const unsigned long hash_value = hash(lookahead_buffer(0),
|
|
lookahead_buffer(1),
|
|
lookahead_buffer(2),
|
|
lookahead_buffer(3)
|
|
);
|
|
|
|
|
|
|
|
node* temp = hash_table[hash_value];
|
|
while (temp != 0)
|
|
{
|
|
// current position in the history buffer
|
|
unsigned long hpos = buffer.get_element_index(temp->id)-lookahead_limit;
|
|
// current position in the lookahead buffer
|
|
unsigned long lpos = 0;
|
|
|
|
// find length of this match
|
|
while (history_buffer(hpos) == lookahead_buffer(lpos))
|
|
{
|
|
++lpos;
|
|
if (hpos == 0)
|
|
break;
|
|
--hpos;
|
|
if (lpos == lookahead_size)
|
|
break;
|
|
}
|
|
|
|
if (lpos > match_length)
|
|
{
|
|
match_length = lpos;
|
|
match_index = buffer.get_element_index(temp->id)-lookahead_limit;
|
|
// if this is the longest possible match then stop looking
|
|
if (lpos == lookahead_limit)
|
|
break;
|
|
}
|
|
|
|
|
|
temp = temp->next;
|
|
} // while (temp != 0)
|
|
|
|
|
|
|
|
|
|
// if we found a match that was long enough then report it
|
|
if (match_length >= min_match_length)
|
|
{
|
|
shift_buffer(match_length);
|
|
index = match_index;
|
|
length = match_length;
|
|
}
|
|
else
|
|
{
|
|
length = 0;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_LZ77_BUFFER_KERNEl_2_
|
|
|