The Code
The source code for a complete twelve-bit version of LZW compression and decompression follows.
/************************* Start of LZW12.C **************************
*
* This is 12 bit LZW program, which is discussed in the first part
* of the chapter. It uses a fixed size code, and does not attempt
* to flush the dictionary after it fills up.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "errand.h"
#include "bitio.h"
/*
* Constants used throughout the program. BITS defines how many bits
* will be in a code. TABLE_SIZE defines the size of the dictionary
* table.
*/
#define BITS 12
#define MAX_CODE ( ( 1 << BITS ) - 1 )
#define TABLE_SIZE 5021
#define END_OF_STREAM 256
#define FIRST_CODE 257
#define UNUSED -1
/*
* Local prototypes.
*/
#ifdef __STDC__
unsigned int find_child_node( int parent_code, int child_character );
unsigned int decode_string( unsigned int offset, unsigned int code );
#else
unsigned int find_child_node ();
unsigned int decode_string ();
#endif
char *CompressionName = "LZW 12 Bit Encoder";
char *Usage = "in-file out-file\n\n";
/*
* This data structure defines the dictionary. Each entry in the
* dictionary has a code value. This is the code emitted by the
* compressor. Each code is actually made up of two piece: a
* parent_code, and a character. Code values of less than 256 are
* actually plain text codes.
*/
struct dictionary {
int code_value;
int parent_code;
char character;
} dict[TABLE_SIZE ];
char decode_stack[ TABLE_SIZE };
/*
* The compressor is short and simple. It reads in new symbols one
* at a time from the input file. It then checks to see if the
* combination of the current symbol and the current code are already
* defined in the dictionary. If they are not, they are added to the
* dictionary, and we start over with a new one symbol code. If they
* are, the code for the combination of the code and character becomes
* our new code.
*/
void CompressFile( input, output, argc, argv )
FILE *input;
BIT_FILE *output;
int argc;
char *argv[];
{
int next_code;
int character;
int string_code;
unsigned int index;
unsigned int i;
next_code = FIRST_CODE;
for ( i = 0 ; i < TABLE_SIZE ; i++ )
dict[ i ].code_value = UNUSED;
if ( ( string_code = getc( input ) )== EOF )
string_code = END_OF_STREAM;
while ( ( character = getc( input ) ) != EOF ) {
index = find_child_node( string_code, character );
if ( dict[ index ].code_value != - 1)
string_code = dict[ index ].code_value;
else {
if ( next_code <= MAX_CODE ) {
dict[ index ].code_value = next_code++;
dict[ index ].parent_code = string_code;
dict[ index ].character = (char) character;
}
OutputBits( output, (unsigned long) string_code, BITS );
string_code = character;
}
}
OutputBits( output, (unsigned long) string_code, BITS );
OutputBits( output, (unsigned long) END_OF_STREAM, BITS );
while ( argc-- > 0 )
printf( "Unknown argument: %s\n", *argv++ ); }
/*
* The file expander operates much like the encoder. It has to
* read in codes, then convert the codes to a string of characters.
* The only catch in the whole operation occurs when the encoder
* encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this
* occurs, the encoder outputs a code that is not presently defined
* in the table. This is handled as an exception.
*/
void ExpandFile( input, output, argc, argv )
BIT_FILE *input;
FILE *output;
int argc;
char *argv[];
{
unsigned int next_code;
unsigned int new_code;
unsigned int old_code;
int character;
unsigned int count;
next_code = FIRST_CODE;
old_code = (unsigned int) InputBits( input, BITS );
if ( old_code == END_OF_STREAM )
return;
character = old_code;
putc( old_code, output );
while ( ( new_code = (unsigned int) InputBits( input, BITS ) )
!= END_OF_STREAM ) {
/*
** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
** case which generates an undefined code. It handles it by decoding
** the last code, and adding a single character to the end of the
** decode string.
*/
if (new_code >= next_code ) {
decode_stack[ 0 ] = (char) character;
count = decode_string( 1, old_code );
}
else
count = decode_string( 0, new_code );
character = decode_string[ count - 1 ];
while ( count > 0 )
putc( decode_stack[ --count ], output );
if ( next_code <= MAX_CODE ) {
dict[ next_code ].parent_code = old_code;
dict[ next_code ].character = (char) character;
next_code++;
}
old_code = new_code;
}
while ( argc-- > 0 )
print( "Unknown argument: %s\n", *argv++ );
}
/*
* This hashing routine is responsible for finding the table location
* for a string/character combination. The table index is created
* by using an exclusive OR combination of the prefix and character.
* This code also has to check for collisions, and handles them by
* jumping around in the table.
*/
unsigned int find_child_node( parent_code, child_character )
int parent_code;
int child_character;
{
int index;
int offset;
index = ( child_character << ( BITS - 8 ) ) ^ parent_code;
if ( index == 0 )
offset = 1;
else
offset = TABLE_SIZE - index;
for ( ; ; ) {
if ( dict[ index ].code_value == UNUSED )
return( index );
if ( dict[ index ].parent_code == parent_code &&
dict[ index ].character == (char) child_character )
return( index );
index -= offset;
if ( index < 0 )
index += TABLE_SIZE;
}
}
/*
* This routine decodes a string from the dictionary, and stores it
* in the decode_stack data structure. It returns a count to the
* calling program of how many characters were placed in the stack.
*/
unsigned int decode_string( count, code )
unsigned int count;
unsigned int code;
{
while ( code > 255 ) {
decode_stack[ count++ ] = dict[ code ].character;
code = dict[ code ].parent_code;
}
decode_stack[ count++ ] = (char) code;
return( count );
}
/*************************** End of LZW12.C ****************************/
|