Wednesday, April 15, 2009
Blogger Import
Sunday, December 4, 2005
Wednesday, November 30, 2005
LZW Encoding Scheme C++
**
** Copyright (c) 1989 Mark R. Nelson
**
** LZW data compression/expansion demonstration program.
**
** April 13, 1989
**
*****************************************************************************/
#include
#include
#include
#include
#define BITS 14 /* Setting the number of bits to 12, 13*/
#define HASHING_SHIFT BITS-8 /* or 14 affects several constants. */
#define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */
#define MAX_CODE MAX_VALUE - 1 /* compile their code in large model if*/
/* 14 bits are selected. */
#if BITS == 14
#define TABLE_SIZE 18041 /* The string table size needs to be a */
#endif /* prime number that is somewhat larger*/
#if BITS == 13 /* than 2**BITS. */
#define TABLE_SIZE 9029
#endif
#if BITS <= 12
#define TABLE_SIZE 5021
#endif
int *code_value; /* This is the code value array */
unsigned int *prefix_code; /* This array holds the prefix codes */
unsigned char *append_character; /* This array holds the appended chars */
unsigned char decode_stack[4000]; /* This array holds the decoded string */
/********************************************************************
**
** This program gets a file name from the command line. It compresses the
** file, placing its output in a file named test.lzw. It then expands
** test.lzw into test.out. Test.out should then be an exact duplicate of
** the input file.
**
*************************************************************************/
void output_code(FILE *output,unsigned int code);
void compress(FILE *input,FILE *output);
int find_match(int hash_prefix,unsigned int hash_character);
void expand(FILE *input,FILE *output);
unsigned char *decode_string(unsigned char *buffer,unsigned int code);
unsigned int input_code(FILE *input);
int main(int argc, char *argv[])
{
FILE *input_file;
FILE *output_file;
FILE *lzw_file;
char input_file_name[81];
/*
** The three buffers are needed for the compression phase.
*/
code_value=( int*)malloc(TABLE_SIZE*sizeof(unsigned int));
prefix_code=(unsigned int*)malloc(TABLE_SIZE*sizeof(unsigned int));
append_character=(unsigned char*)malloc(TABLE_SIZE*sizeof(unsigned char));
if (code_value==NULL || prefix_code==NULL || append_character==NULL)
{
printf("Fatal error allocating table space!\n");
exit(1);
}
/*
** Get the file name, open it up, and open up the lzw output file.
*/
if (argc>1)
strcpy(input_file_name,argv[1]);
else
{
printf("Input file name? ");
scanf("%s",input_file_name);
}
input_file=fopen(input_file_name,"rb");
lzw_file=fopen("test.lzw","wb");
if (input_file==NULL || lzw_file==NULL)
{
printf("Fatal error opening files.\n");
exit(1);
};
/*
** Compress the file.
*/
compress(input_file,lzw_file);
fclose(input_file);
fclose(lzw_file);
free(code_value);
/*
** Now open the files for the expansion.
*/
lzw_file=fopen("test.lzw","rb");
output_file=fopen("test.out","wb");
if (lzw_file==NULL || output_file==NULL)
{
printf("Fatal error opening files.\n");
exit(1);
};
/*
** Expand the file.
*/
expand(lzw_file,output_file);
fclose(lzw_file);
fclose(output_file);
free(prefix_code);
free(append_character);
}
/*
** This is the compression routine. The code should be a fairly close
** match to the algorithm accompanying the article.
**
*/
void compress(FILE *input,FILE *output)
{
unsigned int next_code;
unsigned int character;
unsigned int string_code;
unsigned int index;
int i;
int t;
next_code=256; /* Next code is the next available string code*/
for (i=0;i
i=0;
printf("Compressing...\n");
ifstream file("ratios");
file >> t;
string_code=t;
/*
** This is the main loop where it all happens. This loop runs util all of
** the input has been exhausted. Note that it stops adding codes to the
** table after all of the possible codes have been defined.
*/
//732 * 1214
//while ((character=getc(input)) != (unsigned)EOF)
for(i=0;i<732*1214-1;i++)
{
file >> t; character =t;
if (++i==1000) /* Print a * every 1000 */
{ /* input characters. This */
i=0; /* is just a pacifier. */
printf("*");
}
index=find_match(string_code,character);/* See if the string is in */
if (code_value[index] != -1) /* the table. If it is, */
string_code=code_value[index]; /* get the code value. If */
else /* the string is not in the*/
{ /* table, try to add it. */
if (next_code <= MAX_CODE)
{
code_value[index]=next_code++;
prefix_code[index]=string_code;
append_character[index]=character;
}
output_code(output,string_code); /* When a string is found */
string_code=character; /* that is not in the table*/
} /* I output the last string*/
} /* after adding the new one*/
/*
** End of the main loop.
*/
output_code(output,string_code); /* Output the last code */
output_code(output,MAX_VALUE); /* Output the end of buffer code */
output_code(output,0); /* This code flushes the output buffer*/
printf("\n");
}
/*
** This is the hashing routine. It tries to find a match for the prefix+char
** string in the string table. If it finds it, the index is returned. If
** the string is not found, the first available index in the string table is
** returned instead.
*/
int find_match(int hash_prefix,unsigned int hash_character)
{
int index;
int offset;
index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
if (index == 0)
offset = 1;
else
offset = TABLE_SIZE - index;
while (1)
{
if (code_value[index] == -1)
return(index);
if (prefix_code[index] == hash_prefix &&
append_character[index] == hash_character)
return(index);
index -= offset;
if (index < 0)
index += TABLE_SIZE;
}
}
/*
** This is the expansion routine. It takes an LZW format file, and expands
** it to an output file. The code here should be a fairly close match to
** the algorithm in the accompanying article.
*/
void expand(FILE *input,FILE *output)
{
unsigned int next_code;
unsigned int new_code;
unsigned int old_code;
int character;
int counter;
unsigned char *string;
int t;
ofstream file("output");
next_code=256; /* This is the next available code to define */
counter=0; /* Counter is used as a pacifier. */
printf("Expanding...\n");
old_code=input_code(input); /* Read in the first code, initialize the */
character=old_code; /* character variable, and send the first */
t=(unsigned char) old_code;
file << t;
putc(old_code,output); /* code to the output file */
/*
** This is the main expansion loop. It reads in characters from the LZW file
** until it sees the special code used to inidicate the end of the data.
*/
while ((new_code=input_code(input)) != (MAX_VALUE))
{
if (++counter==1000) /* This section of code prints out */
{ /* an asterisk every 1000 characters */
counter=0; /* It is just a pacifier. */
printf("*");
}
/*
** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
** case which generates an undefined code. It handles it by decoding
** the last code, and adding a single character to the end of the decode string.
*/
if (new_code>=next_code)
{
*decode_stack=character;
string=(unsigned char*)decode_string(decode_stack+1,old_code);
}
/*
** Otherwise we do a straight decode of the new code.
*/
else
string=(unsigned char*)decode_string(decode_stack,new_code);
/*
** Now we output the decoded string in reverse order.
*/
character=*string;
while (string >= decode_stack)
{
t=(unsigned char) *string;
file << t;
putc(*string--,output);
}
/*
** Finally, if possible, add a new code to the string table.
*/
if (next_code <= MAX_CODE)
{
prefix_code[next_code]=old_code;
append_character[next_code]=character;
next_code++;
}
old_code=new_code;
}
printf("\n");
}
/*
** This routine simply decodes a string from the string table, storing
** it in a buffer. The buffer can then be output in reverse order by
** the expansion program.
*/
unsigned char *decode_string(unsigned char *buffer,unsigned int code)
{
int i;
i=0;
while (code > 255)
{
*buffer++ = append_character[code];
code=prefix_code[code];
if (i++>=4094)
{
printf("Fatal error during code expansion.\n");
exit(1);
}
}
*buffer=code;
return(buffer);
}
/*
** The following two routines are used to output variable length
** codes. They are written strictly for clarity, and are not
** particularyl efficient.
*/
unsigned int input_code(FILE *input)
{
unsigned int return_value;
static int input_bit_count=0;
static unsigned long input_bit_buffer=0L;
while (input_bit_count <= 24)
{
input_bit_buffer |=
(unsigned long) getc(input) << (24-input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-BITS);
input_bit_buffer <<= BITS;
input_bit_count -= BITS;
return(return_value);
}
void output_code(FILE *output,unsigned int code)
{
static int output_bit_count=0;
static unsigned long output_bit_buffer=0L;
output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count);
output_bit_count += BITS;
while (output_bit_count >= 8)
{
putc(output_bit_buffer >> 24,output);
output_bit_buffer <<= 8;
output_bit_count -= 8;
}
}
Good Huffman Encoding Code Simple
#include
#include
#include
#include
//
// The node class is used to represent both leaf
// and internal nodes. leaf nodes have 0s in the
// child pointers, and their value member corresponds
// to the character they encode. internal nodes
// don't have anything meaningful in their value
// member, but their child pointers point to
// other nodes.
//
using namespace std;
struct node {
int weight;
unsigned char value;
const node *child0;
const node *child1;
//
// Construct a new leaf node for character c
//
node( unsigned char c = 0, int i = -1 ) {
value = c;
weight = i;
child0 = 0;
child1 = 0;
}
//
// Construct a new internal node that has
// children c1 and c2.
//
node( const node* c0, const node *c1 ) {
value = 0;
weight = c0->weight + c1->weight;
child0 = c0;
child1 = c1;
}
//
// The comparison operator used to order the
// priority queue.
//
bool operator<( const node &a ) const {
return weight < a.weight;
}
void traverse( string code = "" ) const;
};
//
// The traverse member function is used to
// print out the code for a given node. It
// is designed to print the entire tree if
// called for the root node.
//
void node::traverse( string code ) const
{
if ( child0 ) {
child0->traverse( code + "0" );
child1->traverse( code + "1" );
} else {
cout << " " << value << " ";
//cout << setw( 2 ) << weight;
cout << " " << code << endl;
}
}
//
// This routine does a quick count of all the
// characters in the input file. I skip the
// whitespace.
//
void count_chars( int *counts )
{
for ( int i = 0 ; i < 256 ; i++ )
counts[ i ] = 0;
ifstream file( "input.dat" );
if ( !file ) {
cerr << "Couldn't open the input file!\n";
throw "abort";
}
file.setf( ios::skipws );
for ( ; ; ) {
unsigned char c;
file >> c;
if ( file )
counts[ c ]++;
else
break;
}
}
main()
{
int counts[ 256 ];
count_chars( counts );
priority_queue< vector< node >, vector
//
// First I push all the leaf nodes into the queue
//
for ( int i = 0 ; i < 256 ; i++ )
if ( counts[ i ] )
q.push( node( i, counts[ i ] ) );
//
// This loop removes the two smallest nodes from the
// queue. It creates a new internal node that has
// those two nodes as children. The new internal node
// is then inserted into the priority queue. When there
// is only one node in the priority queue, the tree
// is complete.
//
while ( q.size() > 1 ) {
node *child0 = new node( q.top() );
q.pop();
node *child1 = new node( q.top() );
q.pop();
q.push( node( child0, child1 ) );
}
//
// Now I dump the results
//
cout << "Char Symbol Code" << endl;
q.top().traverse();
return 1;
}
Debauchies Wavelets Good Sample Code
#include
/**
Daubechies D4 wavelet transform (D4 denotes four coefficients)
I have to confess up front that the comment here does not even come
close to describing wavelet algorithms and the Daubechies D4
algorithm in particular. I don't think that it can be described in
anything less than a journal article or perhaps a book. I even have
to apologize for the notation I use to describe the algorithm, which
is barely adequate. But explaining the correct notation would take
a fair amount of space as well. This comment really represents some
notes that I wrote up as I implemented the code. If you are
unfamiliar with wavelets I suggest that you look at the bearcave.com
web pages and at the wavelet literature. I have yet to see a really
good reference on wavelets for the software developer. The best
book I can recommend is Ripples in Mathematics by Jensen and
Cour-Harbo.
All wavelet algorithms have two components, a wavelet function and a
scaling function. These are sometime also referred to as high pass
and low pass filters respectively.
The wavelet function is passed two or more samples
and calculates a wavelet coefficient. In the case of
the Haar wavelet this is
coefi = oddi - eveni
or
coefi = 0.5 * (oddi - eveni)
depending on the version of the Haar algorithm used.
The scaling function produces a smoother version of the
original data. In the case of the Haar wavelet algorithm
this is an average of two adjacent elements.
The Daubechies D4 wavelet algorithm also has a wavelet
and a scaling function. The coefficients for the
scaling function are denoted as hi and the
wavelet coefficients are gi.
Mathematicians like to talk about wavelets in terms of
a wavelet algorithm applied to an infinite data set.
In this case one step of the forward transform can be expressed
as the infinite matrix of wavelet coefficients
represented below multiplied by the infinite signal
vector.
ai = ...h0,h1,h2,h3, 0, 0, 0, 0, 0, 0, 0, ... si
ci = ...g0,g1,g2,g3, 0, 0, 0, 0, 0, 0, 0, ... si+1
ai+1 = ...0, 0, h0,h1,h2,h3, 0, 0, 0, 0, 0, ... si+2
ci+1 = ...0, 0, g0,g1,g2,g3, 0, 0, 0, 0, 0, ... si+3
ai+2 = ...0, 0, 0, 0, h0,h1,h2,h3, 0, 0, 0, ... si+4
ci+2 = ...0, 0, 0, 0, g0,g1,g2,g3, 0, 0, 0, ... si+5
ai+3 = ...0, 0, 0, 0, 0, 0, h0,h1,h2,h3, 0, ... si+6
ci+3 = ...0, 0, 0, 0, 0, 0, g0,g1,g2,g3, 0, ... si+7
The dot product (inner product) of the infinite vector and
a row of the matrix produces either a smoother version of the
signal (ai) or a wavelet coefficient (ci).
In an ordered wavelet transform, the smoothed (ai) are
stored in the first half of an n element array region. The
wavelet coefficients (ci) are stored in the second half
the n element region. The algorithm is recursive. The
smoothed values become the input to the next step.
The transpose of the forward transform matrix above is used
to calculate an inverse transform step. Here the dot product is
formed from the result of the forward transform and an inverse
transform matrix row.
si = ...h2,g2,h0,g0, 0, 0, 0, 0, 0, 0, 0, ... ai
si+1 = ...h3,g3,h1,g1, 0, 0, 0, 0, 0, 0, 0, ... ci
si+2 = ...0, 0, h2,g2,h0,g0, 0, 0, 0, 0, 0, ... ai+1
si+3 = ...0, 0, h3,g3,h1,g1, 0, 0, 0, 0, 0, ... ci+1
si+4 = ...0, 0, 0, 0, h2,g2,h0,g0, 0, 0, 0, ... ai+2
si+5 = ...0, 0, 0, 0, h3,g3,h1,g1, 0, 0, 0, ... ci+2
si+6 = ...0, 0, 0, 0, 0, 0, h2,g2,h0,g0, 0, ... ai+3
si+7 = ...0, 0, 0, 0, 0, 0, h3,g3,h1,g1, 0, ... ci+3
Using a standard dot product is grossly inefficient since most
of the operands are zero. In practice the wavelet coefficient
values are moved along the signal vector and a four element
dot product is calculated. Expressed in terms of arrays, for
the forward transform this would be:
ai = s[i]*h0 + s[i+1]*h1 + s[i+2]*h2 + s[i+3]*h3
ci = s[i]*g0 + s[i+1]*g1 + s[i+2]*g2 + s[i+3]*g3
This works fine if we have an infinite data set, since we don't
have to worry about shifting the coefficients "off the end" of
the signal.
I sometimes joke that I left my infinite data set in my other bear
suit. The only problem with the algorithm described so far is that
we don't have an infinite signal. The signal is finite. In fact
not only must the signal be finite, but it must have a power of two
number of elements.
If i=N-1, the i+2 and i+3 elements will be beyond the end of
the array. There are a number of methods for handling the
wavelet edge problem. This version of the algorithm acts
like the data is periodic, where the data at the start of
the signal wraps around to the end.
This algorithm uses a temporary array. A Lifting Scheme version of
the Daubechies D4 algorithm does not require a temporary. The
matrix discussion above is based on material from Ripples in
Mathematics, by Jensen and Cour-Harbo. Any error are mine.
Author: Ian Kaplan
Use: You may use this software for any purpose as long
as I cannot be held liable for the result. Please credit me
with authorship if use use this source code.
This comment is formatted for the doxygen documentation generator
*/
class Daubechies {
private:
/** forward transform scaling coefficients */
double h0, h1, h2, h3;
/** forward transform wave coefficients */
double g0, g1, g2, g3;
double Ih0, Ih1, Ih2, Ih3;
double Ig0, Ig1, Ig2, Ig3;
/**
Forward Daubechies D4 transform
*/
public:
void transform( double* a, const int n )
{
if (n >= 4) {
int i, j;
const int half = n >> 1;
double* tmp = new double[n];
for (i = 0, j = 0; j < n-3; j += 2, i++) {
tmp[i] = a[j]*h0 + a[j+1]*h1 + a[j+2]*h2 + a[j+3]*h3;
tmp[i+half] = a[j]*g0 + a[j+1]*g1 + a[j+2]*g2 + a[j+3]*g3;
}
tmp[i] = a[n-2]*h0 + a[n-1]*h1 + a[0]*h2 + a[1]*h3;
tmp[i+half] = a[n-2]*g0 + a[n-1]*g1 + a[0]*g2 + a[1]*g3;
for (i = 0; i < n; i++) {
a[i] = tmp[i];
}
delete [] tmp;
}
}
/**
Inverse Daubechies D4 transform
*/
void invTransform( double* a, const int n )
{
if (n >= 4) {
int i, j;
const int half = n >> 1;
const int halfPls1 = half + 1;
double* tmp = new double[n];
// last smooth val last coef. first smooth first coef
tmp[0] = a[half-1]*Ih0 + a[n-1]*Ih1 + a[0]*Ih2 + a[half]*Ih3;
tmp[1] = a[half-1]*Ig0 + a[n-1]*Ig1 + a[0]*Ig2 + a[half]*Ig3;
for (i = 0, j = 2; i < half-1; i++) {
// smooth val coef. val smooth val coef. val
tmp[j++] = a[i]*Ih0 + a[i+half]*Ih1 + a[i+1]*Ih2 + a[i+halfPls1]*Ih3;
tmp[j++] = a[i]*Ig0 + a[i+half]*Ig1 + a[i+1]*Ig2 + a[i+halfPls1]*Ig3;
}
for (i = 0; i < n; i++) {
a[i] = tmp[i];
}
delete [] tmp;
}
}
Daubechies()
{
const double sqrt_3 = sqrt( 3 );
const double denom = 4 * sqrt( 2 );
//
// forward transform scaling (smoothing) coefficients
//
h0 = (1 + sqrt_3)/denom;
h1 = (3 + sqrt_3)/denom;
h2 = (3 - sqrt_3)/denom;
h3 = (1 - sqrt_3)/denom;
//
// forward transform wavelet coefficients
//
g0 = h3;
g1 = -h2;
g2 = h1;
g3 = -h0;
Ih0 = h2;
Ih1 = g2; // h1
Ih2 = h0;
Ih3 = g0; // h3
Ig0 = h3;
Ig1 = g3; // -h0
Ig2 = h1;
Ig3 = g1; // -h2
}
void daubTrans( double* ts, int N )
{
int n;
for (n = N; n >= 4; n >>= 1) {
transform( ts, n );
}
}
void invDaubTrans( double* coef, int N )
{
int n;
for (n = 4; n <= N; n <<= 1) {
invTransform( coef, n );
}
}
}; // Daubechies
int main()
{
double A[8] = {1, 1, 2, 5 , 6, 2, 4, 3};
int i;
for(i=0;i<8;i++)
cerr<< A[i]<<" ";
cerr<< endl;
Daubechies D;
D.daubTrans( (double*)A , 8 );
for(i=0;i<8;i++)
cerr<< A[i]<<" ";
cerr<< endl;
D.invDaubTrans( (double*)A , 8 );
for(i=0;i<8;i++)
cerr<< A[i] <<" ";
cerr<< endl;
return 1;
}
Monday, July 25, 2005
C String Manipulation
I am copying it below. Anyone and every1 who has ever parsed a file in C and found it difficult must read this.
char* strsep(char **stringp ,char *delim);
stringp is the initial string and delim is the delimiter character. The result is given as char* initial portion before the delimiter and rest is stored in stringp.
Eg. strcpy(string1,"shubham");
string2 = strsep(&string1,"u");
//Now string2 contains "sh" and string1 contains "bham"