Huffman Coding
A. First Edition
This is a small practice for data structure of famous "Huffman coding" which is a wish lingering in my mind long
before.
Huffman Encoding is a very basic compressing algorithm which is considered to be known to every
computer science student. So, I always want to write it myself, even with sample code in hand.
กก
There is little to mention except that Mr. Shaffer preferred to make two different classes of internal and leaf
nodes.
I overloaded the operator + and ++ of class Huffnode.
กก
E.Further improvement
กก
F.File listing
1. Huffman.h
2. Huffman.cpp
3. main.cpp (main)
file name: Huffman.h
#ifndef HUFFMAN_H #define HUFFMAN_H #include <stdio.h> template<class T> class Huffnode { private: T element; Huffnode<T>* l; Huffnode<T>* r; int freq; char* path; //unsigned int bit; int length; bool isInternal; public: int getLength(){return length;} //unsigned int getBit(){return bit;} Huffnode<T>* left(){return l;} Huffnode<T>* right(){return r;} int getFreq(){return freq;} T getElem(){return element;} void setPath(const char* str); char* getPath(){return path;} bool internal(){return isInternal;} void operator++(); Huffnode(int weight);//constructor for internal Huffnode(T elem, int frequency);//constructor for leaf Huffnode<T>* operator+(Huffnode<T>& other); //static Huffnode<T>* root; }; const int AlphaNumber=128; class Huffman { private: Huffnode<char>* nodes[AlphaNumber]; //Huffnode<char>* lists[AlphaNumber]; Huffnode<char>* root; void initialize(); void swapNode(Huffnode<char>* lists[], int i, int j); void insertList(Huffnode<char>* lists[], Huffnode<char>* ptr, int length); void assignPath(); void sortList(Huffnode<char>* lists[], int length); void traverse(Huffnode<char>* ptr, char* pathStr); void traverse(Huffnode<char>* ptr); public: Huffman(); ~Huffman(); void readFile(const char* fileName); void buildTree(); void compress(const char* sourceFile); void display(); }; /* template<class T> Huffnode<T>* Huffnode<T>::root=NULL; */ template<class T> void Huffnode<T>::operator ++() { freq++; } template<class T> void Huffnode<T>::setPath(const char* str) { //unsigned int temp; length=strlen(str); path=new char[length+1]; strcpy(path, str); /* bit=0; for (int index=length-1; index>=0; index--) { if (str[index]=='1') { temp=1; bit+=temp<<index; //bit+=temp; } } */ } template <class T> Huffnode<T>::Huffnode<T>(T elem, int frequency) { isInternal=false; element=elem; freq=frequency; l=r=NULL; path=NULL; } //internal node template<class T> Huffnode<T>::Huffnode<T>(int weight) { freq=weight; isInternal=true; l=r=NULL; path=NULL; } template<class T> Huffnode<T>* Huffnode<T>::operator +(Huffnode<T>& other) { Huffnode<T>* result=new Huffnode<T>(freq+other.freq); if (freq<other.freq) { result->l=this; result->r=&other; } else { result->l=&other; result->r=this; } return result; } #endif
file name: Huffman.cpp
//#include <stdio.h> #include <string.h> #include <stdlib.h> #include "huffman.h" Huffman::Huffman() { initialize(); } void Huffman::traverse(Huffnode<char>* ptr) { if (ptr==NULL) { return; } //only delete the internal if (ptr->internal()) { traverse(ptr->left()); traverse(ptr->right()); delete ptr; } } Huffman::~Huffman() { Huffnode<char>* ptr=root; traverse(ptr); for (int i=0; i<AlphaNumber; i++) { delete nodes[i]->getPath(); delete nodes[i]; nodes[i]=NULL; } } void Huffman::display() { for (int i=0; i<AlphaNumber; i++) { if (nodes[i]->getFreq()!=0) { //display char in form of int, otherwise invisible. printf("char %d is encoded:%s\t", nodes[i]->getElem(), nodes[i]->getPath()); printf("the length:%d\n", nodes[i]->getLength()); } } } void Huffman::initialize() { for (int i=0; i<AlphaNumber; i++) { nodes[i]=new Huffnode<char>(i, 0); //lists[i]=nodes[i];//initially all nodes are in lists } } void Huffman::readFile(const char* fileName) { FILE* stream; char ch; if ((stream=fopen(fileName, "r"))==NULL) { printf("error open file %s\n", fileName); exit(1); } ch=fgetc(stream); while (!feof(stream)) { nodes[ch]->operator++(); ch=fgetc(stream); } } void Huffman::buildTree() { Huffnode<char>* lists[AlphaNumber]; Huffnode<char>* ptr; int counter=0; for (int i=0; i<AlphaNumber; i++) { if (nodes[i]->getFreq()!=0) { lists[counter++]=nodes[i]; } } sortList(lists, counter);//must be descending while (counter>1) { ptr=lists[counter-2]->operator+(*lists[counter-1]); insertList(lists, ptr, --counter);//shrink size by 2, but add one } root=lists[0]; assignPath(); } void Huffman::swapNode(Huffnode<char>* lists[], int i, int j) { Huffnode<char>* hold; hold=lists[i]; lists[i]=lists[j]; lists[j]=hold; } //descending void Huffman::sortList(Huffnode<char>* lists[], int length) { Huffnode<char>* current; for (int i=0; i<length-1; i++) { current=lists[i]; for (int j=i+1; j<length; j++) { if (current->getFreq()<lists[j]->getFreq()) { swapNode(lists, i, j); } } } } //length includes ptr itself void Huffman::insertList(Huffnode<char>* lists[], Huffnode<char>* ptr, int length) { bool shifting=false; Huffnode<char>* hold, *temp; for (int i=0; i<length; i++) { if (!shifting) { if (ptr->getFreq()>lists[i]->getFreq()) { shifting=true; hold=ptr; //temp=lists[i]; } } if (shifting) { temp=lists[i]; lists[i]=hold; hold=temp; } } //if not find a suitable place, insert at last if (!shifting) { lists[length]=ptr; } } void Huffman::traverse(Huffnode<char>* ptr, char* pathStr) { int len; if (ptr==NULL) { return; } len=strlen(pathStr); //strcpy(pathStr, "ok"); if (ptr->internal()) { pathStr[len]='0'; pathStr[len+1]='\0';//grows traverse(ptr->left(), pathStr); pathStr[len]='1'; traverse(ptr->right(), pathStr); pathStr[len]='\0';//restores } else { //leaf ptr->setPath(pathStr); } } void Huffman::assignPath() { Huffnode<char>* ptr=root; char buffer[AlphaNumber]; buffer[0]='\0'; traverse(root, buffer); }
file name: main.cpp (main)
#include "huffman.h" int main() { //unsigned int i=0x80000000; //printf("i is %x\n", i); Huffman H; H.readFile("nicktest.txt"); H.buildTree(); H.display(); return 0; }
The result is like following:
char 10 is encoded:10010111 the length:8 char 32 is encoded:1101 the length:4 char 40 is encoded:001100010 the length:9 char 41 is encoded:1001001101 the length:10 char 44 is encoded:1011001 the length:7 char 45 is encoded:10010110 the length:8 char 46 is encoded:10100 the length:5 char 48 is encoded:1100 the length:4 char 50 is encoded:1001001110 the length:10 char 57 is encoded:001100011 the length:9 char 65 is encoded:00110100 the length:8 char 67 is encoded:00110101 the length:8 char 68 is encoded:00110011 the length:8 char 69 is encoded:1001001001 the length:10 char 70 is encoded:001100001 the length:9 char 71 is encoded:001100000 the length:9 char 77 is encoded:00110010 the length:8 char 78 is encoded:10010000 the length:8 char 79 is encoded:10010001 the length:8 char 80 is encoded:1001001100 the length:10 char 82 is encoded:1001001111 the length:10 char 83 is encoded:00110110 the length:8 char 84 is encoded:00110111 the length:8 char 86 is encoded:1001001000 the length:10 char 89 is encoded:1001001010 the length:10 char 92 is encoded:1011000 the length:7 char 97 is encoded:10101 the length:5 char 98 is encoded:101110 the length:6 char 99 is encoded:101111 the length:6 char 100 is encoded:0110 the length:4 char 101 is encoded:0111 the length:4 char 102 is encoded:001111 the length:6 char 103 is encoded:001110 the length:6 char 104 is encoded:0000 the length:4 char 105 is encoded:0001 the length:4 char 106 is encoded:1001001011 the length:10 char 108 is encoded:00101 the length:5 char 109 is encoded:00100 the length:5 char 110 is encoded:1111 the length:4 char 111 is encoded:1110 the length:4 char 112 is encoded:10000 the length:5 char 114 is encoded:10001 the length:5 char 115 is encoded:0100 the length:4 char 116 is encoded:0101 the length:4 char 117 is encoded:101101 the length:6 char 118 is encoded:100111 the length:6 char 119 is encoded:10010101 the length:8 char 120 is encoded:10010100 the length:8 char 121 is encoded:100110 the length:6 Press any key to continue