Dependency(set)

A. Second Edition
This is the second edition of my "dependency" and spend half day to implement a help class "Set" which will
be intensively used throughout this series.
B.The problem

"Function dependency" is a very important issue in database theory. And what's more, it is the essence of all.

Because I think it is the key of knowledge: the world is described by relations and relations of relations.

 

C.The idea of program
 
This is a rather old topic. There are some topics I like to write programs over and over, such as DFS,DFA and
 
Sets. Compared with the very, very first approach more than a year ago, I now understand more about set theory.
 
And I am using a rather simple way---bitset which is in the standard template library(STL).
 
D.The major functions
There are few points to mention except for the "forEachSubset" function. I think I do it in a similar way like
"strtok". Do you feel strange that I connect these two together? You first call "forEachSubset" once and then
repeatedly call "eachSub" function to get an output "Set" parameter which is a "proper subset" of calling object.
Please note that I ignore the calling set itself which is a trivial subset, but include the "empty set" at last.
(maybe I should remove the annoying empty set!?)
The implementation is simple: each existing bit has two choice: either in or not in. And I always do this kind of 
algorithms like ancient Chinese "calculus"---or addition calculator.
The second feature is that I have become a fanatic about operator-overloading. I have used up the common operators
for overloading my "set" class.
 
E.Further improvement
 
F.File listing
1. rules.h 
2. rules.cpp
3. set.h
4. set.cpp
5. main.cpp (main)
file name: set.h
#ifndef SET_H
#define SET_H

#include <iostream>
#include <bitset>

using namespace std;


const int MaxAttrNumber=20;

class Set
{
	//this is a long-pain for me, I have no other way to 
	//let compiler recognize this "friend" function outside declaration
	friend ostream& operator<<(ostream& out, const Set& dummy)	
	{
		for (int i=0; i<dummy.size; i++)
		{
			if (dummy.theSet.test(i))
			{
				out<<'1';
			}
			else
			{
				out<<'0';
			}
		}
		return out;
	}
private:
	bitset<MaxAttrNumber> theSet;
	int size;
	int current;
public:
	void setSize(const Set& dummy);
	int next(int current);
	int first();	
	int count();
	Set intersection(const Set& dummy);
	Set unionSet(const Set& dummy);
	Set difference(const Set& dummy);
	
	//I am crazy about operator overloading!!!:)
	Set operator-(const Set& dummy);
	Set operator+(const Set& dummy);
	Set operator*(const Set& dummy);
	void operator=(const Set& dummy);
	bool operator==(const Set& dummy);
	bool operator!=(const Set& dummy);
	bool operator>(const Set& dummy);
	bool operator>=(const Set& dummy);
	bool operator<(const Set& dummy);
	bool operator<=(const Set& dummy);
	void set(int pos);
	void forEachSubSet(Set& dummy);//must be called before "eachSub()"
	bool eachSub(Set& dummy);
	void set();
	void reset(int pos);
	void reset();
	bool test(int pos) const;
	bool isIn(const Set& dummy);
	void setSize(int theSize) {size=theSize;}
	Set(int theSize=10);
};

#endif
file name: set.cpp
#include "Set.h"

bool Set::isIn(const Set& dummy)
{
	for (int i=0; i<size; i++)
	{
		if (theSet.test(i))
		{
			if (!dummy.test(i))//here I use Set.test() instead of set.test()
			{
				return false;
			}
		}
	}
	return true;		
}

bool Set::test(int pos) const
{
	return (pos<size&&theSet.test(pos));
}

//current=-1;//initialize to -1 to prepare for being called
int Set::next(int current)
{
	for (int i=current+1; i<size; i++)//include situation current>=size
	{
		if (theSet.test(i))
		{
			return i;
		}
	}
	return -1;//not found
}

bool Set::operator !=(const Set& dummy)
{
	return !(this->operator ==(dummy));
}

bool Set::operator <(const Set& dummy)
{
	return (this->isIn(dummy)&&this->operator !=(dummy));
}

bool Set::operator <=(const Set& dummy)
{
	return isIn(dummy);
}

bool Set::operator >(const Set& dummy)
{
	return !(this->operator <=(dummy));
}

bool Set::operator >=(const Set& dummy)
{
	return !(this->operator <(dummy));
}

bool Set::operator ==(const Set& dummy)
{
	for (int i=0; i<(size>dummy.size?size:dummy.size); i++)
	{
		if (test(i)^dummy.test(i))
		{
			return false;
		}
	}
	return true;
}

void Set::setSize(const Set& dummy)
{
	size=dummy.size;
}

void Set::operator =(const Set& dummy)
{
	size=dummy.size;
	for (int i=0; i<size; i++)
	{
		if (dummy.test(i))
		{
			theSet.set(i);
		}
		else
		{
			theSet.reset(i);
		}
	}
}


Set::Set(int theSize)
{
	size=theSize;
	reset();
}

void Set::reset()
{
	for (int i=0; i<size; i++)
	{
		theSet.reset(i);
	}
}

void Set::reset(int pos)
{
	if (pos<size)
	{
		theSet.reset(pos);
	}
}

void Set::set()
{
	theSet.set();
}

void Set::set(int pos)
{
	theSet.set(pos);
}
	
void Set::forEachSubSet(Set& dummy)
{
	dummy=*this;
}


bool Set::eachSub(Set& dummy)
{
	int index=first();//starting from very first

	while (index!=-1)//not exceeding boundery
	{
		if (dummy.test(index))
		{
			dummy.reset(index);
			return true;
		}
		else
		{
			dummy.set(index);
			index=next(index);
		}
	}
	return false;
}

int Set::first()
{
	return next(-1);
}

int Set::count()
{
	return theSet.count();
}

Set Set::unionSet(const Set& dummy)
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)||dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;//this is a temparory object;
}

Set Set::difference(const Set& dummy)
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&!dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator +(const Set& dummy)
{
	return unionSet(dummy);
}

Set Set::operator -(const Set& dummy)
{
	return difference(dummy);
}

Set Set::intersection(const Set& dummy)
{
	Set result;
	result.size=size<dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator *(const Set& dummy)
{
	return intersection(dummy);
}


  
file name: rules.h 
#include <iostream>
#include <bitset>
#include "set.h"

using namespace std;

//make it simple, the first line must list all variables or attributes
//"relation"(A,B,C...)
const int MaxNameLength=5;
const int MaxRuleNumber=100;




class Rule
{
	friend class Rules;
private:
	Set lhs;
	Set rhs;	
public:	
	Rule();
	bool test(int pos, bool isLeft);
	void lhsSet(int pos);
	void rhsSet(int pos);
	void setSize(int theSize);
	void set(int pos, bool isLeft);
	void setRule(const Set& left, const Set& right);
	void operator=(const Rule& dummy);
};

class Rules
{
private:
	char attributes[MaxAttrNumber][MaxNameLength+1];
	char relationName[MaxNameLength+1];
	int attrCount;
	int attrFound[MaxAttrNumber];
	int numberFound;
	int searchByChar(char ch, int step);
	void ruleReading(FILE* stream);
	Rule rules[MaxRuleNumber];
	int ruleNumber;
	void displayRule(int index);
	void split();
	void addRule(const Set& left, const Set& right);
	int extractNames(FILE* stream, char* delimeters, char endChar='\n');
public:
	void addRule(const Rule& dummy);
	void readFile(const char* fileName);
	void display();
	Rules();
};
 
file name: rules.cpp 
#include "Rules.h"


void Rules::display()
{
	cout<<"\nnow display\n";
	cout<<relationName<<"(";
	for (int i=0; i<attrCount; i++)
	{
		cout<<attributes[i];
		if (i!=attrCount-1)
		{
			cout<<",";
		}
		else
		{
			cout<<");\n";
		}
	}
	for (i=0; i<ruleNumber; i++)
	{
		displayRule(i);
	}
}


bool Rule::test(int pos, bool isLeft)
{
	if (isLeft)
	{
		return lhs.test(pos);
	}
	else
	{
		return rhs.test(pos);
	}
}


void Rules::displayRule(int index)
{
	for (int i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, true))
		{
			cout<<attributes[i];
		}
	}
	cout<<" -> ";
	for (i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, false))
		{
			cout<<attributes[i];
		}
	}
	cout<<";\n";
}


void Rule::set(int pos, bool isLeft)
{
	if (isLeft)
	{
		lhsSet(pos);
	}
	else
	{
		rhsSet(pos);
	}
}

void Rule::rhsSet(int pos)
{
	rhs.set(pos);
}

void Rule::lhsSet(int pos)
{
	lhs.set(pos);
}

Rule::Rule()
{
	lhs.reset();
	rhs.reset();
	
}


void Rules::readFile(const char* fileName)
{
	FILE* stream;
	stream=fopen(fileName, "r");
	attrCount=extractNames(stream, "=,()", ';');//ignore the first relation name
	ruleReading(stream);

}

void Rules::ruleReading(FILE* stream)
{
	char ch;
	int step=0;
	ruleNumber=0;//initialize
	bool isLeft=true;
	rules[ruleNumber].setSize(attrCount);//do twice!!
	while (!feof(stream))
	{		
		ch=fgetc(stream);
		if (ch==';'||ch==',')
		{
			ruleNumber++;
			rules[ruleNumber].setSize(attrCount);//do twice!!
			isLeft=true;
		}
		else
		{
			if (ch=='-'||ch=='>')
			{
				isLeft=false;
			}
			else
			{
				if (isalpha(ch))
				{
					searchByChar(ch, step);
					if (numberFound!=1)
					{
						if (step<MaxNameLength)
						{
							step++;
						}
						else
						{
							cout<<"illegal attribute stated!\n";
							exit(1);
						}
					}
					else
					{
						step=0;
						
						rules[ruleNumber].set(attrFound[0], isLeft);
					}
				}
			}
		}
	}
}

void Rule::setSize(int theSize)
{
	lhs.setSize(theSize);
	rhs.setSize(theSize);
}
	

int Rules::searchByChar(char ch, int step)
{
	if (step==0)//this is first time, and all attributes are searched
	{
		numberFound=0;
		for (int i=0; i<attrCount; i++)//
		{
			if (ch==attributes[i][step])
			{
				attrFound[numberFound]=i;
				numberFound++;
			}
		}
	}
	else
	{
		int number=0;//new 'attrFound' re-counting
		for (int i=0; i<numberFound; i++)
		{
			if (ch==attributes[attrFound[i]][step])
			{
				attrFound[number]=i;
				number++;
			}
		}
		numberFound=number;
	}
	return numberFound;
}


int findChar(char ch, char* str)
{
	int index=0;
	while (str[index]!='\0')
	{
		if (str[index]==ch)
		{
			return index;
		}
		index++;
	}
	return -1;
}

//extract names from a line delimetered by various chars, like ',','(',')'....
int Rules::extractNames(FILE* stream, char* delimeters, char endChar)
{
	int findChar(char ch, char* str);
	char ch;
	int nameIndex=0;
	char buffer[MaxNameLength+1];
	char* ptr=buffer;

	while (!feof(stream))
	{
		ch=getc(stream);
		if (ch==endChar)
		{
			if (ptr!=buffer)
			{
				*ptr='\0';
				strcpy(attributes[nameIndex], buffer);
			}
			break;
		}

		if (findChar(ch, delimeters)>0)//deli reached
		{
			//skip the consecutive deli
			if (ptr!=buffer)
			{
				*ptr='\0';	
				if (ch=='(')//the very first one
				{
					strcpy(relationName, buffer);
				}
				else
				{
					strcpy(attributes[nameIndex], buffer);
					nameIndex++;
				}
				ptr=buffer;//restart
			}
		}
		else
		{
			*ptr=ch;
			ptr++;
		}
	}
	return nameIndex;
}		

Rules::Rules()
{
	numberFound=attrCount=0;
}
	
void Rule::operator =(const Rule& dummy)
{
	lhs=dummy.lhs;
	rhs=dummy.rhs;
}

void Rules::addRule(const Rule& dummy)
{
	rules[ruleNumber++]=dummy;
}

void Rules::addRule(const Set& left, const Set& right)
{
	rules[ruleNumber++].setRule(left, right);
}

void Rule::setRule(const Set& left, const Set& right)
{
	lhs=left;
	rhs=right;
}

void Rules::split()
{
	int index;
	for (int i=0; i<ruleNumber; i++)
	{
		if (rules[i].rhs.count()>1)
		{			
			index=rules[i].rhs.first();
			index=rules[i].rhs.next(index);//starting from second one
			while (index!=-1)
			{
				Set right;				
				right.setSize(rules[i].rhs);
				right.set(index);
				rules[i].rhs.reset(index);//remove from old rule
				addRule(rules[i].lhs, right);
				index=rules[i].rhs.next(index);
			}
		}
	}
}

 
file name: main.cpp (main)
 
#include "Rules.h"
#include "set.h"

int main()
{
	Rules R;
	R.readFile("d:\\rules.txt");
	R.display();

	return 0;
}
 




                                 back.gif (341 bytes)       up.gif (335 bytes)         next.gif (337 bytes)