CFG Reader
A. First Edition
This is first edition of my CFG Reader which stands for Context-Free-Grammar Reader.
A CFG is the basic rule for writing a parser, however, even the grammar itself should be read in automatically.
And what's more, the grammar should be accessable in all parsing procedure. So, this is the reason I want to write
such a seemingly stupid class. It not only turns input of grammar automatic, but in future whenever grammar is
supposed to change, everything should be able to update. Or do you understand the "YACC" is also doing like this
way? Though I never use it and have no idea how it looks like, in my mind, it should be something like this.
program -> stmt-sequence
stmt-sequence -> stmt-sequence ; statement | statement
statement -> if-stmt | repeat-stmt | assign-stmt | read-stmt | write-stmt
if-stmt -> if boolean-exp then stmt-sequence end
| if boolean-exp then stmt-sequence else stmt-sequence end
repeat-stmt -> repeat stmt-sequence until boolean-exp
assign-stmt -> identifier := integer-exp
read-stmt -> read identifier
write-stmt -> write integer-exp
boolean-exp -> integer-exp comparison-op integer-exp
comparison-op -> < | =
integer-exp -> integer-exp addop term | term
addop -> + | -
term -> term mulop factor | factor
mulop -> * | /
factor -> ( integer-exp ) | number | identifier
E.Further improvement
F.File listing
1. CFGReader.h
2. CFGReader.cpp
3. main.cpp (main)
file name: CFGReader.h
const int BufferLength=256; const int MaxTokenCount=50; const int MaxRHSCount=8; const int MaxRuleCount=50; const int MaxTokenLength=10; const int MaxProduction=10; struct Token { bool terminal; char* name; int production[MaxProduction];//pointer to the production it gives int count; }; class CFGReader { private: char buffer[BufferLength]; FILE* stream; Token* token[MaxTokenCount]; int tokenCount; int curIndex;//the index of current token int curPos;//the position at production rule int production[MaxRuleCount][MaxRHSCount]; int prodCount; void newRule(); void initialize(); void readRule(); int tokenIndex(const char* str); void addRule(const char* str); int addToken(const char* str, bool isTerminal=true); void printRule(int index); public: void readFromFile(const char* fileName); void print(); };
file name: CFGReader.cpp
#include <iostream> #include "CFGReader.h" using namespace std; const char* deliStr=" |\n"; void CFGReader::readFromFile(const char* fileName) { if ((stream=fopen(fileName, "r"))==NULL) { cout<<"cannot open file "<<fileName<<endl; } else { initialize(); readRule(); } } void CFGReader::readRule() { char* ptr=buffer; char* hold; int count=0; while (!feof(stream)) { count=0; fgets(buffer, BufferLength-1, stream); ptr=buffer; while ((ptr=strtok(ptr, " \n"))!=NULL) { if (strcmp(ptr, "|")!=0) { //test the first two token to see if old rule continues if (count==0) { hold=ptr; } if (count==1) { if (strcmp("->", ptr)==0) { curIndex=addToken(hold, false);//the index of cur token token[curIndex]->terminal=false; newRule(); } else { addRule(hold); addRule(ptr); } } if (count>=2)//always add { addRule(ptr); } count++; } else { //this is an alternative production rule newRule(); } ptr=NULL; } } } void CFGReader::newRule() { prodCount++; //add one pointer token[curIndex]->production[token[curIndex]->count++]=prodCount; curPos=0;//reset to restart; } int CFGReader::tokenIndex(const char* str) { for (int i=0; i<tokenCount; i++) { if (strcmp(str, token[i]->name)==0) { return i; } } return -1; } void CFGReader::addRule(const char* str) { int index; index=addToken(str); production[prodCount][curPos++]=index; production[prodCount][curPos]=-1;//set end } int CFGReader::addToken(const char* str, bool isTerminal) { int index; index=tokenIndex(str); if (index==-1) { index=tokenCount; } else { return index; } token[index]=new Token; token[index]->terminal=isTerminal; token[index]->name= new char[strlen(str)+1]; strcpy(token[index]->name, str); token[index]->count=0; tokenCount++; return index; } void CFGReader::initialize() { tokenCount=curIndex=curPos=0; prodCount=-1;//in order to ++ blindly } void CFGReader::printRule(int index) { int nodeIndex=0; while (production[index][nodeIndex]!=-1) { cout<<token[production[index][nodeIndex]]->name<<" "; nodeIndex++; } } void CFGReader::print() { for (int i=0; i<tokenCount; i++) { if (!token[i]->terminal) { cout<<token[i]->name<<" ==> "; for (int j=0; j<token[i]->count; j++) { printRule(token[i]->production[j]); } cout<<"\n"; } } }
file name: main.cpp (main)
#include <iostream> #include "CFGReader.h" using namespace std; int main() { CFGReader R; R.readFromFile("c:\\TinyCFG.txt"); R.print(); return 0; }
Here is the result: The input file is "c:\TinyCFG.txt".
program ==> stmt-sequence stmt-sequence ==> stmt-sequence ; statement statement statement ==> if-stmt repeat-stmt assign-stmt read-stmt write-stmt if-stmt ==> if boolean-exp then stmt-sequence end if boolean-exp then stmt-sequence else stmt-sequen ce end repeat-stmt ==> repeat stmt-sequence until boolean-exp assign-stmt ==> identifier := integer-exp read-stmt ==> read identifier write-stmt ==> write integer-exp boolean-exp ==> integer-exp comparison-op integer-exp integer-exp ==> integer-exp addop term term comparison-op ==> < = addop ==> + - term ==> term mulop factor factor mulop ==> * / factor ==> ( integer-exp ) number identifier Press any key to continue