1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
|
#include "lexer.h"
#include <iterator>
#include <string>
#include<cctype>
using std::list;
using std::string;
list<token> tokenize(string s)
{
list<token> lt;
string::iterator sit;
enum State {nil,num,ifr,sym} state(nil); //denoting nothing,number,identifier,other single char symbols
int start,length; //indicating start and lenght of substr
char cur(0); //current char
string section; //substr'ed string
bool uniflag(false); //for noting if a section is ready to be pushed_back
for(sit=s.begin();sit!=s.end();sit++)
{
uniflag = false;
cur = *sit;
if(cur==' ') //for skipping whitespaces && pusing back preceding ones
{
if(state==nil) //Just Skip
continue;
else //i.e if state is num, ifr or sym
{//read this block carefully...
length=sit - s.begin() - start ;
//sit (an iterator ) - s.begin()
//gives the current position.
//^*that - start (an int)
//gives the length :D
section = s.substr(start,length);
uniflag = true;
//Setting the flag for section to be dumped
state=nil; //reseting state..so that the next char starts from scratch
}
}
else if(isdigit(cur)) //condition for num state
{
if(state==nil) //Start a new num
{
state=num;
start = sit - s.begin();
}
else if(state==ifr||state==sym) //end the ifr or sym,set flag for dump,start a new num
{
state=num;
length=sit - s.begin() - start ;
section = s.substr(start,length);
start = sit - s.begin();
uniflag = true;
}
}
else if(isalpha(cur)||(cur=='"'||cur=='\''))//ifr case(also handles literals) //look above
{
if(state==nil)
{
state=ifr;
start = sit - s.begin();
}
else if(state==num || state == sym)
{
state=ifr;
length=sit - s.begin() - start;
section = s.substr(start,length);
start = sit - s.begin();
uniflag = true;
}
}
else//case sym //look above for clarification
{
if(state==nil)
{
state=sym;
start = sit - s.begin();
}
else if(state==num||state==ifr||state==sym) //state==sym to make sure that symbols are uni-character
{
state=sym;
length=sit - s.begin() - start;
section = s.substr(start,length);
start = sit - s.begin();
uniflag = true;
}
}
if(uniflag==true) //Dumping Station
lt.push_back(token(section));
if((sit == s.end()-1)&&(cur!=' ')) //Ending Condition (when not ' ')
{
length = s.end()-s.begin()-start;
section = s.substr(start,length);
lt.push_back(token(section)); //another D.S.
}
}
return lt;//If this return type needs to be changed, also modify the Dumping Stations above.
}
|