1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
#include <iostream>
#include <deque>
#include <string>
#include <cctype>
#include <sstream>
#include <algorithm>
#include <iomanip>
struct token {
// TO DO: handle literal strings, chars etc.
enum type_t { KEYWORD, IDENTIFIER, INTEGER, FLOAT, OPERATOR, TERMINAL };
std::string value ;
type_t type = IDENTIFIER ;
explicit operator bool () const { return type != TERMINAL ; }
bool operator! () const { return !bool(*this) ;}
// TO DO: allow operators of more than one character eg <= or ++
// ie. std::vector<std::string> operators ;
static const std::string operators ;
static const std::deque<std::string> keywords ;
friend std::ostream& operator<< ( std::ostream& stm, const token& t ) { // for debug support
switch( t.type )
{
case token::KEYWORD : return stm << t.value << " (KEYWORD)" ;
case token::IDENTIFIER : return stm << std::quoted(t.value) << " (IDENTIFIER)" ;
case token::INTEGER : return stm << t.value << " (INTEGER)" ;
case token::FLOAT : return stm << t.value << " (FLOAT)" ;
case token::OPERATOR : return stm << "'" << t.value << "' (OPERATOR)" ;
default: return stm << "(TERMINAL)" ;
}
}
};
const std::string token::operators = "+-*/%()[]{},;.<>=!?:";
const std::deque<std::string> token::keywords = { "const", "auto", "if", "for", "double", "int", "throw" } ; // etc.
struct token_stream {
explicit token_stream( std::istream& input_stm = std::cin )
: stm(input_stm) { stm >> std::skipws ; }
token get()
{
if( tokens.empty() ) underflow() ;
const token t = tokens.front() ;
tokens.pop_front() ;
return t ;
}
void put_back( token t ) { tokens.push_front(t) ; }
std::istream& stm ;
std::deque<token> tokens ;
void underflow() {
char c ;
if( !( stm >> c ) ) { // input failed
tokens.push_back( { {}, token::TERMINAL } ) ; // no more tokens
return ;
}
static const auto isoper = [] ( char c ) { return token::operators.find(c) != std::string::npos ; } ;
static const auto iskeyword = [] ( const std::string& str )
{ return std::find( token::keywords.begin(), token::keywords.end(), str ) != token::keywords.end() ; } ;
if( std::isdigit(c) ) { // number (a simplistic implementation)
stm.putback(c) ;
long double n ;
stm >> n ;
// TO DO: refine this (use regular expressions?)
// this is terribly crude; for instance 3.0 would be parsed as an integer.
// it doesn't take of things like 23.4f or 7LL (the 'f' and "LL" is not consumed)
// also, with the current code, the number "-3" is parsed as two tokens: "-" and "32" (this may be acceptab;le)
if( n == int(n) ) tokens.push_back( { std::to_string( int(n) ), token::INTEGER } ) ;
else tokens.push_back( { std::to_string(n), token::FLOAT } ) ;
}
else if( isoper(c) ) // operator
tokens.push_back( { {c}, token::OPERATOR } ) ;
else { // identifier or keyword
std::string str = {c} ;
while( stm.get(c) && !std::isspace(c) && !isoper(c) ) str += c ;
if(stm) stm.putback(c) ;
tokens.push_back( { str, iskeyword(str) ? token::KEYWORD : token::IDENTIFIER } ) ;
}
}
};
int main() {
const std::string input = " const auto closure = [] ( double value )\n { if( value > 7.3 ) array[4][5] = -1.234e+2 ; } ;" ;
std::cout << input << "\n\n" ;
std::istringstream input_stm(input) ;
token_stream tok_stm(input_stm) ;
while( token t = tok_stm.get() ) std::cout << t << '\n' ;
}
|