Simple Interpreter

Pages: 123
Jun 13, 2009 at 5:37pm
Well at the moment I am commenting the big parts so that I understand it all.

Do you see how I could make the script parse a file though?
Jun 13, 2009 at 6:27pm
So I've just added ^ (power) to the code.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <string>
#include <cctype>
#include <iostream>
#include <map>
#include <strstream>

using namespace std;

// Pointer to input string
istream* input;

// No of errors = 0 by default
int no_of_errors;

// Function to output error
double error(const char* s) {
    no_of_errors++;
    cerr << "Error: " << s << '\n';
    return 1;
}

// Different tokens
enum Token_value {
	NAME,		NUMBER,		END,
	PLUS='+',	MINUS='-',	MUL='*',	DIV='/', POW='^',
	PRINT=';',	ASSIGN='=',	LP='(',		RP=')'
};

Token_value curr_tok = PRINT;
double number_value;
string string_value;

// For each function in the token_value enumeration...
Token_value get_token() {
	char ch;

	// Skip whitespace except \n
	do {
		if(!input->get(ch)) return curr_tok = END;
	} while (ch!='\n' && isspace(ch));

	// Switch each character
	switch (ch) {
        // Print
    	case ';':
    	case '\n':
    		return curr_tok=PRINT;
	// Operators
    	case '*':
    	case '/':
    	case '+':
    	case '-':
    	case '(':
    	case ')':
    	case '=':
        case '^':
    		return curr_tok=Token_value(ch);
	// Numbers AND Decimal point
    	case '0': case '1': case '2': case '3': case '4':
    	case '5': case '6': case '7': case '8': case '9':
    	case '.':
    		input->putback(ch);
    		*input >> number_value;
    		return curr_tok=NUMBER;
	// Variable declaration
    	default:			// NAME, NAME=, or error
    		if (isalpha(ch)) { // If ch is alphabetic
    			string_value = ch;
    			while (input->get(ch) && isalnum(ch))
    				string_value += ch;	// string_value.push_back(ch);
    							// to work around library bug
    			input->putback(ch);
    			return curr_tok=NAME;
    		}
    		error("Bad token");
    		return curr_tok=PRINT;
	}
}

// Create a map type consisting of string and double values
map <string, double> table;

// Expressions
double expr(bool);

// Handle Primaries
double prim(bool get) {
	if (get) get_token();

	switch (curr_tok) {
        // Floating point constants
    	case NUMBER:
    	{	double v = number_value;
    		get_token();
    		return v;
    	}
    	// Variables
    	case NAME:
    	{	double& v = table[string_value];
    		if (get_token() == ASSIGN) v = expr(true);
    		return v;
    	}
    	// Unary Minus
    	case MINUS:
    		return -prim(true);
	// Left Parenthesis
    	case LP:
    	{	double e = expr(true);
    		if (curr_tok != RP) return error(") expected");
    		get_token();		// eat ')'
    		return e;
    	}
    	default:
    		return error("Primary expected");
	}
}

// Calculate the powers
double Pow(double n, double p) {
	double result = 1.0; 
	for(int j=0; j<p; j++) {
		result *= n;
	}
	return result;       
}

// Multiply and divide
double term(bool get) {
	double left = prim(get);

	// Keep looping - that wont happen though as we HAVE to find a token
	for (;;) {
		switch (curr_tok) {
		// Multiply
    		case MUL:
    			left *= prim(true);
    			break;
   		// Divide
    		case DIV:
    			if (double d = prim(true)) {
    				left /= d;
    				break;
    			}
    			return error("Divide by 0");
   		// Power
		case POW:
			return Pow(left, prim(true));
			break;
    		default:
    			return left;
		}
    }
}

// Add and subtract
double expr(bool get) {
	double left = term(get);

	// Keep looping - that wont happen though as we HAVE to find a token
	for (;;) {
		switch (curr_tok) {
		// Plus
    		case PLUS:
    			left += term(true);
    			break;
		// Minus
    		case MINUS:
    			left -= term(true);
    			break;
    		default:
    			return left;
		}
    }
}

int main(int argc, char* argv[]) {
    cout << "Ceres Maths Parser V1" << endl;
    cout << "Written by James Brooks 2009\n" << endl;
	switch (argc) {
        // Direct input
    	case 1:
    		input = &cin;
    		break;
    	// Argument String
    	case 2:
    		input = new istrstream(argv[1]);
    		break;
	    // Otherwise we have a problem
    	default:
    		error("Too many arguments");
    		return 1;
	}

	// Insert predefined values
	table["pi"] = 3.1415926535897932385;
	table["e"] = 2.7182818284590452354;

	while (*input) {
		get_token();
		if (curr_tok == END) break;
		if (curr_tok == PRINT) continue;
		cout << expr(false) << '\n';
	}

	if (input != &cin) delete input;
	return no_of_errors;
}

I tried adding functions however I have no idea how to do that. I mean proper functions which allow me to do something like:
 
Tan(x)

Also, could someone explain to me what ";" does in the code?

James
Last edited on Jun 13, 2009 at 7:58pm
Jun 13, 2009 at 9:41pm
';' should act in the same way of the newline, it stops the current operation and makes the program showing the result:
1
2
3
case ';':
case '\n':
    return curr_tok=PRINT;

For a function as sin cos etc, handle it in 'prim'
eg:
1
2
3
4
5
6
7
8
9
10
11
case NAME:
{
    if ( string_value == "sin" )
        return sin ( prim() );
    else
    {
        double& v = table[string_value];
        if (get_token() == ASSIGN) v = expr(true);
        return v;
    }
}
Last edited on Jun 13, 2009 at 9:41pm
Jun 13, 2009 at 9:50pm
Bazzy, could you show me how this would be implemented please?
Jun 13, 2009 at 9:57pm
Didn't I show you?
Check my code example and compare it to what you have (line 98 on the code you posted)
Jun 13, 2009 at 10:05pm
Oh I see! Thanks Bazzy!

Do you know how I could wrap the "arguments" in brackets? Also, instead of command input, what about parsing a file?
Last edited on Jun 13, 2009 at 10:12pm
Jun 14, 2009 at 8:01am
To make your program accept only parentheses to contain arguments, check for the next token to be LP
eg:
1
2
3
4
5
6
7
8
9
10
if ( string_value == "sin" )
{
    if ( get_token() != LP ) return error ( "( expected" );
    
    // Now as in case LP
    double v = expr(true);
    if (curr_tok != RP) return error(") expected");
    get_token();  // eat ')'
    return  sin( v );
}


Your code can already parse a file:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int main(int argc, char* argv[]) {
    cout << "Ceres Maths Parser V1" << endl;
    cout << "Written by James Brooks 2009\n" << endl;
	switch (argc) {
        // Direct input
    	case 1:
    		input = &cin;
    		break;
    	// Argument String
    	case 2:
    		input = new istrstream(argv[1]); // if you pass a filename as argument to your program it would be used as input
    		break;
	    // Otherwise we have a problem
    	default:
    		error("Too many arguments");
    		return 1;
	}

Jun 14, 2009 at 9:12am
I got parenthesis to work straight after posting, typical, but thanks anyway!

I tried parsing a file however it doesn't work. Surely the file would need to be opened for it to work?
Jun 14, 2009 at 9:17am
The file gets open when its name is passed to the file stream constructor.
How are you passing the filename to your program?
Jun 14, 2009 at 9:26am
So I would need to change:
 
input = new istrstream(argv[1])

To, ifstream?

I'm dragging the file on to the EXE.
Jun 14, 2009 at 9:50am
More or less I have the same problem. I don't know if I have to open another topic or not. I add my question here becouse usually, in forums, admins like to have same questions in same topics.

I need to program, like jbrooksuk, an INTERPRETER (not a parser) of my own script. I read (before enterning here) bison documentations but Bison cannot solve my problems.

Bison is fine if you have to add a syntax check of your script. But I don't need error checking (becouse scripts will be produced with a program, so errors are impossible to do).
The most great problem is one that Bison DON'T solve... is the runtime interpreting (even if you use Bison you have to write it by your own so it is useless to use Bison) and, most of all, Conditional Brantches (if you read documentations it says only "find your way. Is too much hard to explain").

for example if I have this script code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
command 1
command 2
if (condition 1) {
  command 3
  command 4
  if (condition 2) {
    command 5
    loop(1) {
      command 6
      command 7
      if (condition 3) {
        endloop(1)
      }
    }
  }
}


This is only an example... it could be also in XML grammar. The problem is not the final structure of the script. Is to execute end interpreting it correctly in run-time.

for example command 3 will be executed only if condition 1 is true
for example if condition 1 is true ALL the code inside its { } will be execute (other conditional brantches will be evalued and if true executed, if false skipped).
Loop and endloop works as IDs loop(ID) is a while(1) that will be executed until an endloop(ID) encountered (so loop(1) ended when find endloop(1) and quit out of { } of loop(1) )

Any suggestion will be appreciated. It is fine also a solution that suggest to "build" a script if it is too hard to interpret it directly from source (like C++ do when first compile and after link). In this case I need suggestion how to "build" a binary that reproduces all conditions and loop that is easyer to be interpreted in runtime.
Jun 14, 2009 at 9:55am
That's actually what I was looking at for, an interpreter, I even mentioned it. I'm just working with the calculator link because it's kind of what I need.

I too, still need if statements etc.
Jun 14, 2009 at 11:35am
So I would need to change:
input = new istrstream(argv[1])
To, ifstream?

Yes ( I didn't realise that it was a istrstream )

I too, still need if statements etc.

Is not really hard to add an if statement, you can do this is a similar way of a function:
Pseudocode:
1
2
3
4
5
6
7
8
9
if ( string_value == "if" )
{
   // get parentheses as with 'sin'

   if ( expr() )
       Read_and_execute_the_next_block // You may add the braces token to enclose a block
   else
      Skip_the_next_block
}
Last edited on Jun 14, 2009 at 11:36am
Jun 14, 2009 at 11:56am
Bazzy, Changing istrstream to ifstream just caused errors. I even changed line 13 to ifstream too.
Jun 14, 2009 at 12:10pm
Oh, I've done it now. Thanks very much for the help Bazzy.

Just one last thing. How would you implement user-defined functions?

Code should look like this:
1
2
3
Func Add(x, y) {
      Return x+y
}
Jun 14, 2009 at 1:07pm
That is a bit more challenging and there should be many different ways to accomplish it.
Here is what your parser should do to store and use a function:
1
2
3
4
5
6
7
// function declaration
if You_find_the_keyword "Func"
    read_the_next_token
    if it_isnt_a_NAME    return error
    store_the_function_name_in_a_table // ( as the table you have for variables )
    read_the_parameters_and_save_them
    read_the_body_and_save_it

1
2
3
4
5
6
7
8
9
10
11
12
13
// function call
if Your_token_is_a_NAME
    if is_a_keyword
        do_whatever_you_need // ( as the part above  )
    if is_in_the_variable_table
        return variable_value
    if is_in_the_function_table
       check_for_parentheses
       while function_needs_argument
           get_the_next_value_until_you_find_a_comma
           set_the_parameter_to_the_argument_value
       check_for_RP
       execute_the_function_body


The function may be stored in a class
eg:
1
2
3
4
5
6
struct function
{
    string name;
    map<string,double> parameters;
    string body;
};
When executing the function body use an istringstream constructed with function::body as input stream (you should add the assignment of the argument values to the parameters to the function body) and parse it
Jun 14, 2009 at 1:21pm
Thanks bazzy, I'll have a go at this now!
Jun 14, 2009 at 1:29pm
Using the struct "function", how do I store the function name and parameters to the parameters map?
Jun 14, 2009 at 1:43pm
If you store the functions in a map with string as key type, you can avoid having the 'name' member in the function structure.
To assign parameters you can do something like this:
1
2
3
4
map<string,function> func_table;

func_table["Add"].parameters["x"] = 0;
func_table["Add"].parameters["y"] = 0;

When you need to assign arguments to the parameters, you can use iterators:
1
2
for ( map<string,function>::iterator i=func_table["Add"].parameters.begin() ; i != func_table["Add"].parameters.end(); i++ )
    i->second = expr(true); // Notice: you need to break the expression at a comma, not at a semicolon  


some useful links:
http://www.cplusplus.com/reference/stl/map/operator%5B%5D/
http://www.cplusplus.com/reference/stl/map/begin/
http://www.cplusplus.com/reference/stl/map/find/
Last edited on Jun 14, 2009 at 1:44pm
Jun 14, 2009 at 5:10pm
Sorry about this, I'm not sure where you got func_table from, do I create that as well?
Pages: 123