-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.c
113 lines (99 loc) · 3.32 KB
/
lexer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include<stdio.h>
#include<stdlib.h>
#include <string.h>
#include <ctype.h>
#include "lexer.h"
struct {
char * pos;
int line;
}scanner;
struct token scan(){
// handle whitespaces, tabs and comments
for(;;){
switch(*scanner.pos){
case ' ': case '\t': case '\r':
scanner.pos++;
break;
case '`':
if(*(scanner.pos+1) == '`'){
while(*scanner.pos != '\n')
scanner.pos++;
break;
}
default:
goto done_checking_ws;
}
}
done_checking_ws:
switch(*scanner.pos){
case '\0':
return (struct token){.type= T_EOF, .value = "\0", .line= scanner.line};
case '(':
scanner.pos++;
return (struct token){.type = T_LPAR, .value = "(", .line= scanner.line};
case ')':
scanner.pos++;
return (struct token){.type= T_RPAR, .value = ")", .line= scanner.line};
case '+':
scanner.pos++;
return (struct token){.type = T_PLUS, .value= "+", .line= scanner.line};
case '-':
scanner.pos++;
return (struct token){.type = T_MINUS, .value= "-", .line= scanner.line};
case '=':
scanner.pos++;
return (struct token){.type = T_EQ, .value="=", .line= scanner.line};
case '\n':
scanner.pos++;
scanner.line++;
int l = scanner.line - 1;
while(*scanner.pos == '\n'){
scanner.pos++;
scanner.line++;
}
return (struct token){.type= T_SEP, .value= "\n", .line= l};
default:
if(isalpha(*scanner.pos) || *scanner.pos == '_'){
char * start = scanner.pos;
while(isalpha(*scanner.pos) || *scanner.pos == '_')
scanner.pos++;
size_t len = sizeof(char) * (scanner.pos - start);
char * value = (char*) malloc(len+1);
memcpy(value,start,len);
value[len] = '\0';
if(strcmp(value,"var") == 0)
return (struct token){.type= T_VAR, .value= value, .line= scanner.line};
if(strcmp(value,"while") == 0)
return (struct token){.type = T_WHILE, .value = value, .line= scanner.line};
if(strcmp(value,"do") == 0)
return (struct token){.type = T_DO, .value = value,
.line = scanner.line};
if(strcmp(value,"end") == 0)
return (struct token){.type = T_END, .value = value,
.line = scanner.line};
if(strcmp(value,"display") == 0)
return (struct token){.type = T_PRINT, .value = value, .line = scanner.line};
return (struct token){.type = T_ID, .value= value, .line = scanner.line};
}
if(isdigit(*scanner.pos)){
char *start = scanner.pos;
while(isdigit(*scanner.pos))
scanner.pos++;
size_t len = sizeof(char) * (scanner.pos - start);
char * value = (char*) malloc(len+1);
value[len] = '\0';
memcpy(value,start,len);
return (struct token){.type = T_NUM, .value = value, .line = scanner.line};
}
printf("Unknown token %c at line %d\n", *scanner.pos,scanner.line);
exit(1);
}
}
void initScanner(char * f){
scanner.pos = f;
scanner.line = 1;
}
void printToken(struct token t){
static char* names[] = {"VAR", "WHILE", "DO", "END", "PRINT", "ID", "NUM", "LPAR", "RPAR", "PLUS", "MINUS", "EQUALS", "NEWLINE", "EOF"};
printf("Token{ type=%s, value=%s, line=%d }\n", names[t.type], t.value, t.line);
}