Mercurial > dnsbl
comparison src/tokenizer.cpp @ 71:dd21c8e13074
start coding on new config syntax
author | carl |
---|---|
date | Sat, 09 Jul 2005 19:24:41 -0700 |
parents | |
children | e6a2d0be7c5e |
comparison
equal
deleted
inserted
replaced
70:c812a06f87bf | 71:dd21c8e13074 |
---|---|
1 #include "context.h" | |
2 | |
3 static char* tokenizer_version="$Id:"; | |
4 | |
5 enum state {s_init, | |
6 s_token, | |
7 s_string, | |
8 s_ignore, // whitespace | |
9 s_eol, // ignore to eol | |
10 end_state, | |
11 | |
12 s_term, // token terminator | |
13 s_single, | |
14 s_string1, // first " of string | |
15 s_string2, // last " of string | |
16 s_slash, // possible start of ignore to eol | |
17 }; | |
18 | |
19 typedef state PARSE[end_state]; | |
20 | |
21 static PARSE parse_table[256] = { | |
22 // s_init s_token s_string s_ignore s_eol | |
23 { s_single, s_term, s_string, s_single, s_eol, }, // 0x00 | |
24 { s_single, s_term, s_string, s_single, s_eol, }, // 0x01 | |
25 { s_single, s_term, s_string, s_single, s_eol, }, // 0x02 | |
26 { s_single, s_term, s_string, s_single, s_eol, }, // 0x03 | |
27 { s_single, s_term, s_string, s_single, s_eol, }, // 0x04 | |
28 { s_single, s_term, s_string, s_single, s_eol, }, // 0x05 | |
29 { s_single, s_term, s_string, s_single, s_eol, }, // 0x06 | |
30 { s_single, s_term, s_string, s_single, s_eol, }, // 0x07 | |
31 { s_single, s_term, s_string, s_single, s_eol, }, // 0x08 | |
32 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 <tab> | |
33 { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a <lf> | |
34 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b | |
35 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c | |
36 { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d <cr> | |
37 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e | |
38 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f | |
39 { s_single, s_term, s_string, s_single, s_eol, }, // 0x10 | |
40 { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char | |
41 { s_single, s_term, s_string, s_single, s_eol, }, // 0x12 | |
42 { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char | |
43 { s_single, s_term, s_string, s_single, s_eol, }, // 0x14 | |
44 { s_single, s_term, s_string, s_single, s_eol, }, // 0x15 | |
45 { s_single, s_term, s_string, s_single, s_eol, }, // 0x16 | |
46 { s_single, s_term, s_string, s_single, s_eol, }, // 0x17 | |
47 { s_single, s_term, s_string, s_single, s_eol, }, // 0x18 | |
48 { s_single, s_term, s_string, s_single, s_eol, }, // 0x19 | |
49 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a | |
50 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b | |
51 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c | |
52 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d | |
53 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e | |
54 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f | |
55 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space | |
56 { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 ! | |
57 { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 " | |
58 { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 # | |
59 { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $ | |
60 { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 % | |
61 { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 & | |
62 { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 ' | |
63 { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 ( | |
64 { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 ) | |
65 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A * | |
66 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2B + | |
67 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C , | |
68 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D - | |
69 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E . | |
70 { s_slash, s_term, s_string, s_slash, s_eol, }, // 0x2F / | |
71 { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0 | |
72 { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1 | |
73 { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2 | |
74 { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3 | |
75 { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4 | |
76 { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5 | |
77 { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6 | |
78 { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7 | |
79 { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8 | |
80 { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9 | |
81 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A : | |
82 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ; | |
83 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C < | |
84 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3D = | |
85 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E > | |
86 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ? | |
87 { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @ | |
88 { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A | |
89 { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B | |
90 { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C | |
91 { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D | |
92 { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E | |
93 { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F | |
94 { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G | |
95 { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H | |
96 { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I | |
97 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J | |
98 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K | |
99 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L | |
100 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M | |
101 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N | |
102 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O | |
103 { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P | |
104 { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q | |
105 { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R | |
106 { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S | |
107 { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T | |
108 { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U | |
109 { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V | |
110 { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W | |
111 { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X | |
112 { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y | |
113 { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z | |
114 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [ | |
115 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash | |
116 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ] | |
117 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^ | |
118 { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _ | |
119 { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 ` | |
120 { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a | |
121 { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b | |
122 { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c | |
123 { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d | |
124 { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e | |
125 { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f | |
126 { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g | |
127 { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h | |
128 { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i | |
129 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j | |
130 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k | |
131 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l | |
132 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m | |
133 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n | |
134 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o | |
135 { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p | |
136 { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q | |
137 { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r | |
138 { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s | |
139 { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t | |
140 { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u | |
141 { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v | |
142 { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w | |
143 { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x | |
144 { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y | |
145 { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z | |
146 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B { | |
147 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C | | |
148 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D } | |
149 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~ | |
150 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f | |
151 { s_single, s_term, s_string, s_single, s_eol, }, // 0x80 | |
152 { s_single, s_term, s_string, s_single, s_eol, }, // 0x81 | |
153 { s_single, s_term, s_string, s_single, s_eol, }, // 0x82 | |
154 { s_single, s_term, s_string, s_single, s_eol, }, // 0x83 | |
155 { s_single, s_term, s_string, s_single, s_eol, }, // 0x84 | |
156 { s_single, s_term, s_string, s_single, s_eol, }, // 0x85 | |
157 { s_single, s_term, s_string, s_single, s_eol, }, // 0x86 | |
158 { s_single, s_term, s_string, s_single, s_eol, }, // 0x87 | |
159 { s_single, s_term, s_string, s_single, s_eol, }, // 0x88 | |
160 { s_single, s_term, s_string, s_single, s_eol, }, // 0x89 | |
161 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a | |
162 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b | |
163 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c | |
164 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d | |
165 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e | |
166 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f | |
167 { s_single, s_term, s_string, s_single, s_eol, }, // 0x90 | |
168 { s_single, s_term, s_string, s_single, s_eol, }, // 0x91 | |
169 { s_single, s_term, s_string, s_single, s_eol, }, // 0x92 | |
170 { s_single, s_term, s_string, s_single, s_eol, }, // 0x93 | |
171 { s_single, s_term, s_string, s_single, s_eol, }, // 0x94 | |
172 { s_single, s_term, s_string, s_single, s_eol, }, // 0x95 | |
173 { s_single, s_term, s_string, s_single, s_eol, }, // 0x96 | |
174 { s_single, s_term, s_string, s_single, s_eol, }, // 0x97 | |
175 { s_single, s_term, s_string, s_single, s_eol, }, // 0x98 | |
176 { s_single, s_term, s_string, s_single, s_eol, }, // 0x99 | |
177 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a | |
178 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b | |
179 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c | |
180 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d | |
181 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e | |
182 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f | |
183 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0 | |
184 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1 | |
185 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2 | |
186 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3 | |
187 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4 | |
188 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5 | |
189 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6 | |
190 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7 | |
191 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8 | |
192 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9 | |
193 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa | |
194 { s_single, s_term, s_string, s_single, s_eol, }, // 0xab | |
195 { s_single, s_term, s_string, s_single, s_eol, }, // 0xac | |
196 { s_single, s_term, s_string, s_single, s_eol, }, // 0xad | |
197 { s_single, s_term, s_string, s_single, s_eol, }, // 0xae | |
198 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf | |
199 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0 | |
200 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1 | |
201 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2 | |
202 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3 | |
203 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4 | |
204 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5 | |
205 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6 | |
206 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7 | |
207 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8 | |
208 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9 | |
209 { s_single, s_term, s_string, s_single, s_eol, }, // 0xba | |
210 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb | |
211 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc | |
212 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd | |
213 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe | |
214 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf | |
215 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0 | |
216 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1 | |
217 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2 | |
218 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3 | |
219 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4 | |
220 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5 | |
221 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6 | |
222 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7 | |
223 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8 | |
224 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9 | |
225 { s_single, s_term, s_string, s_single, s_eol, }, // 0xca | |
226 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb | |
227 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc | |
228 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd | |
229 { s_single, s_term, s_string, s_single, s_eol, }, // 0xce | |
230 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf | |
231 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0 | |
232 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1 | |
233 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2 | |
234 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3 | |
235 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4 | |
236 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5 | |
237 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6 | |
238 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7 | |
239 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8 | |
240 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9 | |
241 { s_single, s_term, s_string, s_single, s_eol, }, // 0xda | |
242 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb | |
243 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc | |
244 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd | |
245 { s_single, s_term, s_string, s_single, s_eol, }, // 0xde | |
246 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf | |
247 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0 | |
248 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1 | |
249 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2 | |
250 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3 | |
251 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4 | |
252 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5 | |
253 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6 | |
254 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7 | |
255 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8 | |
256 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9 | |
257 { s_single, s_term, s_string, s_single, s_eol, }, // 0xea | |
258 { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb | |
259 { s_single, s_term, s_string, s_single, s_eol, }, // 0xec | |
260 { s_single, s_term, s_string, s_single, s_eol, }, // 0xed | |
261 { s_single, s_term, s_string, s_single, s_eol, }, // 0xee | |
262 { s_single, s_term, s_string, s_single, s_eol, }, // 0xef | |
263 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0 | |
264 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1 | |
265 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2 | |
266 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3 | |
267 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4 | |
268 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5 | |
269 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6 | |
270 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7 | |
271 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8 | |
272 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9 | |
273 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa | |
274 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb | |
275 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc | |
276 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd | |
277 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe | |
278 { s_single, s_term, s_string, s_single, s_eol, }, // 0xff | |
279 }; | |
280 | |
281 | |
282 TOKEN::TOKEN(char *fn, string_set *includes) { | |
283 pushed = false; | |
284 include_files = includes; | |
285 include(fn); | |
286 } | |
287 | |
288 | |
289 TOKEN::~TOKEN() { | |
290 while (!streams.empty()) pop(); | |
291 } | |
292 | |
293 | |
294 void TOKEN::pop() { | |
295 ifstream *is = streams.front(); | |
296 char *fn = filenames.front(); | |
297 streams.pop_front(); | |
298 filenames.pop_front(); | |
299 filenamess.erase(fn); | |
300 linenumbers.pop_front(); | |
301 is->close(); | |
302 delete is; | |
303 } | |
304 | |
305 | |
306 void TOKEN::push_char(u_char c) { | |
307 pushed = true; | |
308 pushed_char = c; | |
309 } | |
310 | |
311 | |
312 bool TOKEN::next_char(u_char &uc) { | |
313 if (pushed) { | |
314 uc = pushed_char; | |
315 pushed = false; | |
316 return true; | |
317 } | |
318 while (!streams.empty() && streams.front()->eof()) { | |
319 pop(); | |
320 } | |
321 if (streams.empty()) return false; | |
322 ifstream *is = streams.front(); | |
323 uc = (u_char)is->get(); | |
324 if (is->eof()) return next_char(uc); | |
325 if (uc == (u_char)'\n') { | |
326 int &line = linenumbers.front(); | |
327 line++; | |
328 } | |
329 return true; | |
330 } | |
331 | |
332 | |
333 bool TOKEN::include(char *fn) { | |
334 string_set::iterator i = filenamess.find(fn); | |
335 if (i != filenamess.end()) { | |
336 my_syslog("redundant or recursive include file detected"); | |
337 return false; | |
338 } | |
339 ifstream *is = new ifstream; | |
340 is->open(fn); | |
341 if (is->fail()) { | |
342 char buf[1000]; | |
343 snprintf(buf, sizeof(buf), "include file %s not found", fn); | |
344 token_error(buf); | |
345 return false; | |
346 } | |
347 string_set &inc = *include_files; | |
348 inc.insert(fn); | |
349 streams.push_front(is); | |
350 filenames.push_front(fn); | |
351 filenamess.insert(fn); | |
352 linenumbers.push_front(1); | |
353 return true; | |
354 } | |
355 | |
356 | |
357 char *TOKEN::next() { | |
358 if (!pending_tokens.empty()) { | |
359 char *t = pending_tokens.front(); | |
360 pending_tokens.pop_front(); | |
361 return t; | |
362 } | |
363 if (streams.empty()) return NULL; | |
364 const int PENDING_LIMIT = 1000; | |
365 static u_char buffer[PENDING_LIMIT]; | |
366 int count = 0; | |
367 state st = s_init; | |
368 while (true) { | |
369 if (count == (PENDING_LIMIT-1)) { | |
370 token_error("token too long"); | |
371 break; | |
372 } | |
373 if (st >= end_state) { | |
374 token_error("finite state machine error"); | |
375 break; | |
376 } | |
377 u_char c; | |
378 if (!next_char(c)) break; | |
379 st = parse_table[c][st]; | |
380 switch (st) { | |
381 case s_string: | |
382 case s_token: { | |
383 buffer[count++] = c; | |
384 } break; | |
385 | |
386 case s_term: { | |
387 push_char(c); | |
388 st = s_init; | |
389 } break; | |
390 | |
391 case s_string1: { | |
392 st = s_string; | |
393 } break; | |
394 | |
395 case s_string2: { | |
396 st = s_init; | |
397 } break; | |
398 | |
399 case s_single: { | |
400 buffer[count++] = c; | |
401 st = s_init; | |
402 } break; | |
403 | |
404 case s_ignore: | |
405 case s_eol: { | |
406 } break; | |
407 | |
408 | |
409 case s_slash: { | |
410 buffer[count++] = c; | |
411 if (next_char(c)) { | |
412 if (c == (u_char)'/') { | |
413 // start of ignore to eol on // | |
414 count--; | |
415 st = s_eol; | |
416 } | |
417 else { | |
418 // not a // token, just return this single / | |
419 push_char(c); | |
420 st = s_init; | |
421 } | |
422 } | |
423 else { | |
424 // cannot get another char | |
425 st = s_init; | |
426 } | |
427 } break; | |
428 | |
429 default: { | |
430 token_error(); | |
431 token_error("unknown state %d %s \n", st, " "); | |
432 } break; | |
433 } | |
434 if (st == s_init) break; | |
435 } | |
436 | |
437 buffer[count] = '\0'; | |
438 if (count == 0) return NULL; | |
439 char *t = register_string((char*)buffer); | |
440 if (t == token_include) { | |
441 char *f = next(); // should be file name | |
442 char *s = next(); // should be semicolon | |
443 if (s == token_semi) { | |
444 include(f); | |
445 return next(); | |
446 } | |
447 else { | |
448 push(s); | |
449 push(f); | |
450 return t; | |
451 } | |
452 } | |
453 return t; | |
454 } | |
455 | |
456 | |
457 int TOKEN::nextint() { | |
458 char *t = next(); | |
459 char *e; | |
460 long i = strtol(t, &e, 10); | |
461 if (*e != '\0') { | |
462 token_error("integer", t); | |
463 return 0; | |
464 } | |
465 return (int)i; | |
466 } | |
467 | |
468 | |
469 void TOKEN::skipeol() { | |
470 while (true) { | |
471 u_char c; | |
472 if (!next_char(c)) break; | |
473 if (c == (u_char)'\n') break; | |
474 } | |
475 } | |
476 | |
477 | |
478 void TOKEN::token_error(const char *err) { | |
479 token_error(); | |
480 printf("%s \n", err); | |
481 } | |
482 | |
483 | |
484 void TOKEN::token_error(const char *fmt, int d, const char *s) { | |
485 printf(fmt, d, s); | |
486 } | |
487 | |
488 | |
489 void TOKEN::token_error(const char *fmt, const char *t, const char *h) { | |
490 if (!h) h = "null"; | |
491 printf(fmt, t, h); | |
492 } | |
493 | |
494 | |
495 void TOKEN::token_error(const char *token, const char *have) { | |
496 token_error(); | |
497 token_error("expecting %s, found %s \n", token, have); | |
498 } | |
499 | |
500 | |
501 void TOKEN::token_error() { | |
502 token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn()); | |
503 } | |
504 |