comparison src/tokenizer.cpp @ 0:48d06780cf77

initial version
author Carl Byington <carl@five-ten-sg.com>
date Tue, 13 May 2008 14:03:10 -0700
parents
children 180d26aa2a17
comparison
equal deleted inserted replaced
-1:000000000000 0:48d06780cf77
1 /*
2
3 Copyright (c) 2007 Carl Byington - 510 Software Group, released under
4 the GPL version 3 or any later version at your choice available at
5 http://www.gnu.org/licenses/gpl-3.0.txt
6
7 */
8
9 #include "includes.h"
10
11 const int maxlen = 1000; // used for snprintf buffers
12
13 enum state {s_init,
14 s_token,
15 s_string,
16 s_ignore, // whitespace
17 s_eol, // ignore to eol
18 end_state,
19
20 s_term, // token terminator
21 s_single,
22 s_string1, // first " of string
23 s_string2, // last " of string
24 s_slash // possible start of ignore to eol
25 };
26
27 typedef state PARSE[end_state];
28
29 static PARSE parse_table[256] = {
30 // s_init s_token s_string s_ignore s_eol
31 { s_single, s_term, s_string, s_single, s_eol, }, // 0x00
32 { s_single, s_term, s_string, s_single, s_eol, }, // 0x01
33 { s_single, s_term, s_string, s_single, s_eol, }, // 0x02
34 { s_single, s_term, s_string, s_single, s_eol, }, // 0x03
35 { s_single, s_term, s_string, s_single, s_eol, }, // 0x04
36 { s_single, s_term, s_string, s_single, s_eol, }, // 0x05
37 { s_single, s_term, s_string, s_single, s_eol, }, // 0x06
38 { s_single, s_term, s_string, s_single, s_eol, }, // 0x07
39 { s_single, s_term, s_string, s_single, s_eol, }, // 0x08
40 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 <tab>
41 { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a <lf>
42 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b
43 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c
44 { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d <cr>
45 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e
46 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f
47 { s_single, s_term, s_string, s_single, s_eol, }, // 0x10
48 { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char
49 { s_single, s_term, s_string, s_single, s_eol, }, // 0x12
50 { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char
51 { s_single, s_term, s_string, s_single, s_eol, }, // 0x14
52 { s_single, s_term, s_string, s_single, s_eol, }, // 0x15
53 { s_single, s_term, s_string, s_single, s_eol, }, // 0x16
54 { s_single, s_term, s_string, s_single, s_eol, }, // 0x17
55 { s_single, s_term, s_string, s_single, s_eol, }, // 0x18
56 { s_single, s_term, s_string, s_single, s_eol, }, // 0x19
57 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a
58 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b
59 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c
60 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d
61 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e
62 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f
63 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space
64 { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 !
65 { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 "
66 { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 #
67 { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $
68 { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 %
69 { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 &
70 { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 '
71 { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 (
72 { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 )
73 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A *
74 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2B +
75 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C ,
76 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D -
77 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E .
78 { s_slash, s_token, s_string, s_slash, s_eol, }, // 0x2F /
79 { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0
80 { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1
81 { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2
82 { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3
83 { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4
84 { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5
85 { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6
86 { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7
87 { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8
88 { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9
89 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A :
90 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ;
91 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C <
92 { s_single, s_token, s_string, s_single, s_eol, }, // 0x3D =
93 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E >
94 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ?
95 { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @
96 { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A
97 { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B
98 { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C
99 { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D
100 { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E
101 { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F
102 { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G
103 { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H
104 { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I
105 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J
106 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K
107 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L
108 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M
109 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N
110 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O
111 { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P
112 { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q
113 { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R
114 { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S
115 { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T
116 { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U
117 { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V
118 { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W
119 { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X
120 { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y
121 { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z
122 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [
123 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash
124 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ]
125 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^
126 { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _
127 { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 `
128 { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a
129 { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b
130 { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c
131 { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d
132 { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e
133 { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f
134 { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g
135 { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h
136 { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i
137 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j
138 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k
139 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l
140 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m
141 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n
142 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o
143 { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p
144 { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q
145 { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r
146 { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s
147 { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t
148 { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u
149 { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v
150 { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w
151 { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x
152 { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y
153 { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z
154 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B {
155 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C |
156 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D }
157 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~
158 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f
159 { s_single, s_term, s_string, s_single, s_eol, }, // 0x80
160 { s_single, s_term, s_string, s_single, s_eol, }, // 0x81
161 { s_single, s_term, s_string, s_single, s_eol, }, // 0x82
162 { s_single, s_term, s_string, s_single, s_eol, }, // 0x83
163 { s_single, s_term, s_string, s_single, s_eol, }, // 0x84
164 { s_single, s_term, s_string, s_single, s_eol, }, // 0x85
165 { s_single, s_term, s_string, s_single, s_eol, }, // 0x86
166 { s_single, s_term, s_string, s_single, s_eol, }, // 0x87
167 { s_single, s_term, s_string, s_single, s_eol, }, // 0x88
168 { s_single, s_term, s_string, s_single, s_eol, }, // 0x89
169 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a
170 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b
171 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c
172 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d
173 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e
174 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f
175 { s_single, s_term, s_string, s_single, s_eol, }, // 0x90
176 { s_single, s_term, s_string, s_single, s_eol, }, // 0x91
177 { s_single, s_term, s_string, s_single, s_eol, }, // 0x92
178 { s_single, s_term, s_string, s_single, s_eol, }, // 0x93
179 { s_single, s_term, s_string, s_single, s_eol, }, // 0x94
180 { s_single, s_term, s_string, s_single, s_eol, }, // 0x95
181 { s_single, s_term, s_string, s_single, s_eol, }, // 0x96
182 { s_single, s_term, s_string, s_single, s_eol, }, // 0x97
183 { s_single, s_term, s_string, s_single, s_eol, }, // 0x98
184 { s_single, s_term, s_string, s_single, s_eol, }, // 0x99
185 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a
186 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b
187 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c
188 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d
189 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e
190 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f
191 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0
192 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1
193 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2
194 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3
195 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4
196 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5
197 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6
198 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7
199 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8
200 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9
201 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa
202 { s_single, s_term, s_string, s_single, s_eol, }, // 0xab
203 { s_single, s_term, s_string, s_single, s_eol, }, // 0xac
204 { s_single, s_term, s_string, s_single, s_eol, }, // 0xad
205 { s_single, s_term, s_string, s_single, s_eol, }, // 0xae
206 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf
207 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0
208 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1
209 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2
210 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3
211 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4
212 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5
213 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6
214 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7
215 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8
216 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9
217 { s_single, s_term, s_string, s_single, s_eol, }, // 0xba
218 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb
219 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc
220 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd
221 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe
222 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf
223 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0
224 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1
225 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2
226 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3
227 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4
228 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5
229 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6
230 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7
231 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8
232 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9
233 { s_single, s_term, s_string, s_single, s_eol, }, // 0xca
234 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb
235 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc
236 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd
237 { s_single, s_term, s_string, s_single, s_eol, }, // 0xce
238 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf
239 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0
240 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1
241 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2
242 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3
243 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4
244 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5
245 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6
246 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7
247 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8
248 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9
249 { s_single, s_term, s_string, s_single, s_eol, }, // 0xda
250 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb
251 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc
252 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd
253 { s_single, s_term, s_string, s_single, s_eol, }, // 0xde
254 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf
255 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0
256 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1
257 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2
258 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3
259 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4
260 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5
261 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6
262 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7
263 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8
264 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9
265 { s_single, s_term, s_string, s_single, s_eol, }, // 0xea
266 { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb
267 { s_single, s_term, s_string, s_single, s_eol, }, // 0xec
268 { s_single, s_term, s_string, s_single, s_eol, }, // 0xed
269 { s_single, s_term, s_string, s_single, s_eol, }, // 0xee
270 { s_single, s_term, s_string, s_single, s_eol, }, // 0xef
271 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0
272 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1
273 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2
274 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3
275 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4
276 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5
277 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6
278 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7
279 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8
280 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9
281 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa
282 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb
283 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc
284 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd
285 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe
286 { s_single, s_term, s_string, s_single, s_eol, }, // 0xff
287 };
288
289
290 TOKEN::TOKEN(char *fn, string_set *includes) {
291 pushed = false;
292 include_files = includes;
293 include(fn);
294 }
295
296
297 TOKEN::~TOKEN() {
298 while (!streams.empty()) pop();
299 }
300
301
302 void TOKEN::pop() {
303 ifstream *is = streams.front();
304 char *fn = filenames.front();
305 streams.pop_front();
306 filenamess.erase(fn);
307 if (filenames.size() > 1) filenames.pop_front();
308 if (linenumbers.size() > 1) linenumbers.pop_front();
309 is->close();
310 delete is;
311 }
312
313
314 void TOKEN::push_char(u_char c) {
315 pushed = true;
316 pushed_char = c;
317 }
318
319
320 bool TOKEN::next_char(u_char &uc) {
321 if (pushed) {
322 uc = (u_char)tolower((char)pushed_char);
323 pushed = false;
324 return true;
325 }
326 while (!streams.empty() && streams.front()->eof()) {
327 pop();
328 }
329 if (streams.empty()) return false;
330 ifstream *is = streams.front();
331 uc = (u_char)is->get();
332 if (is->eof()) return next_char(uc);
333 if (uc == (u_char)'\n') {
334 int &line = linenumbers.front();
335 line++;
336 }
337 uc = (u_char)tolower((char)uc);
338 return true;
339 }
340
341
342 bool TOKEN::include(char *fn) {
343 string_set::iterator i = filenamess.find(fn);
344 if (i != filenamess.end()) {
345 token_error("redundant or recursive include file detected");
346 return false;
347 }
348 ifstream *is = new ifstream;
349 is->open(fn);
350 if (is->fail()) {
351 char buf[maxlen];
352 snprintf(buf, sizeof(buf), "include file %s not found", fn);
353 token_error(buf);
354 return false;
355 }
356 string_set &inc = *include_files;
357 inc.insert(fn);
358 streams.push_front(is);
359 filenames.push_front(fn);
360 filenamess.insert(fn);
361 linenumbers.push_front(1);
362 return true;
363 }
364
365
366 char *TOKEN::next() {
367 if (!pending_tokens.empty()) {
368 char *t = pending_tokens.front();
369 pending_tokens.pop_front();
370 return t;
371 }
372 if (streams.empty()) return NULL;
373 const int PENDING_LIMIT = 1000;
374 u_char buffer[PENDING_LIMIT];
375 int count = 0;
376 state st = s_init;
377 while (true) {
378 if (count == (PENDING_LIMIT-1)) {
379 token_error("token too long");
380 break;
381 }
382 if (st >= end_state) {
383 token_error("finite state machine error");
384 break;
385 }
386 u_char c;
387 if (!next_char(c)) break;
388 st = parse_table[c][st];
389 switch (st) {
390 case s_string:
391 case s_token: {
392 buffer[count++] = c;
393 } break;
394
395 case s_term: {
396 push_char(c);
397 st = s_init;
398 } break;
399
400 case s_string1: {
401 st = s_string;
402 } break;
403
404 case s_string2: {
405 st = s_init;
406 } break;
407
408 case s_single: {
409 buffer[count++] = c;
410 st = s_init;
411 } break;
412
413 case s_ignore:
414 case s_eol: {
415 } break;
416
417
418 case s_slash: {
419 buffer[count++] = c;
420 if (next_char(c)) {
421 if (c == (u_char)'/') {
422 // start of ignore to eol on //
423 count--;
424 st = s_eol;
425 }
426 else {
427 // not a // token, just return this single /
428 push_char(c);
429 st = s_init;
430 }
431 }
432 else {
433 // cannot get another char
434 st = s_init;
435 }
436 } break;
437
438 default: {
439 token_error();
440 token_error("unknown state %d %s", st, " ");
441 } break;
442 }
443 if (st == s_init) break;
444 }
445
446 buffer[count] = '\0';
447 if (count == 0) return NULL;
448 char *t = register_string((char*)buffer);
449 if (t == token_include) {
450 char *f = next(); // should be file name
451 char *s = next(); // should be semicolon
452 if (s == token_semi) {
453 include(f);
454 return next();
455 }
456 else {
457 push(s);
458 push(f);
459 return t;
460 }
461 }
462 return t;
463 }
464
465
466 int TOKEN::nextint() {
467 char *t = next();
468 char *e;
469 long i = strtol(t, &e, 10);
470 if (*e != '\0') {
471 token_error("integer", t);
472 return 0;
473 }
474 return (int)i;
475 }
476
477
478 void TOKEN::skipeol() {
479 while (true) {
480 u_char c;
481 if (!next_char(c)) break;
482 if (c == (u_char)'\n') break;
483 }
484 }
485
486
487 void TOKEN::token_error(const char *err) {
488 token_error();
489 char buf[maxlen];
490 snprintf(buf, sizeof(buf), "%s \n", err);
491 my_syslog(buf);
492 }
493
494
495 void TOKEN::token_error(const char *fmt, int d, const char *s) {
496 char buf[maxlen];
497 snprintf(buf, sizeof(buf), fmt, d, s);
498 my_syslog(buf);
499 }
500
501
502 void TOKEN::token_error(const char *fmt, const char *t, const char *h) {
503 if (!h) h = "null";
504 char buf[maxlen];
505 snprintf(buf, sizeof(buf), fmt, t, h);
506 my_syslog(buf);
507 }
508
509
510 void TOKEN::token_error(const char *want, const char *have) {
511 token_error();
512 token_error("expecting %s, found %s", want, have);
513 }
514
515
516 void TOKEN::token_error() {
517 token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn());
518 line_list::iterator j = linenumbers.begin();
519 string_list::iterator i = filenames.begin();
520 for (; i!=filenames.end(); i++,j++) {
521 if (i != filenames.begin()) {
522 char *fn = (*i);
523 int li = (*j);
524 token_error(" included from line %d in file %s -- ", li, fn);
525 }
526 }
527 }
528