comparison src/tokenizer.cpp @ 0:616666e2f34c

initial version
author carl
date Fri, 10 Mar 2006 10:30:08 -0800
parents
children 75e1a9bcbc2e
comparison
equal deleted inserted replaced
-1:000000000000 0:616666e2f34c
1 /*
2
3 Copyright (c) 2004 Carl Byington - 510 Software Group, released under
4 the GPL version 2 or any later version at your choice available at
5 http://www.fsf.org/licenses/gpl.txt
6
7 */
8
9 #include "includes.h"
10
11 static char* tokenizer_version="$Id$";
12
13 const int maxlen = 1000; // used for snprintf buffers
14
15 enum state {s_init,
16 s_token,
17 s_string,
18 s_ignore, // whitespace
19 s_eol, // ignore to eol
20 end_state,
21
22 s_term, // token terminator
23 s_single,
24 s_string1, // first " of string
25 s_string2, // last " of string
26 s_slash // possible start of ignore to eol
27 };
28
29 typedef state PARSE[end_state];
30
31 static PARSE parse_table[256] = {
32 // s_init s_token s_string s_ignore s_eol
33 { s_single, s_term, s_string, s_single, s_eol, }, // 0x00
34 { s_single, s_term, s_string, s_single, s_eol, }, // 0x01
35 { s_single, s_term, s_string, s_single, s_eol, }, // 0x02
36 { s_single, s_term, s_string, s_single, s_eol, }, // 0x03
37 { s_single, s_term, s_string, s_single, s_eol, }, // 0x04
38 { s_single, s_term, s_string, s_single, s_eol, }, // 0x05
39 { s_single, s_term, s_string, s_single, s_eol, }, // 0x06
40 { s_single, s_term, s_string, s_single, s_eol, }, // 0x07
41 { s_single, s_term, s_string, s_single, s_eol, }, // 0x08
42 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 <tab>
43 { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a <lf>
44 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b
45 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c
46 { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d <cr>
47 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e
48 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f
49 { s_single, s_term, s_string, s_single, s_eol, }, // 0x10
50 { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char
51 { s_single, s_term, s_string, s_single, s_eol, }, // 0x12
52 { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char
53 { s_single, s_term, s_string, s_single, s_eol, }, // 0x14
54 { s_single, s_term, s_string, s_single, s_eol, }, // 0x15
55 { s_single, s_term, s_string, s_single, s_eol, }, // 0x16
56 { s_single, s_term, s_string, s_single, s_eol, }, // 0x17
57 { s_single, s_term, s_string, s_single, s_eol, }, // 0x18
58 { s_single, s_term, s_string, s_single, s_eol, }, // 0x19
59 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a
60 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b
61 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c
62 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d
63 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e
64 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f
65 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space
66 { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 !
67 { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 "
68 { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 #
69 { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $
70 { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 %
71 { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 &
72 { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 '
73 { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 (
74 { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 )
75 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A *
76 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2B +
77 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C ,
78 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D -
79 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E .
80 { s_slash, s_token, s_string, s_slash, s_eol, }, // 0x2F /
81 { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0
82 { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1
83 { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2
84 { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3
85 { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4
86 { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5
87 { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6
88 { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7
89 { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8
90 { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9
91 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A :
92 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ;
93 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C <
94 { s_single, s_token, s_string, s_single, s_eol, }, // 0x3D =
95 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E >
96 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ?
97 { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @
98 { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A
99 { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B
100 { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C
101 { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D
102 { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E
103 { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F
104 { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G
105 { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H
106 { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I
107 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J
108 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K
109 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L
110 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M
111 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N
112 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O
113 { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P
114 { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q
115 { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R
116 { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S
117 { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T
118 { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U
119 { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V
120 { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W
121 { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X
122 { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y
123 { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z
124 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [
125 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash
126 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ]
127 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^
128 { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _
129 { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 `
130 { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a
131 { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b
132 { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c
133 { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d
134 { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e
135 { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f
136 { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g
137 { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h
138 { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i
139 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j
140 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k
141 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l
142 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m
143 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n
144 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o
145 { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p
146 { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q
147 { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r
148 { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s
149 { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t
150 { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u
151 { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v
152 { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w
153 { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x
154 { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y
155 { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z
156 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B {
157 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C |
158 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D }
159 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~
160 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f
161 { s_single, s_term, s_string, s_single, s_eol, }, // 0x80
162 { s_single, s_term, s_string, s_single, s_eol, }, // 0x81
163 { s_single, s_term, s_string, s_single, s_eol, }, // 0x82
164 { s_single, s_term, s_string, s_single, s_eol, }, // 0x83
165 { s_single, s_term, s_string, s_single, s_eol, }, // 0x84
166 { s_single, s_term, s_string, s_single, s_eol, }, // 0x85
167 { s_single, s_term, s_string, s_single, s_eol, }, // 0x86
168 { s_single, s_term, s_string, s_single, s_eol, }, // 0x87
169 { s_single, s_term, s_string, s_single, s_eol, }, // 0x88
170 { s_single, s_term, s_string, s_single, s_eol, }, // 0x89
171 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a
172 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b
173 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c
174 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d
175 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e
176 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f
177 { s_single, s_term, s_string, s_single, s_eol, }, // 0x90
178 { s_single, s_term, s_string, s_single, s_eol, }, // 0x91
179 { s_single, s_term, s_string, s_single, s_eol, }, // 0x92
180 { s_single, s_term, s_string, s_single, s_eol, }, // 0x93
181 { s_single, s_term, s_string, s_single, s_eol, }, // 0x94
182 { s_single, s_term, s_string, s_single, s_eol, }, // 0x95
183 { s_single, s_term, s_string, s_single, s_eol, }, // 0x96
184 { s_single, s_term, s_string, s_single, s_eol, }, // 0x97
185 { s_single, s_term, s_string, s_single, s_eol, }, // 0x98
186 { s_single, s_term, s_string, s_single, s_eol, }, // 0x99
187 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a
188 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b
189 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c
190 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d
191 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e
192 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f
193 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0
194 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1
195 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2
196 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3
197 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4
198 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5
199 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6
200 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7
201 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8
202 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9
203 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa
204 { s_single, s_term, s_string, s_single, s_eol, }, // 0xab
205 { s_single, s_term, s_string, s_single, s_eol, }, // 0xac
206 { s_single, s_term, s_string, s_single, s_eol, }, // 0xad
207 { s_single, s_term, s_string, s_single, s_eol, }, // 0xae
208 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf
209 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0
210 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1
211 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2
212 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3
213 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4
214 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5
215 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6
216 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7
217 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8
218 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9
219 { s_single, s_term, s_string, s_single, s_eol, }, // 0xba
220 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb
221 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc
222 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd
223 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe
224 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf
225 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0
226 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1
227 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2
228 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3
229 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4
230 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5
231 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6
232 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7
233 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8
234 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9
235 { s_single, s_term, s_string, s_single, s_eol, }, // 0xca
236 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb
237 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc
238 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd
239 { s_single, s_term, s_string, s_single, s_eol, }, // 0xce
240 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf
241 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0
242 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1
243 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2
244 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3
245 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4
246 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5
247 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6
248 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7
249 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8
250 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9
251 { s_single, s_term, s_string, s_single, s_eol, }, // 0xda
252 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb
253 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc
254 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd
255 { s_single, s_term, s_string, s_single, s_eol, }, // 0xde
256 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf
257 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0
258 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1
259 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2
260 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3
261 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4
262 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5
263 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6
264 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7
265 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8
266 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9
267 { s_single, s_term, s_string, s_single, s_eol, }, // 0xea
268 { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb
269 { s_single, s_term, s_string, s_single, s_eol, }, // 0xec
270 { s_single, s_term, s_string, s_single, s_eol, }, // 0xed
271 { s_single, s_term, s_string, s_single, s_eol, }, // 0xee
272 { s_single, s_term, s_string, s_single, s_eol, }, // 0xef
273 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0
274 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1
275 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2
276 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3
277 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4
278 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5
279 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6
280 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7
281 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8
282 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9
283 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa
284 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb
285 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc
286 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd
287 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe
288 { s_single, s_term, s_string, s_single, s_eol, }, // 0xff
289 };
290
291
292 TOKEN::TOKEN(char *fn, string_set *includes) {
293 pushed = false;
294 include_files = includes;
295 include(fn);
296 }
297
298
299 TOKEN::~TOKEN() {
300 while (!streams.empty()) pop();
301 }
302
303
304 void TOKEN::pop() {
305 ifstream *is = streams.front();
306 char *fn = filenames.front();
307 streams.pop_front();
308 filenames.pop_front();
309 filenamess.erase(fn);
310 linenumbers.pop_front();
311 is->close();
312 delete is;
313 }
314
315
316 void TOKEN::push_char(u_char c) {
317 pushed = true;
318 pushed_char = c;
319 }
320
321
322 bool TOKEN::next_char(u_char &uc) {
323 if (pushed) {
324 uc = (u_char)tolower((char)pushed_char);
325 pushed = false;
326 return true;
327 }
328 while (!streams.empty() && streams.front()->eof()) {
329 pop();
330 }
331 if (streams.empty()) return false;
332 ifstream *is = streams.front();
333 uc = (u_char)is->get();
334 if (is->eof()) return next_char(uc);
335 if (uc == (u_char)'\n') {
336 int &line = linenumbers.front();
337 line++;
338 }
339 uc = (u_char)tolower((char)uc);
340 return true;
341 }
342
343
344 bool TOKEN::include(char *fn) {
345 string_set::iterator i = filenamess.find(fn);
346 if (i != filenamess.end()) {
347 token_error("redundant or recursive include file detected");
348 return false;
349 }
350 ifstream *is = new ifstream;
351 is->open(fn);
352 if (is->fail()) {
353 char buf[maxlen];
354 snprintf(buf, sizeof(buf), "include file %s not found", fn);
355 token_error(buf);
356 return false;
357 }
358 string_set &inc = *include_files;
359 inc.insert(fn);
360 streams.push_front(is);
361 filenames.push_front(fn);
362 filenamess.insert(fn);
363 linenumbers.push_front(1);
364 return true;
365 }
366
367
368 char *TOKEN::next() {
369 if (!pending_tokens.empty()) {
370 char *t = pending_tokens.front();
371 pending_tokens.pop_front();
372 return t;
373 }
374 if (streams.empty()) return NULL;
375 const int PENDING_LIMIT = 1000;
376 static u_char buffer[PENDING_LIMIT];
377 int count = 0;
378 state st = s_init;
379 while (true) {
380 if (count == (PENDING_LIMIT-1)) {
381 token_error("token too long");
382 break;
383 }
384 if (st >= end_state) {
385 token_error("finite state machine error");
386 break;
387 }
388 u_char c;
389 if (!next_char(c)) break;
390 st = parse_table[c][st];
391 switch (st) {
392 case s_string:
393 case s_token: {
394 buffer[count++] = c;
395 } break;
396
397 case s_term: {
398 push_char(c);
399 st = s_init;
400 } break;
401
402 case s_string1: {
403 st = s_string;
404 } break;
405
406 case s_string2: {
407 st = s_init;
408 } break;
409
410 case s_single: {
411 buffer[count++] = c;
412 st = s_init;
413 } break;
414
415 case s_ignore:
416 case s_eol: {
417 } break;
418
419
420 case s_slash: {
421 buffer[count++] = c;
422 if (next_char(c)) {
423 if (c == (u_char)'/') {
424 // start of ignore to eol on //
425 count--;
426 st = s_eol;
427 }
428 else {
429 // not a // token, just return this single /
430 push_char(c);
431 st = s_init;
432 }
433 }
434 else {
435 // cannot get another char
436 st = s_init;
437 }
438 } break;
439
440 default: {
441 token_error();
442 token_error("unknown state %d %s \n", st, " ");
443 } break;
444 }
445 if (st == s_init) break;
446 }
447
448 buffer[count] = '\0';
449 if (count == 0) return NULL;
450 char *t = register_string((char*)buffer);
451 if (t == token_include) {
452 char *f = next(); // should be file name
453 char *s = next(); // should be semicolon
454 if (s == token_semi) {
455 include(f);
456 return next();
457 }
458 else {
459 push(s);
460 push(f);
461 return t;
462 }
463 }
464 return t;
465 }
466
467
468 int TOKEN::nextint() {
469 char *t = next();
470 char *e;
471 long i = strtol(t, &e, 10);
472 if (*e != '\0') {
473 token_error("integer", t);
474 return 0;
475 }
476 return (int)i;
477 }
478
479
480 void TOKEN::skipeol() {
481 while (true) {
482 u_char c;
483 if (!next_char(c)) break;
484 if (c == (u_char)'\n') break;
485 }
486 }
487
488
489 void TOKEN::token_error(const char *err) {
490 token_error();
491 char buf[maxlen];
492 snprintf(buf, sizeof(buf), "%s \n", err);
493 my_syslog(buf);
494 }
495
496
497 void TOKEN::token_error(const char *fmt, int d, const char *s) {
498 char buf[maxlen];
499 snprintf(buf, sizeof(buf), fmt, d, s);
500 my_syslog(buf);
501 }
502
503
504 void TOKEN::token_error(const char *fmt, const char *t, const char *h) {
505 if (!h) h = "null";
506 char buf[maxlen];
507 snprintf(buf, sizeof(buf), fmt, t, h);
508 my_syslog(buf);
509 }
510
511
512 void TOKEN::token_error(const char *want, const char *have) {
513 token_error();
514 token_error("expecting %s, found %s \n", want, have);
515 }
516
517
518 void TOKEN::token_error() {
519 token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn());
520 line_list::iterator j = linenumbers.begin();
521 string_list::iterator i = filenames.begin();
522 for (; i!=filenames.end(); i++,j++) {
523 if (i != filenames.begin()) {
524 char *fn = (*i);
525 int li = (*j);
526 token_error("\n included from line %d in file %s -- ", li, fn);
527 }
528 }
529 }
530