comparison src/tokenizer.cpp @ 1:551433a01cab

initial coding
author carl
date Wed, 23 Nov 2005 19:29:14 -0800
parents
children 8fe310e5cd44
comparison
equal deleted inserted replaced
0:a3b8d64b2ae5 1:551433a01cab
1 /***************************************************************************
2 * Copyright (C) 2005 by 510 Software Group *
3 * *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20
21 #include "includes.h"
22
23 static char* tokenizer_version="$Id$";
24
25 const int maxlen = 1000; // used for snprintf buffers
26
27 enum state {s_init,
28 s_token,
29 s_string,
30 s_ignore, // whitespace
31 s_eol, // ignore to eol
32 end_state,
33
34 s_term, // token terminator
35 s_single,
36 s_string1, // first " of string
37 s_string2, // last " of string
38 s_slash // possible start of ignore to eol
39 };
40
41 typedef state PARSE[end_state];
42
43 static PARSE parse_table[256] = {
44 // s_init s_token s_string s_ignore s_eol
45 { s_single, s_term, s_string, s_single, s_eol, }, // 0x00
46 { s_single, s_term, s_string, s_single, s_eol, }, // 0x01
47 { s_single, s_term, s_string, s_single, s_eol, }, // 0x02
48 { s_single, s_term, s_string, s_single, s_eol, }, // 0x03
49 { s_single, s_term, s_string, s_single, s_eol, }, // 0x04
50 { s_single, s_term, s_string, s_single, s_eol, }, // 0x05
51 { s_single, s_term, s_string, s_single, s_eol, }, // 0x06
52 { s_single, s_term, s_string, s_single, s_eol, }, // 0x07
53 { s_single, s_term, s_string, s_single, s_eol, }, // 0x08
54 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x09 <tab>
55 { s_ignore, s_term, s_string2, s_ignore, s_ignore, }, // 0x0a <lf>
56 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0b
57 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0c
58 { s_ignore, s_term, s_string2, s_ignore, s_eol, }, // 0x0d <cr>
59 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0e
60 { s_single, s_term, s_string, s_single, s_eol, }, // 0x0f
61 { s_single, s_term, s_string, s_single, s_eol, }, // 0x10
62 { s_single, s_term, s_string, s_single, s_eol, }, // 0x11 xon char
63 { s_single, s_term, s_string, s_single, s_eol, }, // 0x12
64 { s_single, s_term, s_string, s_single, s_eol, }, // 0x13 xoff char
65 { s_single, s_term, s_string, s_single, s_eol, }, // 0x14
66 { s_single, s_term, s_string, s_single, s_eol, }, // 0x15
67 { s_single, s_term, s_string, s_single, s_eol, }, // 0x16
68 { s_single, s_term, s_string, s_single, s_eol, }, // 0x17
69 { s_single, s_term, s_string, s_single, s_eol, }, // 0x18
70 { s_single, s_term, s_string, s_single, s_eol, }, // 0x19
71 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1a
72 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1b
73 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1c
74 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1d
75 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1e
76 { s_single, s_term, s_string, s_single, s_eol, }, // 0x1f
77 { s_ignore, s_term, s_string, s_ignore, s_eol, }, // 0x20 space
78 { s_single, s_term, s_string, s_single, s_eol, }, // 0x21 !
79 { s_string1, s_term, s_string2, s_string1, s_eol, }, // 0x22 "
80 { s_eol, s_term, s_string, s_eol, s_eol, }, // 0x23 #
81 { s_single, s_term, s_string, s_single, s_eol, }, // 0x24 $
82 { s_single, s_term, s_string, s_single, s_eol, }, // 0x25 %
83 { s_single, s_term, s_string, s_single, s_eol, }, // 0x26 &
84 { s_single, s_term, s_string, s_single, s_eol, }, // 0x27 '
85 { s_single, s_term, s_string, s_single, s_eol, }, // 0x28 (
86 { s_single, s_term, s_string, s_single, s_eol, }, // 0x29 )
87 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2A *
88 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2B +
89 { s_single, s_term, s_string, s_single, s_eol, }, // 0x2C ,
90 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2D -
91 { s_single, s_token, s_string, s_single, s_eol, }, // 0x2E .
92 { s_slash, s_token, s_string, s_slash, s_eol, }, // 0x2F /
93 { s_token, s_token, s_string, s_token, s_eol, }, // 0x30 0
94 { s_token, s_token, s_string, s_token, s_eol, }, // 0x31 1
95 { s_token, s_token, s_string, s_token, s_eol, }, // 0x32 2
96 { s_token, s_token, s_string, s_token, s_eol, }, // 0x33 3
97 { s_token, s_token, s_string, s_token, s_eol, }, // 0x34 4
98 { s_token, s_token, s_string, s_token, s_eol, }, // 0x35 5
99 { s_token, s_token, s_string, s_token, s_eol, }, // 0x36 6
100 { s_token, s_token, s_string, s_token, s_eol, }, // 0x37 7
101 { s_token, s_token, s_string, s_token, s_eol, }, // 0x38 8
102 { s_token, s_token, s_string, s_token, s_eol, }, // 0x39 9
103 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3A :
104 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3B ;
105 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3C <
106 { s_single, s_token, s_string, s_single, s_eol, }, // 0x3D =
107 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3E >
108 { s_single, s_term, s_string, s_single, s_eol, }, // 0x3F ?
109 { s_single, s_token, s_string, s_single, s_eol, }, // 0x40 @
110 { s_token, s_token, s_string, s_token, s_eol, }, // 0x41 A
111 { s_token, s_token, s_string, s_token, s_eol, }, // 0x42 B
112 { s_token, s_token, s_string, s_token, s_eol, }, // 0x43 C
113 { s_token, s_token, s_string, s_token, s_eol, }, // 0x44 D
114 { s_token, s_token, s_string, s_token, s_eol, }, // 0x45 E
115 { s_token, s_token, s_string, s_token, s_eol, }, // 0x46 F
116 { s_token, s_token, s_string, s_token, s_eol, }, // 0x47 G
117 { s_token, s_token, s_string, s_token, s_eol, }, // 0x48 H
118 { s_token, s_token, s_string, s_token, s_eol, }, // 0x49 I
119 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4A J
120 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4B K
121 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4C L
122 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4D M
123 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4E N
124 { s_token, s_token, s_string, s_token, s_eol, }, // 0x4F O
125 { s_token, s_token, s_string, s_token, s_eol, }, // 0x50 P
126 { s_token, s_token, s_string, s_token, s_eol, }, // 0x51 Q
127 { s_token, s_token, s_string, s_token, s_eol, }, // 0x52 R
128 { s_token, s_token, s_string, s_token, s_eol, }, // 0x53 S
129 { s_token, s_token, s_string, s_token, s_eol, }, // 0x54 T
130 { s_token, s_token, s_string, s_token, s_eol, }, // 0x55 U
131 { s_token, s_token, s_string, s_token, s_eol, }, // 0x56 V
132 { s_token, s_token, s_string, s_token, s_eol, }, // 0x57 W
133 { s_token, s_token, s_string, s_token, s_eol, }, // 0x58 X
134 { s_token, s_token, s_string, s_token, s_eol, }, // 0x59 Y
135 { s_token, s_token, s_string, s_token, s_eol, }, // 0x5A Z
136 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5B [
137 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5C backslash
138 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5D ]
139 { s_single, s_term, s_string, s_single, s_eol, }, // 0x5E ^
140 { s_single, s_token, s_string, s_single, s_eol, }, // 0x5F _
141 { s_single, s_term, s_string, s_single, s_eol, }, // 0x60 `
142 { s_token, s_token, s_string, s_token, s_eol, }, // 0x61 a
143 { s_token, s_token, s_string, s_token, s_eol, }, // 0x62 b
144 { s_token, s_token, s_string, s_token, s_eol, }, // 0x63 c
145 { s_token, s_token, s_string, s_token, s_eol, }, // 0x64 d
146 { s_token, s_token, s_string, s_token, s_eol, }, // 0x65 e
147 { s_token, s_token, s_string, s_token, s_eol, }, // 0x66 f
148 { s_token, s_token, s_string, s_token, s_eol, }, // 0x67 g
149 { s_token, s_token, s_string, s_token, s_eol, }, // 0x68 h
150 { s_token, s_token, s_string, s_token, s_eol, }, // 0x69 i
151 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6A j
152 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6B k
153 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6C l
154 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6D m
155 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6E n
156 { s_token, s_token, s_string, s_token, s_eol, }, // 0x6F o
157 { s_token, s_token, s_string, s_token, s_eol, }, // 0x70 p
158 { s_token, s_token, s_string, s_token, s_eol, }, // 0x71 q
159 { s_token, s_token, s_string, s_token, s_eol, }, // 0x72 r
160 { s_token, s_token, s_string, s_token, s_eol, }, // 0x73 s
161 { s_token, s_token, s_string, s_token, s_eol, }, // 0x74 t
162 { s_token, s_token, s_string, s_token, s_eol, }, // 0x75 u
163 { s_token, s_token, s_string, s_token, s_eol, }, // 0x76 v
164 { s_token, s_token, s_string, s_token, s_eol, }, // 0x77 w
165 { s_token, s_token, s_string, s_token, s_eol, }, // 0x78 x
166 { s_token, s_token, s_string, s_token, s_eol, }, // 0x79 y
167 { s_token, s_token, s_string, s_token, s_eol, }, // 0x7A z
168 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7B {
169 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7C |
170 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7D }
171 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7E ~
172 { s_single, s_term, s_string, s_single, s_eol, }, // 0x7f
173 { s_single, s_term, s_string, s_single, s_eol, }, // 0x80
174 { s_single, s_term, s_string, s_single, s_eol, }, // 0x81
175 { s_single, s_term, s_string, s_single, s_eol, }, // 0x82
176 { s_single, s_term, s_string, s_single, s_eol, }, // 0x83
177 { s_single, s_term, s_string, s_single, s_eol, }, // 0x84
178 { s_single, s_term, s_string, s_single, s_eol, }, // 0x85
179 { s_single, s_term, s_string, s_single, s_eol, }, // 0x86
180 { s_single, s_term, s_string, s_single, s_eol, }, // 0x87
181 { s_single, s_term, s_string, s_single, s_eol, }, // 0x88
182 { s_single, s_term, s_string, s_single, s_eol, }, // 0x89
183 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8a
184 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8b
185 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8c
186 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8d
187 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8e
188 { s_single, s_term, s_string, s_single, s_eol, }, // 0x8f
189 { s_single, s_term, s_string, s_single, s_eol, }, // 0x90
190 { s_single, s_term, s_string, s_single, s_eol, }, // 0x91
191 { s_single, s_term, s_string, s_single, s_eol, }, // 0x92
192 { s_single, s_term, s_string, s_single, s_eol, }, // 0x93
193 { s_single, s_term, s_string, s_single, s_eol, }, // 0x94
194 { s_single, s_term, s_string, s_single, s_eol, }, // 0x95
195 { s_single, s_term, s_string, s_single, s_eol, }, // 0x96
196 { s_single, s_term, s_string, s_single, s_eol, }, // 0x97
197 { s_single, s_term, s_string, s_single, s_eol, }, // 0x98
198 { s_single, s_term, s_string, s_single, s_eol, }, // 0x99
199 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9a
200 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9b
201 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9c
202 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9d
203 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9e
204 { s_single, s_term, s_string, s_single, s_eol, }, // 0x9f
205 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa0
206 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa1
207 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa2
208 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa3
209 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa4
210 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa5
211 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa6
212 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa7
213 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa8
214 { s_single, s_term, s_string, s_single, s_eol, }, // 0xa9
215 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaa
216 { s_single, s_term, s_string, s_single, s_eol, }, // 0xab
217 { s_single, s_term, s_string, s_single, s_eol, }, // 0xac
218 { s_single, s_term, s_string, s_single, s_eol, }, // 0xad
219 { s_single, s_term, s_string, s_single, s_eol, }, // 0xae
220 { s_single, s_term, s_string, s_single, s_eol, }, // 0xaf
221 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb0
222 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb1
223 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb2
224 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb3
225 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb4
226 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb5
227 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb6
228 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb7
229 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb8
230 { s_single, s_term, s_string, s_single, s_eol, }, // 0xb9
231 { s_single, s_term, s_string, s_single, s_eol, }, // 0xba
232 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbb
233 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbc
234 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbd
235 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbe
236 { s_single, s_term, s_string, s_single, s_eol, }, // 0xbf
237 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc0
238 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc1
239 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc2
240 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc3
241 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc4
242 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc5
243 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc6
244 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc7
245 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc8
246 { s_single, s_term, s_string, s_single, s_eol, }, // 0xc9
247 { s_single, s_term, s_string, s_single, s_eol, }, // 0xca
248 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcb
249 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcc
250 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcd
251 { s_single, s_term, s_string, s_single, s_eol, }, // 0xce
252 { s_single, s_term, s_string, s_single, s_eol, }, // 0xcf
253 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd0
254 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd1
255 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd2
256 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd3
257 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd4
258 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd5
259 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd6
260 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd7
261 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd8
262 { s_single, s_term, s_string, s_single, s_eol, }, // 0xd9
263 { s_single, s_term, s_string, s_single, s_eol, }, // 0xda
264 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdb
265 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdc
266 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdd
267 { s_single, s_term, s_string, s_single, s_eol, }, // 0xde
268 { s_single, s_term, s_string, s_single, s_eol, }, // 0xdf
269 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe0
270 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe1
271 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe2
272 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe3
273 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe4
274 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe5
275 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe6
276 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe7
277 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe8
278 { s_single, s_term, s_string, s_single, s_eol, }, // 0xe9
279 { s_single, s_term, s_string, s_single, s_eol, }, // 0xea
280 { s_single, s_term, s_string, s_single, s_eol, }, // 0xeb
281 { s_single, s_term, s_string, s_single, s_eol, }, // 0xec
282 { s_single, s_term, s_string, s_single, s_eol, }, // 0xed
283 { s_single, s_term, s_string, s_single, s_eol, }, // 0xee
284 { s_single, s_term, s_string, s_single, s_eol, }, // 0xef
285 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf0
286 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf1
287 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf2
288 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf3
289 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf4
290 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf5
291 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf6
292 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf7
293 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf8
294 { s_single, s_term, s_string, s_single, s_eol, }, // 0xf9
295 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfa
296 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfb
297 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfc
298 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfd
299 { s_single, s_term, s_string, s_single, s_eol, }, // 0xfe
300 { s_single, s_term, s_string, s_single, s_eol, }, // 0xff
301 };
302
303
304 TOKEN::TOKEN(char *fn, string_set *includes) {
305 pushed = false;
306 include_files = includes;
307 include(fn);
308 }
309
310
311 TOKEN::~TOKEN() {
312 while (!streams.empty()) pop();
313 }
314
315
316 void TOKEN::pop() {
317 ifstream *is = streams.front();
318 char *fn = filenames.front();
319 streams.pop_front();
320 filenamess.erase(fn);
321 if (filenames.size() > 1) filenames.pop_front();
322 if (linenumbers.size() > 1) linenumbers.pop_front();
323 is->close();
324 delete is;
325 }
326
327
328 void TOKEN::push_char(u_char c) {
329 pushed = true;
330 pushed_char = c;
331 }
332
333
334 bool TOKEN::next_char(u_char &uc) {
335 if (pushed) {
336 uc = (u_char)tolower((char)pushed_char);
337 pushed = false;
338 return true;
339 }
340 while (!streams.empty() && streams.front()->eof()) {
341 pop();
342 }
343 if (streams.empty()) return false;
344 ifstream *is = streams.front();
345 uc = (u_char)is->get();
346 if (is->eof()) return next_char(uc);
347 if (uc == (u_char)'\n') {
348 int &line = linenumbers.front();
349 line++;
350 }
351 uc = (u_char)tolower((char)uc);
352 return true;
353 }
354
355
356 bool TOKEN::include(char *fn) {
357 string_set::iterator i = filenamess.find(fn);
358 if (i != filenamess.end()) {
359 token_error("redundant or recursive include file detected");
360 return false;
361 }
362 ifstream *is = new ifstream;
363 is->open(fn);
364 if (is->fail()) {
365 char buf[maxlen];
366 snprintf(buf, sizeof(buf), "include file %s not found", fn);
367 token_error(buf);
368 return false;
369 }
370 string_set &inc = *include_files;
371 inc.insert(fn);
372 streams.push_front(is);
373 filenames.push_front(fn);
374 filenamess.insert(fn);
375 linenumbers.push_front(1);
376 return true;
377 }
378
379
380 char *TOKEN::next() {
381 if (!pending_tokens.empty()) {
382 char *t = pending_tokens.front();
383 pending_tokens.pop_front();
384 return t;
385 }
386 if (streams.empty()) return NULL;
387 const int PENDING_LIMIT = 1000;
388 static u_char buffer[PENDING_LIMIT];
389 int count = 0;
390 state st = s_init;
391 while (true) {
392 if (count == (PENDING_LIMIT-1)) {
393 token_error("token too long");
394 break;
395 }
396 if (st >= end_state) {
397 token_error("finite state machine error");
398 break;
399 }
400 u_char c;
401 if (!next_char(c)) break;
402 st = parse_table[c][st];
403 switch (st) {
404 case s_string:
405 case s_token: {
406 buffer[count++] = c;
407 } break;
408
409 case s_term: {
410 push_char(c);
411 st = s_init;
412 } break;
413
414 case s_string1: {
415 st = s_string;
416 } break;
417
418 case s_string2: {
419 st = s_init;
420 } break;
421
422 case s_single: {
423 buffer[count++] = c;
424 st = s_init;
425 } break;
426
427 case s_ignore:
428 case s_eol: {
429 } break;
430
431
432 case s_slash: {
433 buffer[count++] = c;
434 if (next_char(c)) {
435 if (c == (u_char)'/') {
436 // start of ignore to eol on //
437 count--;
438 st = s_eol;
439 }
440 else {
441 // not a // token, just return this single /
442 push_char(c);
443 st = s_init;
444 }
445 }
446 else {
447 // cannot get another char
448 st = s_init;
449 }
450 } break;
451
452 default: {
453 token_error();
454 token_error("unknown state %d %s \n", st, " ");
455 } break;
456 }
457 if (st == s_init) break;
458 }
459
460 buffer[count] = '\0';
461 if (count == 0) return NULL;
462 char *t = register_string((char*)buffer);
463 if (t == token_include) {
464 char *f = next(); // should be file name
465 char *s = next(); // should be semicolon
466 if (s == token_semi) {
467 include(f);
468 return next();
469 }
470 else {
471 push(s);
472 push(f);
473 return t;
474 }
475 }
476 return t;
477 }
478
479
480 int TOKEN::nextint() {
481 char *t = next();
482 char *e;
483 long i = strtol(t, &e, 10);
484 if (*e != '\0') {
485 token_error("integer", t);
486 return 0;
487 }
488 return (int)i;
489 }
490
491
492 void TOKEN::skipeol() {
493 while (true) {
494 u_char c;
495 if (!next_char(c)) break;
496 if (c == (u_char)'\n') break;
497 }
498 }
499
500
501 void TOKEN::token_error(const char *err) {
502 token_error();
503 char buf[maxlen];
504 snprintf(buf, sizeof(buf), "%s \n", err);
505 my_syslog(buf);
506 }
507
508
509 void TOKEN::token_error(const char *fmt, int d, const char *s) {
510 char buf[maxlen];
511 snprintf(buf, sizeof(buf), fmt, d, s);
512 my_syslog(buf);
513 }
514
515
516 void TOKEN::token_error(const char *fmt, const char *t, const char *h) {
517 if (!h) h = "null";
518 char buf[maxlen];
519 snprintf(buf, sizeof(buf), fmt, t, h);
520 my_syslog(buf);
521 }
522
523
524 void TOKEN::token_error(const char *want, const char *have) {
525 token_error();
526 token_error("expecting %s, found %s \n", want, have);
527 }
528
529
530 void TOKEN::token_error() {
531 token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn());
532 line_list::iterator j = linenumbers.begin();
533 string_list::iterator i = filenames.begin();
534 for (; i!=filenames.end(); i++,j++) {
535 if (i != filenames.begin()) {
536 char *fn = (*i);
537 int li = (*j);
538 token_error("\n included from line %d in file %s -- ", li, fn);
539 }
540 }
541 }
542