view src/tokenizer.cpp @ 3:8fe310e5cd44

initial coding
author carl
date Sun, 27 Nov 2005 21:12:01 -0800
parents 551433a01cab
children 28fec0c67646
line wrap: on
line source

/***************************************************************************
 *	 Copyright (C) 2005 by 510 Software Group							   *
 *																		   *
 *																		   *
 *	 This program is free software; you can redistribute it and/or modify  *
 *	 it under the terms of the GNU General Public License as published by  *
 *	 the Free Software Foundation; either version 2 of the License, or	   *
 *	 (at your option) any later version.								   *
 *																		   *
 *	 This program is distributed in the hope that it will be useful,	   *
 *	 but WITHOUT ANY WARRANTY; without even the implied warranty of 	   *
 *	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the		   *
 *	 GNU General Public License for more details.						   *
 *																		   *
 *	 You should have received a copy of the GNU General Public License	   *
 *	 along with this program; if not, write to the						   *
 *	 Free Software Foundation, Inc.,									   *
 *	 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.			   *
 ***************************************************************************/

#include "includes.h"

static char* tokenizer_version="$Id$";

const int maxlen = 1000;	// used for snprintf buffers

enum state {s_init,
			s_token,
			s_string,
			s_ignore,		// whitespace
			s_eol,			// ignore to eol
			end_state,

			s_term, 		// token terminator
			s_single,
			s_string1,		// first " of string
			s_string2,		// last " of string
			s_slash 		// possible start of ignore to eol
			};

typedef state PARSE[end_state];

static PARSE parse_table[256] = {
	// s_init	  s_token	  s_string	  s_ignore	 s_eol
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x00
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x01
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x02
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x03
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x04
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x05
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x06
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x07
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x08
	{ s_ignore,   s_term,	  s_string,   s_ignore,  s_eol, 	 },  // 0x09 <tab>
	{ s_ignore,   s_term,	  s_string2,  s_ignore,  s_ignore,	 },  // 0x0a <lf>
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x0b
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x0c
	{ s_ignore,   s_term,	  s_string2,  s_ignore,  s_eol, 	 },  // 0x0d <cr>
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x0e
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x0f
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x10
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x11 xon char
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x12
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x13 xoff char
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x14
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x15
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x16
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x17
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x18
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x19
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1a
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1b
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1c
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1d
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1e
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x1f
	{ s_ignore,   s_term,	  s_string,   s_ignore,  s_eol, 	 },  // 0x20 space
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x21 !
	{ s_string1,  s_term,	  s_string2,  s_string1, s_eol, 	 },  // 0x22 "
	{ s_eol,	  s_term,	  s_string,   s_eol,	 s_eol, 	 },  // 0x23 #
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x24 $
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x25 %
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x26 &
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x27 '
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x28 (
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x29 )
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x2A *
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x2B +
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x2C ,
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x2D -
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x2E .
	{ s_slash,	  s_term,	  s_string,   s_slash,	 s_eol, 	 },  // 0x2F /
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x30 0
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x31 1
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x32 2
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x33 3
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x34 4
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x35 5
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x36 6
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x37 7
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x38 8
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x39 9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x3A :
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x3B ;
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x3C <
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x3D =
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x3E >
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x3F ?
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x40 @
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x41 A
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x42 B
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x43 C
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x44 D
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x45 E
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x46 F
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x47 G
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x48 H
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x49 I
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4A J
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4B K
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4C L
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4D M
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4E N
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x4F O
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x50 P
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x51 Q
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x52 R
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x53 S
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x54 T
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x55 U
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x56 V
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x57 W
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x58 X
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x59 Y
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x5A Z
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x5B [
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x5C backslash
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x5D ]
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x5E ^
	{ s_single,   s_token,	  s_string,   s_single,  s_eol, 	 },  // 0x5F _
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x60 `
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x61 a
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x62 b
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x63 c
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x64 d
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x65 e
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x66 f
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x67 g
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x68 h
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x69 i
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6A j
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6B k
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6C l
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6D m
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6E n
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x6F o
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x70 p
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x71 q
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x72 r
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x73 s
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x74 t
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x75 u
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x76 v
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x77 w
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x78 x
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x79 y
	{ s_token,	  s_token,	  s_string,   s_token,	 s_eol, 	 },  // 0x7A z
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x7B {
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x7C |
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x7D }
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x7E ~
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x7f
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x80
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x81
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x82
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x83
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x84
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x85
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x86
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x87
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x88
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x89
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8a
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8b
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8c
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8d
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8e
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x8f
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x90
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x91
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x92
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x93
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x94
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x95
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x96
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x97
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x98
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x99
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9a
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9b
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9c
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9d
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9e
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0x9f
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xa9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xaa
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xab
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xac
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xad
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xae
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xaf
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xb9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xba
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xbb
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xbc
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xbd
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xbe
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xbf
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xc9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xca
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xcb
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xcc
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xcd
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xce
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xcf
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xd9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xda
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xdb
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xdc
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xdd
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xde
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xdf
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xe9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xea
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xeb
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xec
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xed
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xee
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xef
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf0
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf1
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf2
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf3
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf4
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf5
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf6
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf7
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf8
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xf9
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xfa
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xfb
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xfc
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xfd
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xfe
	{ s_single,   s_term,	  s_string,   s_single,  s_eol, 	 },  // 0xff
};


TOKEN::TOKEN(char *fn, string_set *includes) {
	pushed = false;
	include_files = includes;
	include(fn);
}


TOKEN::~TOKEN() {
	while (!streams.empty()) pop();
}


void TOKEN::pop() {
	ifstream *is = streams.front();
	char *fn = filenames.front();
	streams.pop_front();
	filenamess.erase(fn);
	if (filenames.size() > 1)	filenames.pop_front();
	if (linenumbers.size() > 1) linenumbers.pop_front();
	is->close();
	delete is;
}


void TOKEN::push_char(u_char c) {
	pushed		= true;
	pushed_char = c;
}


bool TOKEN::next_char(u_char &uc) {
	if (pushed) {
		uc = (u_char)tolower((char)pushed_char);
		pushed = false;
		return true;
	}
	while (!streams.empty() && streams.front()->eof()) {
		pop();
	}
	if (streams.empty()) return false;
	ifstream *is = streams.front();
	uc = (u_char)is->get();
	if (is->eof()) return next_char(uc);
	if (uc == (u_char)'\n') {
		int &line = linenumbers.front();
		line++;
	}
	uc = (u_char)tolower((char)uc);
	return true;
}


bool TOKEN::include(char *fn) {
	string_set::iterator i = filenamess.find(fn);
	if (i != filenamess.end()) {
		token_error("redundant or recursive include file detected");
		return false;
	}
	ifstream *is = new ifstream;
	is->open(fn);
	if (is->fail()) {
		char buf[maxlen];
		snprintf(buf, sizeof(buf), "include file %s not found", fn);
		token_error(buf);
		return false;
	}
	string_set &inc = *include_files;
	inc.insert(fn);
	streams.push_front(is);
	filenames.push_front(fn);
	filenamess.insert(fn);
	linenumbers.push_front(1);
	return true;
}


char *TOKEN::next() {
	if (!pending_tokens.empty()) {
		char *t = pending_tokens.front();
		pending_tokens.pop_front();
		return t;
	}
	if (streams.empty()) return NULL;
	const int PENDING_LIMIT = 1000;
	static u_char buffer[PENDING_LIMIT];
	int count = 0;
	state st = s_init;
	while (true) {
		if (count == (PENDING_LIMIT-1)) {
			token_error("token too long");
			break;
		}
		if (st >= end_state) {
			token_error("finite state machine error");
			break;
		}
		u_char c;
		if (!next_char(c)) break;
		st = parse_table[c][st];
		switch (st) {
			case s_string:
			case s_token: {
				buffer[count++] = c;
			} break;

			case s_term: {
				push_char(c);
				st = s_init;
			} break;

			case s_string1: {
				st = s_string;
			} break;

			case s_string2: {
				st = s_init;
			} break;

			case s_single: {
				buffer[count++] = c;
				st = s_init;
			} break;

			case s_ignore:
			case s_eol: {
			} break;


			case s_slash: {
				buffer[count++] = c;
				if (next_char(c)) {
					if (c == (u_char)'/') {
						// start of ignore to eol on //
						count--;
						st = s_eol;
					}
					else {
						// not a // token, just return this single /
						push_char(c);
						st = s_init;
					}
				}
				else {
					// cannot get another char
					st = s_init;
				}
			} break;

			default: {
				token_error();
				token_error("unknown state %d %s \n", st, " ");
			} break;
		}
		if (st == s_init) break;
	}

	buffer[count] = '\0';
	if (count == 0) return NULL;
	char *t = register_string((char*)buffer);
	if (t == token_include) {
		char *f = next();	// should be file name
		char *s = next();	// should be semicolon
		if (s == token_semi) {
			include(f);
			return next();
		}
		else {
			push(s);
			push(f);
			return t;
		}
	}
	return t;
}


int TOKEN::nextint() {
	char *t = next();
	char *e;
	long i = strtol(t, &e, 10);
	if (*e != '\0') {
		token_error("integer", t);
		return 0;
	}
	return (int)i;
}


void TOKEN::skipeol() {
	while (true) {
		u_char c;
		if (!next_char(c)) break;
		if (c == (u_char)'\n') break;
	}
}


void TOKEN::token_error(const char *err) {
	token_error();
	char buf[maxlen];
	snprintf(buf, sizeof(buf), "%s \n", err);
	my_syslog(buf);
}


void TOKEN::token_error(const char *fmt, int d, const char *s) {
	char buf[maxlen];
	snprintf(buf, sizeof(buf), fmt, d, s);
	my_syslog(buf);
}


void TOKEN::token_error(const char *fmt, const char *t, const char *h) {
	if (!h) h = "null";
	char buf[maxlen];
	snprintf(buf, sizeof(buf), fmt, t, h);
	my_syslog(buf);
}


void TOKEN::token_error(const char *want, const char *have) {
	token_error();
	token_error("expecting %s, found %s", want, have);
}


void TOKEN::token_error() {
	token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn());
	line_list::iterator   j = linenumbers.begin();
	string_list::iterator i = filenames.begin();
	for (; i!=filenames.end(); i++,j++) {
		if (i != filenames.begin()) {
			char *fn = (*i);
			int   li = (*j);
			token_error("    included from line %d in file %s -- ", li, fn);
		}
	}
}