view src/tokenizer.cpp @ 356:90d433d1382e

when extracting header fields, need to preserve the header value since that will be used by subsequent (dcc,spamd,etc) milters
author Carl Byington <carl@five-ten-sg.com>
date Sun, 08 Jan 2017 21:13:01 -0800
parents 82886d4dd71f
children f5b394bec28c
line wrap: on
line source

/*

Copyright (c) 2007 Carl Byington - 510 Software Group, released under
the GPL version 3 or any later version at your choice available at
http://www.gnu.org/licenses/gpl-3.0.txt

*/

#include "includes.h"

const int maxlen = 1000;    // used for snprintf buffers

enum state {s_init,
            s_token,
            s_string,
            s_ignore,       // whitespace
            s_eol,          // ignore to eol
            end_state,

            s_term,         // token terminator
            s_single,
            s_string1,      // first " of string
            s_string2,      // last " of string
            s_slash         // possible start of ignore to eol
            };

typedef state PARSE[end_state];

static PARSE parse_table[256] = {
    // s_init     s_token     s_string    s_ignore   s_eol
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x00
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x01
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x02
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x03
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x04
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x05
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x06
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x07
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x08
    { s_ignore,   s_term,     s_string,   s_ignore,  s_eol,      },  // 0x09 <tab>
    { s_ignore,   s_term,     s_string2,  s_ignore,  s_ignore,   },  // 0x0a <lf>
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x0b
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x0c
    { s_ignore,   s_term,     s_string2,  s_ignore,  s_eol,      },  // 0x0d <cr>
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x0e
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x0f
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x10
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x11 xon char
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x12
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x13 xoff char
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x14
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x15
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x16
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x17
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x18
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x19
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1a
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1b
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1c
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1d
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1e
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x1f
    { s_ignore,   s_term,     s_string,   s_ignore,  s_eol,      },  // 0x20 space
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x21 !
    { s_string1,  s_term,     s_string2,  s_string1, s_eol,      },  // 0x22 "
    { s_eol,      s_term,     s_string,   s_eol,     s_eol,      },  // 0x23 #
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x24 $
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x25 %
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x26 &
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x27 '
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x28 (
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x29 )
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x2A *
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x2B +
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x2C ,
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x2D -
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x2E .
    { s_slash,    s_token,    s_string,   s_slash,   s_eol,      },  // 0x2F /
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x30 0
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x31 1
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x32 2
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x33 3
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x34 4
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x35 5
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x36 6
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x37 7
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x38 8
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x39 9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x3A :
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x3B ;
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x3C <
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x3D =
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x3E >
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x3F ?
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x40 @
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x41 A
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x42 B
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x43 C
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x44 D
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x45 E
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x46 F
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x47 G
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x48 H
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x49 I
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4A J
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4B K
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4C L
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4D M
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4E N
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x4F O
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x50 P
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x51 Q
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x52 R
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x53 S
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x54 T
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x55 U
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x56 V
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x57 W
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x58 X
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x59 Y
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x5A Z
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x5B [
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x5C backslash
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x5D ]
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x5E ^
    { s_single,   s_token,    s_string,   s_single,  s_eol,      },  // 0x5F _
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x60 `
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x61 a
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x62 b
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x63 c
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x64 d
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x65 e
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x66 f
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x67 g
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x68 h
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x69 i
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6A j
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6B k
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6C l
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6D m
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6E n
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x6F o
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x70 p
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x71 q
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x72 r
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x73 s
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x74 t
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x75 u
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x76 v
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x77 w
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x78 x
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x79 y
    { s_token,    s_token,    s_string,   s_token,   s_eol,      },  // 0x7A z
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x7B {
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x7C |
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x7D }
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x7E ~
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x7f
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x80
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x81
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x82
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x83
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x84
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x85
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x86
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x87
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x88
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x89
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8a
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8b
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8c
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8d
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8e
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x8f
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x90
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x91
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x92
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x93
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x94
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x95
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x96
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x97
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x98
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x99
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9a
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9b
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9c
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9d
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9e
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0x9f
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xa9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xaa
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xab
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xac
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xad
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xae
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xaf
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xb9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xba
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xbb
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xbc
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xbd
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xbe
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xbf
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xc9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xca
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xcb
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xcc
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xcd
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xce
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xcf
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xd9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xda
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xdb
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xdc
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xdd
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xde
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xdf
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xe9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xea
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xeb
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xec
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xed
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xee
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xef
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf0
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf1
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf2
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf3
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf4
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf5
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf6
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf7
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf8
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xf9
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xfa
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xfb
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xfc
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xfd
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xfe
    { s_single,   s_term,     s_string,   s_single,  s_eol,      },  // 0xff
};


TOKEN::TOKEN(const char *fn, string_set *includes) {
    pushed = false;
    include_files = includes;
    include(fn);
}


TOKEN::~TOKEN() {
    while (!streams.empty()) pop();
}


void TOKEN::pop() {
    ifstream *is = streams.front();
    const char *fn = filenames.front();
    streams.pop_front();
    filenamess.erase(fn);
    if (filenames.size() > 1)   filenames.pop_front();
    if (linenumbers.size() > 1) linenumbers.pop_front();
    is->close();
    delete is;
}


void TOKEN::push_char(u_char c) {
    pushed      = true;
    pushed_char = c;
}


bool TOKEN::next_char(u_char &uc) {
    if (pushed) {
		uc = (u_char)tolower((char)pushed_char);
        pushed = false;
        return true;
    }
    while (!streams.empty() && streams.front()->eof()) {
        pop();
    }
    if (streams.empty()) return false;
    ifstream *is = streams.front();
    uc = (u_char)is->get();
    if (is->eof()) return next_char(uc);
    if (uc == (u_char)'\n') {
        int &line = linenumbers.front();
        line++;
    }
	uc = (u_char)tolower((char)uc);
    return true;
}


bool TOKEN::include(const char *fn) {
    string_set::iterator i = filenamess.find(fn);
    if (i != filenamess.end()) {
        token_error("redundant or recursive include file detected");
        return false;
    }
    ifstream *is = new ifstream;
    is->open(fn);
    if (is->fail()) {
        char buf[maxlen];
        snprintf(buf, sizeof(buf), "include file %s not found", fn);
        token_error(buf);
        return false;
    }
    string_set &inc = *include_files;
    inc.insert(fn);
    streams.push_front(is);
    filenames.push_front(fn);
    filenamess.insert(fn);
    linenumbers.push_front(1);
    return true;
}


const char *TOKEN::next() {
    if (!pending_tokens.empty()) {
        const char *t = pending_tokens.front();
        pending_tokens.pop_front();
        return t;
    }
    if (streams.empty()) return NULL;
    const int PENDING_LIMIT = 1000;
    u_char buffer[PENDING_LIMIT];
    int count = 0;
    state st = s_init;
    while (true) {
        if (count == (PENDING_LIMIT-1)) {
            token_error("token too long");
            break;
        }
        if (st >= end_state) {
            token_error("finite state machine error");
            break;
        }
        u_char c;
        if (!next_char(c)) break;
        st = parse_table[c][st];
        switch (st) {
            case s_string:
            case s_token: {
                buffer[count++] = c;
            } break;

            case s_term: {
                push_char(c);
                st = s_init;
            } break;

            case s_string1: {
                st = s_string;
            } break;

            case s_string2: {
                st = s_init;
            } break;

            case s_single: {
                buffer[count++] = c;
                st = s_init;
            } break;

            case s_ignore:
            case s_eol: {
            } break;


            case s_slash: {
                buffer[count++] = c;
                if (next_char(c)) {
                    if (c == (u_char)'/') {
                        // start of ignore to eol on //
                        count--;
                        st = s_eol;
                    }
                    else {
                        // not a // token, just return this single /
                        push_char(c);
                        st = s_init;
                    }
                }
                else {
                    // cannot get another char
                    st = s_init;
                }
            } break;

            default: {
                token_error();
                token_error("unknown state %d %s", st, " ");
            } break;
        }
        if (st == s_init) break;
    }

    buffer[count] = '\0';
    if (count == 0) return NULL;
    const char *t = register_string((char*)buffer);
    if (t == token_include) {
        const char *f = next();   // should be file name
        const char *s = next();   // should be semicolon
        if (s == token_semi) {
            include(f);
            return next();
        }
        else {
            push(s);
            push(f);
            return t;
        }
    }
    return t;
}


int TOKEN::nextint() {
    const char *t = next();
    char *e;
    long i = strtol(t, &e, 10);
    if (*e != '\0') {
        token_error("integer", t);
        return 0;
    }
    return (int)i;
}


void TOKEN::skipeol() {
    while (true) {
        u_char c;
        if (!next_char(c)) break;
        if (c == (u_char)'\n') break;
    }
}


void TOKEN::token_error(const char *err) {
    token_error();
    char buf[maxlen];
    snprintf(buf, sizeof(buf), "%s \n", err);
    my_syslog(buf);
}


void TOKEN::token_error(const char *fmt, int d, const char *s) {
    char buf[maxlen];
    snprintf(buf, sizeof(buf), fmt, d, s);
    my_syslog(buf);
}


void TOKEN::token_error(const char *fmt, const char *t, const char *h) {
    if (!h) h = "null";
    char buf[maxlen];
    snprintf(buf, sizeof(buf), fmt, t, h);
    my_syslog(buf);
}


void TOKEN::token_error(const char *want, const char *have) {
    token_error();
    token_error("expecting %s, found %s", want, have);
}


void TOKEN::token_error() {
    token_error("syntax error at line %d in file %s -- ", cur_line(), cur_fn());
    line_list::iterator         j = linenumbers.begin();
    string_list::const_iterator i = filenames.begin();
    for (; i!=filenames.end(); i++,j++) {
        if (i != filenames.begin()) {
            const char *fn = (*i);
            int   li = (*j);
            token_error("    included from line %d in file %s -- ", li, fn);
        }
    }
}