Mercurial > libpst
comparison src/vbuf.c @ 116:ed2a260bbb98 stable-0-6-25
improve handling of content-type charset values in mime parts
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Fri, 16 Jan 2009 15:23:52 -0800 |
parents | cb14583c119a |
children | 6395ced2b8b2 |
comparison
equal
deleted
inserted
replaced
115:7689c006b166 | 116:ed2a260bbb98 |
---|---|
38 | 38 |
39 nextr = memchr(vs->b, '\r', vs->dlen); | 39 nextr = memchr(vs->b, '\r', vs->dlen); |
40 nextn = memchr(vs->b, '\n', vs->dlen); | 40 nextn = memchr(vs->b, '\n', vs->dlen); |
41 | 41 |
42 //case 1: UNIX, we find \n first | 42 //case 1: UNIX, we find \n first |
43 if (nextn && (nextr == NULL || nextr > nextn)) { | 43 if (nextn && (!nextr || (nextr > nextn))) { |
44 return nextn - vs->b; | 44 return nextn - vs->b; |
45 } | 45 } |
46 //case 2: DOS, we find \r\n | 46 //case 2: DOS, we find \r\n |
47 if (NULL != nextr && NULL != nextn && 1 == (char *) nextn - (char *) nextr) { | 47 if (nextr && nextn && (nextn-nextr == 1)) { |
48 return nextr - vs->b; | 48 return nextr - vs->b; |
49 } | 49 } |
50 //case 3: we find nothing | 50 //case 3: we find nothing |
51 | 51 |
52 return -1; | 52 return -1; |
53 } | 53 } |
54 | 54 |
55 | 55 |
56 // UTF8 <-> UTF16 <-> ISO8859 Character set conversion functions and (ack) their globals | 56 // UTF8 <-> UTF16 <-> ISO8859 Character set conversion functions and (ack) their globals |
57 | 57 |
58 //TODO: the following should not be | |
59 char *wwbuf = NULL; | |
60 size_t nwwbuf = 0; | |
61 static int unicode_up = 0; | 58 static int unicode_up = 0; |
62 iconv_t i16to8, i8to16, i8859_1to8, i8toi8859_1; | 59 static iconv_t i16to8; |
60 static const char *target_charset = NULL; | |
61 static iconv_t i8totarget; | |
63 | 62 |
64 | 63 |
65 void unicode_init() | 64 void unicode_init() |
66 { | 65 { |
67 char *wipe = ""; | 66 if (unicode_up) unicode_close(); |
68 char dump[4]; | 67 i16to8 = iconv_open("UTF-8", "UTF-16LE"); |
69 | 68 if (i16to8 == (iconv_t)-1) { |
70 if (unicode_up) | 69 fprintf(stderr, "Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n"); |
71 unicode_close(); | |
72 | |
73 if ((iconv_t) - 1 == (i16to8 = iconv_open("UTF-8", "UTF-16LE"))) { | |
74 fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n"); | |
75 exit(1); | 70 exit(1); |
76 } | 71 } |
77 | |
78 if ((iconv_t) - 1 == (i8to16 = iconv_open("UTF-16LE", "UTF-8"))) { | |
79 fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to UTF-16LE.\n"); | |
80 exit(2); | |
81 } | |
82 //iconv will prefix output with an FF FE (utf-16 start seq), the following dumps that. | |
83 memset(dump, 'x', 4); | |
84 ASSERT(0 == utf8to16(wipe, 1, dump, 4), "unicode_init(): attempt to dump FF FE failed."); | |
85 | |
86 if ((iconv_t) - 1 == (i8859_1to8 = iconv_open("UTF-8", "ISO_8859-1"))) { | |
87 fprintf(stderr, "doexport(): Couldn't open iconv descriptor for ASCII to UTF-8.\n"); | |
88 exit(1); | |
89 } | |
90 | |
91 if ((iconv_t) - 1 == (i8toi8859_1 = iconv_open("ISO_8859-1", "UTF-8"))) { | |
92 fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to ASCII.\n"); | |
93 exit(1); | |
94 } | |
95 | |
96 unicode_up = 1; | 72 unicode_up = 1; |
97 } | 73 } |
98 | 74 |
99 | 75 |
100 void unicode_close() | 76 void unicode_close() |
101 { | 77 { |
78 iconv_close(i16to8); | |
79 if (target_charset) { | |
80 iconv_close(i8totarget); | |
81 free((char *)target_charset); | |
82 target_charset = NULL; | |
83 } | |
102 unicode_up = 0; | 84 unicode_up = 0; |
103 iconv_close(i8to16); | 85 } |
104 iconv_close(i16to8); | 86 |
105 iconv_close(i8859_1to8); | 87 |
106 iconv_close(i8toi8859_1); | 88 int utf16_is_terminated(const char *str, int length) |
107 } | |
108 | |
109 | |
110 int utf16_is_terminated(char *str, int length) | |
111 { | 89 { |
112 VSTR_STATIC(errbuf, 100); | 90 VSTR_STATIC(errbuf, 100); |
113 int len = -1; | 91 int len = -1; |
114 int i; | 92 int i; |
115 for (i = 0; i < length; i += 2) { | 93 for (i = 0; i < length; i += 2) { |
125 | 103 |
126 return (-1 == len) ? 0 : 1; | 104 return (-1 == len) ? 0 : 1; |
127 } | 105 } |
128 | 106 |
129 | 107 |
130 int vb_utf16to8(vbuf * dest, char *buf, int len) | 108 size_t vb_utf16to8(vbuf *dest, const char *inbuf, int iblen) |
131 { | 109 { |
132 size_t inbytesleft = len; | 110 size_t inbytesleft = iblen; |
133 char *inbuf = buf; | 111 size_t icresult = (size_t)-1; |
134 size_t icresult = (size_t)-1; | |
135 VBUF_STATIC(dumpster, 100); | |
136 | |
137 size_t outbytesleft = 0; | 112 size_t outbytesleft = 0; |
138 char *outbuf = NULL; | 113 char *outbuf = NULL; |
139 | 114 |
140 ASSERT(unicode_up, "vb_utf16to8() called before unicode started."); | 115 ASSERT(unicode_up, "vb_utf16to8() called before unicode started."); |
141 | 116 |
142 if (2 > dest->blen) | 117 if (2 > dest->blen) vbresize(dest, 2); |
143 vbresize(dest, 2); | |
144 dest->dlen = 0; | 118 dest->dlen = 0; |
145 | 119 |
146 //Bad Things can happen if a non-zero-terminated utf16 string comes through here | 120 //Bad Things can happen if a non-zero-terminated utf16 string comes through here |
147 if (!utf16_is_terminated(buf, len)) | 121 if (!utf16_is_terminated(inbuf, iblen)) |
148 return -1; | 122 return (size_t)-1; |
149 | 123 |
150 do { | 124 do { |
151 outbytesleft = dest->blen - dest->dlen; | 125 outbytesleft = dest->blen - dest->dlen; |
152 outbuf = dest->b + dest->dlen; | 126 outbuf = dest->b + dest->dlen; |
153 icresult = iconv(i16to8, &inbuf, &inbytesleft, &outbuf, &outbytesleft); | 127 icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); |
154 dest->dlen = outbuf - dest->b; | 128 dest->dlen = outbuf - dest->b; |
155 vbgrow(dest, inbytesleft); | 129 vbgrow(dest, inbytesleft); |
156 } while ((size_t)-1 == icresult && E2BIG == errno); | 130 } while ((size_t)-1 == icresult && E2BIG == errno); |
157 | 131 |
158 if (0 != vb_utf8to16T(dumpster, dest->b, dest->dlen)) | |
159 DIE(("Reverse conversion failed.")); | |
160 | |
161 if (icresult == (size_t)-1) { | |
162 //TODO: error | |
163 //ERR_UNIX( errno, "vb_utf16to8():iconv failure: %s", strerror( errno ) ); | |
164 unicode_init(); | |
165 return -1; | |
166 /* | |
167 fprintf(stderr, " attempted to convert:\n"); | |
168 hexdump( (char*)cin, 0, inlen, 1 ); | |
169 fprintf(stderr, " result:\n"); | |
170 hexdump( (char*)bout->b, 0, bout->dlen, 1 ); | |
171 fprintf(stderr, " MyDirtyOut:\n"); | |
172 for( i=0; i<inlen; i++) { | |
173 if( inbuf[i] != '\0' ) fprintf(stderr, "%c", inbuf[i] ); | |
174 } | |
175 | |
176 fprintf( stderr, "\n" ); | |
177 raise( SIGSEGV ); | |
178 exit(1); | |
179 */ | |
180 } | |
181 | |
182 if (icresult) { | |
183 //ERR_UNIX( EILSEQ, "Uhhhh...vb_utf16to8() returning icresult == %d", icresult ); | |
184 return -1; | |
185 } | |
186 return icresult; | |
187 } | |
188 | |
189 | |
190 int utf8to16(char *inbuf_o, int iblen, char *outbuf_o, int oblen) // iblen, oblen: bytes including \0 | |
191 { | |
192 //TODO: this is *only* used to dump the utf16 preamble now... | |
193 //TODO: This (and 8to16) are the most horrible things I have ever seen... | |
194 size_t inbytesleft = 0; | |
195 size_t outbytesleft = oblen; | |
196 char *inbuf = inbuf_o; | |
197 char *outbuf = outbuf_o; | |
198 size_t icresult = (size_t)-1; | |
199 char *stend; | |
200 | |
201 stend = memchr(inbuf_o, '\0', iblen); | |
202 ASSERT(NULL != stend, "utf8to16(): in string not zero terminated."); | |
203 inbytesleft = (stend - inbuf_o + 1 < iblen) ? stend - inbuf_o + 1 : iblen; | |
204 icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft); | |
205 | |
206 if (icresult == (size_t)-1) { | |
207 DIE(("iconv failure(%d): %s\n", errno, strerror(errno))); | |
208 } | |
209 if (icresult > (size_t)INT_MAX) { | |
210 return (-1); | |
211 } | |
212 return (int) icresult; | |
213 } | |
214 | |
215 | |
216 int vb_utf8to16T(vbuf * bout, char *cin, int inlen) | |
217 { | |
218 //TODO: This (and 8to16) are the most horrible things I have ever seen... | |
219 size_t inbytesleft = inlen; | |
220 char *inbuf = cin; | |
221 //int rlen = -1, tlen; | |
222 size_t icresult = (size_t)-1; | |
223 size_t outbytesleft = 0; | |
224 char *outbuf = NULL; | |
225 | |
226 if (2 > bout->blen) | |
227 vbresize(bout, 2); | |
228 bout->dlen = 0; | |
229 | |
230 do { | |
231 outbytesleft = bout->blen - bout->dlen; | |
232 outbuf = bout->b + bout->dlen; | |
233 icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft); | |
234 bout->dlen = outbuf - bout->b; | |
235 vbgrow(bout, 20); | |
236 } while ((size_t)-1 == icresult && E2BIG == errno); | |
237 | |
238 if (icresult == (size_t)-1) { | 132 if (icresult == (size_t)-1) { |
239 WARN(("iconv failure: %s", strerror(errno))); | 133 WARN(("iconv failure: %s", strerror(errno))); |
240 unicode_init(); | 134 unicode_init(); |
241 return -1; | 135 return (size_t)-1; |
242 } | 136 } |
243 if (icresult > (size_t) INT_MAX) { | 137 return (icresult) ? (size_t)-1 : 0; |
244 return (-1); | 138 } |
245 } | 139 |
246 return icresult; | 140 |
247 } | 141 size_t vb_utf8to8bit(vbuf *dest, const char *inbuf, int iblen, const char* charset) |
248 | 142 { |
249 | 143 size_t inbytesleft = iblen; |
250 /* Quick and dirty UNICODE to std. ascii */ | 144 size_t icresult = (size_t)-1; |
251 void cheap_uni2ascii(char *src, char *dest, int l) | 145 size_t outbytesleft = 0; |
252 { | 146 char *outbuf = NULL; |
253 | 147 |
254 for (; l > 0; l -= 2) { | 148 if (!target_charset || (target_charset && strcasecmp(target_charset, charset))) { |
255 *dest = *src; | 149 if (target_charset) { |
256 dest++; | 150 iconv_close(i8totarget); |
257 src += 2; | 151 free((char *)target_charset); |
258 } | 152 } |
259 *dest = 0; | 153 target_charset = strdup(charset); |
260 } | 154 i8totarget = iconv_open(target_charset, "UTF-8"); |
261 | 155 if (i8totarget == (iconv_t)-1) { |
262 | 156 fprintf(stderr, "Couldn't open iconv descriptor for UTF-8 to %s.\n", target_charset); |
263 /* Quick and dirty ascii to unicode */ | 157 return (size_t)-1; |
264 void cheap_ascii2uni(char *src, char *dest, int l) | 158 } |
265 { | 159 } |
266 for (; l > 0; l--) { | 160 |
267 *dest++ = *src++; | 161 if (2 > dest->blen) vbresize(dest, 2); |
268 *dest++ = 0; | 162 dest->dlen = 0; |
269 | 163 |
270 } | 164 do { |
165 outbytesleft = dest->blen - dest->dlen; | |
166 outbuf = dest->b + dest->dlen; | |
167 icresult = iconv(i8totarget, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); | |
168 dest->dlen = outbuf - dest->b; | |
169 vbgrow(dest, 20); | |
170 } while ((size_t)-1 == icresult && E2BIG == errno); | |
171 | |
172 if (icresult == (size_t)-1) { | |
173 WARN(("iconv failure: %s", strerror(errno))); | |
174 unicode_init(); | |
175 return (size_t)-1; | |
176 } | |
177 return (icresult) ? (size_t)-1 : 0; | |
271 } | 178 } |
272 | 179 |
273 | 180 |
274 vbuf *vballoc(size_t len) | 181 vbuf *vballoc(size_t len) |
275 { | 182 { |
607 vbgrow((vbuf *) vs, size); | 514 vbgrow((vbuf *) vs, size); |
608 } | 515 } |
609 } | 516 } |
610 | 517 |
611 | 518 |
612 void vshexdump(vstr * vs, char *b, size_t start, size_t stop, int ascii) | 519 void vshexdump(vstr * vs, const char *b, size_t start, size_t stop, int ascii) |
613 { | 520 { |
614 char c; | 521 char c; |
615 int diff, i; | 522 int diff, i; |
616 | 523 |
617 while (start < stop) { | 524 while (start < stop) { |