Mercurial > libpst
changeset 116:ed2a260bbb98 stable-0-6-25
improve handling of content-type charset values in mime parts
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Fri, 16 Jan 2009 15:23:52 -0800 |
parents | 7689c006b166 |
children | 0a3d854b53f6 |
files | ChangeLog NEWS configure.in libpst.spec.in regression/regression-tests.bash src/libpst.c src/libpst.h src/readpst.c src/vbuf.c src/vbuf.h |
diffstat | 10 files changed, 177 insertions(+), 241 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Thu Dec 11 12:06:03 2008 -0800 +++ b/ChangeLog Fri Jan 16 15:23:52 2009 -0800 @@ -1,3 +1,7 @@ +LibPST 0.6.25 (2009-01-16) +=============================== + * improve handling of content-type charset values in mime parts + LibPST 0.6.24 (2008-12-11) =============================== * patch from Chris Eagle to build on cygwin
--- a/NEWS Thu Dec 11 12:06:03 2008 -0800 +++ b/NEWS Fri Jan 16 15:23:52 2009 -0800 @@ -1,3 +1,4 @@ +0.6.25 2009-01-16 improve handling of content-type charset values in mime parts 0.6.24 2008-12-11 patch from Chris Eagle to build on cygwin 0.6.23 2008-12-04 bump version to avoid cvs tagging mistake in fedora 0.6.22 2008-11-28 process emails with type PST_TYPE_OTHER, fix malloc error and possible segfault
--- a/configure.in Thu Dec 11 12:06:03 2008 -0800 +++ b/configure.in Fri Jan 16 15:23:52 2009 -0800 @@ -1,5 +1,5 @@ AC_PREREQ(2.59) -AC_INIT(libpst,0.6.24,carl@five-ten-sg.com) +AC_INIT(libpst,0.6.25,carl@five-ten-sg.com) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADER([config.h])
--- a/libpst.spec.in Thu Dec 11 12:06:03 2008 -0800 +++ b/libpst.spec.in Fri Jan 16 15:23:52 2009 -0800 @@ -47,6 +47,9 @@ %changelog +* Fri Jan 16 2009 Carl Byington <carl@five-ten-sg.com> - 0.6.25-1 +- improve handling of content-type charset values in mime parts + * Thu Dec 11 2008 Carl Byington <carl@five-ten-sg.com> - 0.6.24-1 - patch from Chris Eagle to build on cygwin
--- a/regression/regression-tests.bash Thu Dec 11 12:06:03 2008 -0800 +++ b/regression/regression-tests.bash Fri Jan 16 15:23:52 2009 -0800 @@ -19,7 +19,7 @@ fn="$2" rm -rf output$n mkdir output$n - $val ../src/readpst -cv -o output$n -d dumper $fn >$fn.pst.err 2>&1 + $val ../src/readpst -cv -o output$n -d dumper $fn >$fn.err 2>&1 ../src/readpstlog -f I dumper >$fn.log #$val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' -o $fn >$fn.ldif.err 2>&1 #$val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$fn.ldif2.err 2>&1 @@ -40,19 +40,20 @@ dodii 2 sample_64.pst dodii 3 test.pst dodii 4 big_mail.pst -elif [ "$1" == "flow" ]; then - dopst 11 flow.pst else - dopst 1 ams.pst - #dopst 2 sample_64.pst - #dopst 3 test.pst - #dopst 4 big_mail.pst - #dopst 5 mbmg.archive.pst - #dopst 6 Single2003-read.pst - #dopst 7 Single2003-unread.pst - #dopst 8 ol2k3high.pst - #dopst 9 ol97high.pst - #dopst 10 returned_message.pst - #dopst 11 flow.pst + #dopst 1 ams.pst + #dopst 2 sample_64.pst + #dopst 3 test.pst + #dopst 4 big_mail.pst + #dopst 5 mbmg.archive.pst + #dopst 6 Single2003-read.pst + #dopst 7 Single2003-unread.pst + #dopst 8 ol2k3high.pst + #dopst 9 ol97high.pst + #dopst 10 returned_message.pst + #dopst 11 flow.pst + #dopst 12 test-html.pst + dopst 13 test-text.pst + #dopst 14 joe.romanowski.pst fi
--- a/src/libpst.c Thu Dec 11 12:06:03 2008 -0800 +++ b/src/libpst.c Fri Jan 16 15:23:52 2009 -0800 @@ -1606,6 +1606,7 @@ } if (table_rec.ref_type == (uint16_t)0x1f) { // there is more to do for the type 0x1f unicode strings + size_t rc; static vbuf *strbuf = NULL; static vbuf *unibuf = NULL; if (!strbuf) strbuf=vballoc((size_t)1024); @@ -1620,11 +1621,17 @@ vbappend(strbuf, "\0\0", (size_t)2); DEBUG_INDEX(("Iconv in:\n")); DEBUG_HEXDUMPC(strbuf->b, strbuf->dlen, 0x10); - (void)vb_utf16to8(unibuf, strbuf->b, strbuf->dlen); - free(na_ptr->items[x]->data); - na_ptr->items[x]->size = unibuf->dlen; - na_ptr->items[x]->data = xmalloc(unibuf->dlen); - memcpy(na_ptr->items[x]->data, unibuf->b, unibuf->dlen); + rc = vb_utf16to8(unibuf, strbuf->b, strbuf->dlen); + if (rc == (size_t)-1) { + free(unibuf->b); + DEBUG_EMAIL(("Failed to convert utf-16 to utf-8\n")); + } + else { + free(na_ptr->items[x]->data); + na_ptr->items[x]->size = unibuf->dlen; + na_ptr->items[x]->data = xmalloc(unibuf->dlen); + memcpy(na_ptr->items[x]->data, unibuf->b, unibuf->dlen); + } DEBUG_INDEX(("Iconv out:\n")); DEBUG_HEXDUMPC(na_ptr->items[x]->data, na_ptr->items[x]->size, 0x10); } @@ -1732,6 +1739,22 @@ ef->next = item->extra_fields; item->extra_fields = ef; DEBUG_EMAIL(("\"%s\" = \"%s\"\n", ef->field_name, ef->value)); + if (strcmp(ef->field_name, "content-type") == 0) { + char *p = strstr(ef->value, "charset=\""); + if (p) { + p += 9; // skip over charset=" + char *pp = strchr(p, '"'); + if (pp) { + *pp = '\0'; + char *set = strdup(p); + *pp = '"'; + MALLOC_EMAIL(item); + if (item->email->body_charset) free(item->email->body_charset); + item->email->body_charset = set; + DEBUG_EMAIL(("body charset %s from content-type extra field\n", set)); + } + } + } } else { DEBUG_EMAIL(("NULL extra field\n")); @@ -2209,8 +2232,7 @@ DEBUG_EMAIL(("Plain Text body - ")); MALLOC_EMAIL(item); LIST_COPY(item->email->body, (char*)); - //DEBUG_EMAIL("%s\n", item->email->body); - DEBUG_EMAIL(("NOT PRINTED\n")); + DEBUG_EMAIL(("%s\n", item->email->body)); break; case 0x1006: // PR_RTF_SYNC_BODY_CRC DEBUG_EMAIL(("RTF Sync Body CRC - ")); @@ -2261,8 +2283,7 @@ DEBUG_EMAIL(("HTML body - ")); MALLOC_EMAIL(item); LIST_COPY(item->email->htmlbody, (char*)); - // DEBUG_EMAIL(("%s\n", item->email->htmlbody)); - DEBUG_EMAIL(("NOT PRINTED\n")); + DEBUG_EMAIL(("%s\n", item->email->htmlbody)); break; case 0x1035: // Message ID DEBUG_EMAIL(("Message ID - ")); @@ -3699,6 +3720,7 @@ if (item->email) { SAFE_FREE(item->email->arrival_date); SAFE_FREE(item->email->body); + SAFE_FREE(item->email->body_charset); SAFE_FREE(item->email->cc_address); SAFE_FREE(item->email->bcc_address); SAFE_FREE(item->email->common_name);
--- a/src/libpst.h Thu Dec 11 12:06:03 2008 -0800 +++ b/src/libpst.h Fri Jan 16 15:23:52 2009 -0800 @@ -211,6 +211,7 @@ FILETIME *arrival_date; int autoforward; // 1 = true, 0 = not set, -1 = false char *body; + char *body_charset; // null if not specified char *cc_address; char *bcc_address; char *common_name;
--- a/src/readpst.c Thu Dec 11 12:06:03 2008 -0800 +++ b/src/readpst.c Fri Jan 16 15:23:52 2009 -0800 @@ -6,6 +6,7 @@ */ #include "define.h" #include "libstrfunc.h" +#include "vbuf.h" #include "libpst.h" #include "common.h" #include "timeconv.h" @@ -762,11 +763,11 @@ char *attach_filename; fprintf(f_output, "\n--%s\n", boundary); if (!current_attach->mimetype) { - fprintf(f_output, "Content-type: %s\n", MIME_TYPE_DEFAULT); + fprintf(f_output, "Content-Type: %s\n", MIME_TYPE_DEFAULT); } else { - fprintf(f_output, "Content-type: %s\n", current_attach->mimetype); + fprintf(f_output, "Content-Type: %s\n", current_attach->mimetype); } - fprintf(f_output, "Content-transfer-encoding: base64\n"); + fprintf(f_output, "Content-Transfer-Encoding: base64\n"); // If there is a long filename (filename2) use that, otherwise // use the 8.3 filename (filename1) if (current_attach->filename2) { @@ -822,7 +823,7 @@ // see if there is a boundary variable there // this search MUST be made case insensitive (DONE). // Also, we should check to find out if we are looking - // at the boundary associated with content-type, and that + // at the boundary associated with Content-Type, and that // the content type really is multipart removeCR(item->email->header); @@ -1024,12 +1025,10 @@ // in the headers above. if (item->attach) { // write the boundary stuff if we have attachments - fprintf(f_output, "Content-type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); - } else if (boundary) { - // else if we have multipart/alternative then tell it so - fprintf(f_output, "Content-type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); - } else if (item->email->htmlbody) { - fprintf(f_output, "Content-type: text/html\n"); + fprintf(f_output, "Content-Type: multipart/mixed;\n\tboundary=\"%s\"\n", boundary); + } else { + // else we have multipart/alternative then tell it so + fprintf(f_output, "Content-Type: multipart/alternative;\n\tboundary=\"%s\"\n", boundary); } } fprintf(f_output, "\n"); // start the body @@ -1037,12 +1036,57 @@ if (item->email->body) { if (boundary) { + // try to find the charset for this body part + const char *def = "utf-8"; + // it seems that if (item->email->body_charset) is set, then + // we actually have utf8 plain body text. If that is not set + // we have plain body text in an 8 bit charset specified in + // the headers. + char *c = my_stristr(item->email->header, "\nContent-Type:"); + if (c) { + c++; + char *n = my_stristr(c, "\n"); // termination on the content type + if (n) { + char *s = my_stristr(c, "; charset="); + if (s && (s < n)) { + char *e; + s += 10; // skip over charset= + if (*s == '"') { + s++; + e = my_stristr(s, "\""); + } + else { + e = my_stristr(s, ";"); + } + if (!e || (e > n)) e = n; // use the trailing lf as terminator if nothing better + *e = '\0'; // corrupt the header, but we have already printed it + def = s; + DEBUG_EMAIL(("body charset %s from headers\n", def)); + } + } + } fprintf(f_output, "\n--%s\n", boundary); - fprintf(f_output, "Content-type: text/plain\n"); + fprintf(f_output, "Content-Type: text/plain; charset=\"%s\"\n", def); if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); fprintf(f_output, "\n"); } + else if (item->email->body_charset && (strcasecmp("utf-8",item->email->body_charset))) { + // try to convert to the specified charset since it is not utf-8 + size_t rc; + DEBUG_EMAIL(("Convert plain text utf-8 to %s\n", item->email->body_charset)); + vbuf *newer = vballoc(2); + rc = vb_utf8to8bit(newer, item->email->body, strlen(item->email->body) + 1, item->email->body_charset); + if (rc == (size_t)-1) { + free(newer->b); + DEBUG_EMAIL(("Failed to convert plain text utf-8 to %s\n", item->email->body_charset)); + } + else { + free(item->email->body); + item->email->body = newer->b; + } + free(newer); + } removeCR(item->email->body); if (base64_body) { char *enc = base64_encode(item->email->body, strlen(item->email->body)); @@ -1058,8 +1102,10 @@ if (item->email->htmlbody) { if (boundary) { + const char *def = "utf-8"; + if (item->email->body_charset) def = item->email->body_charset; fprintf(f_output, "\n--%s\n", boundary); - fprintf(f_output, "Content-type: text/html\n"); + fprintf(f_output, "Content-Type: text/html; charset=\"%s\"\n", def); if (base64_body) fprintf(f_output, "Content-Transfer-Encoding: base64\n"); fprintf(f_output, "\n"); }
--- a/src/vbuf.c Thu Dec 11 12:06:03 2008 -0800 +++ b/src/vbuf.c Fri Jan 16 15:23:52 2009 -0800 @@ -40,11 +40,11 @@ nextn = memchr(vs->b, '\n', vs->dlen); //case 1: UNIX, we find \n first - if (nextn && (nextr == NULL || nextr > nextn)) { + if (nextn && (!nextr || (nextr > nextn))) { return nextn - vs->b; } //case 2: DOS, we find \r\n - if (NULL != nextr && NULL != nextn && 1 == (char *) nextn - (char *) nextr) { + if (nextr && nextn && (nextn-nextr == 1)) { return nextr - vs->b; } //case 3: we find nothing @@ -55,59 +55,37 @@ // UTF8 <-> UTF16 <-> ISO8859 Character set conversion functions and (ack) their globals -//TODO: the following should not be -char *wwbuf = NULL; -size_t nwwbuf = 0; static int unicode_up = 0; -iconv_t i16to8, i8to16, i8859_1to8, i8toi8859_1; +static iconv_t i16to8; +static const char *target_charset = NULL; +static iconv_t i8totarget; void unicode_init() { - char *wipe = ""; - char dump[4]; - - if (unicode_up) - unicode_close(); - - if ((iconv_t) - 1 == (i16to8 = iconv_open("UTF-8", "UTF-16LE"))) { - fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n"); + if (unicode_up) unicode_close(); + i16to8 = iconv_open("UTF-8", "UTF-16LE"); + if (i16to8 == (iconv_t)-1) { + fprintf(stderr, "Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n"); exit(1); } - - if ((iconv_t) - 1 == (i8to16 = iconv_open("UTF-16LE", "UTF-8"))) { - fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to UTF-16LE.\n"); - exit(2); - } - //iconv will prefix output with an FF FE (utf-16 start seq), the following dumps that. - memset(dump, 'x', 4); - ASSERT(0 == utf8to16(wipe, 1, dump, 4), "unicode_init(): attempt to dump FF FE failed."); - - if ((iconv_t) - 1 == (i8859_1to8 = iconv_open("UTF-8", "ISO_8859-1"))) { - fprintf(stderr, "doexport(): Couldn't open iconv descriptor for ASCII to UTF-8.\n"); - exit(1); - } - - if ((iconv_t) - 1 == (i8toi8859_1 = iconv_open("ISO_8859-1", "UTF-8"))) { - fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to ASCII.\n"); - exit(1); - } - unicode_up = 1; } void unicode_close() { + iconv_close(i16to8); + if (target_charset) { + iconv_close(i8totarget); + free((char *)target_charset); + target_charset = NULL; + } unicode_up = 0; - iconv_close(i8to16); - iconv_close(i16to8); - iconv_close(i8859_1to8); - iconv_close(i8toi8859_1); } -int utf16_is_terminated(char *str, int length) +int utf16_is_terminated(const char *str, int length) { VSTR_STATIC(errbuf, 100); int len = -1; @@ -127,147 +105,76 @@ } -int vb_utf16to8(vbuf * dest, char *buf, int len) +size_t vb_utf16to8(vbuf *dest, const char *inbuf, int iblen) { - size_t inbytesleft = len; - char *inbuf = buf; - size_t icresult = (size_t)-1; - VBUF_STATIC(dumpster, 100); - + size_t inbytesleft = iblen; + size_t icresult = (size_t)-1; size_t outbytesleft = 0; - char *outbuf = NULL; + char *outbuf = NULL; ASSERT(unicode_up, "vb_utf16to8() called before unicode started."); - if (2 > dest->blen) - vbresize(dest, 2); + if (2 > dest->blen) vbresize(dest, 2); dest->dlen = 0; //Bad Things can happen if a non-zero-terminated utf16 string comes through here - if (!utf16_is_terminated(buf, len)) - return -1; + if (!utf16_is_terminated(inbuf, iblen)) + return (size_t)-1; do { outbytesleft = dest->blen - dest->dlen; outbuf = dest->b + dest->dlen; - icresult = iconv(i16to8, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); dest->dlen = outbuf - dest->b; vbgrow(dest, inbytesleft); } while ((size_t)-1 == icresult && E2BIG == errno); - if (0 != vb_utf8to16T(dumpster, dest->b, dest->dlen)) - DIE(("Reverse conversion failed.")); - - if (icresult == (size_t)-1) { - //TODO: error - //ERR_UNIX( errno, "vb_utf16to8():iconv failure: %s", strerror( errno ) ); - unicode_init(); - return -1; - /* - fprintf(stderr, " attempted to convert:\n"); - hexdump( (char*)cin, 0, inlen, 1 ); - fprintf(stderr, " result:\n"); - hexdump( (char*)bout->b, 0, bout->dlen, 1 ); - fprintf(stderr, " MyDirtyOut:\n"); - for( i=0; i<inlen; i++) { - if( inbuf[i] != '\0' ) fprintf(stderr, "%c", inbuf[i] ); - } - - fprintf( stderr, "\n" ); - raise( SIGSEGV ); - exit(1); - */ - } - - if (icresult) { - //ERR_UNIX( EILSEQ, "Uhhhh...vb_utf16to8() returning icresult == %d", icresult ); - return -1; - } - return icresult; -} - - -int utf8to16(char *inbuf_o, int iblen, char *outbuf_o, int oblen) // iblen, oblen: bytes including \0 -{ - //TODO: this is *only* used to dump the utf16 preamble now... - //TODO: This (and 8to16) are the most horrible things I have ever seen... - size_t inbytesleft = 0; - size_t outbytesleft = oblen; - char *inbuf = inbuf_o; - char *outbuf = outbuf_o; - size_t icresult = (size_t)-1; - char *stend; - - stend = memchr(inbuf_o, '\0', iblen); - ASSERT(NULL != stend, "utf8to16(): in string not zero terminated."); - inbytesleft = (stend - inbuf_o + 1 < iblen) ? stend - inbuf_o + 1 : iblen; - icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - - if (icresult == (size_t)-1) { - DIE(("iconv failure(%d): %s\n", errno, strerror(errno))); - } - if (icresult > (size_t)INT_MAX) { - return (-1); - } - return (int) icresult; -} - - -int vb_utf8to16T(vbuf * bout, char *cin, int inlen) -{ - //TODO: This (and 8to16) are the most horrible things I have ever seen... - size_t inbytesleft = inlen; - char *inbuf = cin; - //int rlen = -1, tlen; - size_t icresult = (size_t)-1; - size_t outbytesleft = 0; - char *outbuf = NULL; - - if (2 > bout->blen) - vbresize(bout, 2); - bout->dlen = 0; - - do { - outbytesleft = bout->blen - bout->dlen; - outbuf = bout->b + bout->dlen; - icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - bout->dlen = outbuf - bout->b; - vbgrow(bout, 20); - } while ((size_t)-1 == icresult && E2BIG == errno); - if (icresult == (size_t)-1) { WARN(("iconv failure: %s", strerror(errno))); unicode_init(); - return -1; + return (size_t)-1; } - if (icresult > (size_t) INT_MAX) { - return (-1); - } - return icresult; + return (icresult) ? (size_t)-1 : 0; } -/* Quick and dirty UNICODE to std. ascii */ -void cheap_uni2ascii(char *src, char *dest, int l) +size_t vb_utf8to8bit(vbuf *dest, const char *inbuf, int iblen, const char* charset) { + size_t inbytesleft = iblen; + size_t icresult = (size_t)-1; + size_t outbytesleft = 0; + char *outbuf = NULL; - for (; l > 0; l -= 2) { - *dest = *src; - dest++; - src += 2; + if (!target_charset || (target_charset && strcasecmp(target_charset, charset))) { + if (target_charset) { + iconv_close(i8totarget); + free((char *)target_charset); + } + target_charset = strdup(charset); + i8totarget = iconv_open(target_charset, "UTF-8"); + if (i8totarget == (iconv_t)-1) { + fprintf(stderr, "Couldn't open iconv descriptor for UTF-8 to %s.\n", target_charset); + return (size_t)-1; + } } - *dest = 0; -} + if (2 > dest->blen) vbresize(dest, 2); + dest->dlen = 0; -/* Quick and dirty ascii to unicode */ -void cheap_ascii2uni(char *src, char *dest, int l) -{ - for (; l > 0; l--) { - *dest++ = *src++; - *dest++ = 0; + do { + outbytesleft = dest->blen - dest->dlen; + outbuf = dest->b + dest->dlen; + icresult = iconv(i8totarget, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft); + dest->dlen = outbuf - dest->b; + vbgrow(dest, 20); + } while ((size_t)-1 == icresult && E2BIG == errno); + if (icresult == (size_t)-1) { + WARN(("iconv failure: %s", strerror(errno))); + unicode_init(); + return (size_t)-1; } + return (icresult) ? (size_t)-1 : 0; } @@ -609,7 +516,7 @@ } -void vshexdump(vstr * vs, char *b, size_t start, size_t stop, int ascii) +void vshexdump(vstr * vs, const char *b, size_t start, size_t stop, int ascii) { char c; int diff, i;
--- a/src/vbuf.h Thu Dec 11 12:06:03 2008 -0800 +++ b/src/vbuf.h Fri Jan 16 15:23:52 2009 -0800 @@ -17,19 +17,6 @@ #include <stdarg.h> /***************************************************/ -// Tokenizer const TOK_EMPTY, TOK_ELEMENT, DELIM -#define DELIM '\\' - -#define TOK_EMPTY 0 -#define TOK_DELIM 1 -#define TOK_PARENT 2 -#define TOK_CURRENT 3 -#define TOK_ELEMENT 4 - -#define TOK_ERROR 10 -#define TOK_BUF_SMALL 11 - - // Variable-length buffers struct varbuf { @@ -55,6 +42,9 @@ #define VBUF_STATIC(x,y) static vbuf *x = NULL; if(!x) x = vballoc(y); #define VSTR_STATIC(x,y) static vstr *x = NULL; if(!x) x = vsalloc(y); +int skip_nl( char *s ); // returns the width of the newline at s[0] +int find_nl( vstr *vs ); // find newline of type type in b + // vbuf functions struct varbuf *vballoc( size_t len ); void vbfree( vbuf *vb ); @@ -86,57 +76,18 @@ void vsskipws( vstr *vs ); void vs_printf( vstr *vs, char *fmt, ... ); void vs_printfa( vstr *vs, char *fmt, ... ); -void vshexdump( vstr *vs, char *b, size_t start, size_t stop, int ascii ); +void vshexdump( vstr *vs, const char *b, size_t start, size_t stop, int ascii ); int vscatprintf( vstr *vs, char *fmt, ... ); void vsvprintf( vstr *vs, char *fmt, va_list ap ); void vstrunc( vstr *vs, size_t off ); // Drop chars [off..dlen] int vslast( vstr *vs ); // returns the last character stored in a vstr string void vscharcat( vstr *vs, int ch ); -int vsutf16( vstr *vs, vbuf *in ); //in: in=zero-terminated utf16; out: vs=utf8; returns: 0 on success, else on fail -int vs_parse_escaped_string( vstr *vs, char *str, size_t len ); - - -/* - * Windows unicode output trash - this stuff sucks - * TODO: most of this should not be here - */ void unicode_init(); void unicode_close(); -int utf16_write( FILE* stream, const void *buf, size_t count ); -int utf16_fprintf( FILE* stream, const char *fmt, ... ); -int utf16to8( char *inbuf_o, char *outbuf_o, int length ); -int utf8to16( char *inbuf_o, int iblen, char *outbuf_o, int oblen); -int vb_utf8to16T( vbuf *bout, char *cin, int inlen ); -int vb_utf16to8( vbuf *dest, char *buf, int len ); -int iso8859_1to8( char *inbuf_o, char *outbuf_o, int length ); -int utf8toascii( const char *inbuf_o, char *outbuf_o, int length ); - -/* dump ascii hex in windoze format */ -void winhex(FILE* stream, unsigned char *hbuf, int start, int stop, int loff); -void winhex8(FILE *stream, unsigned char *hbuf, int start, int stop, int loff ); - -void vbwinhex8(vbuf *vb, unsigned char *hbuf, int start, int stop, int loff ); - -/* general search routine, find something in something else */ -int find_in_buf(char *buf, char *what, int sz, int len, int start); +size_t vb_utf16to8(vbuf *dest, const char *inbuf, int iblen); +size_t vb_utf8to8bit(vbuf *dest, const char *inbuf, int iblen, const char* charset); -/* Get INTEGER from memory. This is probably low-endian specific? */ -int get_int( char *array ); - -int find_nl( vstr *vs ); // find newline of type type in b -int skip_nl( char *s ); // returns the width of the newline at s[0] -//int vb_readline( struct varbuf *vb, int *ctype, FILE *in ); // read *AT LEAST* one full line of data from in int vb_skipline( struct varbuf *vb ); // in: vb->b == "stuff\nmore_stuff"; out: vb->b == "more_stuff" -/* Get a string of HEX bytes (space separated), - * or if first char is ' get an ASCII string instead. */ -int gethexorstr(char **c, char *wb); -char *esc_index( char *s, int c ); // just like index(3), but works on strings with escape sequences -char *esc_rindex( char *s, int c ); // just like rindex(3), but works on strings with escape sequences - -char *tok_esc_char( char *s, int *is_esc, int *c ); -int vb_path_token( vbuf *tok, char **path ); // returns things like TOK_EMPTY, TOK_ERROR, complete list at top - -int gettoken( char *tok, int len, char **path, char delim ); // Path tokenizer: increments path, dumps token in tok #endif