diff src/vbuf.c @ 116:ed2a260bbb98 stable-0-6-25

improve handling of content-type charset values in mime parts
author Carl Byington <carl@five-ten-sg.com>
date Fri, 16 Jan 2009 15:23:52 -0800
parents cb14583c119a
children 6395ced2b8b2
line wrap: on
line diff
--- a/src/vbuf.c	Thu Dec 11 12:06:03 2008 -0800
+++ b/src/vbuf.c	Fri Jan 16 15:23:52 2009 -0800
@@ -40,11 +40,11 @@
     nextn = memchr(vs->b, '\n', vs->dlen);
 
     //case 1: UNIX, we find \n first
-    if (nextn && (nextr == NULL || nextr > nextn)) {
+    if (nextn && (!nextr || (nextr > nextn))) {
         return nextn - vs->b;
     }
     //case 2: DOS, we find \r\n
-    if (NULL != nextr && NULL != nextn && 1 == (char *) nextn - (char *) nextr) {
+    if (nextr && nextn && (nextn-nextr == 1)) {
         return nextr - vs->b;
     }
     //case 3: we find nothing
@@ -55,59 +55,37 @@
 
 //  UTF8 <-> UTF16 <-> ISO8859 Character set conversion functions and (ack) their globals
 
-//TODO: the following should not be
-char *wwbuf = NULL;
-size_t nwwbuf = 0;
 static int unicode_up = 0;
-iconv_t i16to8, i8to16, i8859_1to8, i8toi8859_1;
+static iconv_t i16to8;
+static const char *target_charset = NULL;
+static iconv_t    i8totarget;
 
 
 void unicode_init()
 {
-    char *wipe = "";
-    char dump[4];
-
-    if (unicode_up)
-        unicode_close();
-
-    if ((iconv_t) - 1 == (i16to8 = iconv_open("UTF-8", "UTF-16LE"))) {
-        fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n");
+    if (unicode_up) unicode_close();
+    i16to8 = iconv_open("UTF-8", "UTF-16LE");
+    if (i16to8 == (iconv_t)-1) {
+        fprintf(stderr, "Couldn't open iconv descriptor for UTF-16LE to UTF-8.\n");
         exit(1);
     }
-
-    if ((iconv_t) - 1 == (i8to16 = iconv_open("UTF-16LE", "UTF-8"))) {
-        fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to UTF-16LE.\n");
-        exit(2);
-    }
-    //iconv will prefix output with an FF FE (utf-16 start seq), the following dumps that.
-    memset(dump, 'x', 4);
-    ASSERT(0 == utf8to16(wipe, 1, dump, 4), "unicode_init(): attempt to dump FF FE failed.");
-
-    if ((iconv_t) - 1 == (i8859_1to8 = iconv_open("UTF-8", "ISO_8859-1"))) {
-        fprintf(stderr, "doexport(): Couldn't open iconv descriptor for ASCII to UTF-8.\n");
-        exit(1);
-    }
-
-    if ((iconv_t) - 1 == (i8toi8859_1 = iconv_open("ISO_8859-1", "UTF-8"))) {
-        fprintf(stderr, "doexport(): Couldn't open iconv descriptor for UTF-8 to ASCII.\n");
-        exit(1);
-    }
-
     unicode_up = 1;
 }
 
 
 void unicode_close()
 {
+    iconv_close(i16to8);
+    if (target_charset) {
+        iconv_close(i8totarget);
+        free((char *)target_charset);
+        target_charset = NULL;
+    }
     unicode_up = 0;
-    iconv_close(i8to16);
-    iconv_close(i16to8);
-    iconv_close(i8859_1to8);
-    iconv_close(i8toi8859_1);
 }
 
 
-int utf16_is_terminated(char *str, int length)
+int utf16_is_terminated(const char *str, int length)
 {
     VSTR_STATIC(errbuf, 100);
     int len = -1;
@@ -127,147 +105,76 @@
 }
 
 
-int vb_utf16to8(vbuf * dest, char *buf, int len)
+size_t vb_utf16to8(vbuf *dest, const char *inbuf, int iblen)
 {
-    size_t inbytesleft = len;
-    char *inbuf = buf;
-    size_t icresult = (size_t)-1;
-    VBUF_STATIC(dumpster, 100);
-
+    size_t inbytesleft  = iblen;
+    size_t icresult     = (size_t)-1;
     size_t outbytesleft = 0;
-    char *outbuf = NULL;
+    char *outbuf        = NULL;
 
     ASSERT(unicode_up, "vb_utf16to8() called before unicode started.");
 
-    if (2 > dest->blen)
-        vbresize(dest, 2);
+    if (2 > dest->blen) vbresize(dest, 2);
     dest->dlen = 0;
 
     //Bad Things can happen if a non-zero-terminated utf16 string comes through here
-    if (!utf16_is_terminated(buf, len))
-        return -1;
+    if (!utf16_is_terminated(inbuf, iblen))
+        return (size_t)-1;
 
     do {
         outbytesleft = dest->blen - dest->dlen;
         outbuf = dest->b + dest->dlen;
-        icresult = iconv(i16to8, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+        icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
         dest->dlen = outbuf - dest->b;
         vbgrow(dest, inbytesleft);
     } while ((size_t)-1 == icresult && E2BIG == errno);
 
-    if (0 != vb_utf8to16T(dumpster, dest->b, dest->dlen))
-        DIE(("Reverse conversion failed."));
-
-    if (icresult == (size_t)-1) {
-        //TODO: error
-        //ERR_UNIX( errno, "vb_utf16to8():iconv failure: %s", strerror( errno ) );
-        unicode_init();
-        return -1;
-        /*
-           fprintf(stderr, "  attempted to convert:\n");
-           hexdump( (char*)cin, 0, inlen, 1 );
-           fprintf(stderr, "  result:\n");
-           hexdump( (char*)bout->b, 0, bout->dlen, 1 );
-           fprintf(stderr, "  MyDirtyOut:\n");
-           for( i=0; i<inlen; i++) {
-           if( inbuf[i] != '\0' ) fprintf(stderr, "%c", inbuf[i] );
-           }
-
-           fprintf( stderr, "\n" );
-           raise( SIGSEGV );
-           exit(1);
-         */
-    }
-
-    if (icresult) {
-        //ERR_UNIX( EILSEQ, "Uhhhh...vb_utf16to8() returning icresult == %d", icresult );
-        return -1;
-    }
-    return icresult;
-}
-
-
-int utf8to16(char *inbuf_o, int iblen, char *outbuf_o, int oblen)       // iblen, oblen: bytes including \0
-{
-    //TODO: this is *only* used to dump the utf16 preamble now...
-    //TODO: This (and 8to16) are the most horrible things I have ever seen...
-    size_t inbytesleft = 0;
-    size_t outbytesleft = oblen;
-    char *inbuf = inbuf_o;
-    char *outbuf = outbuf_o;
-    size_t icresult = (size_t)-1;
-    char *stend;
-
-    stend = memchr(inbuf_o, '\0', iblen);
-    ASSERT(NULL != stend, "utf8to16(): in string not zero terminated.");
-    inbytesleft = (stend - inbuf_o + 1 < iblen) ? stend - inbuf_o + 1 : iblen;
-    icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-
-    if (icresult == (size_t)-1) {
-        DIE(("iconv failure(%d): %s\n", errno, strerror(errno)));
-    }
-    if (icresult > (size_t)INT_MAX) {
-        return (-1);
-    }
-    return (int) icresult;
-}
-
-
-int vb_utf8to16T(vbuf * bout, char *cin, int inlen)
-{
-    //TODO: This (and 8to16) are the most horrible things I have ever seen...
-    size_t inbytesleft = inlen;
-    char *inbuf = cin;
-    //int rlen = -1, tlen;
-    size_t icresult = (size_t)-1;
-    size_t outbytesleft = 0;
-    char *outbuf = NULL;
-
-    if (2 > bout->blen)
-        vbresize(bout, 2);
-    bout->dlen = 0;
-
-    do {
-        outbytesleft = bout->blen - bout->dlen;
-        outbuf = bout->b + bout->dlen;
-        icresult = iconv(i8to16, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-        bout->dlen = outbuf - bout->b;
-        vbgrow(bout, 20);
-    } while ((size_t)-1 == icresult && E2BIG == errno);
-
     if (icresult == (size_t)-1) {
         WARN(("iconv failure: %s", strerror(errno)));
         unicode_init();
-        return -1;
+        return (size_t)-1;
     }
-    if (icresult > (size_t) INT_MAX) {
-        return (-1);
-    }
-    return icresult;
+    return (icresult) ? (size_t)-1 : 0;
 }
 
 
-/* Quick and dirty UNICODE to std. ascii */
-void cheap_uni2ascii(char *src, char *dest, int l)
+size_t vb_utf8to8bit(vbuf *dest, const char *inbuf, int iblen, const char* charset)
 {
+    size_t inbytesleft  = iblen;
+    size_t icresult     = (size_t)-1;
+    size_t outbytesleft = 0;
+    char *outbuf        = NULL;
 
-    for (; l > 0; l -= 2) {
-        *dest = *src;
-        dest++;
-        src += 2;
+    if (!target_charset || (target_charset && strcasecmp(target_charset, charset))) {
+        if (target_charset) {
+            iconv_close(i8totarget);
+            free((char *)target_charset);
+        }
+        target_charset = strdup(charset);
+        i8totarget = iconv_open(target_charset, "UTF-8");
+        if (i8totarget == (iconv_t)-1) {
+            fprintf(stderr, "Couldn't open iconv descriptor for UTF-8 to %s.\n", target_charset);
+            return (size_t)-1;
+        }
     }
-    *dest = 0;
-}
 
+    if (2 > dest->blen) vbresize(dest, 2);
+    dest->dlen = 0;
 
-/* Quick and dirty ascii to unicode */
-void cheap_ascii2uni(char *src, char *dest, int l)
-{
-    for (; l > 0; l--) {
-        *dest++ = *src++;
-        *dest++ = 0;
+    do {
+        outbytesleft = dest->blen - dest->dlen;
+        outbuf = dest->b + dest->dlen;
+        icresult = iconv(i8totarget, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
+        dest->dlen = outbuf - dest->b;
+        vbgrow(dest, 20);
+    } while ((size_t)-1 == icresult && E2BIG == errno);
 
+    if (icresult == (size_t)-1) {
+        WARN(("iconv failure: %s", strerror(errno)));
+        unicode_init();
+        return (size_t)-1;
     }
+    return (icresult) ? (size_t)-1 : 0;
 }
 
 
@@ -609,7 +516,7 @@
 }
 
 
-void vshexdump(vstr * vs, char *b, size_t start, size_t stop, int ascii)
+void vshexdump(vstr * vs, const char *b, size_t start, size_t stop, int ascii)
 {
     char c;
     int diff, i;