diff src/pst2ldif.cpp @ 104:39ba19372732

many fixes in pst2ldif by Robert Harris
author Carl Byington <carl@five-ten-sg.com>
date Thu, 09 Oct 2008 12:04:40 -0700
parents b7f456946c5b
children 4703d622e95b
line wrap: on
line diff
--- a/src/pst2ldif.cpp	Tue Oct 07 10:45:50 2008 -0700
+++ b/src/pst2ldif.cpp	Thu Oct 09 12:04:40 2008 -0700
@@ -12,6 +12,8 @@
 
 // needed for std c++ collections
 #include <set>
+#include <vector>
+#include <string>
 
 extern "C" {
     #include "define.h"
@@ -27,19 +29,22 @@
 int32_t     usage();
 int32_t     version();
 char       *check_filename(char *fname);
-char       *dn_escape(const char *str);
-void        print_ldif(const char *dn, const char *value);
-void        print_ldif_single(const char *dn, const char *value);
+void        print_ldif_single(const char *attr, const char *value);
+void        print_ldif_address(const char *attr, int nvalues, char *value, ...);
+void        print_ldif_dn(const char *attr, const char *value, const char *base);
 void        print_ldif_multi(const char *dn, const char *value);
-void        print_ldif_two(const char *dn, const char *value1, const char *value2);
+void        print_ldif_two(const char *attr, const char *value1, const char *value2);
+void        print_escaped_dn(const char *value);
 void        build_cn(char *cn, size_t len, int nvalues, char *value, ...);
 
 char *prog_name;
 pst_file pstfile;
-char *ldap_base  = NULL;    // 'o=some.domain.tld, c=US'
-char *ldap_class = NULL;    // 'newPerson'
-char *ldap_org   = NULL;    // 'some.domain.tld', computed from ldap_base
-iconv_t cd       = 0;       // Character set conversion descriptor
+bool    old_schema            = false;
+char    *ldap_base            = NULL;   // 'o=some.domain.tld,c=US'
+int     ldif_extra_line_count = 0;
+iconv_t cd                    = 0;      // Character set conversion descriptor
+vector<string> ldap_class;              // 'newPerson' or 'inetOrgPerson'
+vector<string> ldif_extra_line;         // 'o: myorg'
 
 
 ////////////////////////////////////////////////
@@ -123,6 +128,7 @@
                 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
                     // deal with a contact
                     char cn[1000];
+
                     build_cn(cn, sizeof(cn), 4,
                         item->contact->display_name_prefix,
                         item->contact->first_name,
@@ -131,10 +137,8 @@
                     if (cn[0] != 0) {
                         // have a valid cn
                         const char *ucn = unique_string(cn);
-                        char dn[strlen(ucn) + strlen(ldap_base) + 6];
 
-                        sprintf(dn, "cn=%s, %s", ucn, ldap_base);
-                        print_ldif_single("dn", dn);
+                        print_ldif_dn("dn", ucn, ldap_base);
                         print_ldif_single("cn", ucn);
                         if (item->contact->first_name) {
                             print_ldif_two("givenName",
@@ -152,10 +156,19 @@
                         else
                             print_ldif_single("sn", ucn); // use cn as sn if we cannot find something better
 
-                        if (item->contact->job_title)
-                            print_ldif_single("personalTitle", item->contact->job_title);
-                        if (item->contact->company_name)
-                            print_ldif_single("company", item->contact->company_name);
+                        if (old_schema) {
+                            if (item->contact->job_title)
+                                print_ldif_single("personalTitle", item->contact->job_title);
+                            if (item->contact->company_name)
+                                print_ldif_single("company", item->contact->company_name);
+                        else {
+                            // new schema
+                            if (item->contact->job_title)
+                                print_ldif_single("title", item->contact->job_title);
+                            if (item->contact->company_name)
+                                print_ldif_single("o", item->contact->company_name);
+                            }
+                        }
                         if (item->contact->address1  && *item->contact->address1)
                             print_ldif_single("mail", item->contact->address1);
                         if (item->contact->address2  && *item->contact->address2)
@@ -168,42 +181,96 @@
                             print_ldif_single("mail", item->contact->address2a);
                         if (item->contact->address3a && *item->contact->address3a)
                             print_ldif_single("mail", item->contact->address3a);
-                        if (item->contact->business_address) {
-                            if (item->contact->business_po_box)
-                                print_ldif_single("postalAddress", item->contact->business_po_box);
-                            if (item->contact->business_street)
-                                print_ldif_multi("postalAddress", item->contact->business_street);
-                            if (item->contact->business_city)
-                                print_ldif_single("l", item->contact->business_city);
-                            if (item->contact->business_state)
-                                print_ldif_single("st", item->contact->business_state);
-                            if (item->contact->business_postal_code)
-                                print_ldif_single("postalCode", item->contact->business_postal_code);
+
+                        if (old_schema) {
+                            if (item->contact->business_address) {
+                                if (item->contact->business_po_box)
+                                    print_ldif_single("postalAddress", item->contact->business_po_box);
+                                if (item->contact->business_street)
+                                    print_ldif_multi("postalAddress", item->contact->business_street);
+                                if (item->contact->business_city)
+                                    print_ldif_single("l", item->contact->business_city);
+                                if (item->contact->business_state)
+                                    print_ldif_single("st", item->contact->business_state);
+                                if (item->contact->business_postal_code)
+                                    print_ldif_single("postalCode", item->contact->business_postal_code);
+                            }
+                            else if (item->contact->home_address) {
+                                if (item->contact->home_po_box)
+                                    print_ldif_single("postalAddress", item->contact->home_po_box);
+                                if (item->contact->home_street)
+                                    print_ldif_multi("postalAddress", item->contact->home_street);
+                                if (item->contact->home_city)
+                                    print_ldif_single("l", item->contact->home_city);
+                                if (item->contact->home_state)
+                                    print_ldif_single("st", item->contact->home_state);
+                                if (item->contact->home_postal_code)
+                                    print_ldif_single("postalCode", item->contact->home_postal_code);
+                            }
+                            else if (item->contact->other_address) {
+                                if (item->contact->other_po_box)
+                                    print_ldif_single("postalAddress", item->contact->other_po_box);
+                                if (item->contact->other_street)
+                                    print_ldif_multi("postalAddress", item->contact->other_street);
+                                if (item->contact->other_city)
+                                    print_ldif_single("l", item->contact->other_city);
+                                if (item->contact->other_state)
+                                    print_ldif_single("st", item->contact->other_state);
+                                if (item->contact->other_postal_code)
+                                    print_ldif_single("postalCode", item->contact->other_postal_code);
+                            }
                         }
-                        else if (item->contact->home_address) {
-                            if (item->contact->home_po_box)
-                                print_ldif_single("postalAddress", item->contact->home_po_box);
-                            if (item->contact->home_street)
-                                print_ldif_multi("postalAddress", item->contact->home_street);
-                            if (item->contact->home_city)
-                                print_ldif_single("l", item->contact->home_city);
-                            if (item->contact->home_state)
-                                print_ldif_single("st", item->contact->home_state);
-                            if (item->contact->home_postal_code)
-                                print_ldif_single("postalCode", item->contact->home_postal_code);
+                        else {
+                            // new schema, with proper RFC4517 postal addresses
+                            if (item->contact->business_address) {
+                                print_ldif_address("postalAddress", 6,
+                                    item->contact->business_po_box,
+                                    item->contact->business_street,
+                                    item->contact->business_city,
+                                    item->contact->business_state,
+                                    item->contact->business_postal_code,
+                                    item->contact->business_country);
+                                if (item->contact->business_city)
+                                    print_ldif_single("l", item->contact->business_city);
+                                if (item->contact->business_state)
+                                    print_ldif_single("st", item->contact->business_state);
+                                if (item->contact->business_postal_code)
+                                    print_ldif_single("postalCode", item->contact->business_postal_code);
+                            }
+                            else if (item->contact->home_address) {
+                                if (item->contact->home_city)
+                                    print_ldif_single("l", item->contact->home_city);
+                                if (item->contact->home_state)
+                                    print_ldif_single("st", item->contact->home_state);
+                                if (item->contact->home_postal_code)
+                                    print_ldif_single("postalCode", item->contact->home_postal_code);
+                            }
+                            else if (item->contact->other_address) {
+                                print_ldif_address("postalAddress", 6,
+                                    item->contact->other_po_box,
+                                    item->contact->other_street,
+                                    item->contact->other_city,
+                                    item->contact->other_state,
+                                    item->contact->other_postal_code,
+                                    item->contact->other_country);
+                                if (item->contact->other_city)
+                                    print_ldif_single("l", item->contact->other_city);
+                                if (item->contact->other_state)
+                                    print_ldif_single("st", item->contact->other_state);
+                                if (item->contact->other_postal_code)
+                                    print_ldif_single("postalCode", item->contact->other_postal_code);
+                            }
+                            if (item->contact->home_address) {
+                                print_ldif_address("homePostalAddress", 6,
+                                    item->contact->home_po_box,
+                                    item->contact->home_street,
+                                    item->contact->home_city,
+                                    item->contact->home_state,
+                                    item->contact->home_postal_code,
+                                    item->contact->home_country);
+                            }
                         }
-                        else if (item->contact->other_address) {
-                            if (item->contact->other_po_box)
-                                print_ldif_single("postalAddress", item->contact->other_po_box);
-                            if (item->contact->other_street)
-                                print_ldif_multi("postalAddress", item->contact->other_street);
-                            if (item->contact->other_city)
-                                print_ldif_single("l", item->contact->other_city);
-                            if (item->contact->other_state)
-                                print_ldif_single("st", item->contact->other_state);
-                            if (item->contact->other_postal_code)
-                                print_ldif_single("postalCode", item->contact->other_postal_code);
-                        }
+
                         if (item->contact->business_fax)
                             print_ldif_single("facsimileTelephoneNumber", item->contact->business_fax);
                         else if (item->contact->home_fax)
@@ -221,12 +288,19 @@
                         else if (item->contact->other_phone)
                             print_ldif_single("mobile", item->contact->other_phone);
 
+                        if (!old_schema) {
+                            if (item->contact->business_homepage)
+                                print_ldif_single("labeledURI", item->contact->business_homepage);
+                            if (item->contact->personal_homepage)
+                                print_ldif_single("labeledURI", item->contact->personal_homepage);
+                        }
 
                         if (item->comment)
                             print_ldif_single("description", item->comment);
 
-                        print_ldif("objectClass", ldap_class);
-                        putchar('\n');
+                        for (int i=0; i<ldap_class.size(); i++)
+                            print_ldif_single("objectClass", ldap_class[i].c_str());
+                        printf("\n");
                     }
                 }
                 else {
@@ -240,16 +314,10 @@
 }
 
 
-void print_ldif(const char *dn, const char *value)
-{
-    printf("%s: %s\n", dn, value);
-}
-
-
-// Prints a Distinguished Name together with its value.
+// Prints an attribute together with its value.
 // If the value isn't a "SAFE STRING" (as defined in RFC2849),
 // then it is output as a BASE-64 encoded value
-void print_ldif_single(const char *dn, const char *value)
+void print_ldif_single(const char *attr, const char *value)
 {
     size_t len;
     bool is_safe_string = true;
@@ -261,8 +329,8 @@
     len = strlen(value) + 1;
     char buffer[len];
     char *p = buffer;
+
     // See if "value" is a "SAFE STRING"
-
     // First check characters that are safe but not safe as initial characters
     if (*value == ':' || *value == '<')
         is_safe_string = false;
@@ -291,7 +359,7 @@
     }
     *p = 0;
     if (is_safe_string) {
-        printf("%s: %s\n", dn, buffer);
+        printf("%s: %s\n", attr, buffer);
         return;
     }
 
@@ -314,11 +382,82 @@
     }
     else
         p = base64_encode(buffer, strlen(buffer));
-    printf("%s:: %s\n", dn, p);
+    printf("%s:: %s\n", attr, p);
     free(p);
 }
 
 
+// Combines values representing address lines into an address,i
+// lines separated with "$" as per PostalAddress syntax in RFC4517
+void print_ldif_address(const char *attr, int nvalues, char *value, ...)
+{
+    bool space_flag = false;
+    bool newline_flag = false;
+    char *address = NULL;    // Buffer where address is built up
+    int len = 0;             // Length of buffer
+    int i = 0;               // Index of next character position in buffer
+    va_list ap;
+
+    va_start(ap, value);
+
+    while (!value) {
+       nvalues--;
+       if (nvalues == 0) {    // Nothing at all to do!
+           va_end(ap);
+           return;
+       }
+       value = va_arg(ap, char *);
+    }
+    for (;;) {
+        char ch = *value++;
+
+        if (ch == 0 || ch == '\n') {
+            do {
+                value = NULL;
+                nvalues--;
+                if (nvalues == 0) break;
+                value = va_arg(ap, char *);
+            } while (!value);
+            if (!value) break;
+            space_flag = true;
+            newline_flag = true;
+        }
+        else if (ch == '\r')
+            continue;
+        else if (ch == '\n') {
+            newline_flag = true;
+            continue;
+        }
+        else if (ch == ' ') {
+            space_flag = true;
+            continue;
+        }
+        else {
+            if (i > (len-5)) {
+                len += 256;
+                address = (char *)realloc(address, len);
+            }
+            if (newline_flag) {
+                address[i++] = '$';
+                newline_flag = false;
+                space_flag   = false;
+            }
+            else if (space_flag) {
+                address[i++] = ' ';
+                space_flag   = false;
+            }
+            if (ch == '$' || ch == '\\') address[i++] = '\\';
+            address[i++] = ch;
+        }
+    }
+    va_end(ap);
+    if (i == 0) return;   // Nothing to do
+    address[i] = 0;
+    print_ldif_single(attr, address);
+    free(address);
+}
+
+
 void print_ldif_multi(const char *dn, const char *value)
 {
     const char *n;
@@ -330,20 +469,20 @@
 }
 
 
-void print_ldif_two(const char *dn, const char *value1, const char *value2)
+void print_ldif_two(const char *attr, const char *value1, const char *value2)
 {
     size_t len1, len2;
     if (value1 && *value1)
         len1 = strlen(value1);
     else {
-        print_ldif_single(dn, value2);
+        print_ldif_single(attr, value2);
         return;
     }
 
     if (value2 && *value2)
         len2 = strlen(value2);
     else {
-        print_ldif_single(dn, value1);
+        print_ldif_single(attr, value1);
         return;
     }
 
@@ -351,7 +490,7 @@
     memcpy(value, value1, len1);
     value[len1] = ' ';
     memcpy(value + len1 + 1, value2, len2 + 1);
-    print_ldif_single(dn, value);
+    print_ldif_single(attr, value);
 }
 
 
@@ -366,6 +505,7 @@
     while (!value) {
        nvalues--;
        if (nvalues == 0) {
+           cn[0] = 0;   // Just a terminating NUL
            va_end(ap);
            return;
        }
@@ -410,25 +550,18 @@
 int main(int argc, char** argv) {
     pst_desc_ll *d_ptr;
     char *fname = NULL;
-    char *temp = NULL;        //temporary char pointer
     int c;
     char *d_log = NULL;
     prog_name = argv[0];
     pst_item *item = NULL;
 
-    while ((c = getopt(argc, argv, "b:c:C:d:Vh"))!= -1) {
+    while ((c = getopt(argc, argv, "b:c:C:d:l:oVh"))!= -1) {
         switch (c) {
         case 'b':
             ldap_base = optarg;
-            temp = strchr(ldap_base, ',');
-            if (temp) {
-                *temp = '\0';
-                ldap_org = strdup(ldap_base+2); // assume first 2 chars are o=
-                *temp = ',';
-            }
             break;
         case 'c':
-            ldap_class = optarg;
+            ldap_class.push_back(string(optarg));
             break;
         case 'C':
             cd = iconv_open("UTF-8", optarg);
@@ -445,6 +578,12 @@
             usage();
             exit(0);
             break;
+        case 'l':
+            ldif_extra_line.push_back(string(optarg));
+            break;
+        case 'o':
+            old_schema = true;
+            break;
         case 'V':
             version();
             exit(0);
@@ -456,7 +595,7 @@
         }
     }
 
-    if ((argc > optind) && (ldap_base) && (ldap_class) && (ldap_org)) {
+    if ((argc > optind) && (ldap_base)) {
         fname = argv[optind];
     } else {
         usage();
@@ -490,18 +629,29 @@
 
     pst_freeItem(item);
 
-    // write the ldap header
-    printf("dn: %s\n", ldap_base);
-    printf("o: %s\n", ldap_org);
-    printf("objectClass: organization\n\n");
-    printf("dn: cn=root, %s\n", ldap_base);
-    printf("cn: root\n");
-    printf("objectClass: %s\n\n", ldap_class);
+    if (old_schema && (strlen(ldap_base) > 2)) {
+        char *ldap_org = strdup(ldap_base+2); // assume first 2 chars are o=
+        char *temp = strchr(ldap_org, ',');
+        if (temp) {
+            *temp = '\0';
+            // write the ldap header
+            printf("dn: %s\n", ldap_base);
+            printf("o: %s\n", ldap_org);
+            printf("objectClass: organization\n\n");
+            printf("dn: cn=root, %s\n", ldap_base);
+            printf("cn: root\n");
+            for (int i=0; i<ldap_class.size(); i++)
+                print_ldif_single("objectClass", ldap_class[i].c_str());
+            printf("\n");
+        }
+    }
 
     process(d_ptr->child);  // do the children of TOPF
     pst_close(&pstfile);
     DEBUG_RET();
     free_strings(all_strings);
+    if (cd) iconv_close(cd);
+
     return 0;
 }
 
@@ -510,11 +660,14 @@
     version();
     printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
     printf("OPTIONS:\n");
-    printf("\t-h\t- Help. This screen\n");
     printf("\t-V\t- Version. Display program version\n");
+    printf("\t-C charset\t- assumed character set of non-ASCII characters\n");
     printf("\t-b ldapbase\t- set the LDAP base value\n");
-    printf("\t-c class   \t- set the class of the LDAP objects\n");
-    printf("\t-C charset \t- assumed character set of non-ASCII characters\n");
+    printf("\t-c class\t- set the class of the LDAP objects (may contain more than one)\n");
+    printf("\t-d <filename>\t- Debug to file. This is a binary log. Use readpstlog to print it\n");
+    printf("\t-h\t- Help. This screen\n");
+    printf("\t-l line\t- extra line to insert in the LDIF file for each contact\n");
+    printf("\t-o\t- use old schema, default is new schema\n");
     return 0;
 }
 
@@ -547,52 +700,78 @@
     return fname;
 }
 
-#if 0
+
 // This function escapes Distinguished Names (as per RFC4514)
-char *dn_escape(const char *str) {
-    static char* buf = NULL;
-    const char *a;
-    char *ret, *b;
-    if (str == NULL)
-        ret = NULL;
-    else {
-        // Calculate maximum space needed (if every character must be escaped)
-        int x = 2 * strlen(str) + 1;  // don't forget room for the NUL
-        buf = (char*) realloc(buf, x);
-        a = str;
-        b = buf;
+void print_ldif_dn(const char *attr, const char *value, const char *base)
+{
+    printf("dn: cn=");
+    // remove leading spaces (RFC says escape them)
+    while (*value == ' ')
+        value++;
+
+    print_escaped_dn(value);
+    if (base && base[0]) {
+        printf(",");
+        print_escaped_dn(base);
+    }
+    printf("\n");
+    return;
+}
+
 
-        // remove leading spaces (RFC says escape them)
-        while (*a == ' ')
-            a++;
+void print_escaped_dn(const char *value)
+{
+    char ch;
+    bool needs_code_conversion = false;
+    char *utf8_buffer = NULL;
 
-        // escape initial '#'
-        if (*a == '#')
-            *b++ = '\\';
+    // First do a quick scan to see if any code conversion is required
+    if (cd) {
+        const char *p = value;
+        while (*p) {
+            if (*p++ & 0x80) {
+                needs_code_conversion = true;
+                break;
+            }
+        }
+    }
 
-        while (*a != '\0') {
-            switch(*a) {
-                case '\\':
-                case '"' :
-                case '+' :
-                case ';' :
-                case '<' :
-                case '>' :
-                    *(b++)='\\';
-                    *b=*a;
-                break;
-                case '\r':  // skip cr
-                    b--;
-                    break;
-                default:
-                    *b=*a;
-            }
-            b++;
-            a++;
+    if (needs_code_conversion) {
+        size_t inlen = strlen(value);
+        size_t utf8_len = 2 * inlen + 1;
+        char *p = (char *)value;
+        char *utf8_p = utf8_buffer;
+
+        utf8_buffer = (char *)malloc(utf8_len);
+        utf8_p = utf8_buffer;
+        iconv(cd, NULL, NULL, NULL, NULL);
+        if (iconv(cd, &p, &inlen, &utf8_p, &utf8_len) >= 0) {
+            *utf8_p = 0;
+            value = utf8_buffer;
         }
-        *b = '\0'; // NUL-terminate the string (buf)
-        ret = buf;
     }
-    return ret;
+
+    // escape initial '#' and space
+    if (*value == '#' || *value == ' ')
+        putchar('\\');
+
+    while ((ch = *value++) != 0) {
+        if (((ch & 0x80) != 0) || (ch <= 0x1F))
+            // Print as escaped hex digits
+            printf("\\%2.2X", ch & 0xFF);
+        else switch (ch) {
+            case '\\':
+            case '"' :
+            case '+' :
+            case ';' :
+            case '<' :
+            case '>' :
+                putchar('\\');
+                // Fall through
+            default:
+                putchar(ch);
+        }
+    }
+    if (utf8_buffer) free((void *)utf8_buffer);
+    return;
 }
-#endif