# HG changeset patch # User Carl Byington # Date 1223579080 25200 # Node ID 39ba193727321b150e71bf40d0208054c3ac08f2 # Parent 0af0bbe166e16c31ff6eaf2b1aac751ac18f883d many fixes in pst2ldif by Robert Harris diff -r 0af0bbe166e1 -r 39ba19372732 ChangeLog --- a/ChangeLog Tue Oct 07 10:45:50 2008 -0700 +++ b/ChangeLog Thu Oct 09 12:04:40 2008 -0700 @@ -1,4 +1,4 @@ -LibPST 0.6.20 (2008-10-07) +LibPST 0.6.20 (2008-10-09) =============================== * add configure option --enable-dii=no to remove dependency on libgd. * many fixes in pst2ldif by Robert Harris. diff -r 0af0bbe166e1 -r 39ba19372732 NEWS --- a/NEWS Tue Oct 07 10:45:50 2008 -0700 +++ b/NEWS Thu Oct 09 12:04:40 2008 -0700 @@ -1,4 +1,4 @@ -0.6.20 2008-10-07 add configure option --enable-dii=no, fixes from Robert Harris for pst2ldif. +0.6.20 2008-10-09 add configure option --enable-dii=no, fixes from Robert Harris for pst2ldif. 0.6.19 2008-09-14 Initial work on a .so shared library from Bharath Acharya. 0.6.18 2008-08-28 Fixes for iconv on Mac from Justin Greer. 0.6.17 2008-08-05 More fixes for 32/64 bit portability on big endian ppc. diff -r 0af0bbe166e1 -r 39ba19372732 libpst.spec.in --- a/libpst.spec.in Tue Oct 07 10:45:50 2008 -0700 +++ b/libpst.spec.in Thu Oct 09 12:04:40 2008 -0700 @@ -47,7 +47,7 @@ %changelog -* Tue Oct 07 2008 Carl Byington - 0.6.20-1 +* Thu Oct 09 2008 Carl Byington - 0.6.20-1 - add configure option --enable-dii=no to remove dependency on libgd. - many fixes in pst2ldif by Robert Harris. - add -D option to include deleted items, from Justin Greer diff -r 0af0bbe166e1 -r 39ba19372732 regression/regression-tests.bash --- a/regression/regression-tests.bash Tue Oct 07 10:45:50 2008 -0700 +++ b/regression/regression-tests.bash Thu Oct 09 12:04:40 2008 -0700 @@ -21,7 +21,8 @@ mkdir output$n $val ../src/readpst -cv -o output$n -d dumper $fn >$fn.pst.err 2>&1 ../src/readpstlog -f I dumper >$fn.log - $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' $fn >$fn.ldif.err 2>&1 + $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'newPerson' -o $fn >$fn.ldif.err 2>&1 + $val ../src/pst2ldif -b 'o=ams-cc.com, c=US' -c 'inetOrgPerson' $fn >$fn.ldif2.err 2>&1 rm -f dumper } diff -r 0af0bbe166e1 -r 39ba19372732 src/libpst.c --- a/src/libpst.c Tue Oct 07 10:45:50 2008 -0700 +++ b/src/libpst.c Thu Oct 09 12:04:40 2008 -0700 @@ -281,6 +281,7 @@ static void record_descriptor(pst_file *pf, pst_desc_ll *node); static void record_descriptor(pst_file *pf, pst_desc_ll *node) { + DEBUG_ENT("record_descriptor"); // finish node initialization node->parent = NULL; node->child = NULL; @@ -331,6 +332,7 @@ add_descriptor_to_list(node, &pf->d_head, &pf->d_tail); } } + DEBUG_RET(); } diff -r 0af0bbe166e1 -r 39ba19372732 src/pst2ldif.cpp --- a/src/pst2ldif.cpp Tue Oct 07 10:45:50 2008 -0700 +++ b/src/pst2ldif.cpp Thu Oct 09 12:04:40 2008 -0700 @@ -12,6 +12,8 @@ // needed for std c++ collections #include +#include +#include extern "C" { #include "define.h" @@ -27,19 +29,22 @@ int32_t usage(); int32_t version(); char *check_filename(char *fname); -char *dn_escape(const char *str); -void print_ldif(const char *dn, const char *value); -void print_ldif_single(const char *dn, const char *value); +void print_ldif_single(const char *attr, const char *value); +void print_ldif_address(const char *attr, int nvalues, char *value, ...); +void print_ldif_dn(const char *attr, const char *value, const char *base); void print_ldif_multi(const char *dn, const char *value); -void print_ldif_two(const char *dn, const char *value1, const char *value2); +void print_ldif_two(const char *attr, const char *value1, const char *value2); +void print_escaped_dn(const char *value); void build_cn(char *cn, size_t len, int nvalues, char *value, ...); char *prog_name; pst_file pstfile; -char *ldap_base = NULL; // 'o=some.domain.tld, c=US' -char *ldap_class = NULL; // 'newPerson' -char *ldap_org = NULL; // 'some.domain.tld', computed from ldap_base -iconv_t cd = 0; // Character set conversion descriptor +bool old_schema = false; +char *ldap_base = NULL; // 'o=some.domain.tld,c=US' +int ldif_extra_line_count = 0; +iconv_t cd = 0; // Character set conversion descriptor +vector ldap_class; // 'newPerson' or 'inetOrgPerson' +vector ldif_extra_line; // 'o: myorg' //////////////////////////////////////////////// @@ -123,6 +128,7 @@ } else if (item->contact && (item->type == PST_TYPE_CONTACT)) { // deal with a contact char cn[1000]; + build_cn(cn, sizeof(cn), 4, item->contact->display_name_prefix, item->contact->first_name, @@ -131,10 +137,8 @@ if (cn[0] != 0) { // have a valid cn const char *ucn = unique_string(cn); - char dn[strlen(ucn) + strlen(ldap_base) + 6]; - sprintf(dn, "cn=%s, %s", ucn, ldap_base); - print_ldif_single("dn", dn); + print_ldif_dn("dn", ucn, ldap_base); print_ldif_single("cn", ucn); if (item->contact->first_name) { print_ldif_two("givenName", @@ -152,10 +156,19 @@ else print_ldif_single("sn", ucn); // use cn as sn if we cannot find something better - if (item->contact->job_title) - print_ldif_single("personalTitle", item->contact->job_title); - if (item->contact->company_name) - print_ldif_single("company", item->contact->company_name); + if (old_schema) { + if (item->contact->job_title) + print_ldif_single("personalTitle", item->contact->job_title); + if (item->contact->company_name) + print_ldif_single("company", item->contact->company_name); + else { + // new schema + if (item->contact->job_title) + print_ldif_single("title", item->contact->job_title); + if (item->contact->company_name) + print_ldif_single("o", item->contact->company_name); + } + } if (item->contact->address1 && *item->contact->address1) print_ldif_single("mail", item->contact->address1); if (item->contact->address2 && *item->contact->address2) @@ -168,42 +181,96 @@ print_ldif_single("mail", item->contact->address2a); if (item->contact->address3a && *item->contact->address3a) print_ldif_single("mail", item->contact->address3a); - if (item->contact->business_address) { - if (item->contact->business_po_box) - print_ldif_single("postalAddress", item->contact->business_po_box); - if (item->contact->business_street) - print_ldif_multi("postalAddress", item->contact->business_street); - if (item->contact->business_city) - print_ldif_single("l", item->contact->business_city); - if (item->contact->business_state) - print_ldif_single("st", item->contact->business_state); - if (item->contact->business_postal_code) - print_ldif_single("postalCode", item->contact->business_postal_code); + + if (old_schema) { + if (item->contact->business_address) { + if (item->contact->business_po_box) + print_ldif_single("postalAddress", item->contact->business_po_box); + if (item->contact->business_street) + print_ldif_multi("postalAddress", item->contact->business_street); + if (item->contact->business_city) + print_ldif_single("l", item->contact->business_city); + if (item->contact->business_state) + print_ldif_single("st", item->contact->business_state); + if (item->contact->business_postal_code) + print_ldif_single("postalCode", item->contact->business_postal_code); + } + else if (item->contact->home_address) { + if (item->contact->home_po_box) + print_ldif_single("postalAddress", item->contact->home_po_box); + if (item->contact->home_street) + print_ldif_multi("postalAddress", item->contact->home_street); + if (item->contact->home_city) + print_ldif_single("l", item->contact->home_city); + if (item->contact->home_state) + print_ldif_single("st", item->contact->home_state); + if (item->contact->home_postal_code) + print_ldif_single("postalCode", item->contact->home_postal_code); + } + else if (item->contact->other_address) { + if (item->contact->other_po_box) + print_ldif_single("postalAddress", item->contact->other_po_box); + if (item->contact->other_street) + print_ldif_multi("postalAddress", item->contact->other_street); + if (item->contact->other_city) + print_ldif_single("l", item->contact->other_city); + if (item->contact->other_state) + print_ldif_single("st", item->contact->other_state); + if (item->contact->other_postal_code) + print_ldif_single("postalCode", item->contact->other_postal_code); + } } - else if (item->contact->home_address) { - if (item->contact->home_po_box) - print_ldif_single("postalAddress", item->contact->home_po_box); - if (item->contact->home_street) - print_ldif_multi("postalAddress", item->contact->home_street); - if (item->contact->home_city) - print_ldif_single("l", item->contact->home_city); - if (item->contact->home_state) - print_ldif_single("st", item->contact->home_state); - if (item->contact->home_postal_code) - print_ldif_single("postalCode", item->contact->home_postal_code); + else { + // new schema, with proper RFC4517 postal addresses + if (item->contact->business_address) { + print_ldif_address("postalAddress", 6, + item->contact->business_po_box, + item->contact->business_street, + item->contact->business_city, + item->contact->business_state, + item->contact->business_postal_code, + item->contact->business_country); + if (item->contact->business_city) + print_ldif_single("l", item->contact->business_city); + if (item->contact->business_state) + print_ldif_single("st", item->contact->business_state); + if (item->contact->business_postal_code) + print_ldif_single("postalCode", item->contact->business_postal_code); + } + else if (item->contact->home_address) { + if (item->contact->home_city) + print_ldif_single("l", item->contact->home_city); + if (item->contact->home_state) + print_ldif_single("st", item->contact->home_state); + if (item->contact->home_postal_code) + print_ldif_single("postalCode", item->contact->home_postal_code); + } + else if (item->contact->other_address) { + print_ldif_address("postalAddress", 6, + item->contact->other_po_box, + item->contact->other_street, + item->contact->other_city, + item->contact->other_state, + item->contact->other_postal_code, + item->contact->other_country); + if (item->contact->other_city) + print_ldif_single("l", item->contact->other_city); + if (item->contact->other_state) + print_ldif_single("st", item->contact->other_state); + if (item->contact->other_postal_code) + print_ldif_single("postalCode", item->contact->other_postal_code); + } + if (item->contact->home_address) { + print_ldif_address("homePostalAddress", 6, + item->contact->home_po_box, + item->contact->home_street, + item->contact->home_city, + item->contact->home_state, + item->contact->home_postal_code, + item->contact->home_country); + } } - else if (item->contact->other_address) { - if (item->contact->other_po_box) - print_ldif_single("postalAddress", item->contact->other_po_box); - if (item->contact->other_street) - print_ldif_multi("postalAddress", item->contact->other_street); - if (item->contact->other_city) - print_ldif_single("l", item->contact->other_city); - if (item->contact->other_state) - print_ldif_single("st", item->contact->other_state); - if (item->contact->other_postal_code) - print_ldif_single("postalCode", item->contact->other_postal_code); - } + if (item->contact->business_fax) print_ldif_single("facsimileTelephoneNumber", item->contact->business_fax); else if (item->contact->home_fax) @@ -221,12 +288,19 @@ else if (item->contact->other_phone) print_ldif_single("mobile", item->contact->other_phone); + if (!old_schema) { + if (item->contact->business_homepage) + print_ldif_single("labeledURI", item->contact->business_homepage); + if (item->contact->personal_homepage) + print_ldif_single("labeledURI", item->contact->personal_homepage); + } if (item->comment) print_ldif_single("description", item->comment); - print_ldif("objectClass", ldap_class); - putchar('\n'); + for (int i=0; i (len-5)) { + len += 256; + address = (char *)realloc(address, len); + } + if (newline_flag) { + address[i++] = '$'; + newline_flag = false; + space_flag = false; + } + else if (space_flag) { + address[i++] = ' '; + space_flag = false; + } + if (ch == '$' || ch == '\\') address[i++] = '\\'; + address[i++] = ch; + } + } + va_end(ap); + if (i == 0) return; // Nothing to do + address[i] = 0; + print_ldif_single(attr, address); + free(address); +} + + void print_ldif_multi(const char *dn, const char *value) { const char *n; @@ -330,20 +469,20 @@ } -void print_ldif_two(const char *dn, const char *value1, const char *value2) +void print_ldif_two(const char *attr, const char *value1, const char *value2) { size_t len1, len2; if (value1 && *value1) len1 = strlen(value1); else { - print_ldif_single(dn, value2); + print_ldif_single(attr, value2); return; } if (value2 && *value2) len2 = strlen(value2); else { - print_ldif_single(dn, value1); + print_ldif_single(attr, value1); return; } @@ -351,7 +490,7 @@ memcpy(value, value1, len1); value[len1] = ' '; memcpy(value + len1 + 1, value2, len2 + 1); - print_ldif_single(dn, value); + print_ldif_single(attr, value); } @@ -366,6 +505,7 @@ while (!value) { nvalues--; if (nvalues == 0) { + cn[0] = 0; // Just a terminating NUL va_end(ap); return; } @@ -410,25 +550,18 @@ int main(int argc, char** argv) { pst_desc_ll *d_ptr; char *fname = NULL; - char *temp = NULL; //temporary char pointer int c; char *d_log = NULL; prog_name = argv[0]; pst_item *item = NULL; - while ((c = getopt(argc, argv, "b:c:C:d:Vh"))!= -1) { + while ((c = getopt(argc, argv, "b:c:C:d:l:oVh"))!= -1) { switch (c) { case 'b': ldap_base = optarg; - temp = strchr(ldap_base, ','); - if (temp) { - *temp = '\0'; - ldap_org = strdup(ldap_base+2); // assume first 2 chars are o= - *temp = ','; - } break; case 'c': - ldap_class = optarg; + ldap_class.push_back(string(optarg)); break; case 'C': cd = iconv_open("UTF-8", optarg); @@ -445,6 +578,12 @@ usage(); exit(0); break; + case 'l': + ldif_extra_line.push_back(string(optarg)); + break; + case 'o': + old_schema = true; + break; case 'V': version(); exit(0); @@ -456,7 +595,7 @@ } } - if ((argc > optind) && (ldap_base) && (ldap_class) && (ldap_org)) { + if ((argc > optind) && (ldap_base)) { fname = argv[optind]; } else { usage(); @@ -490,18 +629,29 @@ pst_freeItem(item); - // write the ldap header - printf("dn: %s\n", ldap_base); - printf("o: %s\n", ldap_org); - printf("objectClass: organization\n\n"); - printf("dn: cn=root, %s\n", ldap_base); - printf("cn: root\n"); - printf("objectClass: %s\n\n", ldap_class); + if (old_schema && (strlen(ldap_base) > 2)) { + char *ldap_org = strdup(ldap_base+2); // assume first 2 chars are o= + char *temp = strchr(ldap_org, ','); + if (temp) { + *temp = '\0'; + // write the ldap header + printf("dn: %s\n", ldap_base); + printf("o: %s\n", ldap_org); + printf("objectClass: organization\n\n"); + printf("dn: cn=root, %s\n", ldap_base); + printf("cn: root\n"); + for (int i=0; ichild); // do the children of TOPF pst_close(&pstfile); DEBUG_RET(); free_strings(all_strings); + if (cd) iconv_close(cd); + return 0; } @@ -510,11 +660,14 @@ version(); printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); - printf("\t-h\t- Help. This screen\n"); printf("\t-V\t- Version. Display program version\n"); + printf("\t-C charset\t- assumed character set of non-ASCII characters\n"); printf("\t-b ldapbase\t- set the LDAP base value\n"); - printf("\t-c class \t- set the class of the LDAP objects\n"); - printf("\t-C charset \t- assumed character set of non-ASCII characters\n"); + printf("\t-c class\t- set the class of the LDAP objects (may contain more than one)\n"); + printf("\t-d \t- Debug to file. This is a binary log. Use readpstlog to print it\n"); + printf("\t-h\t- Help. This screen\n"); + printf("\t-l line\t- extra line to insert in the LDIF file for each contact\n"); + printf("\t-o\t- use old schema, default is new schema\n"); return 0; } @@ -547,52 +700,78 @@ return fname; } -#if 0 + // This function escapes Distinguished Names (as per RFC4514) -char *dn_escape(const char *str) { - static char* buf = NULL; - const char *a; - char *ret, *b; - if (str == NULL) - ret = NULL; - else { - // Calculate maximum space needed (if every character must be escaped) - int x = 2 * strlen(str) + 1; // don't forget room for the NUL - buf = (char*) realloc(buf, x); - a = str; - b = buf; +void print_ldif_dn(const char *attr, const char *value, const char *base) +{ + printf("dn: cn="); + // remove leading spaces (RFC says escape them) + while (*value == ' ') + value++; + + print_escaped_dn(value); + if (base && base[0]) { + printf(","); + print_escaped_dn(base); + } + printf("\n"); + return; +} + - // remove leading spaces (RFC says escape them) - while (*a == ' ') - a++; +void print_escaped_dn(const char *value) +{ + char ch; + bool needs_code_conversion = false; + char *utf8_buffer = NULL; - // escape initial '#' - if (*a == '#') - *b++ = '\\'; + // First do a quick scan to see if any code conversion is required + if (cd) { + const char *p = value; + while (*p) { + if (*p++ & 0x80) { + needs_code_conversion = true; + break; + } + } + } - while (*a != '\0') { - switch(*a) { - case '\\': - case '"' : - case '+' : - case ';' : - case '<' : - case '>' : - *(b++)='\\'; - *b=*a; - break; - case '\r': // skip cr - b--; - break; - default: - *b=*a; - } - b++; - a++; + if (needs_code_conversion) { + size_t inlen = strlen(value); + size_t utf8_len = 2 * inlen + 1; + char *p = (char *)value; + char *utf8_p = utf8_buffer; + + utf8_buffer = (char *)malloc(utf8_len); + utf8_p = utf8_buffer; + iconv(cd, NULL, NULL, NULL, NULL); + if (iconv(cd, &p, &inlen, &utf8_p, &utf8_len) >= 0) { + *utf8_p = 0; + value = utf8_buffer; } - *b = '\0'; // NUL-terminate the string (buf) - ret = buf; } - return ret; + + // escape initial '#' and space + if (*value == '#' || *value == ' ') + putchar('\\'); + + while ((ch = *value++) != 0) { + if (((ch & 0x80) != 0) || (ch <= 0x1F)) + // Print as escaped hex digits + printf("\\%2.2X", ch & 0xFF); + else switch (ch) { + case '\\': + case '"' : + case '+' : + case ';' : + case '<' : + case '>' : + putchar('\\'); + // Fall through + default: + putchar(ch); + } + } + if (utf8_buffer) free((void *)utf8_buffer); + return; } -#endif diff -r 0af0bbe166e1 -r 39ba19372732 src/readpst.c --- a/src/readpst.c Tue Oct 07 10:45:50 2008 -0700 +++ b/src/readpst.c Thu Oct 09 12:04:40 2008 -0700 @@ -420,11 +420,11 @@ version(); printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name); printf("OPTIONS:\n"); + printf("\t-V\t- Version. Display program version\n"); printf("\t-C\t- Decrypt (compressible encryption) the entire file and output on stdout (not typically useful)\n"); printf("\t-D\t- Include deleted items in output\n"); printf("\t-M\t- MH. Write emails in the MH format\n"); printf("\t-S\t- Separate. Write emails in the separate format\n"); - printf("\t-V\t- Version. Display program version\n"); printf("\t-b\t- Don't save RTF-Body attachments\n"); printf("\t-c[v|l]\t- Set the Contact output mode. -cv = VCard, -cl = EMail list\n"); printf("\t-d \t- Debug to file. This is a binary log. Use readpstlog to print it\n"); diff -r 0af0bbe166e1 -r 39ba19372732 xml/libpst.in --- a/xml/libpst.in Tue Oct 07 10:45:50 2008 -0700 +++ b/xml/libpst.in Thu Oct 09 12:04:40 2008 -0700 @@ -33,7 +33,7 @@ - 2008-10-05 + 2008-10-09 @@ -233,7 +233,7 @@ - 2008-10-05 + 2008-10-09 @@ -336,7 +336,7 @@ - 2008-10-05 + 2008-10-09 @@ -520,7 +520,7 @@ - 2008-10-05 + 2008-10-09 @@ -543,6 +543,8 @@ + + pstfilename @@ -569,7 +571,8 @@ Sets the objectClass values for the contact items. This class needs to be defined in the schema used by your LDAP server, and at a minimum it must - contain the ldap attributes given below. + contain the ldap attributes given below. This option may be specified + multiple times to generate entries with multiple object classes. @@ -587,6 +590,26 @@ + -l extra-line + + Specify an extra line to be added to each ldap entry. This + option may be specified multiple times to add multiple lines + to each ldap entry. + + + + -o + + Use the old ldap schema, rather than the default new ldap schema. + The old schema generates multiple postalAddress attributes for + a single entry. The new schema generates a single postalAddress + (and homePostalAddress when available) attribute with $ delimiters + as specified in RFC4517. Using the old schema also generates two + extra leading entries, one for "dn:ldap base", and one for + "dn: cn=root, ldap base". + + + -h Show summary of options. Subsequent options are then ignored. @@ -600,7 +623,8 @@ pst2ldif reads the contact information from a MS Outlook .pst file and produces a .ldif file that may be used to import those contacts - into an LDAP database. The following ldap attributes are generated: + into an LDAP database. The following ldap attributes are generated + for the old ldap schema: cn givenName @@ -619,6 +643,27 @@ mobile description + The following attributes are generated for the new ldap schema: + + cn + givenName + sn + title + o + mail + postalAddress + homePostalAddress + l + st + postalCode + c + homePhone + telephoneNumber + facsimileTelephoneNumber + mobile + description + labeledURI + @@ -651,7 +696,7 @@ - 2008-10-05 + 2008-10-09 @@ -785,7 +830,7 @@ - 2008-10-05 + 2008-10-09