view src/pst2ldif.cpp @ 118:0f1492b7fe8b

patch from Fridrich Strba for building on mingw and general cleanup of autoconf files add processing for pst files of type 0x0f start adding support for properly building and installing libpst.so and the header files required to use it. remove version.h since the version number is now in config.h more const correctness issues regarding getopt()
author Carl Byington <carl@five-ten-sg.com>
date Sat, 31 Jan 2009 12:12:36 -0800
parents e213bfcf9aa7
children 6395ced2b8b2
line wrap: on
line source

/*

Copyright (c) 2004 Carl Byington - 510 Software Group, released under
the GPL version 2 or any later version at your choice available at
http://www.fsf.org/licenses/gpl.txt

Based on readpst.c by David Smith

*/

using namespace std;

// needed for std c++ collections
#include <set>
#include <vector>
#include <string>

extern "C" {
    #include "define.h"
    #include "libstrfunc.h"
    #include "libpst.h"
    #include "common.h"
    #include "timeconv.h"
    #include "lzfu.h"
    #include "stdarg.h"
    #include "iconv.h"
}

void       usage(void);
void       version(void);
char       *check_filename(char *fname);
void        print_ldif_single(const char *attr, const char *value);
void        print_ldif_address(const char *attr, int nvalues, char *value, ...);
void        print_ldif_dn(const char *attr, const char *value, const char *base);
void        print_ldif_multi(const char *dn, const char *value);
void        print_ldif_two(const char *attr, const char *value1, const char *value2);
void        print_escaped_dn(const char *value);
void        build_cn(char *cn, size_t len, int nvalues, char *value, ...);

char *prog_name;
pst_file pstfile;
bool    old_schema            = false;
char    *ldap_base            = NULL;   // 'o=some.domain.tld,c=US'
int     ldif_extra_line_count = 0;
iconv_t cd                    = 0;      // Character set conversion descriptor
vector<string> ldap_class;              // 'newPerson' or 'inetOrgPerson'
vector<string> ldif_extra_line;         // 'o: myorg'


////////////////////////////////////////////////
// define our ordering
struct ltstr {
    bool operator()(const char* s1, const char* s2) const {
        return strcasecmp(s1, s2) < 0;
    }
};
// define our set
typedef set<const char *, ltstr>    string_set;
// make a static set to hold the cn values
static string_set all_strings;


////////////////////////////////////////////////
// helper to free all the strings in a set
//
static void free_strings(string_set &s);
static void free_strings(string_set &s)
{
    for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
        free((void*)*i);
    }
    s.clear();
}


////////////////////////////////////////////////
// helper to register a string in a string set
//
static const char* register_string(string_set &s, const char *name);
static const char* register_string(string_set &s, const char *name) {
    string_set::const_iterator i = s.find(name);
    if (i != s.end()) return *i;
    char *x = strdup(name);
    s.insert(x);
    return x;
}


////////////////////////////////////////////////
// register a global string
//
static const char* register_string(const char *name);
static const char* register_string(const char *name) {
    return register_string(all_strings, name);
}


////////////////////////////////////////////////
// make a unique string
//
static const char* unique_string(const char *name);
static const char* unique_string(const char *name) {
    int  unique = 2;
    string_set::iterator i = all_strings.find(name);
    if (i == all_strings.end()) return register_string(name);
    while (true) {
        char n[strlen(name)+10];
        snprintf(n, sizeof(n), "%s %d", name, unique++);
        string_set::iterator i = all_strings.find(n);
        if (i == all_strings.end()) return register_string(n);
    }
}


static void process(pst_desc_ll *d_ptr);
static void process(pst_desc_ll *d_ptr) {
    pst_item *item = NULL;
    while (d_ptr) {
        if (d_ptr->desc) {
            item = pst_parse_item(&pstfile, d_ptr);
            DEBUG_INFO(("item pointer is %p\n", item));
            if (item) {
                if (item->folder && d_ptr->child && strcasecmp(item->file_as, "Deleted Items")) {
                    //if this is a non-empty folder other than deleted items, we want to recurse into it
                    fprintf(stderr, "entering folder %s\n", item->file_as);
                    process(d_ptr->child);

                } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
                    // deal with a contact
                    char cn[1000];

                    build_cn(cn, sizeof(cn), 4,
                        item->contact->display_name_prefix,
                        item->contact->first_name,
                        item->contact->surname,
                        item->contact->suffix);
                    if (cn[0] != 0) {
                        // have a valid cn
                        const char *ucn = unique_string(cn);

                        print_ldif_dn("dn", ucn, ldap_base);
                        print_ldif_single("cn", ucn);
                        if (item->contact->first_name) {
                            print_ldif_two("givenName",
                                           item->contact->display_name_prefix,
                                           item->contact->first_name);
                        }
                        if (item->contact->surname) {
                            print_ldif_two("sn",
                                           item->contact->surname,
                                           item->contact->suffix);
                        }
                        else if (item->contact->company_name) {
                            print_ldif_single("sn", item->contact->company_name);
                        }
                        else
                            print_ldif_single("sn", ucn); // use cn as sn if we cannot find something better

                        if (old_schema) {
                            if (item->contact->job_title)
                                print_ldif_single("personalTitle", item->contact->job_title);
                            if (item->contact->company_name)
                                print_ldif_single("company", item->contact->company_name);
                        }
                        else {
                            // new schema
                            if (item->contact->job_title)
                                print_ldif_single("title", item->contact->job_title);
                            if (item->contact->company_name)
                                print_ldif_single("o", item->contact->company_name);
                        }
                        if (item->contact->address1  && *item->contact->address1)
                            print_ldif_single("mail", item->contact->address1);
                        if (item->contact->address2  && *item->contact->address2)
                            print_ldif_single("mail", item->contact->address2);
                        if (item->contact->address3  && *item->contact->address3)
                            print_ldif_single("mail", item->contact->address3);
                        if (item->contact->address1a && *item->contact->address1a)
                            print_ldif_single("mail", item->contact->address1a);
                        if (item->contact->address2a && *item->contact->address2a)
                            print_ldif_single("mail", item->contact->address2a);
                        if (item->contact->address3a && *item->contact->address3a)
                            print_ldif_single("mail", item->contact->address3a);

                        if (old_schema) {
                            if (item->contact->business_address) {
                                if (item->contact->business_po_box)
                                    print_ldif_single("postalAddress", item->contact->business_po_box);
                                if (item->contact->business_street)
                                    print_ldif_multi("postalAddress", item->contact->business_street);
                                if (item->contact->business_city)
                                    print_ldif_single("l", item->contact->business_city);
                                if (item->contact->business_state)
                                    print_ldif_single("st", item->contact->business_state);
                                if (item->contact->business_postal_code)
                                    print_ldif_single("postalCode", item->contact->business_postal_code);
                            }
                            else if (item->contact->home_address) {
                                if (item->contact->home_po_box)
                                    print_ldif_single("postalAddress", item->contact->home_po_box);
                                if (item->contact->home_street)
                                    print_ldif_multi("postalAddress", item->contact->home_street);
                                if (item->contact->home_city)
                                    print_ldif_single("l", item->contact->home_city);
                                if (item->contact->home_state)
                                    print_ldif_single("st", item->contact->home_state);
                                if (item->contact->home_postal_code)
                                    print_ldif_single("postalCode", item->contact->home_postal_code);
                            }
                            else if (item->contact->other_address) {
                                if (item->contact->other_po_box)
                                    print_ldif_single("postalAddress", item->contact->other_po_box);
                                if (item->contact->other_street)
                                    print_ldif_multi("postalAddress", item->contact->other_street);
                                if (item->contact->other_city)
                                    print_ldif_single("l", item->contact->other_city);
                                if (item->contact->other_state)
                                    print_ldif_single("st", item->contact->other_state);
                                if (item->contact->other_postal_code)
                                    print_ldif_single("postalCode", item->contact->other_postal_code);
                            }
                        }
                        else {
                            // new schema, with proper RFC4517 postal addresses
                            if (item->contact->business_address) {
                                print_ldif_address("postalAddress", 6,
                                    item->contact->business_po_box,
                                    item->contact->business_street,
                                    item->contact->business_city,
                                    item->contact->business_state,
                                    item->contact->business_postal_code,
                                    item->contact->business_country);
                                if (item->contact->business_city)
                                    print_ldif_single("l", item->contact->business_city);
                                if (item->contact->business_state)
                                    print_ldif_single("st", item->contact->business_state);
                                if (item->contact->business_postal_code)
                                    print_ldif_single("postalCode", item->contact->business_postal_code);
                            }
                            else if (item->contact->home_address) {
                                if (item->contact->home_city)
                                    print_ldif_single("l", item->contact->home_city);
                                if (item->contact->home_state)
                                    print_ldif_single("st", item->contact->home_state);
                                if (item->contact->home_postal_code)
                                    print_ldif_single("postalCode", item->contact->home_postal_code);
                            }
                            else if (item->contact->other_address) {
                                print_ldif_address("postalAddress", 6,
                                    item->contact->other_po_box,
                                    item->contact->other_street,
                                    item->contact->other_city,
                                    item->contact->other_state,
                                    item->contact->other_postal_code,
                                    item->contact->other_country);
                                if (item->contact->other_city)
                                    print_ldif_single("l", item->contact->other_city);
                                if (item->contact->other_state)
                                    print_ldif_single("st", item->contact->other_state);
                                if (item->contact->other_postal_code)
                                    print_ldif_single("postalCode", item->contact->other_postal_code);
                            }
                            if (item->contact->home_address) {
                                print_ldif_address("homePostalAddress", 6,
                                    item->contact->home_po_box,
                                    item->contact->home_street,
                                    item->contact->home_city,
                                    item->contact->home_state,
                                    item->contact->home_postal_code,
                                    item->contact->home_country);
                            }
                        }

                        if (item->contact->business_fax)
                            print_ldif_single("facsimileTelephoneNumber", item->contact->business_fax);
                        else if (item->contact->home_fax)
                            print_ldif_single("facsimileTelephoneNumber", item->contact->home_fax);

                        if (item->contact->business_phone)
                            print_ldif_single("telephoneNumber", item->contact->business_phone);
                        if (item->contact->home_phone)
                            print_ldif_single("homePhone", item->contact->home_phone);

                        if (item->contact->car_phone)
                            print_ldif_single("mobile", item->contact->car_phone);
                        else if (item->contact->mobile_phone)
                            print_ldif_single("mobile", item->contact->mobile_phone);
                        else if (item->contact->other_phone)
                            print_ldif_single("mobile", item->contact->other_phone);

                        if (!old_schema) {
                            if (item->contact->business_homepage)
                                print_ldif_single("labeledURI", item->contact->business_homepage);
                            if (item->contact->personal_homepage)
                                print_ldif_single("labeledURI", item->contact->personal_homepage);
                        }

                        if (item->comment)
                            print_ldif_single("description", item->comment);

                        for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
                            print_ldif_single("objectClass", ldap_class[i].c_str());
                        printf("\n");
                    }
                }
                else {
                    DEBUG_INFO(("item is not a contact\n"));
                }
            }
            pst_freeItem(item);
        }
        d_ptr = d_ptr->next;
    }
}


// Prints an attribute together with its value.
// If the value isn't a "SAFE STRING" (as defined in RFC2849),
// then it is output as a BASE-64 encoded value
void print_ldif_single(const char *attr, const char *value)
{
    size_t len;
    bool is_safe_string = true;
    bool needs_code_conversion = false;
    bool space_flag = false;

    // Strip leading spaces
    while (*value == ' ') value++;
    len = strlen(value) + 1;
    char buffer[len];
    char *p = buffer;

    // See if "value" is a "SAFE STRING"
    // First check characters that are safe but not safe as initial characters
    if (*value == ':' || *value == '<')
        is_safe_string = false;
    for (;;) {
        char ch = *value++;

        if (ch == 0 || ch == '\n')
            break;
        else if (ch == '\r')
            continue;
        else if (ch == ' ') {
            space_flag = true;
            continue;
        }
        else {
            if ((ch & 0x80) == 0x80) {
                needs_code_conversion = true;
                is_safe_string = false;
            }
            if (space_flag) {
                *p++ = ' ';
                space_flag = false;
            }
            *p++ = ch;
        }
    }
    *p = 0;
    if (is_safe_string) {
        printf("%s: %s\n", attr, buffer);
        return;
    }

    if (needs_code_conversion && cd != 0) {
        size_t inlen = p - buffer;
        size_t utf8_len = 2 * inlen + 1;
        char utf8_buffer[utf8_len];
        char *utf8_p = utf8_buffer;

        iconv(cd, NULL, NULL, NULL, NULL);
        p = buffer;
        int ret = iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len);

        if (ret >= 0) {
            *utf8_p = 0;
            p = base64_encode(utf8_buffer, utf8_p - utf8_buffer);
        }
        else
            p = base64_encode(buffer, strlen(buffer));
    }
    else
        p = base64_encode(buffer, strlen(buffer));
    printf("%s:: %s\n", attr, p);
    free(p);
}


// Combines values representing address lines into an address,i
// lines separated with "$" as per PostalAddress syntax in RFC4517
void print_ldif_address(const char *attr, int nvalues, char *value, ...)
{
    bool space_flag = false;
    bool newline_flag = false;
    char *address = NULL;    // Buffer where address is built up
    int len = 0;             // Length of buffer
    int i = 0;               // Index of next character position in buffer
    va_list ap;

    va_start(ap, value);

    while (!value) {
       nvalues--;
       if (nvalues == 0) {    // Nothing at all to do!
           va_end(ap);
           return;
       }
       value = va_arg(ap, char *);
    }
    for (;;) {
        char ch = *value++;

        if (ch == 0 || ch == '\n') {
            do {
                value = NULL;
                nvalues--;
                if (nvalues == 0) break;
                value = va_arg(ap, char *);
            } while (!value);
            if (!value) break;
            space_flag = true;
            newline_flag = true;
        }
        else if (ch == '\r')
            continue;
        else if (ch == '\n') {
            newline_flag = true;
            continue;
        }
        else if (ch == ' ') {
            space_flag = true;
            continue;
        }
        else {
            if (i > (len-5)) {
                len += 256;
                address = (char *)realloc(address, len);
            }
            if (newline_flag) {
                address[i++] = '$';
                newline_flag = false;
                space_flag   = false;
            }
            else if (space_flag) {
                address[i++] = ' ';
                space_flag   = false;
            }
            if (ch == '$' || ch == '\\') address[i++] = '\\';
            address[i++] = ch;
        }
    }
    va_end(ap);
    if (i == 0) return;   // Nothing to do
    address[i] = 0;
    print_ldif_single(attr, address);
    free(address);
}


void print_ldif_multi(const char *dn, const char *value)
{
    const char *n;
    while ((n = strchr(value, '\n'))) {
        print_ldif_single(dn, value);
        value = n + 1;
    }
    print_ldif_single(dn, value);
}


void print_ldif_two(const char *attr, const char *value1, const char *value2)
{
    size_t len1, len2;
    if (value1 && *value1)
        len1 = strlen(value1);
    else {
        print_ldif_single(attr, value2);
        return;
    }

    if (value2 && *value2)
        len2 = strlen(value2);
    else {
        print_ldif_single(attr, value1);
        return;
    }

    char value[len1 + len2 + 2];
    memcpy(value, value1, len1);
    value[len1] = ' ';
    memcpy(value + len1 + 1, value2, len2 + 1);
    print_ldif_single(attr, value);
}


void build_cn(char *cn, size_t len, int nvalues, char *value, ...)
{
    bool space_flag = false;
    size_t i = 0;
    va_list ap;

    va_start(ap, value);

    while (!value) {
       nvalues--;
       if (nvalues == 0) {
           cn[0] = 0;   // Just a terminating NUL
           va_end(ap);
           return;
       }
       value = va_arg(ap, char *);
    }
    for (;;) {
        char ch = *value++;

        if (ch == 0 || ch == '\n') {
            do {
                value = NULL;
                nvalues--;
                if (nvalues == 0) break;
                value = va_arg(ap, char *);
            } while (!value);
            if (!value) break;
            space_flag = true;
        }
        else if (ch == '\r')
            continue;
        else if (ch == ' ') {
            space_flag = true;
            continue;
        }
        else {
            if (space_flag) {
                if (i > 0) {
                    if (i < (len - 2)) cn[i++] = ' ';
                    else               break;
                }
                space_flag = false;
            }
            if (i < (len - 1)) cn[i++] = ch;
            else               break;
        }
    }
    cn[i] = 0;
    va_end(ap);
}


int main(int argc, char* const* argv) {
    pst_desc_ll *d_ptr;
    char *fname = NULL;
    int c;
    char *d_log = NULL;
    prog_name = argv[0];
    pst_item *item = NULL;

    while ((c = getopt(argc, argv, "b:c:C:d:l:oVh"))!= -1) {
        switch (c) {
        case 'b':
            ldap_base = optarg;
            break;
        case 'c':
            ldap_class.push_back(string(optarg));
            break;
        case 'C':
            cd = iconv_open("UTF-8", optarg);
            if (cd == (iconv_t)(-1)) {
                fprintf(stderr, "I don't know character set \"%s\"!\n\n", optarg);
                fprintf(stderr, "Type: \"iconv --list\" to get list of known character sets\n");
                return 1;
            }
            break;
        case 'd':
            d_log = optarg;
            break;
        case 'h':
            usage();
            exit(0);
            break;
        case 'l':
            ldif_extra_line.push_back(string(optarg));
            break;
        case 'o':
            old_schema = true;
            break;
        case 'V':
            version();
            exit(0);
            break;
        default:
            usage();
            exit(1);
            break;
        }
    }

    if ((argc > optind) && (ldap_base)) {
        fname = argv[optind];
    } else {
        usage();
        exit(2);
    }

    #ifdef DEBUG_ALL
        // force a log file
        if (!d_log) d_log = "pst2ldif.log";
    #endif
    DEBUG_INIT(d_log);
    DEBUG_REGISTER_CLOSE();
    DEBUG_ENT("main");
    RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n"));
    RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n"));

    pst_load_extended_attributes(&pstfile);

    d_ptr = pstfile.d_head; // first record is main record
    item  = (pst_item*)pst_parse_item(&pstfile, d_ptr);
    if (!item || !item->message_store) {
        DEBUG_RET();
        DIE(("main: Could not get root record\n"));
    }

    d_ptr = pst_getTopOfFolders(&pstfile, item);
    if (!d_ptr) {
        DEBUG_RET();
        DIE(("Top of folders record not found. Cannot continue\n"));
    }

    pst_freeItem(item);

    if (old_schema && (strlen(ldap_base) > 2)) {
        char *ldap_org = strdup(ldap_base+2); // assume first 2 chars are o=
        char *temp = strchr(ldap_org, ',');
        if (temp) {
            *temp = '\0';
            // write the ldap header
            printf("dn: %s\n", ldap_base);
            printf("o: %s\n", ldap_org);
            printf("objectClass: organization\n\n");
            printf("dn: cn=root, %s\n", ldap_base);
            printf("cn: root\n");
            printf("sn: root\n");
            for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
                print_ldif_single("objectClass", ldap_class[i].c_str());
            printf("\n");
        }
    }

    process(d_ptr->child);  // do the children of TOPF
    pst_close(&pstfile);
    DEBUG_RET();
    free_strings(all_strings);
    if (cd) iconv_close(cd);

    return 0;
}


void usage(void) {
    version();
    printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
    printf("OPTIONS:\n");
    printf("\t-V\t- Version. Display program version\n");
    printf("\t-C charset\t- assumed character set of non-ASCII characters\n");
    printf("\t-b ldapbase\t- set the LDAP base value\n");
    printf("\t-c class\t- set the class of the LDAP objects (may contain more than one)\n");
    printf("\t-d <filename>\t- Debug to file. This is a binary log. Use readpstlog to print it\n");
    printf("\t-h\t- Help. This screen\n");
    printf("\t-l line\t- extra line to insert in the LDIF file for each contact\n");
    printf("\t-o\t- use old schema, default is new schema\n");
}


void version(void) {
    printf("pst2ldif v%s\n", VERSION);
#if BYTE_ORDER == BIG_ENDIAN
    printf("Big Endian implementation being used.\n");
#elif BYTE_ORDER == LITTLE_ENDIAN
    printf("Little Endian implementation being used.\n");
#else
#  error "Byte order not supported by this library"
#endif
#ifdef __GNUC__
    printf("GCC %d.%d : %s %s\n", __GNUC__, __GNUC_MINOR__, __DATE__, __TIME__);
#endif
}


char *check_filename(char *fname) {
    char *t = fname;
    if (t == NULL) {
        return fname;
    }
    while ((t = strpbrk(t, "/\\:"))) {
        // while there are characters in the second string that we don't want
        *t = '_'; //replace them with an underscore
    }
    return fname;
}


// This function escapes Distinguished Names (as per RFC4514)
void print_ldif_dn(const char *attr, const char *value, const char *base)
{
    printf("dn: cn=");
    // remove leading spaces (RFC says escape them)
    while (*value == ' ')
        value++;

    print_escaped_dn(value);
    if (base && base[0]) {
        printf(", %s", base);
    }
    printf("\n");
    return;
}


void print_escaped_dn(const char *value)
{
    char ch;
    bool needs_code_conversion = false;
    char *utf8_buffer = NULL;

    // First do a quick scan to see if any code conversion is required
    if (cd) {
        const char *p = value;
        while (*p) {
            if (*p++ & 0x80) {
                needs_code_conversion = true;
                break;
            }
        }
    }

    if (needs_code_conversion) {
        size_t inlen = strlen(value);
        size_t utf8_len = 2 * inlen + 1;
        char *p = (char *)value;
        char *utf8_p = utf8_buffer;

        utf8_buffer = (char *)malloc(utf8_len);
        utf8_p = utf8_buffer;
        iconv(cd, NULL, NULL, NULL, NULL);
        if (iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len) >= 0) {
            *utf8_p = 0;
            value = utf8_buffer;
        }
    }

    // escape initial '#' and space
    if (*value == '#' || *value == ' ')
        putchar('\\');

    while ((ch = *value++) != 0) {
        if (((ch & 0x80) != 0) || (ch <= 0x1F))
            // Print as escaped hex digits
            printf("\\%2.2X", ch & 0xFF);
        else switch (ch) {
            case '\\':
            case '"' :
            case '+' :
            case ',' :
            case ';' :
            case '<' :
            case '>' :
                putchar('\\');
                // Fall through
            default:
                putchar(ch);
        }
    }
    if (utf8_buffer) free((void *)utf8_buffer);
    return;
}