annotate python/test.py @ 355:d1f930be4711

From Jeffrey Morlan: pst_build_id_ptr and pst_build_desc_ptr require that the first child of a BTree page have the same starting ID as itself. This is not required by the spec, and is not true in many real-world PSTs (presumably, the original first child of the page got deleted). Because of this, many emails are not being extracted from these PSTs. It also triggers an infinite loop in lspst (a separate bug, also fixed)
author Carl Byington <carl@five-ten-sg.com>
date Wed, 06 Jul 2016 10:12:22 -0700
parents 1ddc61fd6189
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
1 import _libpst, sys
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
2
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
3 ft = _libpst.FILETIME()
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
4 ft.dwLowDateTime = 0
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
5 ft.dwHighDateTime = 1
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
6
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
7
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
8 for i in range(1,len(sys.argv)):
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
9 print "try file %s" % (sys.argv[i])
299
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
10 pst = _libpst.pst(sys.argv[i], "")
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
11 topf = pst.pst_getTopOfFolders()
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
12
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
13 print pst.pst_rfc2425_datetime_format(ft)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
14 print pst.pst_rfc2445_datetime_format(ft)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
15
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
16 while (topf):
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
17 #print "topf d_id is %d\n" % (topf.d_id)
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
18 item = pst.pst_parse_item(topf, None)
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
19 if (item):
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
20 if (item.type == 1):
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
21 em = item.email
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
22 if (em):
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
23 print "default charset %s" % (pst.pst_default_charset(item))
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
24 ft = em.arrival_date
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
25 if (ft):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
26 print "message arrived at %s" % (pst.pst_rfc2425_datetime_format(ft))
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
27 if (em.messageid.str):
204
268458c79e9b more cleanup of the shared library interface, but still not fully thread safe
Carl Byington <carl@five-ten-sg.com>
parents: 196
diff changeset
28 print "message id is <%s>" % (em.messageid.str)
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
29 subj = item.subject;
299
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
30 rtf = em.rtf_compressed;
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
31 if rtf and (len(rtf) > 0): print "rtf compressed size", len(rtf)
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
32 if (subj and subj.str):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
33 was = subj.is_utf8;
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
34 pst.pst_convert_utf8(item, subj)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
35 now = subj.is_utf8;
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
36 if (was != now):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
37 print "subject was converted to utf8"
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
38 print "subject is %s" % (subj.str)
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
39 body = item.body
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
40 #if (body.str):
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
41 # print "message body is %s" % (body.str)
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
42 att = item.attach
299
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
43 att = None
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
44 while (att):
299
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
45 attid = att.i_id
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
46 att1 = att.filename1
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
47 att2 = att.filename2
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
48 print "attachment id %d file name short '%s' long '%s'" % (attid, att1.str, att2.str)
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
49 if (1):
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
50 attdata = pst.pst_attach_to_mem(att)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
51 if (attdata):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
52 print "data size %d" % (len(attdata))
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
53 if (0):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
54 f = pst.ppst_open_file(att2.str, 'w')
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
55 if (f):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
56 si = pst.pst_attach_to_file_base64(att, f)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
57 pst.ppst_close_file(f)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
58 print "wrote %d bytes in %s" % (si, att2.str)
299
1ddc61fd6189 fix pst_binary in python interface
Carl Byington <carl@five-ten-sg.com>
parents: 204
diff changeset
59 if (0):
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
60 f = pst.ppst_open_file(att2.str, 'w')
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
61 if (f):
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
62 si = pst.pst_attach_to_file(att, f)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
63 pst.ppst_close_file(f)
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
64 print "wrote %d bytes in %s" % (si, att2.str)
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
65 att = att.next
196
ffd1503a7530 build proper python rpm subpackage
Carl Byington <carl@five-ten-sg.com>
parents: 195
diff changeset
66 pst.pst_freeItem(item)
195
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
67 topf = pst.pst_getNextDptr(topf)
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
68 print "done"
320cfcba8058 add python module interface to the shared library for easy scripting.
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
69