annotate logstash_index_cleaner.py @ 16:aa606d801e02

work on building from source, revert jruby and elastic search to versions bundled into logstash-monolith
author Carl Byington <carl@five-ten-sg.com>
date Thu, 11 Apr 2013 13:48:24 -0700
parents 796ac0b50dbf
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
1 #!/usr/bin/env python
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
2 #
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
3 # Deletes all indices with a datestamp older than "days-to-keep" for daily
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
4 # if you have hourly indices, it will delete all of those older than "hours-to-keep"
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
5 #
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
6 # This script presumes an index is named typically, e.g. logstash-YYYY.MM.DD
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
7 # It will work with any name-YYYY.MM.DD or name-YYYY.MM.DD.HH type sequence
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
8 #
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
9 # Requires python and the following dependencies (all pip/easy_installable):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
10 #
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
11 # pyes (python elasticsearch bindings, which might need simplejson)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
12 # argparse (built-in in python2.7 and higher, python 2.6 and lower will have to easy_install it)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
13 #
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
14 # TODO: Proper logging instead of just print statements, being able to configure a decent logging level.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
15 # Unit tests. The code is somewhat broken up into logical parts that may be tested separately.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
16 # Better error reporting?
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
17 # Improve the get_index_epoch method to parse more date formats. Consider renaming (to "parse_date_to_timestamp"?)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
18
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
19 import sys
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
20 import time
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
21 import argparse
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
22 from datetime import timedelta
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
23
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
24 import pyes
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
25
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
26
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
27 __version__ = '0.1.2'
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
28
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
29
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
30 def make_parser():
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
31 """ Creates an ArgumentParser to parse the command line options. """
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
32 parser = argparse.ArgumentParser(description='Delete old logstash indices from Elasticsearch.')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
33
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
34 parser.add_argument('-v', '--version', action='version', version='%(prog)s '+__version__)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
35
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
36 parser.add_argument('--host', help='Elasticsearch host.', default='localhost')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
37 parser.add_argument('--port', help='Elasticsearch port', default=9200, type=int)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
38 parser.add_argument('-t', '--timeout', help='Elasticsearch timeout', default=30, type=int)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
39
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
40 parser.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped.', default='logstash-')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
41 parser.add_argument('-s', '--separator', help='Time unit separator', default='.')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
42
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
43 parser.add_argument('-H', '--hours-to-keep', action='store', help='Number of hours to keep.', type=int)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
44 parser.add_argument('-d', '--days-to-keep', action='store', help='Number of days to keep.', type=int)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
45 parser.add_argument('-g', '--disk-space-to-keep', action='store', help='Disk space to keep (GB).', type=float)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
46
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
47 parser.add_argument('-n', '--dry-run', action='store_true', help='If true, does not perform any changes to the Elasticsearch indices.', default=False)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
48
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
49 return parser
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
50
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
51
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
52 def get_index_epoch(index_timestamp, separator='.'):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
53 """ Gets the epoch of the index.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
54
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
55 :param index_timestamp: A string on the format YYYY.MM.DD[.HH]
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
56 :return The creation time (epoch) of the index.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
57 """
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
58 year_month_day_optionalhour = index_timestamp.split(separator)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
59 if len(year_month_day_optionalhour) == 3:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
60 year_month_day_optionalhour.append('3')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
61
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
62 return time.mktime([int(part) for part in year_month_day_optionalhour] + [0,0,0,0,0])
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
63
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
64
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
65 def find_expired_indices(connection, days_to_keep=None, hours_to_keep=None, separator='.', prefix='logstash-', out=sys.stdout, err=sys.stderr):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
66 """ Generator that yields expired indices.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
67
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
68 :return: Yields tuples on the format ``(index_name, expired_by)`` where index_name
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
69 is the name of the expired index and expired_by is the number of seconds (a float value) that the
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
70 index was expired by.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
71 """
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
72 utc_now_time = time.time() + time.altzone
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
73 days_cutoff = utc_now_time - days_to_keep * 24 * 60 * 60 if days_to_keep is not None else None
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
74 hours_cutoff = utc_now_time - hours_to_keep * 60 * 60 if hours_to_keep is not None else None
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
75
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
76 for index_name in sorted(set(connection.get_indices().keys())):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
77 if not index_name.startswith(prefix):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
78 print >> out, 'Skipping index due to missing prefix {0}: {1}'.format(prefix, index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
79 continue
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
80
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
81 unprefixed_index_name = index_name[len(prefix):]
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
82
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
83 # find the timestamp parts (i.e ['2011', '01', '05'] from '2011.01.05') using the configured separator
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
84 parts = unprefixed_index_name.split(separator)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
85
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
86 # perform some basic validation
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
87 if len(parts) < 3 or len(parts) > 4 or not all([item.isdigit() for item in parts]):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
88 print >> err, 'Could not find a valid timestamp from the index: {0}'.format(index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
89 continue
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
90
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
91 # find the cutoff. if we have more than 3 parts in the timestamp, the timestamp includes the hours and we
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
92 # should compare it to the hours_cutoff, otherwise, we should use the days_cutoff
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
93 cutoff = hours_cutoff
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
94 if len(parts) == 3:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
95 cutoff = days_cutoff
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
96
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
97 # but the cutoff might be none, if the current index only has three parts (year.month.day) and we're only
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
98 # removing hourly indices:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
99 if cutoff is None:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
100 print >> out, 'Skipping {0} because it is of a type (hourly or daily) that I\'m not asked to delete.'.format(index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
101 continue
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
102
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
103 index_epoch = get_index_epoch(unprefixed_index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
104
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
105 # if the index is older than the cutoff
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
106 if index_epoch < cutoff:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
107 yield index_name, cutoff-index_epoch
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
108
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
109 else:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
110 print >> out, '{0} is {1} above the cutoff.'.format(index_name, timedelta(seconds=index_epoch-cutoff))
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
111
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
112 def find_overusage_indices(connection, disk_space_to_keep, separator='.', prefix='logstash-', out=sys.stdout, err=sys.stderr):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
113 """ Generator that yields over usage indices.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
114
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
115 :return: Yields tuples on the format ``(index_name, 0)`` where index_name
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
116 is the name of the expired index. The second element is only here for
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
117 compatiblity reasons.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
118 """
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
119
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
120 disk_usage = 0.0
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
121 disk_limit = disk_space_to_keep * 2**30
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
122
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
123 for index_name in reversed(sorted(set(connection.get_indices().keys()))):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
124
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
125 if not index_name.startswith(prefix):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
126 print >> out, 'Skipping index due to missing prefix {0}: {1}'.format(prefix, index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
127 continue
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
128
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
129 index_size = connection.status(index_name).get('indices').get(index_name).get('index').get('primary_size_in_bytes')
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
130 disk_usage += index_size
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
131
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
132 if disk_usage > disk_limit:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
133 yield index_name, 0
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
134 else:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
135 print >> out, 'keeping {0}, disk usage is {1:.3f} GB and disk limit is {2:.3f} GB.'.format(index_name, disk_usage/2**30, disk_limit/2**30)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
136
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
137 def main():
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
138 start = time.time()
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
139
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
140 parser = make_parser()
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
141 arguments = parser.parse_args()
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
142
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
143 if not arguments.hours_to_keep and not arguments.days_to_keep and not arguments.disk_space_to_keep:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
144 print >> sys.stderr, 'Invalid arguments: You must specify either the number of hours, the number of days to keep or the maximum disk space to use'
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
145 parser.print_help()
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
146 return
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
147
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
148 connection = pyes.ES('{0}:{1}'.format(arguments.host, arguments.port), timeout=arguments.timeout)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
149
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
150 if arguments.days_to_keep:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
151 print 'Deleting daily indices older than {0} days.'.format(arguments.days_to_keep)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
152 expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
153 if arguments.hours_to_keep:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
154 print 'Deleting hourly indices older than {0} hours.'.format(arguments.hours_to_keep)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
155 expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
156 if arguments.disk_space_to_keep:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
157 print 'Let\'s keep disk usage lower than {} GB.'.format(arguments.disk_space_to_keep)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
158 expired_indices = find_overusage_indices(connection, arguments.disk_space_to_keep, arguments.separator, arguments.prefix)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
159
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
160 print ''
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
161
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
162 for index_name, expired_by in expired_indices:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
163 expiration = timedelta(seconds=expired_by)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
164
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
165 if arguments.dry_run:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
166 print 'Would have attempted deleting index {0} because it is {1} older than the calculated cutoff.'.format(index_name, expiration)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
167 continue
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
168
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
169 print 'Deleting index {0} because it was {1} older than cutoff.'.format(index_name, expiration)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
170
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
171 deletion = connection.delete_index_if_exists(index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
172 # ES returns a dict on the format {u'acknowledged': True, u'ok': True} on success.
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
173 if deletion.get('ok'):
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
174 print 'Successfully deleted index: {0}'.format(index_name)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
175 else:
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
176 print 'Error deleting index: {0}. ({1})'.format(index_name, deletion)
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
177
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
178 print ''
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
179 print 'Done in {0}.'.format(timedelta(seconds=time.time()-start))
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
180
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
181
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
182 if __name__ == '__main__':
796ac0b50dbf add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff changeset
183 main()