Mercurial > logstash
annotate logstash_index_cleaner.py @ 3:796ac0b50dbf
add cron.daily index cleaning
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Thu, 07 Mar 2013 10:41:01 -0800 |
parents | |
children |
rev | line source |
---|---|
3
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
2 # |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
3 # Deletes all indices with a datestamp older than "days-to-keep" for daily |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
4 # if you have hourly indices, it will delete all of those older than "hours-to-keep" |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
5 # |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
6 # This script presumes an index is named typically, e.g. logstash-YYYY.MM.DD |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
7 # It will work with any name-YYYY.MM.DD or name-YYYY.MM.DD.HH type sequence |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
8 # |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
9 # Requires python and the following dependencies (all pip/easy_installable): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
10 # |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
11 # pyes (python elasticsearch bindings, which might need simplejson) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
12 # argparse (built-in in python2.7 and higher, python 2.6 and lower will have to easy_install it) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
13 # |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
14 # TODO: Proper logging instead of just print statements, being able to configure a decent logging level. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
15 # Unit tests. The code is somewhat broken up into logical parts that may be tested separately. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
16 # Better error reporting? |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
17 # Improve the get_index_epoch method to parse more date formats. Consider renaming (to "parse_date_to_timestamp"?) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
18 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
19 import sys |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
20 import time |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
21 import argparse |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
22 from datetime import timedelta |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
23 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
24 import pyes |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
25 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
26 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
27 __version__ = '0.1.2' |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
28 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
29 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
30 def make_parser(): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
31 """ Creates an ArgumentParser to parse the command line options. """ |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
32 parser = argparse.ArgumentParser(description='Delete old logstash indices from Elasticsearch.') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
33 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
34 parser.add_argument('-v', '--version', action='version', version='%(prog)s '+__version__) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
35 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
36 parser.add_argument('--host', help='Elasticsearch host.', default='localhost') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
37 parser.add_argument('--port', help='Elasticsearch port', default=9200, type=int) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
38 parser.add_argument('-t', '--timeout', help='Elasticsearch timeout', default=30, type=int) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
39 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
40 parser.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped.', default='logstash-') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
41 parser.add_argument('-s', '--separator', help='Time unit separator', default='.') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
42 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
43 parser.add_argument('-H', '--hours-to-keep', action='store', help='Number of hours to keep.', type=int) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
44 parser.add_argument('-d', '--days-to-keep', action='store', help='Number of days to keep.', type=int) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
45 parser.add_argument('-g', '--disk-space-to-keep', action='store', help='Disk space to keep (GB).', type=float) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
46 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
47 parser.add_argument('-n', '--dry-run', action='store_true', help='If true, does not perform any changes to the Elasticsearch indices.', default=False) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
48 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
49 return parser |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
50 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
51 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
52 def get_index_epoch(index_timestamp, separator='.'): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
53 """ Gets the epoch of the index. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
54 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
55 :param index_timestamp: A string on the format YYYY.MM.DD[.HH] |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
56 :return The creation time (epoch) of the index. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
57 """ |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
58 year_month_day_optionalhour = index_timestamp.split(separator) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
59 if len(year_month_day_optionalhour) == 3: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
60 year_month_day_optionalhour.append('3') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
61 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
62 return time.mktime([int(part) for part in year_month_day_optionalhour] + [0,0,0,0,0]) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
63 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
64 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
65 def find_expired_indices(connection, days_to_keep=None, hours_to_keep=None, separator='.', prefix='logstash-', out=sys.stdout, err=sys.stderr): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
66 """ Generator that yields expired indices. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
67 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
68 :return: Yields tuples on the format ``(index_name, expired_by)`` where index_name |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
69 is the name of the expired index and expired_by is the number of seconds (a float value) that the |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
70 index was expired by. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
71 """ |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
72 utc_now_time = time.time() + time.altzone |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
73 days_cutoff = utc_now_time - days_to_keep * 24 * 60 * 60 if days_to_keep is not None else None |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
74 hours_cutoff = utc_now_time - hours_to_keep * 60 * 60 if hours_to_keep is not None else None |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
75 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
76 for index_name in sorted(set(connection.get_indices().keys())): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
77 if not index_name.startswith(prefix): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
78 print >> out, 'Skipping index due to missing prefix {0}: {1}'.format(prefix, index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
79 continue |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
80 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
81 unprefixed_index_name = index_name[len(prefix):] |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
82 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
83 # find the timestamp parts (i.e ['2011', '01', '05'] from '2011.01.05') using the configured separator |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
84 parts = unprefixed_index_name.split(separator) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
85 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
86 # perform some basic validation |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
87 if len(parts) < 3 or len(parts) > 4 or not all([item.isdigit() for item in parts]): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
88 print >> err, 'Could not find a valid timestamp from the index: {0}'.format(index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
89 continue |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
90 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
91 # find the cutoff. if we have more than 3 parts in the timestamp, the timestamp includes the hours and we |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
92 # should compare it to the hours_cutoff, otherwise, we should use the days_cutoff |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
93 cutoff = hours_cutoff |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
94 if len(parts) == 3: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
95 cutoff = days_cutoff |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
96 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
97 # but the cutoff might be none, if the current index only has three parts (year.month.day) and we're only |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
98 # removing hourly indices: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
99 if cutoff is None: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
100 print >> out, 'Skipping {0} because it is of a type (hourly or daily) that I\'m not asked to delete.'.format(index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
101 continue |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
102 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
103 index_epoch = get_index_epoch(unprefixed_index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
104 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
105 # if the index is older than the cutoff |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
106 if index_epoch < cutoff: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
107 yield index_name, cutoff-index_epoch |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
108 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
109 else: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
110 print >> out, '{0} is {1} above the cutoff.'.format(index_name, timedelta(seconds=index_epoch-cutoff)) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
111 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
112 def find_overusage_indices(connection, disk_space_to_keep, separator='.', prefix='logstash-', out=sys.stdout, err=sys.stderr): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
113 """ Generator that yields over usage indices. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
114 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
115 :return: Yields tuples on the format ``(index_name, 0)`` where index_name |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
116 is the name of the expired index. The second element is only here for |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
117 compatiblity reasons. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
118 """ |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
119 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
120 disk_usage = 0.0 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
121 disk_limit = disk_space_to_keep * 2**30 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
122 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
123 for index_name in reversed(sorted(set(connection.get_indices().keys()))): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
124 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
125 if not index_name.startswith(prefix): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
126 print >> out, 'Skipping index due to missing prefix {0}: {1}'.format(prefix, index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
127 continue |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
128 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
129 index_size = connection.status(index_name).get('indices').get(index_name).get('index').get('primary_size_in_bytes') |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
130 disk_usage += index_size |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
131 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
132 if disk_usage > disk_limit: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
133 yield index_name, 0 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
134 else: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
135 print >> out, 'keeping {0}, disk usage is {1:.3f} GB and disk limit is {2:.3f} GB.'.format(index_name, disk_usage/2**30, disk_limit/2**30) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
136 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
137 def main(): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
138 start = time.time() |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
139 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
140 parser = make_parser() |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
141 arguments = parser.parse_args() |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
142 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
143 if not arguments.hours_to_keep and not arguments.days_to_keep and not arguments.disk_space_to_keep: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
144 print >> sys.stderr, 'Invalid arguments: You must specify either the number of hours, the number of days to keep or the maximum disk space to use' |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
145 parser.print_help() |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
146 return |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
147 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
148 connection = pyes.ES('{0}:{1}'.format(arguments.host, arguments.port), timeout=arguments.timeout) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
149 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
150 if arguments.days_to_keep: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
151 print 'Deleting daily indices older than {0} days.'.format(arguments.days_to_keep) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
152 expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
153 if arguments.hours_to_keep: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
154 print 'Deleting hourly indices older than {0} hours.'.format(arguments.hours_to_keep) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
155 expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
156 if arguments.disk_space_to_keep: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
157 print 'Let\'s keep disk usage lower than {} GB.'.format(arguments.disk_space_to_keep) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
158 expired_indices = find_overusage_indices(connection, arguments.disk_space_to_keep, arguments.separator, arguments.prefix) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
159 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
160 print '' |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
161 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
162 for index_name, expired_by in expired_indices: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
163 expiration = timedelta(seconds=expired_by) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
164 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
165 if arguments.dry_run: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
166 print 'Would have attempted deleting index {0} because it is {1} older than the calculated cutoff.'.format(index_name, expiration) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
167 continue |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
168 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
169 print 'Deleting index {0} because it was {1} older than cutoff.'.format(index_name, expiration) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
170 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
171 deletion = connection.delete_index_if_exists(index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
172 # ES returns a dict on the format {u'acknowledged': True, u'ok': True} on success. |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
173 if deletion.get('ok'): |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
174 print 'Successfully deleted index: {0}'.format(index_name) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
175 else: |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
176 print 'Error deleting index: {0}. ({1})'.format(index_name, deletion) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
177 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
178 print '' |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
179 print 'Done in {0}.'.format(timedelta(seconds=time.time()-start)) |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
180 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
181 |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
182 if __name__ == '__main__': |
796ac0b50dbf
add cron.daily index cleaning
Carl Byington <carl@five-ten-sg.com>
parents:
diff
changeset
|
183 main() |