-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmail-archiver
executable file
·203 lines (165 loc) · 6.8 KB
/
mail-archiver
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python3
"""mail-archiver - put mail older than the first of the month in dated archive
Usage:
mail-archiver [-hqd] [DATE]
Options:
--help, -h Show this menu
--quiet, -q Limit log output to warnings or worse
--dry-run, -d Describe changes only: no files will be moved
"""
import logging
import re
from datetime import datetime, timezone
from email import message_from_file
from email.utils import parsedate_to_datetime
from os import environ
from pathlib import Path
from sys import stdout
from docopt import docopt
def setup_logger(level):
"""
Create a log object to pass around the program.
Arguments: level for logging
1. Create logging instance and set its name, level, handler, and format.
2. Return logging instance.
"""
logger = logging.getLogger("mail-archiver")
logger.setLevel(level)
handler = logging.StreamHandler(stream=stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s " "- %(message)s",
datefmt="%Y-%m-%d %I:%M:%S %p",
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def make_paths(paths_needed, archive, dry_run, logger):
"""
Create the paths needed for the mail backups.
Arguments:
paths_needed -- a dict containing names of directories to create as keys.
archive -- where the archived mails will live
dry_run -- True or False, should we *actually* move the photos?
logger -- logging object for documenting actions in this function
1. Iterate over the keys of the hash.
2. Iterate over the subdirectories we need, namely 'cur', 'new', 'tmp'.
3. Use try …except to build the directory paths.
4. Return nothing: side effects are the point.
"""
for year_month in paths_needed.keys():
for sub_folder in ("cur", "new", "tmp"):
maildir_path = archive / year_month / sub_folder
if dry_run:
logger.info(f"would create {maildir_path}")
else:
try:
Path.mkdir(maildir_path, exist_ok=True, parents=True)
logger.info(f"created {maildir_path}")
except FileExistsError as fe_error:
# TODO: If this error is triggered despite exist_ok=True,
# then there is a regular file where I want there to be
# a directory. Should I exit at this point? The
# corresponding move will fail later if I don't.
logger.warning(fe_error)
except PermissionError as perm_error:
logger.exception(perm_error)
raise perm_error
def clean_pathname(original_pathname):
return re.sub(r",U.*$", "", original_pathname)
def move_mails(mails_and_dates, archive, dry_run, logger):
"""
Back up email older than a certain date or amount of time.
Arguments:
mails_and_dates -- a list of (email, date_string, subject) tuples.
email is a pathlib object of the email to move.
date_string is a string in the format YYYY.
subject is the subject of the email
archive -- where the archived mails will live
dry_run -- True or False, should we *actually* move the photos?
logger -- logging object for documenting actions in this function
1. Move email to the backup_path/old-emails/date_string/cur directory.
2. Log successful moves and failures.
3. Return nothing: side effects are the point.
NOTE: When we move files, we must remove the trailing ',U=xxx' bit.
Otherwise, we will be very sorry. See here for details.
https://sourceforge.net/p/isync/mailman/message/35258847/
"""
for email_path, date_stamp, subject in mails_and_dates:
fixed_pathname = clean_pathname(email_path.name)
new_email_path = archive / date_stamp / "cur" / fixed_pathname
if dry_run:
logger.info(f"{subject} would be moved to {new_email_path}")
else:
try:
email_path.rename(new_email_path)
logger.info(f"{email_path} was moved to {new_email_path}")
except PermissionError as perm_error:
logger.exception(perm_error)
raise perm_error
def scan_mails(source, terminus, logger):
"""
Scan source for emails older than terminus in source.
Arguments:
source -- a directory filled with email
terminus -- a date object with the first date for mail to keep
logger -- logging object for documenting actions in this function
TODO: Description of function in steps
"""
emails = [(source / f) for f in source.iterdir() if (source / f).is_file()]
dated_paths = {}
mails_and_datestamps = []
for email_path in emails:
with open(email_path, "r", encoding="utf-8", errors="ignore") as email:
msg = message_from_file(email)
date_str = msg.get("date")
subject = msg.get("subject")
if date_str is None:
logger.warning("Ignoring %s, which has no date", email_path)
else:
email_date = parsedate_to_datetime(date_str)
if email_date is None:
logger.warning("Ignoring %s, which has no date", email_path)
elif email_date.strftime("%Y%m%d") < terminus.strftime("%Y%m%d"):
datestamp = email_date.strftime("%Y")
dated_paths[datestamp] = True
mails_and_datestamps.append((email_path, datestamp, subject))
else:
logger.info("%s is too recent to move", email_path)
return dated_paths, mails_and_datestamps
def prep_args(user_args):
"""Process arguments from docopt"""
prog_args = {}
maildir = Path(environ["MAILDIR"])
prog_args["source"] = maildir / "recent/cur"
prog_args["target"] = maildir / "archive"
if user_args["--quiet"]:
logger = setup_logger("WARNING")
else:
logger = setup_logger("INFO")
dry_run = bool(user_args["--dry-run"])
terminus = user_args.get("--date")
if terminus is None:
terminus = datetime.now(timezone.utc).replace(day=1)
else:
try:
terminus = datetime(terminus)
except ValueError as e:
logger.warning("%s is not a valid date!", terminus)
raise e
prog_args["terminus"] = terminus
logger.info("date is %s", terminus)
return prog_args, dry_run, logger
def main():
arguments, dry_run, logger = prep_args(
docopt(
__doc__,
)
)
dated_paths, mails_and_datestamps = scan_mails(
arguments["source"], arguments["terminus"], logger
)
make_paths(dated_paths, arguments["target"], dry_run, logger)
move_mails(mails_and_datestamps, arguments["target"], dry_run, logger)
if __name__ == "__main__":
main()