partage public
This commit is contained in:
533
cleanup-maildir
Executable file
533
cleanup-maildir
Executable file
@@ -0,0 +1,533 @@
|
||||
#!/usr/bin/python -tt
|
||||
#
|
||||
# Copyright 2004-2006 Nathaniel W. Turner <nate@houseofnate.net>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
"""
|
||||
USAGE
|
||||
cleanup-maildir [OPTION].. COMMAND FOLDERNAME..
|
||||
|
||||
DESCRIPTION
|
||||
Cleans up old messages in FOLDERNAME; the exact action taken
|
||||
depends on COMMAND. (See next section.)
|
||||
Note that FOLDERNAME is a name such as 'Drafts', and the
|
||||
corresponding maildir path is determined using the values of
|
||||
maildir-root, folder-prefix, and folder-seperator.
|
||||
|
||||
COMMANDS
|
||||
archive - move old messages to subfolders based on message date
|
||||
trash - move old message to trash folder
|
||||
delete - permanently delete old messages
|
||||
|
||||
OPTIONS
|
||||
-h, --help
|
||||
Show this help.
|
||||
-q, --quiet
|
||||
Suppress normal output.
|
||||
-v, --verbose
|
||||
Output extra information for testing.
|
||||
-n, --trial-run
|
||||
Do not actually touch any files; just say what would be done.
|
||||
-a, --age=N
|
||||
Only touch messages older than N days. Default is 14 days.
|
||||
-k, --keep-flagged-threads
|
||||
If any messages in a thread are flagged, do not touch them or
|
||||
any other messages in that thread.
|
||||
Note: the thread-detection mechanism is currently base purely on
|
||||
a message's subject. The In-Reply-To header is not currently used.
|
||||
-r, --keep-read
|
||||
If any messages are flagged as READ, do not touch them.
|
||||
-t, --trash-folder=F
|
||||
Use F as trash folder when COMMAND is 'trash'.
|
||||
Default is 'Trash'.
|
||||
--archive-folder=F
|
||||
Use F as the base for constructing archive folders. For example, if F is
|
||||
'Archive', messages from 2004 might be put in the folder 'Archive.2004'.
|
||||
-d, --archive-hierarchy-depth=N
|
||||
Specify number of subfolders in archive hierarchy; 1 is just
|
||||
the year, 2 is year/month (default), 3 is year/month/day.
|
||||
--maildir-root=F
|
||||
Specifies folder that contains mail folders.
|
||||
Default is "$HOME/Maildir".
|
||||
--folder-seperator=str
|
||||
Folder hierarchy seperator. Default is '.'
|
||||
--folder-prefix=str
|
||||
Folder prefix. Default is '.'
|
||||
|
||||
NOTES
|
||||
The following form is accepted for backwards compatibility, but is deprecated:
|
||||
cleanup-maildir --mode=COMMAND [OPTION].. FOLDERNAME..
|
||||
|
||||
EXAMPLES
|
||||
# Archive messages in 'Sent Items' folder over 30 days old
|
||||
cleanup-maildir --age=30 archive 'Sent Items'"
|
||||
|
||||
# Delete messages over 2 weeks old in 'Lists/debian-devel' folder,
|
||||
# except messages that are part of a thread containing a flagged message.
|
||||
cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel'
|
||||
"""
|
||||
|
||||
__version__ = "0.3.0"
|
||||
# $Id$
|
||||
# $URL$
|
||||
|
||||
import mailbox
|
||||
import os.path
|
||||
import os
|
||||
import rfc822
|
||||
import string
|
||||
import socket
|
||||
import time
|
||||
import logging
|
||||
import sys
|
||||
import getopt
|
||||
|
||||
|
||||
def mkMaildir(path):
|
||||
"""Make a Maildir structure rooted at 'path'"""
|
||||
os.mkdir(path, 0700)
|
||||
os.mkdir(os.path.join(path, 'tmp'), 0700)
|
||||
os.mkdir(os.path.join(path, 'new'), 0700)
|
||||
os.mkdir(os.path.join(path, 'cur'), 0700)
|
||||
|
||||
|
||||
class MaildirWriter(object):
|
||||
|
||||
"""Deliver messages into a Maildir"""
|
||||
|
||||
path = None
|
||||
counter = 0
|
||||
|
||||
def __init__(self, path=None):
|
||||
"""Create a MaildirWriter that manages the Maildir at 'path'
|
||||
|
||||
Arguments:
|
||||
path -- if specified, used as the default Maildir for this object
|
||||
"""
|
||||
if path != None:
|
||||
if not os.path.isdir(path):
|
||||
raise ValueError, 'Path does not exist: %s' % path
|
||||
self.path = path
|
||||
self.logger = logging.getLogger('MaildirWriter')
|
||||
|
||||
def deliver(self, msg, path=None):
|
||||
"""Deliver a message to a Maildir
|
||||
|
||||
Arguments:
|
||||
msg -- a message object
|
||||
path -- the path of the Maildir; if None, uses default from __init__
|
||||
"""
|
||||
if path != None:
|
||||
self.path = path
|
||||
if self.path == None or not os.path.isdir(self.path):
|
||||
raise ValueError, 'Path does not exist'
|
||||
tryCount = 1
|
||||
srcFile = msg.getFilePath();
|
||||
(dstName, tmpFile, newFile, dstFile) = (None, None, None, None)
|
||||
while 1:
|
||||
try:
|
||||
dstName = "%d.%d_%d.%s" % (int(time.time()), os.getpid(),
|
||||
self.counter, socket.gethostname())
|
||||
tmpFile = os.path.join(os.path.join(self.path, "tmp"), dstName)
|
||||
newFile = os.path.join(os.path.join(self.path, "new"), dstName)
|
||||
self.logger.debug("deliver: attempt copy %s to %s" %
|
||||
(srcFile, tmpFile))
|
||||
os.link(srcFile, tmpFile) # Copy into tmp
|
||||
self.logger.debug("deliver: attempt link to %s" % newFile)
|
||||
os.link(tmpFile, newFile) # Link into new
|
||||
except OSError, (n, s):
|
||||
self.logger.critical(
|
||||
"deliver failed: %s (src=%s tmp=%s new=%s i=%d)" %
|
||||
(s, srcFile, tmpFile, newFile, tryCount))
|
||||
self.logger.info("sleeping")
|
||||
time.sleep(2)
|
||||
tryCount += 1
|
||||
self.counter += 1
|
||||
if tryCount > 10:
|
||||
raise OSError("too many failed delivery attempts")
|
||||
else:
|
||||
break
|
||||
|
||||
# Successful delivery; increment deliver counter
|
||||
self.counter += 1
|
||||
|
||||
# For the rest of this method we are acting as an MUA, not an MDA.
|
||||
|
||||
# Move message to cur and restore any flags
|
||||
dstFile = os.path.join(os.path.join(self.path, "cur"), dstName)
|
||||
if msg.getFlags() != None:
|
||||
dstFile += ':' + msg.getFlags()
|
||||
self.logger.debug("deliver: attempt link to %s" % dstFile)
|
||||
os.link(newFile, dstFile)
|
||||
os.unlink(newFile)
|
||||
|
||||
# Cleanup tmp file
|
||||
os.unlink(tmpFile)
|
||||
|
||||
|
||||
class MessageDateError(TypeError):
|
||||
"""Indicate that the message date was invalid"""
|
||||
pass
|
||||
|
||||
|
||||
class MaildirMessage(rfc822.Message):
|
||||
|
||||
"""An email message
|
||||
|
||||
Has extra Maildir-specific attributes
|
||||
"""
|
||||
|
||||
def getFilePath(self):
|
||||
if sys.hexversion >= 0x020500F0:
|
||||
return self.fp._file.name
|
||||
else:
|
||||
return self.fp.name
|
||||
|
||||
def isFlagged(self):
|
||||
"""return true if the message is flagged as important"""
|
||||
import re
|
||||
fname = self.getFilePath()
|
||||
if re.search(r':.*F', fname) != None:
|
||||
return True
|
||||
return False
|
||||
|
||||
def getFlags(self):
|
||||
"""return the flag part of the message's filename"""
|
||||
parts = self.getFilePath().split(':')
|
||||
if len(parts) == 2:
|
||||
return parts[1]
|
||||
return None
|
||||
|
||||
def isNew(self):
|
||||
"""return true if the message is marked as unread"""
|
||||
# XXX should really be called isUnread
|
||||
import re
|
||||
fname = self.getFilePath()
|
||||
if re.search(r':.*S', fname) != None:
|
||||
return False
|
||||
return True
|
||||
|
||||
def getSubject(self):
|
||||
"""get the message's subject as a unicode string"""
|
||||
|
||||
import email.Header
|
||||
s = self.getheader("Subject")
|
||||
try:
|
||||
return u"".join(map(lambda x: x[0].decode(x[1] or 'ASCII', 'replace'),
|
||||
email.Header.decode_header(s)))
|
||||
except(LookupError):
|
||||
return s
|
||||
|
||||
def getSubjectHash(self):
|
||||
"""get the message's subject in a "normalized" form
|
||||
|
||||
This currently means lowercasing and removing any reply or forward
|
||||
indicators.
|
||||
"""
|
||||
import re
|
||||
import string
|
||||
s = self.getSubject()
|
||||
if s == None:
|
||||
return '(no subject)'
|
||||
return re.sub(r'^(re|fwd?):\s*', '', string.strip(s.lower()))
|
||||
|
||||
def getDateSent(self):
|
||||
"""Get the time of sending from the Date header
|
||||
|
||||
Returns a time object using time.mktime. Not very reliable, because
|
||||
the Date header can be missing or spoofed (and often is, by spammers).
|
||||
Throws a MessageDateError if the Date header is missing or invalid.
|
||||
"""
|
||||
dh = self.getheader('Date')
|
||||
if dh == None:
|
||||
return None
|
||||
try:
|
||||
return time.mktime(rfc822.parsedate(dh))
|
||||
except ValueError:
|
||||
raise MessageDateError("message has missing or bad Date")
|
||||
except TypeError: # gets thrown by mktime if parsedate returns None
|
||||
raise MessageDateError("message has missing or bad Date")
|
||||
except OverflowError:
|
||||
raise MessageDateError("message has missing or bad Date")
|
||||
|
||||
def getDateRecd(self):
|
||||
"""Get the time the message was received"""
|
||||
# XXX check that stat returns time in UTC, fix if not
|
||||
return os.stat(self.getFilePath())[8]
|
||||
|
||||
def getDateSentOrRecd(self):
|
||||
"""Get the time the message was sent, fall back on time received"""
|
||||
try:
|
||||
d = self.getDateSent()
|
||||
if d != None:
|
||||
return d
|
||||
except MessageDateError:
|
||||
pass
|
||||
return self.getDateRecd()
|
||||
|
||||
def getAge(self):
|
||||
"""Get the number of seconds since the message was received"""
|
||||
msgTime = self.getDateRecd()
|
||||
msgAge = time.mktime(time.gmtime()) - msgTime
|
||||
return msgAge / (60*60*24)
|
||||
|
||||
|
||||
class MaildirCleaner(object):
|
||||
|
||||
"""Clean a maildir by deleting or moving old messages"""
|
||||
|
||||
__trashWriter = None
|
||||
__mdWriter = None
|
||||
stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0}
|
||||
keepSubjects = {}
|
||||
archiveFolder = None
|
||||
archiveHierDepth = 2
|
||||
folderBase = None
|
||||
folderPrefix = "."
|
||||
folderSeperator = "."
|
||||
keepFlaggedThreads = False
|
||||
trashFolder = "Trash"
|
||||
isTrialRun = False
|
||||
keepRead = False
|
||||
|
||||
def __init__(self, folderBase=None):
|
||||
"""Initialize the MaildirCleaner
|
||||
|
||||
Arguments:
|
||||
folderBase -- the directory in which the folders are found
|
||||
"""
|
||||
self.folderBase = folderBase
|
||||
self.__mdWriter = MaildirWriter()
|
||||
self.logger = logging.getLogger('MaildirCleaner')
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
|
||||
def __getTrashWriter(self):
|
||||
if not self.__trashWriter:
|
||||
path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder)
|
||||
self.__trashWriter = MaildirWriter(path)
|
||||
return self.__trashWriter
|
||||
|
||||
trashWriter = property(__getTrashWriter)
|
||||
|
||||
def scanSubjects(self, folderName):
|
||||
"""Scans for flagged subjects"""
|
||||
self.logger.info("Scanning for flagged subjects...")
|
||||
if (folderName == 'INBOX'):
|
||||
path = self.folderBase
|
||||
else:
|
||||
path = os.path.join(self.folderBase, self.folderPrefix + folderName)
|
||||
maildir = mailbox.Maildir(path, MaildirMessage)
|
||||
self.keepSubjects = {}
|
||||
for i, msg in enumerate(maildir):
|
||||
if msg.isFlagged():
|
||||
self.keepSubjects[msg.getSubjectHash()] = 1
|
||||
self.logger.debug("Flagged (%d): %s", i, msg.getSubjectHash())
|
||||
self.logger.info("Done scanning.")
|
||||
|
||||
|
||||
def clean(self, mode, folderName, minAge):
|
||||
|
||||
"""Trashes or archives messages older than minAge days
|
||||
|
||||
Arguments:
|
||||
mode -- the cleaning mode. Valid modes are:
|
||||
trash -- moves the messages to a trash folder
|
||||
archive -- moves the messages to folders based on their date
|
||||
delete -- deletes the messages
|
||||
folderName -- the name of the folder on which to operate
|
||||
This is a name like "Stuff", not a filename
|
||||
minAge -- messages younger than minAge days are left alone
|
||||
"""
|
||||
|
||||
if not mode in ('trash', 'archive', 'delete'):
|
||||
raise ValueError
|
||||
|
||||
if (self.keepFlaggedThreads):
|
||||
self.scanSubjects(folderName)
|
||||
|
||||
archiveFolder = self.archiveFolder
|
||||
if (archiveFolder == None):
|
||||
if (folderName == 'INBOX'):
|
||||
archiveFolder = ""
|
||||
else:
|
||||
archiveFolder = folderName
|
||||
|
||||
if (folderName == 'INBOX'):
|
||||
path = self.folderBase
|
||||
else:
|
||||
path = os.path.join(self.folderBase, self.folderPrefix + folderName)
|
||||
|
||||
maildir = mailbox.Maildir(path, MaildirMessage)
|
||||
|
||||
fakeMsg = ""
|
||||
if self.isTrialRun:
|
||||
fakeMsg = "(Not really) "
|
||||
|
||||
# Move old messages
|
||||
for i, msg in enumerate(maildir):
|
||||
if self.keepFlaggedThreads == True \
|
||||
and msg.getSubjectHash() in self.keepSubjects:
|
||||
self.log(logging.DEBUG, "Keeping #%d (topic flagged)" % i, msg)
|
||||
else:
|
||||
if (msg.getAge() >= minAge) and ((not self.keepRead) or (self.keepRead and msg.isNew())):
|
||||
if mode == 'trash':
|
||||
self.log(logging.INFO, "%sTrashing #%d (old)" %
|
||||
(fakeMsg, i), msg)
|
||||
if not self.isTrialRun:
|
||||
self.trashWriter.deliver(msg)
|
||||
os.unlink(msg.getFilePath())
|
||||
elif mode == 'delete':
|
||||
self.log(logging.INFO, "%sDeleting #%d (old)" %
|
||||
(fakeMsg, i), msg)
|
||||
if not self.isTrialRun:
|
||||
os.unlink(msg.getFilePath())
|
||||
else: # mode == 'archive'
|
||||
# Determine subfolder path
|
||||
mdate = time.gmtime(msg.getDateSentOrRecd())
|
||||
datePart = str(mdate[0])
|
||||
if self.archiveHierDepth > 1:
|
||||
datePart += self.folderSeperator \
|
||||
+ time.strftime("%m", mdate)
|
||||
if self.archiveHierDepth > 2:
|
||||
datePart += self.folderSeperator \
|
||||
+ time.strftime("%d", mdate)
|
||||
subFolder = archiveFolder + self.folderSeperator \
|
||||
+ datePart
|
||||
sfPath = os.path.join(self.folderBase,
|
||||
self.folderPrefix + subFolder)
|
||||
self.log(logging.INFO, "%sArchiving #%d to %s" %
|
||||
(fakeMsg, i, subFolder), msg)
|
||||
if not self.isTrialRun:
|
||||
# Create the subfolder if needed
|
||||
if not os.path.exists(sfPath):
|
||||
mkMaildir(sfPath)
|
||||
# Deliver
|
||||
self.__mdWriter.deliver(msg, sfPath)
|
||||
os.unlink(msg.getFilePath())
|
||||
self.stats[mode] += 1
|
||||
else:
|
||||
self.log(logging.DEBUG, "Keeping #%d (fresh)" % i, msg)
|
||||
self.stats['total'] += 1
|
||||
|
||||
def log(self, lvl, text, msgObj):
|
||||
"""Log some text with the subject of a message"""
|
||||
subj = msgObj.getSubject()
|
||||
if subj == None:
|
||||
subj = "(no subject)"
|
||||
self.logger.log(lvl, text + ": " + subj)
|
||||
|
||||
|
||||
# Defaults
|
||||
minAge = 14
|
||||
mode = None
|
||||
|
||||
logging.basicConfig()
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
logging.disable(logging.INFO - 1)
|
||||
logger = logging.getLogger('cleanup-maildir')
|
||||
cleaner = MaildirCleaner()
|
||||
|
||||
# Read command-line arguments
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:],
|
||||
"hqvnrm:t:a:kd:",
|
||||
["help", "quiet", "verbose", "version", "mode=", "trash-folder=",
|
||||
"age=", "keep-flagged-threads", "keep-read", "folder-seperator=",
|
||||
"folder-prefix=", "maildir-root=", "archive-folder=",
|
||||
"archive-hierarchy-depth=", "trial-run"])
|
||||
except getopt.GetoptError, (msg, opt):
|
||||
logger.error("%s\n\n%s" % (msg, __doc__))
|
||||
sys.exit(2)
|
||||
output = None
|
||||
for o, a in opts:
|
||||
if o in ("-h", "--help"):
|
||||
print __doc__
|
||||
sys.exit()
|
||||
if o in ("-q", "--quiet"):
|
||||
logging.disable(logging.WARNING - 1)
|
||||
if o in ("-v", "--verbose"):
|
||||
logging.disable(logging.DEBUG - 1)
|
||||
if o == "--version":
|
||||
print __version__
|
||||
sys.exit()
|
||||
if o in ("-n", "--trial-run"):
|
||||
cleaner.isTrialRun = True
|
||||
if o in ("-m", "--mode"):
|
||||
logger.warning("the --mode flag is deprecated (see --help)")
|
||||
if a in ('trash', 'archive', 'delete'):
|
||||
mode = a
|
||||
else:
|
||||
logger.error("%s is not a valid command" % a)
|
||||
sys.exit(2)
|
||||
if o in ("-t", "--trash-folder"):
|
||||
cleaner.trashFolder = a
|
||||
if o == "--archive-folder":
|
||||
cleaner.archiveFolder = a
|
||||
if o in ("-a", "--age"):
|
||||
minAge = int(a)
|
||||
if o in ("-k", "--keep-flagged-threads"):
|
||||
cleaner.keepFlaggedThreads = True
|
||||
if o in ("-r", "--keep-read"):
|
||||
cleaner.keepRead = True
|
||||
if o == "--folder-seperator":
|
||||
cleaner.folderSeperator = a
|
||||
if o == "--folder-prefix":
|
||||
cleaner.folderPrefix = a
|
||||
if o == "--maildir-root":
|
||||
cleaner.folderBase = a
|
||||
if o in ("-d", "--archive-hierarchy-depth"):
|
||||
archiveHierDepth = int(a)
|
||||
if archiveHierDepth < 1 or archiveHierDepth > 3:
|
||||
sys.stderr.write("Error: archive hierarchy depth must be 1, " +
|
||||
"2, or 3.\n")
|
||||
sys.exit(2)
|
||||
cleaner.archiveHierDepth = archiveHierDepth
|
||||
|
||||
if not cleaner.folderBase:
|
||||
cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir")
|
||||
if mode == None:
|
||||
if len(args) < 1:
|
||||
logger.error("No command specified")
|
||||
sys.stderr.write(__doc__)
|
||||
sys.exit(2)
|
||||
mode = args.pop(0)
|
||||
if not mode in ('trash', 'archive', 'delete'):
|
||||
logger.error("%s is not a valid command" % mode)
|
||||
sys.exit(2)
|
||||
|
||||
if len(args) == 0:
|
||||
logger.error("No folder(s) specified")
|
||||
sys.stderr.write(__doc__)
|
||||
sys.exit(2)
|
||||
|
||||
logger.debug("Mode is " + mode)
|
||||
|
||||
# Clean each folder
|
||||
for dir in args:
|
||||
logger.debug("Cleaning up %s..." % dir)
|
||||
cleaner.clean(mode, dir, minAge)
|
||||
|
||||
logger.info('Total messages: %5d' % cleaner.stats['total'])
|
||||
logger.info('Affected messages: %5d' % cleaner.stats[mode])
|
||||
logger.info('Untouched messages: %5d' %
|
||||
(cleaner.stats['total'] - cleaner.stats[mode]))
|
||||
Reference in New Issue
Block a user