partage public
This commit is contained in:
439
svndumpfilter2
Executable file
439
svndumpfilter2
Executable file
@@ -0,0 +1,439 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Utility to filter a dump file of a Subversion repository to
|
||||
# produce a dump file describing only specified subdirectories of
|
||||
# the tree contained in the original one. This is similar in
|
||||
# concept to the official tool `svndumpfilter', but it's able to
|
||||
# cope with revisions which copy files into the area of interest
|
||||
# from outside it (in which situation a Node-copyfrom won't be
|
||||
# valid in the output dump file). However, in order to support
|
||||
# this, svndumpfilter2 requires access via `svnlook' to the
|
||||
# original repository from which the input dump file was produced.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# svndumpfilter [options] source-repository regexp [regexp...]
|
||||
#
|
||||
# This command expects to receive a Subversion dump file on
|
||||
# standard input, which must correspond to the Subversion
|
||||
# repository pointed to by the first argument. It outputs a
|
||||
# filtered dump file on standard output.
|
||||
#
|
||||
# `source-repository': The first argument must be a pathname to a
|
||||
# _local_ Subversion repository. That is, it isn't a Subversion URL
|
||||
# (beginning with http:// or svn:// or anything else like that);
|
||||
# it's a simple local pathname (absolute or relative). A simple
|
||||
# test to see if it's a valid pathname is to pass it as an argument
|
||||
# to `svnlook tree'. If that succeeds, it's also a valid first
|
||||
# argument to svndumpfilter2.
|
||||
#
|
||||
# `regexp': The remaining arguments are used to select directory
|
||||
# names from the top level of the repository's internal directory
|
||||
# tree. Any directory matching any of the regexps will be
|
||||
# considered `interesting' and copied into the output dump file;
|
||||
# any directory not matching will not. Matching is performed at the
|
||||
# top level only: it is not currently possible to selectively
|
||||
# include a subset of second-level directories with a common
|
||||
# parent.
|
||||
#
|
||||
# Options include:
|
||||
#
|
||||
# `--drop-empty-revs': Exclude empty revisions from the output.
|
||||
#
|
||||
# `--renumber-revs': Generated sequential revision numbers in the
|
||||
# filtered output. This may help work around issues with certain
|
||||
# versions of 'svnadmin load'.
|
||||
#
|
||||
# For example, this command...
|
||||
#
|
||||
# svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x
|
||||
#
|
||||
# ... will read a dump file on standard input, and output one on
|
||||
# standard output which contains only the subdirectories `foo',
|
||||
# `bar', `baz', `quux', `quuux', `quuuux', etc.
|
||||
#
|
||||
# You will probably usually want to use svndumpfilter2 in
|
||||
# conjunction with the production of the dump file in the first
|
||||
# place, like this:
|
||||
#
|
||||
# svnadmin dump /home/svnadmin/myrepos | \
|
||||
# svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x > msv.dump
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import types
|
||||
import md5
|
||||
from optparse import OptionParser
|
||||
|
||||
# Quoting function which should render any string impervious to
|
||||
# POSIX shell metacharacter expansion.
|
||||
def quote(word):
|
||||
return "'" + string.replace(word, "'", "'\\''") + "'"
|
||||
|
||||
# First, the sensible way to deal with a pathname is to split it
|
||||
# into pieces at the slashes and thereafter treat it as a list.
|
||||
def splitpath(s):
|
||||
list = string.split(s, "/")
|
||||
# Simplest way to remove all empty elements!
|
||||
try:
|
||||
while 1:
|
||||
list.remove("")
|
||||
except ValueError:
|
||||
pass
|
||||
return list
|
||||
|
||||
def joinpath(list, prefix=""):
|
||||
return prefix + string.join(list, "/")
|
||||
|
||||
def cleanpath(s):
|
||||
return joinpath(splitpath(s))
|
||||
|
||||
def catpath(path1, path2, prefix=""):
|
||||
return joinpath(splitpath(path1) + splitpath(path2), prefix)
|
||||
|
||||
# Decide whether a pathname is interesting or not.
|
||||
class InterestingPaths:
|
||||
def __init__(self, args):
|
||||
self.res = []
|
||||
for a in args:
|
||||
self.res.append(re.compile(a))
|
||||
def interesting(self, path):
|
||||
path = cleanpath(path)
|
||||
if path == '':
|
||||
# It's possible that the path may have no elements at
|
||||
# all, in which case we can't match on its first
|
||||
# element. This generally occurs when svn properties
|
||||
# are being changed on the root of the repository; we
|
||||
# consider those to be always interesting and never
|
||||
# filter them out.
|
||||
return 1
|
||||
for r in self.res:
|
||||
if r.match(path):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
# A class and some functions to handle a single lump of
|
||||
# RFC822-ish-headers-plus-data read from an SVN dump file.
|
||||
|
||||
class Lump:
|
||||
def __init__(self):
|
||||
self.hdrlist = []
|
||||
self.hdrdict = {}
|
||||
self.prop = ""
|
||||
self.text = None
|
||||
self.extant = 1
|
||||
self.props = [[], {}]
|
||||
def sethdr(self, key, val):
|
||||
if not self.hdrdict.has_key(key):
|
||||
self.hdrlist.append(key)
|
||||
self.hdrdict[key] = val
|
||||
def delhdr(self, key):
|
||||
if self.hdrdict.has_key(key):
|
||||
del self.hdrdict[key]
|
||||
self.hdrlist.remove(key)
|
||||
def propparse(self):
|
||||
index = 0
|
||||
while 1:
|
||||
if self.prop[index:index+2] == "K ":
|
||||
wantval = 1
|
||||
elif self.prop[index:index+2] == "D ":
|
||||
wantval = 0
|
||||
elif self.prop[index:index+9] == "PROPS-END":
|
||||
break
|
||||
else:
|
||||
raise "Unrecognised record in props section"
|
||||
nlpos = string.find(self.prop, "\n", index)
|
||||
assert nlpos > 0
|
||||
namelen = string.atoi(self.prop[index+2:nlpos])
|
||||
assert self.prop[nlpos+1+namelen] == "\n"
|
||||
name = self.prop[nlpos+1:nlpos+1+namelen]
|
||||
index = nlpos+2+namelen
|
||||
if wantval:
|
||||
assert self.prop[index:index+2] == "V "
|
||||
nlpos = string.find(self.prop, "\n", index)
|
||||
assert nlpos > 0
|
||||
proplen = string.atoi(self.prop[index+2:nlpos])
|
||||
assert self.prop[nlpos+1+proplen] == "\n"
|
||||
prop = self.prop[nlpos+1:nlpos+1+proplen]
|
||||
index = nlpos+2+proplen
|
||||
else:
|
||||
prop = None
|
||||
self.props[0].append(name)
|
||||
self.props[1][name] = prop
|
||||
def setprop(self, key, val):
|
||||
if not self.props[1].has_key(key):
|
||||
self.props[0].append(key)
|
||||
self.props[1][key] = val
|
||||
def delprop(self, key):
|
||||
if self.props[1].has_key(key):
|
||||
del self.props[1][key]
|
||||
self.props[0].remove(key)
|
||||
def correct_headers(self, revmap):
|
||||
# First reconstitute the properties block.
|
||||
self.prop = ""
|
||||
if (not (self.props is None)) and len(self.props[0]) > 0:
|
||||
for key in self.props[0]:
|
||||
val = self.props[1][key]
|
||||
if val == None:
|
||||
self.prop = self.prop + "D %d" % len(key) + "\n" + key + "\n"
|
||||
else:
|
||||
self.prop = self.prop + "K %d" % len(key) + "\n" + key + "\n"
|
||||
self.prop = self.prop + "V %d" % len(val) + "\n" + val + "\n"
|
||||
self.prop = self.prop + "PROPS-END\n"
|
||||
# Now fix up the content length headers.
|
||||
if len(self.prop) > 0:
|
||||
self.sethdr("Prop-content-length", str(len(self.prop)))
|
||||
else:
|
||||
self.delhdr("Prop-content-length")
|
||||
# Only fiddle with the md5 if we're not doing a delta.
|
||||
if self.hdrdict.get("Text-delta", "false") != "true":
|
||||
if self.text != None:
|
||||
self.sethdr("Text-content-length", str(len(self.text)))
|
||||
m = md5.new()
|
||||
m.update(self.text)
|
||||
self.sethdr("Text-content-md5", m.hexdigest())
|
||||
else:
|
||||
self.delhdr("Text-content-length")
|
||||
self.delhdr("Text-content-md5")
|
||||
if len(self.prop) > 0 or self.text != None:
|
||||
if self.text == None:
|
||||
textlen = 0
|
||||
else:
|
||||
textlen = len(self.text)
|
||||
self.sethdr("Content-length", str(len(self.prop)+textlen))
|
||||
else:
|
||||
self.delhdr("Content-length")
|
||||
# Adjust the revision numbers as needed.
|
||||
for header in ["Revision-number", "Node-copyfrom-rev"]:
|
||||
if self.hdrdict.has_key(header):
|
||||
old_val = int(self.hdrdict[header])
|
||||
if revmap != None:
|
||||
new_val = revmap[old_val]
|
||||
else:
|
||||
new_val = old_val
|
||||
self.sethdr(header, str(new_val))
|
||||
|
||||
def read_rfc822_headers(f):
|
||||
ret = Lump()
|
||||
while 1:
|
||||
s = f.readline()
|
||||
if s == "":
|
||||
return None # end of file
|
||||
if s == "\n":
|
||||
if len(ret.hdrlist) > 0:
|
||||
break # newline after headers ends them
|
||||
else:
|
||||
continue # newline before headers is simply ignored
|
||||
if s[-1:] == "\n": s = s[:-1]
|
||||
colon = string.find(s, ":")
|
||||
assert colon > 0
|
||||
assert s[colon:colon+2] == ": "
|
||||
key = s[:colon]
|
||||
val = s[colon+2:]
|
||||
ret.sethdr(key, val)
|
||||
return ret
|
||||
|
||||
def read_lump(f):
|
||||
lump = read_rfc822_headers(f)
|
||||
if lump == None:
|
||||
return None
|
||||
pcl = string.atoi(lump.hdrdict.get("Prop-content-length", "0"))
|
||||
if pcl > 0:
|
||||
lump.prop = f.read(pcl)
|
||||
lump.propparse()
|
||||
if lump.hdrdict.has_key("Text-content-length"):
|
||||
tcl = string.atoi(lump.hdrdict["Text-content-length"])
|
||||
lump.text = f.read(tcl)
|
||||
return lump
|
||||
|
||||
def write_lump(f, lump, revmap):
|
||||
if not lump.extant:
|
||||
return
|
||||
lump.correct_headers(revmap)
|
||||
for key in lump.hdrlist:
|
||||
val = lump.hdrdict[key]
|
||||
f.write(key + ": " + val + "\n")
|
||||
f.write("\n")
|
||||
f.write(lump.prop)
|
||||
if lump.text != None:
|
||||
f.write(lump.text)
|
||||
if lump.hdrdict.has_key("Prop-content-length") or \
|
||||
lump.hdrdict.has_key("Text-content-length") or \
|
||||
lump.hdrdict.has_key("Content-length"):
|
||||
f.write("\n")
|
||||
|
||||
# Higher-level class that makes use of the above to filter dump
|
||||
# file fragments a whole revision at a time.
|
||||
|
||||
class Filter:
|
||||
def __init__(self, paths):
|
||||
self.revisions = {}
|
||||
self.paths = paths
|
||||
|
||||
def tweak(self, revhdr, contents):
|
||||
contents2 = []
|
||||
for lump in contents:
|
||||
action = lump.hdrdict["Node-action"]
|
||||
path = lump.hdrdict["Node-path"]
|
||||
|
||||
if not self.paths.interesting(path):
|
||||
continue # boooring
|
||||
|
||||
need = 1 # we need to do something about this lump
|
||||
|
||||
if action == "add":
|
||||
if lump.hdrdict.has_key("Node-copyfrom-path"):
|
||||
srcrev = string.atoi(lump.hdrdict["Node-copyfrom-rev"])
|
||||
srcpath = lump.hdrdict["Node-copyfrom-path"]
|
||||
if not self.paths.interesting(srcpath):
|
||||
# Copy from a boring path to an interesting
|
||||
# one, meaning we must use svnlook to
|
||||
# extract the subtree and convert it into
|
||||
# lumps.
|
||||
treecmd = "svnlook tree -r%d %s %s" % \
|
||||
(srcrev, quote(repos), quote(srcpath))
|
||||
tree = os.popen(treecmd, "r")
|
||||
pathcomponents = []
|
||||
while 1:
|
||||
treeline = tree.readline()
|
||||
if treeline == "": break
|
||||
if treeline[-1:] == "\n": treeline = treeline[:-1]
|
||||
subdir = 0
|
||||
while treeline[-1:] == "/":
|
||||
subdir = 1
|
||||
treeline = treeline[:-1]
|
||||
depth = 0
|
||||
while treeline[:1] == " ":
|
||||
depth = depth + 1
|
||||
treeline = treeline[1:]
|
||||
pathcomponents[depth:] = [treeline]
|
||||
thissrcpath = string.join([srcpath] + pathcomponents[1:], "/")
|
||||
thisdstpath = string.join([path] + pathcomponents[1:], "/")
|
||||
newlump = Lump()
|
||||
newlump.sethdr("Node-path", thisdstpath)
|
||||
newlump.sethdr("Node-action", "add")
|
||||
props = os.popen("svnlook pl -r%d %s %s" % \
|
||||
(srcrev, quote(repos), quote(thissrcpath)), "r")
|
||||
while 1:
|
||||
propname = props.readline()
|
||||
if propname == "": break
|
||||
if propname[-1:] == "\n": propname = propname[:-1]
|
||||
while propname[:1] == " ": propname = propname[1:]
|
||||
propf = os.popen("svnlook pg -r%d %s %s %s" % \
|
||||
(srcrev, quote(repos), quote(propname), quote(thissrcpath)), "r")
|
||||
proptext = propf.read()
|
||||
propf.close()
|
||||
newlump.setprop(propname, proptext)
|
||||
props.close()
|
||||
if subdir:
|
||||
newlump.sethdr("Node-kind", "dir")
|
||||
else:
|
||||
newlump.sethdr("Node-kind", "file")
|
||||
f = os.popen("svnlook cat -r%d %s %s" % \
|
||||
(srcrev, quote(repos), quote(thissrcpath)), "r")
|
||||
newlump.text = f.read()
|
||||
f.close()
|
||||
contents2.append(newlump)
|
||||
tree.close()
|
||||
if lump.text != None:
|
||||
# This was a copyfrom _plus_ some sort of
|
||||
# delta or new contents, which means that
|
||||
# having done the copy we now also need a
|
||||
# change record providing the new contents.
|
||||
lump.sethdr("Node-action", "change")
|
||||
lump.delhdr("Node-copyfrom-rev")
|
||||
lump.delhdr("Node-copyfrom-path")
|
||||
else:
|
||||
need = 0 # we have now done something
|
||||
if need:
|
||||
contents2.append(lump)
|
||||
|
||||
# Change the contents array.
|
||||
contents[:] = contents2
|
||||
|
||||
# If we've just removed everything in this revision, leave
|
||||
# out some revision properties as well.
|
||||
if (len(contents) == 0):
|
||||
revhdr.delprop("svn:log")
|
||||
revhdr.delprop("svn:author")
|
||||
revhdr.delprop("svn:date")
|
||||
|
||||
fr = sys.stdin
|
||||
fw = sys.stdout
|
||||
|
||||
# Parse our command-line arguments.
|
||||
parser = OptionParser(usage="Usage: %prog [options] src-repo regexp...")
|
||||
parser.add_option("--drop-empty-revs", action="store_true",
|
||||
dest="drop_empty_revs", default=False,
|
||||
help="filter empty revisions from the dump")
|
||||
parser.add_option("--renumber-revs", action="store_true",
|
||||
dest="renumber_revs", default=False,
|
||||
help="renumber remaining revisions")
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) < 2:
|
||||
print >>sys.stderr, sys.argv[0] + ": Too few arguments."
|
||||
print >>sys.stderr, parser.usage
|
||||
sys.exit(2)
|
||||
|
||||
repos = args[0]
|
||||
paths = InterestingPaths(args[1:])
|
||||
|
||||
# We use this table to map input revisions to output revisions.
|
||||
if options.renumber_revs:
|
||||
revmap = {}
|
||||
else:
|
||||
revmap = None
|
||||
|
||||
# Pass the dump-file header through unchanged.
|
||||
lump = read_lump(fr)
|
||||
while not lump.hdrdict.has_key("Revision-number"):
|
||||
write_lump(fw, lump, revmap)
|
||||
lump = read_lump(fr)
|
||||
|
||||
revhdr = lump
|
||||
|
||||
filt = Filter(paths)
|
||||
|
||||
current_output_rev = 0
|
||||
while revhdr != None:
|
||||
# Read revision header.
|
||||
assert revhdr.hdrdict.has_key("Revision-number")
|
||||
contents = []
|
||||
# Read revision contents.
|
||||
while 1:
|
||||
lump = read_lump(fr)
|
||||
if lump == None or lump.hdrdict.has_key("Revision-number"):
|
||||
newrevhdr = lump
|
||||
break
|
||||
contents.append(lump)
|
||||
|
||||
# Alter the contents of the revision.
|
||||
filt.tweak(revhdr, contents)
|
||||
|
||||
# Determine whether we should output this revision. We only
|
||||
# update the current_output_rev if we're actually going to write
|
||||
# something.
|
||||
should_write = (len(contents) > 0 or not options.drop_empty_revs)
|
||||
if should_write:
|
||||
current_output_rev += 1
|
||||
|
||||
# Update our revmap with information about this revision. Note that
|
||||
# if this revision won't be written, current_output_rev still points
|
||||
# to the last version we dumped.
|
||||
input_rev = int(revhdr.hdrdict["Revision-number"])
|
||||
if revmap != None:
|
||||
revmap[input_rev] = current_output_rev
|
||||
|
||||
# Write out this revision, if that's what we've decided to do.
|
||||
if should_write:
|
||||
write_lump(fw, revhdr, revmap)
|
||||
for lump in contents:
|
||||
write_lump(fw, lump, revmap)
|
||||
|
||||
# And loop round again.
|
||||
revhdr = newrevhdr
|
||||
|
||||
fr.close()
|
||||
fw.close()
|
||||
Reference in New Issue
Block a user