bin-utils/svndumpfilter2

#!/usr/bin/env python

# Utility to filter a dump file of a Subversion repository to
# produce a dump file describing only specified subdirectories of
# the tree contained in the original one. This is similar in
# concept to the official tool `svndumpfilter', but it's able to
# cope with revisions which copy files into the area of interest
# from outside it (in which situation a Node-copyfrom won't be
# valid in the output dump file). However, in order to support
# this, svndumpfilter2 requires access via `svnlook' to the
# original repository from which the input dump file was produced.
#
# Usage:
#
#     svndumpfilter [options] source-repository regexp [regexp...]
#
# This command expects to receive a Subversion dump file on
# standard input, which must correspond to the Subversion
# repository pointed to by the first argument. It outputs a
# filtered dump file on standard output.
#
# `source-repository': The first argument must be a pathname to a
# _local_ Subversion repository. That is, it isn't a Subversion URL
# (beginning with http:// or svn:// or anything else like that);
# it's a simple local pathname (absolute or relative). A simple
# test to see if it's a valid pathname is to pass it as an argument
# to `svnlook tree'. If that succeeds, it's also a valid first
# argument to svndumpfilter2.
#
# `regexp': The remaining arguments are used to select directory
# names from the top level of the repository's internal directory
# tree. Any directory matching any of the regexps will be
# considered `interesting' and copied into the output dump file;
# any directory not matching will not. Matching is performed at the
# top level only: it is not currently possible to selectively
# include a subset of second-level directories with a common
# parent.
#
# Options include:
#
# `--drop-empty-revs': Exclude empty revisions from the output.
#
# `--renumber-revs': Generated sequential revision numbers in the
# filtered output.  This may help work around issues with certain
# versions of 'svnadmin load'.
#
# For example, this command...
#
#     svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x
#
# ... will read a dump file on standard input, and output one on
# standard output which contains only the subdirectories `foo',
# `bar', `baz', `quux', `quuux', `quuuux', etc.
#
# You will probably usually want to use svndumpfilter2 in
# conjunction with the production of the dump file in the first
# place, like this:
#
#     svnadmin dump /home/svnadmin/myrepos | \
#         svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x > msv.dump

import sys
import os
import re
import string
import types
import md5
from optparse import OptionParser

# Quoting function which should render any string impervious to
# POSIX shell metacharacter expansion.
def quote(word):
    return "'" + string.replace(word, "'", "'\\''") + "'"

# First, the sensible way to deal with a pathname is to split it
# into pieces at the slashes and thereafter treat it as a list.
def splitpath(s):
    list = string.split(s, "/")
    # Simplest way to remove all empty elements!
    try:
	while 1:
	    list.remove("")
    except ValueError:
	pass
    return list

def joinpath(list, prefix=""):
    return prefix + string.join(list, "/")

def cleanpath(s):
    return joinpath(splitpath(s))

def catpath(path1, path2, prefix=""):
    return joinpath(splitpath(path1) + splitpath(path2), prefix)

# Decide whether a pathname is interesting or not.
class InterestingPaths:
    def __init__(self, args):
	self.res = []
	for a in args:
	    self.res.append(re.compile(a))
    def interesting(self, path):
	path = cleanpath(path)
	if path == '':
	    # It's possible that the path may have no elements at
	    # all, in which case we can't match on its first
	    # element. This generally occurs when svn properties
	    # are being changed on the root of the repository; we
	    # consider those to be always interesting and never
	    # filter them out.
	    return 1
	for r in self.res:
	    if r.match(path):
		return 1
	return 0

# A class and some functions to handle a single lump of
# RFC822-ish-headers-plus-data read from an SVN dump file.

class Lump:
    def __init__(self):
	self.hdrlist = []
	self.hdrdict = {}
	self.prop = ""
	self.text = None
	self.extant = 1
	self.props = [[], {}]
    def sethdr(self, key, val):
	if not self.hdrdict.has_key(key):
	    self.hdrlist.append(key)
	self.hdrdict[key] = val
    def delhdr(self, key):
	if self.hdrdict.has_key(key):
	    del self.hdrdict[key]
	    self.hdrlist.remove(key)
    def propparse(self):
	index = 0
	while 1:
	    if self.prop[index:index+2] == "K ":
		wantval = 1
	    elif self.prop[index:index+2] == "D ":
		wantval = 0
	    elif self.prop[index:index+9] == "PROPS-END":
		break
	    else:
		raise "Unrecognised record in props section"
	    nlpos = string.find(self.prop, "\n", index)
	    assert nlpos > 0
	    namelen = string.atoi(self.prop[index+2:nlpos])
	    assert self.prop[nlpos+1+namelen] == "\n"
	    name = self.prop[nlpos+1:nlpos+1+namelen]
	    index = nlpos+2+namelen
	    if wantval:
		assert self.prop[index:index+2] == "V "
		nlpos = string.find(self.prop, "\n", index)
		assert nlpos > 0
		proplen = string.atoi(self.prop[index+2:nlpos])
		assert self.prop[nlpos+1+proplen] == "\n"
		prop = self.prop[nlpos+1:nlpos+1+proplen]
		index = nlpos+2+proplen
	    else:
		prop = None
	    self.props[0].append(name)
	    self.props[1][name] = prop
    def setprop(self, key, val):
	if not self.props[1].has_key(key):
	    self.props[0].append(key)
	self.props[1][key] = val
    def delprop(self, key):
	if self.props[1].has_key(key):
	    del self.props[1][key]
	    self.props[0].remove(key)
    def correct_headers(self, revmap):
	# First reconstitute the properties block.
	self.prop = ""
	if (not (self.props is None)) and len(self.props[0]) > 0:
	    for key in self.props[0]:
		val = self.props[1][key]
		if val == None:
		    self.prop = self.prop + "D %d" % len(key) + "\n" + key + "\n"
		else:
		    self.prop = self.prop + "K %d" % len(key) + "\n" + key + "\n"
		    self.prop = self.prop + "V %d" % len(val) + "\n" + val + "\n"
	    self.prop = self.prop + "PROPS-END\n"
	# Now fix up the content length headers.
	if len(self.prop) > 0:
	    self.sethdr("Prop-content-length", str(len(self.prop)))
	else:
	    self.delhdr("Prop-content-length")
	# Only fiddle with the md5 if we're not doing a delta.
	if self.hdrdict.get("Text-delta", "false") != "true":
	    if self.text != None:
		self.sethdr("Text-content-length", str(len(self.text)))
		m = md5.new()
		m.update(self.text)
		self.sethdr("Text-content-md5", m.hexdigest())
	    else:
		self.delhdr("Text-content-length")
		self.delhdr("Text-content-md5")
	if len(self.prop) > 0 or self.text != None:
	    if self.text == None:
		textlen = 0
	    else:
		textlen = len(self.text)
	    self.sethdr("Content-length", str(len(self.prop)+textlen))
	else:
	    self.delhdr("Content-length")
	# Adjust the revision numbers as needed.
	for header in ["Revision-number", "Node-copyfrom-rev"]:
	    if self.hdrdict.has_key(header):
		old_val = int(self.hdrdict[header])
                if revmap != None:
                    new_val = revmap[old_val]
                else:
                    new_val = old_val
		self.sethdr(header, str(new_val))

def read_rfc822_headers(f):
    ret = Lump()
    while 1:
	s = f.readline()
	if s == "":
	    return None # end of file
	if s == "\n":
	    if len(ret.hdrlist) > 0:
		break # newline after headers ends them
	    else:
		continue # newline before headers is simply ignored
	if s[-1:] == "\n": s = s[:-1]
	colon = string.find(s, ":")
	assert colon > 0
	assert s[colon:colon+2] == ": "
	key = s[:colon]
	val = s[colon+2:]
	ret.sethdr(key, val)
    return ret

def read_lump(f):
    lump = read_rfc822_headers(f)
    if lump == None:
	return None
    pcl = string.atoi(lump.hdrdict.get("Prop-content-length", "0"))
    if pcl > 0:
	lump.prop = f.read(pcl)
	lump.propparse()
    if lump.hdrdict.has_key("Text-content-length"):
	tcl = string.atoi(lump.hdrdict["Text-content-length"])
	lump.text = f.read(tcl)
    return lump

def write_lump(f, lump, revmap):
    if not lump.extant:
	return
    lump.correct_headers(revmap)
    for key in lump.hdrlist:
	val = lump.hdrdict[key]
	f.write(key + ": " + val + "\n")
    f.write("\n")
    f.write(lump.prop)
    if lump.text != None:
	f.write(lump.text)
    if lump.hdrdict.has_key("Prop-content-length") or \
    lump.hdrdict.has_key("Text-content-length") or \
    lump.hdrdict.has_key("Content-length"):
	f.write("\n")

# Higher-level class that makes use of the above to filter dump
# file fragments a whole revision at a time.

class Filter:
    def __init__(self, paths):
	self.revisions = {}
	self.paths = paths

    def tweak(self, revhdr, contents):
	contents2 = []
	for lump in contents:
	    action = lump.hdrdict["Node-action"]
	    path = lump.hdrdict["Node-path"]

	    if not self.paths.interesting(path):
		continue # boooring

	    need = 1 # we need to do something about this lump

	    if action == "add":
		if lump.hdrdict.has_key("Node-copyfrom-path"):
		    srcrev = string.atoi(lump.hdrdict["Node-copyfrom-rev"])
		    srcpath = lump.hdrdict["Node-copyfrom-path"]
		    if not self.paths.interesting(srcpath):
			# Copy from a boring path to an interesting
			# one, meaning we must use svnlook to
			# extract the subtree and convert it into
			# lumps.
			treecmd = "svnlook tree -r%d %s %s" % \
			(srcrev, quote(repos), quote(srcpath))
			tree = os.popen(treecmd, "r")
			pathcomponents = []
			while 1:
			    treeline = tree.readline()
			    if treeline == "": break
			    if treeline[-1:] == "\n": treeline = treeline[:-1]
			    subdir = 0
			    while treeline[-1:] == "/":
				subdir = 1
				treeline = treeline[:-1]
			    depth = 0
			    while treeline[:1] == " ":
				depth = depth + 1
				treeline = treeline[1:]
			    pathcomponents[depth:] = [treeline]
			    thissrcpath = string.join([srcpath] + pathcomponents[1:], "/")
			    thisdstpath = string.join([path] + pathcomponents[1:], "/")
			    newlump = Lump()
			    newlump.sethdr("Node-path", thisdstpath)
			    newlump.sethdr("Node-action", "add")
			    props = os.popen("svnlook pl -r%d %s %s" % \
			    (srcrev, quote(repos), quote(thissrcpath)), "r")
			    while 1:
				propname = props.readline()
				if propname == "": break
				if propname[-1:] == "\n": propname = propname[:-1]
				while propname[:1] == " ": propname = propname[1:]
				propf = os.popen("svnlook pg -r%d %s %s %s" % \
				(srcrev, quote(repos), quote(propname), quote(thissrcpath)), "r")
				proptext = propf.read()
				propf.close()
				newlump.setprop(propname, proptext)
			    props.close()
			    if subdir:
				newlump.sethdr("Node-kind", "dir")
			    else:
				newlump.sethdr("Node-kind", "file")
				f = os.popen("svnlook cat -r%d %s %s" % \
				(srcrev, quote(repos), quote(thissrcpath)), "r")
				newlump.text = f.read()
				f.close()
			    contents2.append(newlump)
			tree.close()
			if lump.text != None:
			    # This was a copyfrom _plus_ some sort of
			    # delta or new contents, which means that
			    # having done the copy we now also need a
			    # change record providing the new contents.
			    lump.sethdr("Node-action", "change")
			    lump.delhdr("Node-copyfrom-rev")
			    lump.delhdr("Node-copyfrom-path")
			else:
			    need = 0 # we have now done something
	    if need:
		contents2.append(lump)

	# Change the contents array.
	contents[:] = contents2

	# If we've just removed everything in this revision, leave
	# out some revision properties as well.
	if (len(contents) == 0):
	    revhdr.delprop("svn:log")
	    revhdr.delprop("svn:author")
	    revhdr.delprop("svn:date")

fr = sys.stdin
fw = sys.stdout

# Parse our command-line arguments.
parser = OptionParser(usage="Usage: %prog [options] src-repo regexp...")
parser.add_option("--drop-empty-revs", action="store_true",
                  dest="drop_empty_revs", default=False,
                  help="filter empty revisions from the dump")
parser.add_option("--renumber-revs", action="store_true",
                  dest="renumber_revs", default=False,
                  help="renumber remaining revisions")
(options, args) = parser.parse_args()
if len(args) < 2:
    print >>sys.stderr, sys.argv[0] + ": Too few arguments."
    print >>sys.stderr, parser.usage
    sys.exit(2)

repos = args[0]
paths = InterestingPaths(args[1:])

# We use this table to map input revisions to output revisions.
if options.renumber_revs:
    revmap = {}
else:
    revmap = None

# Pass the dump-file header through unchanged.
lump = read_lump(fr)
while not lump.hdrdict.has_key("Revision-number"):
    write_lump(fw, lump, revmap)
    lump = read_lump(fr)

revhdr = lump

filt = Filter(paths)

current_output_rev = 0
while revhdr != None:
    # Read revision header.
    assert revhdr.hdrdict.has_key("Revision-number")
    contents = []
    # Read revision contents.
    while 1:
	lump = read_lump(fr)
	if lump == None or lump.hdrdict.has_key("Revision-number"):
	    newrevhdr = lump
	    break
	contents.append(lump)

    # Alter the contents of the revision.
    filt.tweak(revhdr, contents)

    # Determine whether we should output this revision.  We only
    # update the current_output_rev if we're actually going to write
    # something.
    should_write = (len(contents) > 0 or not options.drop_empty_revs)
    if should_write:
	current_output_rev += 1

    # Update our revmap with information about this revision.  Note that
    # if this revision won't be written, current_output_rev still points
    # to the last version we dumped.
    input_rev = int(revhdr.hdrdict["Revision-number"])
    if revmap != None:
	revmap[input_rev] = current_output_rev

    # Write out this revision, if that's what we've decided to do.
    if should_write:
	write_lump(fw, revhdr, revmap)
	for lump in contents:
	    write_lump(fw, lump, revmap)

    # And loop round again.
    revhdr = newrevhdr

fr.close()
fw.close()
partage public 2015-05-30 18:42:42 +02:00			`#!/usr/bin/env python`

			`# Utility to filter a dump file of a Subversion repository to`
			`# produce a dump file describing only specified subdirectories of`
			`# the tree contained in the original one. This is similar in`
			# concept to the official tool `svndumpfilter', but it's able to
			`# cope with revisions which copy files into the area of interest`
			`# from outside it (in which situation a Node-copyfrom won't be`
			`# valid in the output dump file). However, in order to support`
			# this, svndumpfilter2 requires access via `svnlook' to the
			`# original repository from which the input dump file was produced.`
			`#`
			`# Usage:`
			`#`
			`# svndumpfilter [options] source-repository regexp [regexp...]`
			`#`
			`# This command expects to receive a Subversion dump file on`
			`# standard input, which must correspond to the Subversion`
			`# repository pointed to by the first argument. It outputs a`
			`# filtered dump file on standard output.`
			`#`
			# `source-repository': The first argument must be a pathname to a
			`# _local_ Subversion repository. That is, it isn't a Subversion URL`
			`# (beginning with http:// or svn:// or anything else like that);`
			`# it's a simple local pathname (absolute or relative). A simple`
			`# test to see if it's a valid pathname is to pass it as an argument`
			# to `svnlook tree'. If that succeeds, it's also a valid first
			`# argument to svndumpfilter2.`
			`#`
			# `regexp': The remaining arguments are used to select directory
			`# names from the top level of the repository's internal directory`
			`# tree. Any directory matching any of the regexps will be`
			# considered `interesting' and copied into the output dump file;
			`# any directory not matching will not. Matching is performed at the`
			`# top level only: it is not currently possible to selectively`
			`# include a subset of second-level directories with a common`
			`# parent.`
			`#`
			`# Options include:`
			`#`
			# `--drop-empty-revs': Exclude empty revisions from the output.
			`#`
			# `--renumber-revs': Generated sequential revision numbers in the
			`# filtered output. This may help work around issues with certain`
			`# versions of 'svnadmin load'.`
			`#`
			`# For example, this command...`
			`#`
			`# svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x`
			`#`
			`# ... will read a dump file on standard input, and output one on`
			# standard output which contains only the subdirectories `foo',
			# `bar', `baz', `quux', `quuux', `quuuux', etc.
			`#`
			`# You will probably usually want to use svndumpfilter2 in`
			`# conjunction with the production of the dump file in the first`
			`# place, like this:`
			`#`
			`# svnadmin dump /home/svnadmin/myrepos \| \`
			`# svndumpfilter2 /home/svnadmin/myrepos foo bar baz quu+x > msv.dump`

			`import sys`
			`import os`
			`import re`
			`import string`
			`import types`
			`import md5`
			`from optparse import OptionParser`

			`# Quoting function which should render any string impervious to`
			`# POSIX shell metacharacter expansion.`
			`def quote(word):`
			`return "'" + string.replace(word, "'", "'\\''") + "'"`

			`# First, the sensible way to deal with a pathname is to split it`
			`# into pieces at the slashes and thereafter treat it as a list.`
			`def splitpath(s):`
			`list = string.split(s, "/")`
			`# Simplest way to remove all empty elements!`
			`try:`
			`while 1:`
			`list.remove("")`
			`except ValueError:`
			`pass`
			`return list`

			`def joinpath(list, prefix=""):`
			`return prefix + string.join(list, "/")`

			`def cleanpath(s):`
			`return joinpath(splitpath(s))`

			`def catpath(path1, path2, prefix=""):`
			`return joinpath(splitpath(path1) + splitpath(path2), prefix)`

			`# Decide whether a pathname is interesting or not.`
			`class InterestingPaths:`
			`def __init__(self, args):`
			`self.res = []`
			`for a in args:`
			`self.res.append(re.compile(a))`
			`def interesting(self, path):`
			`path = cleanpath(path)`
			`if path == '':`
			`# It's possible that the path may have no elements at`
			`# all, in which case we can't match on its first`
			`# element. This generally occurs when svn properties`
			`# are being changed on the root of the repository; we`
			`# consider those to be always interesting and never`
			`# filter them out.`
			`return 1`
			`for r in self.res:`
			`if r.match(path):`
			`return 1`
			`return 0`

			`# A class and some functions to handle a single lump of`
			`# RFC822-ish-headers-plus-data read from an SVN dump file.`

			`class Lump:`
			`def __init__(self):`
			`self.hdrlist = []`
			`self.hdrdict = {}`
			`self.prop = ""`
			`self.text = None`
			`self.extant = 1`
			`self.props = [[], {}]`
			`def sethdr(self, key, val):`
			`if not self.hdrdict.has_key(key):`
			`self.hdrlist.append(key)`
			`self.hdrdict[key] = val`
			`def delhdr(self, key):`
			`if self.hdrdict.has_key(key):`
			`del self.hdrdict[key]`
			`self.hdrlist.remove(key)`
			`def propparse(self):`
			`index = 0`
			`while 1:`
			`if self.prop[index:index+2] == "K ":`
			`wantval = 1`
			`elif self.prop[index:index+2] == "D ":`
			`wantval = 0`
			`elif self.prop[index:index+9] == "PROPS-END":`
			`break`
			`else:`
			`raise "Unrecognised record in props section"`
			`nlpos = string.find(self.prop, "\n", index)`
			`assert nlpos > 0`
			`namelen = string.atoi(self.prop[index+2:nlpos])`
			`assert self.prop[nlpos+1+namelen] == "\n"`
			`name = self.prop[nlpos+1:nlpos+1+namelen]`
			`index = nlpos+2+namelen`
			`if wantval:`
			`assert self.prop[index:index+2] == "V "`
			`nlpos = string.find(self.prop, "\n", index)`
			`assert nlpos > 0`
			`proplen = string.atoi(self.prop[index+2:nlpos])`
			`assert self.prop[nlpos+1+proplen] == "\n"`
			`prop = self.prop[nlpos+1:nlpos+1+proplen]`
			`index = nlpos+2+proplen`
			`else:`
			`prop = None`
			`self.props[0].append(name)`
			`self.props[1][name] = prop`
			`def setprop(self, key, val):`
			`if not self.props[1].has_key(key):`
			`self.props[0].append(key)`
			`self.props[1][key] = val`
			`def delprop(self, key):`
			`if self.props[1].has_key(key):`
			`del self.props[1][key]`
			`self.props[0].remove(key)`
			`def correct_headers(self, revmap):`
			`# First reconstitute the properties block.`
			`self.prop = ""`
			`if (not (self.props is None)) and len(self.props[0]) > 0:`
			`for key in self.props[0]:`
			`val = self.props[1][key]`
			`if val == None:`
			`self.prop = self.prop + "D %d" % len(key) + "\n" + key + "\n"`
			`else:`
			`self.prop = self.prop + "K %d" % len(key) + "\n" + key + "\n"`
			`self.prop = self.prop + "V %d" % len(val) + "\n" + val + "\n"`
			`self.prop = self.prop + "PROPS-END\n"`
			`# Now fix up the content length headers.`
			`if len(self.prop) > 0:`
			`self.sethdr("Prop-content-length", str(len(self.prop)))`
			`else:`
			`self.delhdr("Prop-content-length")`
			`# Only fiddle with the md5 if we're not doing a delta.`
			`if self.hdrdict.get("Text-delta", "false") != "true":`
			`if self.text != None:`
			`self.sethdr("Text-content-length", str(len(self.text)))`
			`m = md5.new()`
			`m.update(self.text)`
			`self.sethdr("Text-content-md5", m.hexdigest())`
			`else:`
			`self.delhdr("Text-content-length")`
			`self.delhdr("Text-content-md5")`
			`if len(self.prop) > 0 or self.text != None:`
			`if self.text == None:`
			`textlen = 0`
			`else:`
			`textlen = len(self.text)`
			`self.sethdr("Content-length", str(len(self.prop)+textlen))`
			`else:`
			`self.delhdr("Content-length")`
			`# Adjust the revision numbers as needed.`
			`for header in ["Revision-number", "Node-copyfrom-rev"]:`
			`if self.hdrdict.has_key(header):`
			`old_val = int(self.hdrdict[header])`
			`if revmap != None:`
			`new_val = revmap[old_val]`
			`else:`
			`new_val = old_val`
			`self.sethdr(header, str(new_val))`

			`def read_rfc822_headers(f):`
			`ret = Lump()`
			`while 1:`
			`s = f.readline()`
			`if s == "":`
			`return None # end of file`
			`if s == "\n":`
			`if len(ret.hdrlist) > 0:`
			`break # newline after headers ends them`
			`else:`
			`continue # newline before headers is simply ignored`
			`if s[-1:] == "\n": s = s[:-1]`
			`colon = string.find(s, ":")`
			`assert colon > 0`
			`assert s[colon:colon+2] == ": "`
			`key = s[:colon]`
			`val = s[colon+2:]`
			`ret.sethdr(key, val)`
			`return ret`

			`def read_lump(f):`
			`lump = read_rfc822_headers(f)`
			`if lump == None:`
			`return None`
			`pcl = string.atoi(lump.hdrdict.get("Prop-content-length", "0"))`
			`if pcl > 0:`
			`lump.prop = f.read(pcl)`
			`lump.propparse()`
			`if lump.hdrdict.has_key("Text-content-length"):`
			`tcl = string.atoi(lump.hdrdict["Text-content-length"])`
			`lump.text = f.read(tcl)`
			`return lump`

			`def write_lump(f, lump, revmap):`
			`if not lump.extant:`
			`return`
			`lump.correct_headers(revmap)`
			`for key in lump.hdrlist:`
			`val = lump.hdrdict[key]`
			`f.write(key + ": " + val + "\n")`
			`f.write("\n")`
			`f.write(lump.prop)`
			`if lump.text != None:`
			`f.write(lump.text)`
			`if lump.hdrdict.has_key("Prop-content-length") or \`
			`lump.hdrdict.has_key("Text-content-length") or \`
			`lump.hdrdict.has_key("Content-length"):`
			`f.write("\n")`

			`# Higher-level class that makes use of the above to filter dump`
			`# file fragments a whole revision at a time.`

			`class Filter:`
			`def __init__(self, paths):`
			`self.revisions = {}`
			`self.paths = paths`

			`def tweak(self, revhdr, contents):`
			`contents2 = []`
			`for lump in contents:`
			`action = lump.hdrdict["Node-action"]`
			`path = lump.hdrdict["Node-path"]`

			`if not self.paths.interesting(path):`
			`continue # boooring`

			`need = 1 # we need to do something about this lump`

			`if action == "add":`
			`if lump.hdrdict.has_key("Node-copyfrom-path"):`
			`srcrev = string.atoi(lump.hdrdict["Node-copyfrom-rev"])`
			`srcpath = lump.hdrdict["Node-copyfrom-path"]`
			`if not self.paths.interesting(srcpath):`
			`# Copy from a boring path to an interesting`
			`# one, meaning we must use svnlook to`
			`# extract the subtree and convert it into`
			`# lumps.`
			`treecmd = "svnlook tree -r%d %s %s" % \`
			`(srcrev, quote(repos), quote(srcpath))`
			`tree = os.popen(treecmd, "r")`
			`pathcomponents = []`
			`while 1:`
			`treeline = tree.readline()`
			`if treeline == "": break`
			`if treeline[-1:] == "\n": treeline = treeline[:-1]`
			`subdir = 0`
			`while treeline[-1:] == "/":`
			`subdir = 1`
			`treeline = treeline[:-1]`
			`depth = 0`
			`while treeline[:1] == " ":`
			`depth = depth + 1`
			`treeline = treeline[1:]`
			`pathcomponents[depth:] = [treeline]`
			`thissrcpath = string.join([srcpath] + pathcomponents[1:], "/")`
			`thisdstpath = string.join([path] + pathcomponents[1:], "/")`
			`newlump = Lump()`
			`newlump.sethdr("Node-path", thisdstpath)`
			`newlump.sethdr("Node-action", "add")`
			`props = os.popen("svnlook pl -r%d %s %s" % \`
			`(srcrev, quote(repos), quote(thissrcpath)), "r")`
			`while 1:`
			`propname = props.readline()`
			`if propname == "": break`
			`if propname[-1:] == "\n": propname = propname[:-1]`
			`while propname[:1] == " ": propname = propname[1:]`
			`propf = os.popen("svnlook pg -r%d %s %s %s" % \`
			`(srcrev, quote(repos), quote(propname), quote(thissrcpath)), "r")`
			`proptext = propf.read()`
			`propf.close()`
			`newlump.setprop(propname, proptext)`
			`props.close()`
			`if subdir:`
			`newlump.sethdr("Node-kind", "dir")`
			`else:`
			`newlump.sethdr("Node-kind", "file")`
			`f = os.popen("svnlook cat -r%d %s %s" % \`
			`(srcrev, quote(repos), quote(thissrcpath)), "r")`
			`newlump.text = f.read()`
			`f.close()`
			`contents2.append(newlump)`
			`tree.close()`
			`if lump.text != None:`
			`# This was a copyfrom _plus_ some sort of`
			`# delta or new contents, which means that`
			`# having done the copy we now also need a`
			`# change record providing the new contents.`
			`lump.sethdr("Node-action", "change")`
			`lump.delhdr("Node-copyfrom-rev")`
			`lump.delhdr("Node-copyfrom-path")`
			`else:`
			`need = 0 # we have now done something`
			`if need:`
			`contents2.append(lump)`

			`# Change the contents array.`
			`contents[:] = contents2`

			`# If we've just removed everything in this revision, leave`
			`# out some revision properties as well.`
			`if (len(contents) == 0):`
			`revhdr.delprop("svn:log")`
			`revhdr.delprop("svn:author")`
			`revhdr.delprop("svn:date")`

			`fr = sys.stdin`
			`fw = sys.stdout`

			`# Parse our command-line arguments.`
			`parser = OptionParser(usage="Usage: %prog [options] src-repo regexp...")`
			`parser.add_option("--drop-empty-revs", action="store_true",`
			`dest="drop_empty_revs", default=False,`
			`help="filter empty revisions from the dump")`
			`parser.add_option("--renumber-revs", action="store_true",`
			`dest="renumber_revs", default=False,`
			`help="renumber remaining revisions")`
			`(options, args) = parser.parse_args()`
			`if len(args) < 2:`
			`print >>sys.stderr, sys.argv[0] + ": Too few arguments."`
			`print >>sys.stderr, parser.usage`
			`sys.exit(2)`

			`repos = args[0]`
			`paths = InterestingPaths(args[1:])`

			`# We use this table to map input revisions to output revisions.`
			`if options.renumber_revs:`
			`revmap = {}`
			`else:`
			`revmap = None`

			`# Pass the dump-file header through unchanged.`
			`lump = read_lump(fr)`
			`while not lump.hdrdict.has_key("Revision-number"):`
			`write_lump(fw, lump, revmap)`
			`lump = read_lump(fr)`

			`revhdr = lump`

			`filt = Filter(paths)`

			`current_output_rev = 0`
			`while revhdr != None:`
			`# Read revision header.`
			`assert revhdr.hdrdict.has_key("Revision-number")`
			`contents = []`
			`# Read revision contents.`
			`while 1:`
			`lump = read_lump(fr)`
			`if lump == None or lump.hdrdict.has_key("Revision-number"):`
			`newrevhdr = lump`
			`break`
			`contents.append(lump)`

			`# Alter the contents of the revision.`
			`filt.tweak(revhdr, contents)`

			`# Determine whether we should output this revision. We only`
			`# update the current_output_rev if we're actually going to write`
			`# something.`
			`should_write = (len(contents) > 0 or not options.drop_empty_revs)`
			`if should_write:`
			`current_output_rev += 1`

			`# Update our revmap with information about this revision. Note that`
			`# if this revision won't be written, current_output_rev still points`
			`# to the last version we dumped.`
			`input_rev = int(revhdr.hdrdict["Revision-number"])`
			`if revmap != None:`
			`revmap[input_rev] = current_output_rev`

			`# Write out this revision, if that's what we've decided to do.`
			`if should_write:`
			`write_lump(fw, revhdr, revmap)`
			`for lump in contents:`
			`write_lump(fw, lump, revmap)`

			`# And loop round again.`
			`revhdr = newrevhdr`

			`fr.close()`
			`fw.close()`