aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Harring <ferringb@google.com>2012-10-22 19:30:33 -0700
committerBrian Harring <ferringb@google.com>2012-10-22 19:30:33 -0700
commit7e0f9e20c7b1e356a11502d3be88983388348873 (patch)
tree224bf7c5e8598965d5bf4eb2a9139e284aaa6fec
parentDrop the prune; it's unnecessary (diff)
downloadgit-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.gz
git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.tar.bz2
git-conversion-tools-7e0f9e20c7b1e356a11502d3be88983388348873.zip
Rework the blob rewriting of $Header.
Specifically, fix the replacement so it actually matches/replaces, additionally fix the oversight where the code wasn't rewriting the data statement to the new length.
-rwxr-xr-xprocess_directory.sh25
-rwxr-xr-xrewrite-git-blob.py75
2 files changed, 87 insertions, 13 deletions
diff --git a/process_directory.sh b/process_directory.sh
index fa9a0ee..6d59677 100755
--- a/process_directory.sh
+++ b/process_directory.sh
@@ -1,17 +1,15 @@
#!/bin/bash
-command='
- sed -re "s/^\(paludis (0.1.*)\)$/Package-manager: Paludis \1/" \
- -e "s/^\([Pp]ortage version: (.*)\)$/Package-manager: Portage \1/"'
+
f() {
set -x
mkdir -p "${output}"/{git,cvs-repo/gentoo-x86/Attic}
ln -s "${cvsroot}" "${output}/cvs-repo/CVSROOT"
ln -s "${root}/gentoo-x86/$1" "${output}/cvs-repo/gentoo-x86/$1"
#ln -s "${root}/gentoo-x86/Attic" "${output}/cvs-repo/gentoo-x86/Attic"
- ln -s "$(pwd)/config" "${output}/config"
- ln -s "$(pwd)/gentoo_mailmap.py" "${output}/gentoo_mailmap.py"
+ ln -s "${base}/config" "${output}/config"
+ ln -s "${base}/gentoo_mailmap.py" "${output}/gentoo_mailmap.py"
# Note- this must be canonical path, else it screws up our $Header rewriting.
- cd "$(readlink -f "${output}" )"
+ pushd "$(readlink -f "${output}" )"
export PYTHONPATH="${output}${PYTHONPATH:+:${PYTHONPATH}}"
time cvs2git --options config -v
cd git
@@ -19,24 +17,25 @@ f() {
# Note we're only pull in blob data here; this intentional- we need to
# interlace the commit objects together, these git object pools will be
# be used as alternates for the final repo combination.
- sed -re \
- 's|\$Header: '"$(readlink -f "$(pwd)")"'/*output/.*/cvs-repo/|$Header: /var/cvsroot/|g' \
- ../cvs2svn-tmp/git-blob.dat | \
+ "${base}/rewrite-git-blob.py" \
+ ../cvs2svn-tmp/git-blob.dat "${output}/cvs-repo" | \
+ tee ../cvs2svn-tmp/rewritten-blob.dat | \
git fast-import --export-marks=../cvs2svn-tmp/git-blob.idx
+ popd
rm -rf "${final}"
- cd "$root"
mv "$output" "${final}"
set +x
}
[ $# -lt 1 ] && { echo "need an argument..."; exit 1; }
+cd "$(readlink -f "$(pwd)")"
base="$(pwd)"
-root="$(pwd)/cvs-repo"
+root="${base}/cvs-repo"
cvsroot="${root}/CVSROOT"
repo="${root}/gentoo-x86"
-output="$(pwd)/output/${1%,v}"
-final="$(pwd)/final/$1"
+output="${base}/output/${1%,v}"
+final="${base}/final/$1"
mkdir -p "$(dirname "${final}")"
rm -rf "${output}"
diff --git a/rewrite-git-blob.py b/rewrite-git-blob.py
new file mode 100755
index 0000000..430e415
--- /dev/null
+++ b/rewrite-git-blob.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+import contextlib
+import collections
+import functools
+import itertools
+import mmap
+import multiprocessing
+import operator
+import os
+import re
+import subprocess
+import sys
+
+@contextlib.contextmanager
+def mmap_open(path):
+ handle = fd = None
+ try:
+ fd = os.open(path, os.O_RDONLY)
+ handle = mmap.mmap(fd, os.fstat(fd).st_size, mmap.MAP_SHARED, mmap.PROT_READ)
+ os.close(fd)
+ fd = None
+ yield handle
+ finally:
+ if fd:
+ os.close(fd)
+ if handle:
+ handle.close()
+
+def readline_iterate(handle):
+ line = handle.readline()
+ while line:
+ yield line
+ line = handle.readline()
+
+mangler = []
+mangler.append(functools.partial(
+ re.compile(r"^\(paludis (0.1.*)\)$", re.M|re.I).sub,
+ r"Package-Manager: paludis-\1/"))
+# Special case not covered by the main portage mangler.
+mangler.append(functools.partial(
+ re.compile('r^\(Portage (2\.1\.2[^\)]+)\)$', re.M|re.I).sub,
+ r'Package-Manager: portage-\1'))
+mangler.append(functools.partial(
+ re.compile(r' *\((?:manifest +recommit|(?:un)?signed +manifest +commit)\) *$', re.M|re.I).sub,
+ r''))
+
+def process_stream(source, output_dir, output):
+ header = os.path.normpath(os.path.abspath(output_dir))
+ header = "$Header: %s" % output_dir
+ line = source.readline()
+ while line:
+ chunks = line.split()
+ if chunks[0:1] == ['data']:
+ # Process the commit message...
+ size = int(chunks[1])
+ data = source.read(size)
+ assert len(data) == size, (line, data)
+ data = data.replace(header, "$Header: /var/cvsroot")
+ line = 'data %i\n%s' % (len(data), data)
+ output.write(line)
+ line = source.readline()
+
+def main(blob_file, output_dir, output):
+ # allocate the pool now, before we start getting memory abusive; this is
+ # used for thin-manifest conversion if active/enabled.
+ #clean_pool = multiprocessing.Pool()
+
+ # Be careful here to just iterate over source; doing so allows this script
+ # to do basic processing as it goes (specifically while it's being fed from
+ # the mainline cvs2git parallelized repo creator).
+ with mmap_open(blob_file) as data:
+ process_stream(data, output_dir, sys.stdout)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1], sys.argv[2], sys.stdout))