bk2cg script

Vincent Danjean Vincent.Danjean at ens-lyon.org
Thu Jun 30 07:16:14 UTC 2005


On Thu, Jun 30, 2005 at 07:56:05AM +0200, krypsilon at gmail.com wrote:
> Hi!
> 
> > Pretty much all the revision history information in the BK repository
> > has successfully been imported into mercurial. Various of us have been
> > playing around with mercurial for a while now, and although it's a step
> > backwards from bitkeeper, its currently quite usable, and improving
> > fast. The mercurial developers have been very responsive in fixing minor
> > bugs and adding features we've requested, so we think this is the best
> > way forward. Rest assured we've spent a great deal of time investigating
> > the alternatives...
> 
> It would be great if the tools used for the BK->Mercurial conversion
> could be made available to the public. There are a lot of projects
> interested in switching and a conversion tool for existing history
> eases the pain..

Here is the script I use to convert to Cogito/Git. It should be easely
adapted for mercurial.
Note : I use Cogito/Git because I have symlinks in my project. I will
convert git repo to mercurial as soon as symlinks as supported.

Note2 : my script should be independent of the user that launch it. This
means that several users on the same project can use it independently in
their private work directory. And after, they will be able to merge
their Cogito/Git repo together.

Note3 : to use it, go at the base of a BK repo and launch it. It will
create a 'git' subdirectory with the git repo in it. It can be launched
several times (if you commit something in BK) and want to add this in
the git repo.
If you have sereval bk branches (repos of the same project), you can use
only one git repo (with multi heads) :
$ ls
bk_repo_1/
bk_repo_2/
$ mkdir projet-git
$ ln -s ../projet-git bk_repo_1/git
$ ln -s ../projet-git bk_repo_2/git
$ (cd bk_repo_1 && bk2cg)
$ (cd bk_repo_2 && bk2cg)

  Vincent
-------------- next part --------------
#!/usr/bin/perl

use strict;
use diagnostics;
use warnings;
use Getopt::Long qw(:config permute);
use Pod::Usage;

# pour les constantes O_CREATE...
use Fcntl;
# pour que les blocks END soient ex?cut?s en cas de terminaison par signal
use sigtrap qw(die normal-signals);

use Data::Dumper;

our $VERSION;
#$VERSION = sprintf "%d.%03d", q$Revision: 1.7 $ =~ /(\d+)/g;
$VERSION = sprintf "%d.%03d", q$Revision: 0.9 $ =~ /(\d+)/g;

my %options=( "git-dir" => "git",
#	      "committer-name" => "bk2cogito",
#	      "committer-email" => 'Foo.Bar at domain.org',
	      );

sub get_options() {
    my $handle_args = sub {
        my $name=shift;
        die "What about '$name' ?\n";
    };

    $options{"<>"} = $handle_args;

    $options{"git-head"} = `pwd`;
    chomp($options{"git-head"});
    $options{"git-head"}=~ s,.*/,,;

    $options{"rev-to-commit"} = `bk changes -e -r+ -d:REV:`;
    chomp($options{"rev-to-commit"});

    GetOptions (\%options,
                "version",
                "short-help|h",
                "help",
                "man",
                "git-dir=s",
                "git-head=s",
#                "committer-name=s",
#                "committer-email=s",
                "rev-to-commit=s",
                "debug-log=s",
#                "association|a!", # !!! demander pour le ! ? Vince
#                "s-localisation|l!",    
#                "first-input-file|i=s",
#                "second-input-file|j=s",
#                "output-file|o=s",
#                "data-type|t=s",
#                "tree-building-program|p=s",
#                "splitmode|s=s",
#                "prolongation|b!",
#                "chi2-threshold|n=f",
#                "permutations|r=i",
#                "trees-to-analyse=i",
#                "s-site-number=i",
#                "s-site-characters=s",
#                "co-evo|e=s",
#                "print-tree!",
                "<>",
                ) or pod2usage(2);
    if (defined($options{"version"})) {
        print $0, " version ", $VERSION, "\n";
        exit 0;
    }
    if (defined($options{"short-help"})) {
        pod2usage(-exitstatus => 0, -verbose => 0);
    }
    if (defined($options{"help"})) {
        pod2usage(-exitstatus => 0, -verbose => 1);
    }
    if (defined($options{"man"})) {
        pod2usage(-exitstatus => 0, -verbose => 2);
    }
}

my $gitdir;
my $githead;

my $logfile;
my $editor;
my $lockfile;

sub die_handler {
    die @_ if $^S;
    end_handler();
}


sub end_handler {
    if ($logfile && -f $logfile) {
	unlink($logfile) or print STDERR "Couldn't unlink $logfile: $!\n";
    }
    if ($editor && -f $editor) {
	unlink($editor) or print STDERR "Couldn't unlink $editor: $!\n";
    }
    if ($lockfile) {
	close(FH);
	unlink($lockfile) or print STDERR "Couldn't unlink $lockfile: $!\n";
    }
}

END { end_handler(); }

sub lock_dir() {
    my $lock=$gitdir."/.git/bk2cogito.lock";
    my $time=10;
    #our FH;
    while (not sysopen(FH, $lock, O_RDWR|O_CREAT|O_EXCL)) {
	print STDERR "lockfile '$lock' already exists.\n",
	"Sleeping 10s before retrying";
	for(my $count=0; $count<$time; $count++) {
	    print STDERR ".";
	    sleep(1);
	}
	print STDERR "\n";
    }
    $lockfile=$lock;
}

sub check_env() {
    $gitdir=$options{"git-dir"};
    $githead=$options{"git-head"};

    if (not -d $gitdir."/.") {
	mkdir $gitdir or die "Unable to create directory '$gitdir': $!";
    }
    if (-d $gitdir."/.git") {
	lock_dir();
	do_cmd("ln -sf 'refs/heads/$githead' '$gitdir/.git/HEAD'");
    }
    $editor=`mktemp -t bk2cogito-editor.XXXXXX`;
    chomp $editor;
    $logfile=`mktemp -t bk2cogito-log.XXXXXX`;
    chomp $logfile;

    open(EDITOR, ">", $editor) or die "Cannot open $editor ($!)";
    print EDITOR "#!/bin/sh\n";
    print EDITOR "grep '^CG:' \"\$1\" >> '$logfile'\n";
    print EDITOR "cp '$logfile' \"\$1\"\n";
    close(EDITOR);
    do_cmd("chmod +x '$editor'");
	
    $ENV{"EDITOR"}=$editor;
    if (defined($options{"committer-name"})) {
	$ENV{"GIT_COMMITTER_NAME"}=$options{"committer-name"};
    }
    if (defined($options{"committer-email"})) {
	$ENV{"GIT_COMMITTER_EMAIL"}=$options{"committer-email"};
    }
}

my %ChangeSets;

sub load_ChangeSets() {
    print "Reading ChangeSets\n";
    open(BK_CSETS, 
	 'bk changes -f -e -d":REV:|:PARENT:|:MPARENT:|:MD5KEY:|:USER:@:HOST:|:D: :T::TZ:\n" |')
	or die "Cannot run 'bk changes' ($!)";
    my @fields_name=("rev", "parent", "mparent", "hrev", "user", "date");
    while(<BK_CSETS>) {
	chomp;
	my @fields=split('[|]');
	if (scalar(@fields) != scalar(@fields_name)) {
	    print Dumper(@fields);
	    die "error";
	}
	my %cset;
	foreach my $field (@fields_name) {
	    my $val=shift @fields;
	    if ($val ne '') {
		$cset{$field}=$val;
	    }
	}
	$ChangeSets{"REVS"}->{$cset{"rev"}}=\%cset;
	$ChangeSets{"HREVS"}->{$cset{"hrev"}}=\%cset;
	#print $cset{"rev"}, " ";
	#flush STDOUT;
    }
    #print "\n";
    close(BK_CSETS);
    print "Reading Logs\n";
    open(BK_CSETS, 'bk changes -f -e -v |')
	or die "Cannot run 'bk changes' ($!)";
    my $prev="none";
    my $cset=undef;
    while(<BK_CSETS>) {
	chomp;
	my $line=$_;
	if ($prev eq "ChangeSet") {
	    if ($line !~ /^  ([0-9.]+) .*$/) {
		die "Cannot parse log line after ChangeSet '$line'";
	    }
	    my $rev=$1;
	    if (not defined($ChangeSets{"REVS"}->{$rev})) {
		die "Unknown ChangeSet $rev in log line '$line'";
	    }
	    $cset=$ChangeSets{"REVS"}->{$rev};
	    $prev="";
	    #print STDERR $rev, "\n";
	    #flush STDOUT;
	    next;
	}
	
	if ($line =~ /^ChangeSet$/) {
	    #print STDERR "New CSET\n";
	    $prev="ChangeSet";
	    next;
	}
	
	if (not defined($cset)) {
	    die "Cannot parse log line '$line'";
	}

	#print "Adding $line\n";
	$cset->{"log"}.=$line."\n";
    }    
    close(BK_CSETS);
    #print "\n";

    if (-d $gitdir."/.git/refs/tags") {
        my @tags;
	print "Reading links between cogito and BK ChangeSets\n";
	opendir(BK_CG, $gitdir."/.git/refs/tags") 
	    or die "Cannot opendir $gitdir/.git/refs/tags ($!)";
	@tags = grep { /^bk2cg-[a-zA-Z0-9_-]{30}$/ } readdir(BK_CG);
	closedir BK_CG;
	foreach my $tag (@tags) {
	    my $hrev=$tag;
	    $hrev =~ s/^[^-]*-//;
	    my $cg_id=`cat $gitdir/.git/refs/tags/$tag`;
	    chomp $cg_id;
	    if (not defined($ChangeSets{"HREVS"}->{$hrev})) {
	        next;
	    }
	    $ChangeSets{"HREVS"}->{$hrev}->{"cg-id"}=$cg_id;
	    $ChangeSets{"CGIDS"}->{$cg_id}=$ChangeSets{"HREVS"}->{$hrev};
	    #print "Link $hrev / $cg_id\n";
	}
    }
}

sub add_rev($$$) {
    my $cset=shift;
    my $cg_id=shift;
    my $prefix=shift;

    do_cmd("cd $gitdir && cg-tag bk2cg-".$cset->{"hrev"});
    $cset->{"cg-id"}=$cg_id;
    $ChangeSets{"CGIDS"}->{$cg_id}=$cset;
    print $prefix."Rev ".$cset->{"rev"}."/".$cset->{"hrev"}." -> $cg_id added\n";
}

sub do_cmd(@) {
    my @cmd=@_;
    my $cmd="'".join("' '", @cmd)."'";
    print "Runnning: $cmd\n";
    my $ret=system(@cmd);
    if ($ret != 0) {
	die "system @cmd failed: $?";
    }
}

sub clean_git() {
    if (-d "$gitdir/.") {
        do_cmd("find", "$gitdir/.", "-maxdepth", "1", "-mindepth", "1", "!",
	       "-name", ".git", "-exec", "rm", "-rf", "{}", ";");
    }
}

sub bk_export($) {
    my $rev=shift;

    clean_git();
    do_cmd("bk export -tplain -r$rev $gitdir");
}

sub cg_changes() {
    my $changes=0;
    open(CG, "cd '$gitdir' ; cg-status |") 
	or die "Unable to run cg-status ($!)";
    my %add;
    my %del;
    while (my $line=<CG>) {
	chomp($line);
	# ?N : new
	# !D : deleted
	# M : modified (in place)
	# T : changed file type
	# U : unmerged (???)
	if ($line =~ /^[?N] (.*)$/) {
	    $add{$1}=1;
	    $changes=1;
	} elsif ($line =~ /^[!D] (.*)$/) {
	    $del{$1}=1;
	    $changes=1;
	} elsif ($line =~ /^[MT] (.*)$/) {
	    $changes=1;
	} elsif ($line =~ /^U (.*)$/) {
	    $changes=1;
	    if (-f $1) {
	        $add{$1}=1;
	    } else {
	        $del{$1}=1;
	    }
	} else {
	    die "Unknown status line '$line'";
	}
    }
    close(CG);
    foreach my $f (keys %add) {
    	if (defined($del{$f})) {
	    delete($del{$f});
	    delete($add{$f});
	}
    }
    my @del=keys %del;
    if (scalar(@del) > 0) {
        my @part=splice(@del, 0, 50);
	while (scalar(@part) > 0) {
		do_cmd("cd '$gitdir'; cg-rm '".join("' '", @part)."'");
        	@part=splice(@del, 0, 50);
	}
    }
    my @add=keys %add;
    if (scalar(@add) > 0) {
        my @part=splice(@add, 0, 50);
	while (scalar(@part) > 0) {
		do_cmd("cd '$gitdir'; cg-add '".join("' '", @part)."'");
        	@part=splice(@add, 0, 50);
	}
    }
    return $changes;
}

sub cg_prepare_commit($$) {
    my $cset=shift;
    my $show=shift;

    my $name=$cset->{"user"};
    $name =~ s/[@].*$//;
    $name =~ tr/./ /;
    my $email=$cset->{"user"};
    my $date=$cset->{"date"};
    my $log;

    my $file="ChangeSet";
    my @lst_files;
    my %log_per_file;
    my $prev="first line";
    my $cur_log="";
    my $indent="  ";
    my $bk2cvs=0;
    foreach my $logline (split("\n", $cset->{"log"}), "") {
	if ($prev =~ /new_block/) {
	    $prev="new_file";
	    if ($logline =~ /^  (.*)$/) {
		$file=$1;
		#print "new file $file\n";
	    } else {
		die "Cannot parse file name in log line '$logline'";
	    }
	    next;
	} elsif ($prev =~ /new_file/) {
	    $prev="";
	    if ($logline !~ m,^    [0-9.]+ [0-9][0-9]/[0-1][0-9]/[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9] [^ ]* [+][0-9]+ [-][0-9]+$,) {
#	    if ($logline !~ m,^    [0-9.],) {
		die "Cannot parse file infos in log line '$logline'";
	    }
	    next;
	}
	if ($logline eq "") {	    
	    #if ($file =~ m,^BitKeeper/,) {
		#print "Ignoring BK file $file with log\n$cur_log";
	    #} else {
	    if (not defined($log_per_file{$file})
		&& ($file ne "ChangeSet")) {
		push @lst_files, $file;
	    }
	    if ($cur_log=~ m/^BitKeeper file .*\n$/m) {
		# delete "new file Bitkeeper message"
		$cur_log="";
	    }
	    $log_per_file{$file}.=$cur_log;
		#print "Adding file $file log\n$cur_log";
	    #}

	    $file="";
	    $cur_log="";
	    $indent="    ";
	    $prev="new_block";
	    next;
	} elsif ($logline =~ /^$indent(.*)$/) {
	    $logline=$1;
	    if ($prev eq "first line") {
		$prev="";
		if ($logline =~ m,^commit at (([12][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9]) ([0-2][0-9]:[0-5][0-9]:[0-5][0-9])? GMT) by [a-zA-Z0-9_.-]+$,) {
		    $bk2cvs=1;
		    if ($3) {
			$date=$1;
		    } else {
		        $date=$2." 00:00:00 GMT";
		    }
		    next;
		}
	    }
	    if ($bk2cvs) {
		if ($logline =~ m,^rev [0-9.]+ at ([12][0-9][0-9][0-9]/[0-9][0-9]/[0-9][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9] GMT)$,) {
		    $date=$1;
		    next;
		}
		$logline =~ s/^ //;
		if ($logline eq "*** empty log message ***") {
		    next;
		}
	    }
	    $cur_log.=$logline."\n";
	} else {
	    die "Cannot parse file log in log line '$logline'";
	}
    }
    my %files_per_log;
    my %written_log;
    @lst_files=grep {
	if ( m|^BitKeeper/| ) {
	    $show && print "Ignoring BK file $_ with log\n",$log_per_file{$_} ;
	    0;
	} elsif ( /^ChangeSet$/) {
	    0;
	} else {
	    1;
	}
    } @lst_files;
    foreach my $file (@lst_files) {
	push @{$files_per_log{$log_per_file{$file}}}, $file;
    }
    $log=$log_per_file{"ChangeSet"};
    $written_log{$log_per_file{"ChangeSet"}}="ChangeSet";
    $written_log{""}="[Empty log skipped]";
    foreach my $file (@lst_files) {
	my $flog=$log_per_file{$file};
	if (defined $written_log{$flog}) {
	    $show && print "Log for file $file already wrote with ",
	    $written_log{$flog}, "\n";
	    next;
	}
	foreach my $f (@{$files_per_log{$flog}}) {
	    $log .= "\n".$f;
	}
	$log .= "\n  ".join("\n  ", split("\n", $flog))."\n"; 
	$written_log{$flog}=1;
    }
    
    


    if ($show) {
	print "AUTHOR $name <$email>\n",
	"DATE $date\n",
	"LOG\n$log\n";
	#print $cset->{"log"};
	return;
    }
    print "AUTHOR $name <$email>\n",
	"DATE $date\n";
    print "| ", join("\n| ", split("\n", $log)), "\n";

    open(EDITOR, ">", $logfile) or die "Cannot open $logfile ($!)";
    print EDITOR $log;
    close(EDITOR);
    $ENV{"GIT_AUTHOR_NAME"}=$name;
    $ENV{"GIT_AUTHOR_EMAIL"}=$email;
    $ENV{"GIT_AUTHOR_DATE"}=$date;
    $ENV{"GIT_COMMITTER_NAME"}=$name;
    $ENV{"GIT_COMMITTER_EMAIL"}=$email;
    $ENV{"GIT_COMMITTER_DATE"}=$date;
}

sub cg_commit($) {
    my $cset=shift;
    my $rev=$cset->{"rev"};

    do_cmd("cd '$gitdir'; cg-commit -E -m'rev$rev'");
    #die "commit";
}

sub put_in_bk_head($$);
sub put_in_bk_head($$) {
    my $rev=shift;
    my $prefix=shift;
    print $prefix."Trying to get rev $rev in head '$githead'\n";
    
    if (not defined $ChangeSets{"REVS"}->{$rev}) {
	die "Unknown bk rev $rev";
    }
    my $cset=$ChangeSets{"REVS"}->{$rev};
    if (not defined($cset->{"cg-id"})) {
	print $prefix."Needing to build the cogito Revision\n";
	my $cg_id;
	if (not defined $cset->{"parent"}) {
	    bk_export($rev);
	    $cset->{"log"}=$ChangeSets{"REVS"}->{"1.1"}->{"log"};
	    cg_prepare_commit($cset, 0);
	    do_cmd("cd $gitdir; cg-init");
	    lock_dir();
	    do_cmd("cp '$gitdir/.git/HEAD' '$gitdir/.git/refs/heads/$githead'");
	    do_cmd("ln -sf 'refs/heads/$githead' '$gitdir/.git/HEAD'");
	} else {
	    my $new_prefix=$prefix."  ";
	    my $some=" "x50;
	    $new_prefix =~ s/$some/*/;
	    #$new_prefix =~ tr/[0-9]//cd;
	    #$new_prefix++;
	    #put_in_bk_head($cset->{"parent"}, $new_prefix);
	    my $mrev;
	    my $headfile;
	    my $mhead;
	    if (defined $cset->{"mparent"}) {
		$mrev=$cset->{"mparent"};
		$mhead="$githead-rev-$mrev";
		$headfile="$gitdir/.git/refs/heads/$mhead";
		put_in_bk_head($cset->{"mparent"}, $new_prefix);
		do_cmd("cp '$gitdir/.git/HEAD' '$headfile'");
	    }

	    put_in_bk_head($cset->{"parent"}, $new_prefix);
	    
	    if (defined $cset->{"mparent"}) {
		# We ignore problems of merge as we already know the result
		# and never commit the result here.
		do_cmd("touch '$gitdir/.git/branches/$mhead'");
		do_cmd("cd $gitdir; cg-merge -c $mhead || true");
		unlink("$headfile") 
		    or die "Cannot unlink $headfile ($!)";
	    }

	    print $prefix."Getting changes for rev $rev\n";
	    
	    bk_export($rev);
	    if (cg_changes()) {
		cg_prepare_commit($cset, 0);
		cg_commit($cset);
	    }
	    if (defined $cset->{"mparent"}) {
		unlink("$gitdir/.git/branches/$mhead")
		    or die "Cannot unlink $gitdir/.git/branches/$mhead ($!)";
	    }
	}
	$cg_id=`cd $gitdir; cg-seek`;
	chomp $cg_id;
	$cg_id=~ s/On commit //;

	if ($cg_id eq "") {
	    die "Unable to get current cg-id";
	}
	if (defined($ChangeSets{"CGIDS"}->{$cg_id})) {
	    print STDERR "WARNING: skipping rev $rev that is the same as rev ", 
	    $ChangeSets{"CGIDS"}->{$cg_id}->{"rev"}, "\n";
	}
	add_rev($cset, $cg_id, $prefix);
    } else {
	print $prefix."Cogito revision already exists. Trying to recover\n";
	clean_git();
	do_cmd("cd $gitdir; git-read-tree ".$cset->{"cg-id"});
	do_cmd("cd $gitdir; git-checkout-cache -f -a");
	do_cmd('echo "'.$cset->{"cg-id"}.'" > '.$gitdir."/.git/HEAD");
    }
    print $prefix."Rev $rev is in head '$githead'\n";
}

sub main() {
    get_options();
    
    check_env();

    load_ChangeSets();

    #put_in_bk_head($options{"rev-to-commit"}, "0000: ");
    if (defined($options{"debug-log"})) {
	cg_prepare_commit($ChangeSets{"REVS"}->{$options{"debug-log"}}, 1);
    } else {
	put_in_bk_head($options{"rev-to-commit"}, "* ");
    }
    unlink($editor, $logfile);
}

main;

__END__

=head1 NAME

bk2cogito - BK to Cogito converter

=head1 SYNOPSIS

bk2cogito [options]

 Options:
    --version        program version
    --short-help|h   brief help message
    --help           help message with options descriptions
    --man            full documentation
    --git-dir        directory where git repository is created (default: git)
    --git-head       git head tracking the bk repository 
                     (default: last bk repository path name)
    --rev-to-commit rev
                     bk rev to put on git head
    --debug-log rev  show git log message for bk rev 'rev'

=head1 OPTIONS

=over 8

=item B<--version>

Print the program version and exits.

=item B<--short-help>

Print a brief help message and exits.

=item B<--help>

Print a help message with options descriptions and exits.

=item B<--man>

Prints the manual page and exits.

=back

=head1 DESCRIPTION

B<This program> will convert a bk repository into an git one

=cut




More information about the Mercurial mailing list