#!/usr/bin/perl -w
#
#
=pod

=head1 NAME backup.pl

A script for building clusters.


=head1 SYNOPSIS

  # Prepare host A for clustering
  cluster.pl --init=A

  # Refresh /usr/lib/backup
  cluster.pl --refresh=A

  # Make a backup of host A to host B
  cluster.pl --backup --source=A --destination=B

  # Create a local backup on /backup
  cluster.pl --backup --destination=/backup

  # In case: Copy /usr/lib/backup/A to /
  cluster.pl --install A


=head1 DESCRIPTION

This script keeps track of a Linux cluster. The general idea is as
follows: There are certain files, which must not be mirrored, for
example /etc/fstab (hardware dependent) or /etc/sysconfig/network-
scripts/ifcfg-eth0 (fixes the current IP address on eth0) or
/etc/rc.d/rc2.d/S64dhcpd (only one running DHCP server instance at
the same time).

To assure this, we create the following directory tree below
F</usr/lib/backup>:

  A -- etc -- fstab.ignore
  |        +- sysconfig -- network-scripts -- ifcfg-eth0
  |        +- rc.d      -- rc2.d           -- S65dhcpd
  |
  B -- etc -- fstab.ignore
           +- sysconfig -- network-scripts -- ifcfg-eth0
           +- rc.d      -- rc2.d           -- S65dhcpd

If host A is dropping service, we start host B by copying the
files from F</usr/lib/backup/A> to F</> (the exception being
files called F<*.ignore>) In theory host B should run as host
A after an immediate reboot ... by ignoring trivial problems
like boot sectors and all these kind of ugly things.

Now for a more formal description (something completely
different :-). We assume the following:

=over 8

=item -

There's a source machine A which ought to be left untouched.

=item -

There's a backup machine B, an almost identical copy of A, the
exception being the IP address etc. The idea is that both machines
ought to stay online at the same time.

=item -

If a file must not be mirrored, then we create a copy below
F</usr/lib/backup>. For example, a copy of F</etc/hosts> will be
stored in F</usr/lib/backup/E<lt>machineE<gt>/etc/hosts>. If the
copy is called F</usr/lib/backup/E<lt>maschineE<gt>/etc/fstab.ignore>
rather than F</usr/lib/backup/E<lt>maschineE<gt>/etc/fstab>, then
the file will never be copied back.

The script is responsible for refreshing these copies and synchronizing
the machines. (It requires root permissions via ssh or ssh2 on both
machines.)

=item -

Doing a backup means mirroring all files, the exception being those
from F</usr/lib/backup/*>. Installing a backup means copying the
files from F</usr/lib/backup/*> to F</>.

=item -

Whenever the backup machine shall replace the original, then we
install the files from F</usr/lib/backup/A> to F</> and do a reboot.

Host A needs to become powered off or we install the files from
F</usr/lib/backup/B> to F</> and do a reboot, so that A becomes
the backup server B.

=cut

use strict;

use Getopt::Long ();
use Socket ();
use File::Basename ();
use File::Path ();
use File::Find ();
use IO::File ();
use IO::AtomicFile ();
use File::Spec ();


############################################################################
#
#   Globale Variablen
#
############################################################################

use vars qw($debug $verbose $BASEDIR $MY_NAME $SSH @INSTALLFILES $VERSION);

$VERSION = "0.01";
$debug = 0;
$verbose = 0;
$BASEDIR = "/usr/lib/backup";
$MY_NAME = "mail";
#  {
#      my $hostname = `hostname`;
#      if (defined($hostname)  &&  $hostname =~ /sun2/) {
#  	$MY_NAME = "sun2.zvw.de";
#      }
#  }
$SSH = "/usr/bin/ssh";

@INSTALLFILES =
#
#   H-Net.com, RH Linux 5.2
#
#      qw(/etc/hosts /etc/sysconfig/network /etc/HOSTNAME
#         /etc/sysconfig/network-scripts/ifcfg-eth0
#         /etc/resolv.conf /etc/conf.modules /etc/fstab
#         /root/.ssh/identity /root/.ssh/identity.pub
#         /etc/ssh/ssh_host_key /etc/ssh/ssh_host_key.pub
#         /etc/lilo.conf /etc/mtab /etc/rc.d/rc2.d/S65dhcpd
#         /etc/rc.d/rc3.d/S65dhcpd /etc/rc.d/rc4.d/S65dhcpd
#         /etc/rc.d/rc5.d/S65dhcpd
#         /etc/rc.d/rc3.d/S55named /etc/rc.d/rc4.d/S55named
#         /etc/rc.d/rc5.d/S55named);
#
#   ZVW, Sparc Solaris 5.6
#
#      qw(/etc/inet/hosts /etc/net/ticlts/hosts
#         /etc/net/ticots/hosts /etc/net/ticotsord/hosts
#         /etc/hostname.le0 /etc/nodename /kernel/genunix
#         /root/.ssh/identity
#         /root/.ssh/identity.pub /root/.ssh/known_hosts
#         /root/.ssh/.ssh/authorized_keys /etc/ssh/ssh_host_key
#         /etc/ssh/ssh_host_key.pub /etc/vfstab);
#
#   bgag.de, RH Linux 6.0, just a local backup
#
    qw(/etc/fstab /etc/lilo.conf /etc/mtab); 



=head2 Preparing a machine for integration into the cluster.

To integrate a machine into the cluster, we create the directory
F</usr/lib/backup/E<lt>hostE<gt>/>. This happens by executing

  cluster.pl --init=<host>

The list of files being created below F</usr/lib/backup/E<lt>hostE<gt>/>
is read from the global variable @INSTALLFILES. You need to adjust
this variable before creating the cluster to your local settings.

=cut

sub Clone {
    my($source, $dest) = @_;
    my $dir = File::Basename::dirname($dest);
    unless (-d $dir) {
	print "Creating directory $dir.\n" if $verbose;
        File::Path::mkpath([$dir], 0, 0700) unless $debug;
    }
    print "Cloning $source to $dest.\n" if $verbose;
    return if $debug;
    my($dev, $ino, $mode, $nlink, $uid, $gid) = stat $source
	or die "Cannot stat $source: $!";
    my $fhin = IO::File->new($source, "r")
	or die "Failed to open $source for input: $!";
    my $fhout;
    $fhout = IO::AtomicFile->open($dest, "w")
	or die "Failed to open $dest for output: $!";
    while (1) {
	my $line;
	my $result = $fhin->read($line, 4096);
	die "Failed to read $source: $!" unless defined($line);
	last unless $result;
	$fhout->print($line) or die "Failed to write to $dest: $!";
    }
    die "Failed to close $dest: $!" unless $fhout->close();
    chmod $mode, $dest
	or warn "Failed to change mode of $dest to $mode: $!";
    chown $uid, $gid, $dest
	or warn "Failed to change ownership of $dest to $uid, $gid: $!";
}


sub Init {
    my $o = shift;
    my $host = $o->{'init'}  or  die "Init: Missing host name";
    die "Init: Cannot resolve host name $host" unless Socket::inet_aton($host);
    my $dir = File::Spec->catdir($BASEDIR, $host);
    die "Cannot init host $host: $dir already exists" if -e $dir;
    print "Creating directory $dir, mode 0700\n" if $verbose;
    File::Path::mkpath([$dir], 0, 0700) unless $debug;
    foreach my $file (@INSTALLFILES) {
	Clone($file, File::Spec->catfile($dir, $file)) if -f $file;
    }
}


=pod

=head2 Refreshing the contents of /usr/lib/backup

From time to time (I do it once a day) you need to refresh the
contents of F</usr/lib/backup> This is happening by running

    cluster.pl --refresh=E<lt>hostE<gt>

If E<lt>hostE<gt> is an external machine, then the refresh is done
via C<ssh> or C<ssh2> and C<rsaync>. You can fix the ssh path in
the global variable $SSH.

=cut

sub Refresh {
    my $o = shift;
    my $host = $o->{'refresh'}  or  die "Init: Missing host name";
    if ($host ne $MY_NAME) {
	my @command = ($SSH, $host, "perl", "$BASEDIR/cluster.pl",
		       "--refresh=$host");
	print "Refreshing remote directory: ", join(" ", @command), "\n"
	    if $verbose;
	system @command unless $debug;
	my $v = $verbose ? "v" : "";
	@command = ("rsync", "-az$v", "-e", $SSH,
		    "$host:$BASEDIR/", "$BASEDIR/");
	print "Refreshing local directory: ", join(" ", @command), "\n"
	    if $verbose;
	system @command unless $debug;
    } else {
	my $dir = File::Spec->catdir($BASEDIR, $host);
	die "No such host in $dir: $host" unless -d $dir;
	my $wanted = sub {
	    return if -d $_;
	    my $t = $File::Find::name;
	    my $f = $t;
	    $f =~ s/\.ignore$//;
	    if ($f =~ /^$BASEDIR\/.*?(\/.*)/) {
		$f = $1;
	    } else {
		die "Cannot parse file name: $f";
	    }
	    my $v = $verbose ? "v" : "";
	    my @command = ("rsync", "-a$v", $f, $t);
	    print "Refreshing $t from $f: ", join(" ", @command), "\n"
		if $verbose;
	    system @command unless $debug;
	};
        File::Find::find($wanted, $dir);
    }
}


=pod

=head2 Creating a backup

A backup is created by running

  cluster.pl --backup --source=A --destination=B

This synchronizes the contents of source host A with the destination
host B.

The cluster script distinguishes between a local backup (for example,
if a machine has two disk drives, one for backup purposes) with

  cluster.pl --backup --source=A --destination=A/backup

and a remote backup, where the host name differs in source and
destination.

In both cases the external command C<rsync> is used for creating
the backup and a list of exception files is created by looking
into F</usr/lib/backup/A>.

You are strongly encouraged to unmount a local backup partition by
default. That means that the backup partition is using the I<noauto>
flag in F</etc/fstab>, for example as follows:

  /dev/hdb2  /backup  ext2  noauto  0 0

In other words: We don't mount the partition at boot time. Instead
the partition is mounted and unmounted manually, for example by
calling the following script from within the crontab:

  #!/bin/sh
  mount /backup
  /usr/lib/backup/cluster.pl --backup --source=A --destination=A/backup
  umount /backup

=cut

sub Backup {
    my $o = shift;
    my $source = $o->{'source'} or die "Missing source host";
    die "Cannot resolv host name: $source"
	unless Socket::inet_aton($source);
    my $sdir = File::Spec->catdir($BASEDIR, $source);
    die "No such host in $BASEDIR: $source" unless -d $sdir;
    my $dest = $o->{'destination'} or die "Missing destination host";
    my $destdir = "/";
    if ($dest =~ /(.*?)(\/.*)/) {
	$destdir = $2;
	$dest = $1;
	if ($dest ne $source) {
	    die "Cannot perform a local backup between different hosts.\n";
	}
    }
    die "Cannot resolv host name: $dest"
	unless Socket::inet_aton($dest);
    my $ddir = File::Spec->catdir($BASEDIR, $dest);
    die "No such host in $BASEDIR: $dest" unless -d $ddir;

    # Create a list of exception files.
    my %files;
    foreach my $d (qw(/proc /mnt/cdrom /mnt/floppy /backup)) {
	if (-d $d) {
	    $files{"$d/*"} = 1;
	}
    }
    if ($source ne $MY_NAME  ||  $dest ne $MY_NAME) {
	$files{"$BASEDIR/*"} = 1;
    }
    $files{"$destdir/*"} = 1 if $destdir ne "/";
    my $wanted = sub {
	return if $_ =~ /\.ignore$/ || $_ =~ /^\.\.?$/  ||  -d $_;
	my $f = $File::Find::name;
	if ($f =~ /^$BASEDIR\/.*?(\/.*)/) {
	    $files{$1} = 1;
	} else {
	    die "Cannot parse file name: $f";
	}
    };

    File::Find::find($wanted, ($sdir eq $ddir) ? ($sdir) : ($sdir, $ddir));

    my $v = $verbose ? "v" : "";
    my @opts = ("rsync", "--delete", "-e", $SSH,
		($source ne $MY_NAME  ||  $dest ne $MY_NAME)
		    ? "-az$v" : "-a$v",
		($source ne $MY_NAME) ? "$source:/" : "/",
		($dest ne $MY_NAME) ? "$dest:$destdir" : $destdir,
		map { ("--exclude", $_) } keys %files);
    print "Backup command: ", join(" ", @opts), "\n" if $verbose;
    system @opts unless $debug;
    if ($destdir eq "/") {
	print "Updating boot blocks: /sbin/lilo\n" if $verbose;
	system "/sbin/lilo" unless $debug;
    }
}


=pod

=head2 Restoring a backup

This is the worst case. Hope it never happens, and we have a good
chance it never happens, now that we have a backup ... ;-)

The backup is restored by running

    cluster.pl --install=E<lt>hostE<gt>

followed by a reboot.

Even by having a backup, success isn't guaranteed. Rebooting can fail
for a number of reasons, for example the former F</dev/sdb> can become
F</dev/sda> (because the first drive doesn't become detected) or
booting fails. I strongly advise you to have boot disks for both
cases or one disk with an appropriate lilo boot block.

If all else fails, a rescue disk can be of great help. With Red Hat
Linux, you create a rescue disk like this:

    mount /mnt/cdrom
    cd /mnt/cdrom/images
    dd if=boot.img of=/dev/fd0
    dd if=rescue.img of=/dev/fd0

Of course you can create a rescue disk from within DOS or Windows
by using the F<rawrite> utility.

=cut

sub Install {
    my $o = shift;
    my $host = $o->{'install'};
    my $dir = File::Spec->catdir($BASEDIR, $host);
    die "Failed to install $host: No such directory in $BASEDIR"
	unless -d $dir;
    die "Failed to install $host: This is me" if $host eq $MY_NAME;
    my $odir = File::Spec->catdir($BASEDIR, $MY_NAME);

    my %files;

    my $wanted = sub {
	return if $_ =~ /\.ignore$/ || $_ =~ /^\.\.?$/  ||  -d $_;
	my $f = $File::Find::name;
	return if -f "$f.ignore";
	if ($f =~ /^$BASEDIR\/.*?(\/.*)/) {
	    my $t = $1;
	    $files{$f} = $t;
	} else {
	    die "Cannot parse file name: $f";
	}
    };
    File::Find::find($wanted, $dir);

    print "The following files will be replaced, if we continue:\n\n";
    while(my($var, $val) = each %files) {
	print "  $val\n";
    }

    print "\nDo you really want to continue? [n] \n";
    my $reply = <STDIN>;
    exit 1 unless $reply =~ /y/i;

    print "\nSure? [n] \n";
    $reply = <STDIN>;
    exit 1 unless $reply =~ /y/i;

    while(my($var, $val) = each %files) {
	Clone($var, $val);
    }
}


############################################################################
#
#   Name:    Usage
#
#   Purpose: Print Usage message and exit
#
############################################################################

sub Usage {
    print <<"EOF";
Usage: $0 <action> [options]

Possible actions are:

  --backup		Create a backup from one machine to another
			machine. The options --source and --destinations
			have to be used for setting source and destination
			hosts.
  --init=<host>		Initialize the directory /usr/lib/backup/<host>
			for using <host> in the cluster.
  --refresh=<host>      Refresh the contents of /usr/lib/backup/<host>.
  --install=<host>      Install the original files from /usr/lib/backup/<host>
                        into the current machine.

Possible options are:

  --destination=<host>	Use <host> as the destination machine in a backup.
  --source=<host>	Use <host> as the source machine in a backup.
  --ssh=<file>          Use <file> as ssh replacement (Default: /usr/bin/ssh1)

See "perldoc $0" for details

cluster.pl $VERSION, (C) 1999 by Jochen Wiedmann

This script can be distributed and used under either the
Artistic License or the GPL (GNU General Public License), as
specified in the Perl README. In particular the author
refuses any warranty. You are using this software at your
own risk!
EOF
    exit 1;
}


############################################################################
#
#   This is main()
#
############################################################################

{
    my %o;
    Getopt::Long::GetOptions(\%o,
			     'backup',  'debug', 'destination=s', 'init=s',
			     'install=s', 'help',
			     'refresh=s', 'source=s', 'ssh=s', 'verbose');
    $verbose = 1 if $o{'verbose'} or $o{'debug'};
    $debug = 1 if $o{'debug'};
    $SSH = $o{'ssh'} if $o{'ssh'};

    if ($< || $>) {
	die "Must be running as root";
    }

    if ($o{'backup'}) {
	Backup(\%o);
    } elsif ($o{'init'}) {
	Init(\%o);
    } elsif ($o{'install'}) {
	Install(\%o);
    } elsif ($o{'refresh'}) {
	Refresh(\%o);
    } else {
	Usage();
    }
}


1;

=pod

=head1 EXAMPLE

We run the following script daily at 4 am on host A:

	#!/bin/sh
        cd /usr/lib/backup
	./cluster.pl --refresh=A
	./cluster.pl --refresh=B
        ./cluster.pl --backup --source=A --destination=B
	mount /backup
	./cluster.pl --backup --source=A --destination=A/backup
	umount /backup


=head1 CPAN

This script is available from any CPAN mirror, in particular

  ftp://ftp.funet.fi/pub/languages/perl/CPAN/authors/id/JWIED

The following sections are merely for CPAN's automatic registration
scheme:


=head2 SCRIPT CATEGORIES

Unix:System_administration


=head2 PREREQUISITES

The ssh (or ssh2) and rsync utilities. Additionally you need the
Perl module B<IO::AtomicFile>.

=head2 README

The cluster.pl script is a small script for maintaining a Linux
cluster, to be precise, a cluster of machines with one original
host and multiple backup hosts.


=head1 AUTHOR

    Jochen Wiedmann
    Am Eisteich 9
    72555 Metzingen
    Germany

    Email: joe@ispsoft.de

This script can be distributed and used under either the
Artistic License or the GPL (GNU General Public License), as
specified in the Perl README. In particular the author
refuses any warranty. You are using this software at your
own risk!


=head1 SEE ALSO

L<ssh(1)>, L<rsync(1)>


=cut