#!/usr/bin/perl

# this software is licensed for use under the Free Software Foundation's GPL v3.0 license, as retrieved
# from http://www.gnu.org/licenses/gpl-3.0.html on 2014-11-17.  A copy should also be available in this
# project's Git repository at https://github.com/shatteredsilicon/backoid/blob/master/LICENSE.

$::VERSION = '1.0';

use strict;
use warnings;
use Config::IniFiles; # read samba-style conf file
use Getopt::Long qw(:config auto_version auto_help);
use Pod::Usage;
use Time::Local;
use Capture::Tiny ':all';
use File::Spec::Functions 'catfile';
use File::Basename;
use File::Path qw(make_path);
use List::Util qw(reduce first);
use Number::Bytes::Human qw(parse_bytes);
use POSIX qw(ceil);
use Parallel::ForkManager;
use IO::Handle;
use IPC::Open3;
use Symbol 'gensym';

my $ps = 'ps';
my $pv = 'pv';
my $zfs = 'zfs';
my $tar = 'tar';
my $rclone = 'rclone';
my $mbuffer = 'mbuffer';

my $DEFAULT_COMPRESSION = 'zstd';
my $DEFAULT_RETENTION = '7d';

my %args = (
    "configdir=s" => \(my $configdir = "/etc/backoid"),
	"run-dir=s" => \(my $run_dir = "/var/run/backoid"),
	"split-size=s" => \my $split_size,
	"parallel:i" => \my $parallel,
	"debug" => \my $debug,
	"verbose" => \my $verbose
);
GetOptions(%args) or pod2usage(-verbose => 1);

my $conf_file = catfile($configdir, "backoid.conf");
my $default_conf_file = catfile($configdir, "backoid.defaults.conf");

make_path($run_dir);

if (scalar(@ARGV) != 0) {
	pod2usage(2);
	exit 127;
}

my %config;
my %remotes;
if (checklock('backoid')) {
	writelock('backoid');

	%config = parse_config($conf_file, $default_conf_file);
	%remotes = get_remotes();

	removelock('backoid');
} else {
	if ($verbose) { print "INFO: exiting as lock held by other backoid process.\n"; }
	exit 0;
}

my $parent_id = $$;
upload_snapshots();

if (checklock('backoid')) {
	writelock('backoid');

	purge_snapshots();

	removelock('backoid');
} else {
	if ($verbose) { print "INFO: deferring snapshot purging - valid purging lock held by other backoid process.\n"; }
	exit 0;
}

sub upload_snapshots {
	my $log_pipe_exit = pipe(my $log_read_fh, my $log_write_fh);
	if (!$log_pipe_exit) {
		warn "CRITICAL ERROR: failed to create pipe for progress";
		return;
	}

	my $pid = fork();
	if (!defined $pid) {
		warn "CRITICAL ERROR: failed to fork";
		return;
	} elsif ($pid == 0) {
		close($log_write_fh);
		STDOUT->autoflush(1);

		my @snapshots = ();

		my $stdout_line;
		while (my $log_line = <$log_read_fh>) {
			my $prev_progress_lines = reduce {$a + 3 + $b->{'lines'}} 0, @snapshots;

			my $stdout_output = '';
			my $stderr_output;
			if ($log_line =~ /^progress /) {
				$log_line =~ s/^progress //;
				chomp $log_line;
				if ($log_line eq '') { next; }

				my ($snapshot_name, $content) = split /\s+/, $log_line, 2;
				if (!defined $snapshot_name || !defined $content || $content eq '') { next; }

				my $snapshot = first { $_->{'name'} eq $snapshot_name } @snapshots;
				if (!defined $snapshot) {
					$snapshot = {
						'name' => $snapshot_name,
						'content' => $content,
						'lines' => $log_line =~ tr/\n//
					};
					push @snapshots, $snapshot;
				} else {
					$snapshot->{'content'} = $content;
				}
			} elsif ($log_line =~ /^stderr /) {
				$log_line =~ s/^stderr //;
				$stderr_output = $log_line;
			} elsif ($log_line =~ /^stdout /) {
				$log_line =~ s/^stdout //;
				$stdout_output = $log_line;
			} else {
				$stdout_output = $log_line;
			}

			for (my $i = 0; $i < scalar(@snapshots); $i++) {
				$stdout_output .= ($snapshots[$i]->{'name'} . ":\n");
				$stdout_output .= ($snapshots[$i]->{'content'} . "\n");
				if ($i < scalar(@snapshots)-1) {
					$stdout_output .= "\n";
				}
			}
			$prev_progress_lines > 0 && print "\033[" . ($prev_progress_lines - 1) . "F";
			print "\033[0J";
			if (defined $stderr_output) {
				print $stderr_output;
			}
			print $stdout_output;
		}

		close($log_read_fh);
		exit 0;
	} else {
		close($log_read_fh);
		$log_write_fh->autoflush(1);

		my $parallel_num = defined $parallel
			? $parallel > 0
				? $parallel
				: ((reduce { $a + scalar(@{$b->{'snapshots'}}) } 0, values %{$config{'datasets'}}) || 1)
			: 1
			;
		my $pm = Parallel::ForkManager->new($parallel_num);

		foreach my $dataset (keys %{$config{'datasets'}}) {
			my $compressor = get_compressor($config{'datasets'}{$dataset}->{'compression'}, $config{'datasets'}{$dataset}->{'compression_level'});
			my @snapshots = get_snapshots($dataset, $config{'datasets'}{$dataset}, $log_write_fh);
			for (my $i = 0; $i < (scalar(@snapshots)+1); $i++) { # run one more time to see if there are new snapshots created
				if (scalar($pm->running_procs) >= $parallel_num) {
					$pm->wait_for_available_procs(1);
					my @new_snapshots = get_snapshots($dataset, $config{'datasets'}{$dataset}, $log_write_fh);
					my $snapshot_list_changed = 0;
					if (scalar(@snapshots) != scalar(@new_snapshots)) {
						$snapshot_list_changed = 1;
					} else {
						for (my $si = 0; $si < scalar(@new_snapshots); $si++) {
							if ($snapshots[$si]->{'timestamp'} != $new_snapshots[$si]->{'timestamp'} || $snapshots[$si]->{'name'} ne $new_snapshots[$si]->{'name'}) {
								$snapshot_list_changed = 1;
								last;
							}
						}
					}
					if ($snapshot_list_changed) {
						# if snapshot list changed, just use the new snapshot list
						@snapshots = @new_snapshots;
						$i = 0;
					}
				}
				if ($i == scalar(@snapshots)) { last; }
				my $snapshot = $snapshots[$i];
				$pm->start and next;

				my $lockfile = $snapshot->{'name'};
				$lockfile =~ s/\//-/g;
				if (checklock($lockfile)) {
					writelock($lockfile);

					my $snapshot_name = get_snapshot_name($snapshot);
					my $remote_path = get_remote_path($config{'datasets'}{$dataset}->{'target'}, $snapshot_name, $compressor);
					my $remote_path_basename = basename($remote_path);
					my $rclone_output;
					if (defined $config{'datasets'}{$dataset}->{'method'} && $config{'datasets'}{$dataset}->{'method'} eq 'files') {
						$rclone_output = `$rclone ls $remote_path`;
					} else {
						$rclone_output = `$rclone ls $config{'datasets'}{$dataset}->{'target'} --include "$remote_path_basename" --include "$remote_path_basename.000"`;
					}
					my $rclone_exit = $? >> 8;
					if ($rclone_exit != 0) {
						send_log($log_write_fh, "stderr", "CRITICAL ERROR: failed to check if $snapshot->{'name'} is already uploaded");
					}

					defined $rclone_output && chomp($rclone_output);
					if (defined $rclone_output && $rclone_output ne '' && $verbose) {
						# same snapshot is uploaded, ignore
						send_log($log_write_fh, "stdout", "INFO: deferring snapshot uploading - snapshot $snapshot->{'name'} already uploaded.\n");
					}

					if ($rclone_exit == 0 && (!defined $rclone_output || $rclone_output eq '')) {
						upload_snapshot($config{'datasets'}{$dataset}, $compressor, $snapshot, \%remotes, $log_write_fh);
					}

					removelock($lockfile);
				} else {
					# per-snapshot lock is held, ignore
					if ($verbose) { send_log($log_write_fh, "stdout", "INFO: deferring snapshot uploading - valid per-snapshot lock of $snapshot->{'name'} held by other backoid process.\n"); }
				}

				$pm->finish;
			}
		}

		$pm->wait_all_children;
		close $log_write_fh;
		waitpid($pid, 0);
	}
}

sub purge_snapshots {
	my %uploaded_snapshots = get_uploaded_snapshots();
	foreach my $dataset (keys %{$config{'datasets'}}) {
		my ($retention_number, $retention_suffix) = ($config{'datasets'}{$dataset}{'retention'} =~ /^(\d+)([hdwmy]?)$/);
		if (! length($retention_number) && ! length($retention_suffix)) {
			warn "CRITICAL ERROR: auto purge for dataset '$dataset' is cancelled, backoid doesn't know how to deal with retention '$config{'datasets'}{$dataset}{'retention'}'";
			next;
		}

		my @purging_objects = ();
		if ($retention_suffix eq '') {
			foreach my $snapshot (keys %uploaded_snapshots) {
				if (! ($snapshot =~ /^$dataset@/)) {
					# not the snapshot of current dataset
					next;
				}
				my %snapshot_obj = %{$uploaded_snapshots{$snapshot}};
				my $i = 0;
				foreach my $purging_object (@purging_objects) {
					if ($purging_object->{'datetime'} gt $snapshot_obj{'datetime'}) {
						last;
					}

					$i = $i + 1;
				}
				splice(@purging_objects, $i, 0, {
					'datetime' => $snapshot_obj{'datetime'},
					'objects' => $snapshot_obj{'objects'}
				});
			}
			splice(@purging_objects, scalar(@purging_objects) < $retention_number ? 0 : scalar(@purging_objects) - $retention_number, $retention_number, undef);
		} else {
			my $seconds = retention_to_seconds($retention_number, $retention_suffix);
			foreach my $snapshot (keys %uploaded_snapshots) {
				if (! ($snapshot =~ /^$dataset@/)) {
					# not the snapshot of current dataset
					next;
				}
				my %snapshot_obj = %{$uploaded_snapshots{$snapshot}};
				if (time() - timelocal($snapshot_obj{'second'},$snapshot_obj{'minute'},$snapshot_obj{'hour'},$snapshot_obj{'day'},$snapshot_obj{'month'}-1,$snapshot_obj{'year'}) < $seconds) {
					next;
				}
				push(@purging_objects, {
					'objects' => $snapshot_obj{'objects'}
				});
			}
		}

		foreach my $purging_object (@purging_objects) {
			foreach my $object (@{$purging_object->{'objects'}}) {
				my $object_path = catfile($config{'datasets'}{$dataset}{'target'}, $object);
				if ($debug) { print "DEBUG: purging object $object_path\n"; }
				system("$rclone deletefile $object_path") == 0
					or warn "CRITICAL ERROR: purge object $object_path failed, exit code: $?";
			}
		}
	}
}

sub parse_config {
	my ($conf_file, $default_conf_file) = @_;
	my %config = (
		'datasets' => {}
	);

	unless (-e $default_conf_file ) { die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!"; }
	unless (-e $conf_file ) { die "FATAL: cannot load $conf_file - please create a valid local config file before running backoid!"; }

	tie my %defaults, 'Config::IniFiles', ( -file => $default_conf_file ) or die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!";
	tie my %ini, 'Config::IniFiles', ( -file => $conf_file ) or die "FATAL: cannot load $conf_file - please create a valid local config file before running backoid!";

	foreach my $section (keys %ini) {
		# first up - die with honor if unknown parameters are set in any modules or templates by the user.
		foreach my $key (keys %{$ini{$section}}) {
			if (! defined ($defaults{'template_default'}{$key})) {
				die "FATAL ERROR: I don't understand the setting $key you've set in \[$section\] in $conf_file.\n";
			}

			# in case of duplicate lines we will end up with an array of all values
			my $value = $ini{$section}{$key};
			if (ref($value) eq 'ARRAY') {
				warn "duplicate key '$key' in section '$section', using the value from the first occurence and ignoring the others.\n";
				$ini{$section}{$key} = $value->[0];
			}
		}

		if ($section =~ /^template_/) { next; } # don't process templates directly

		# set default values from %defaults, which can then be overridden by template
        # and/or local settings within the module.
		foreach my $key (keys %{$defaults{'template_default'}}) {
			$config{'datasets'}{$section}{$key} = $defaults{'template_default'}{$key};
		}

		# override with values from user-defined default template, if any
		foreach my $key (keys %{$ini{'template_default'}}) {
			$config{'datasets'}{$section}{$key} = $ini{'template_default'}{$key};
		}

		# override with values from user-defined templates applied to this module,
		# in the order they were specified (ie use_template = default,production,mytemplate)
		if (defined $ini{$section}{'use_template'}) {
			my @templates = split (' *, *',$ini{$section}{'use_template'});
			foreach my $rawtemplate (@templates) {
				# strip trailing whitespace
				$rawtemplate =~ s/\s+$//g;

				my $template = 'template_'.$rawtemplate;
				foreach my $key (keys %{$ini{$template}}) {
					$config{'datasets'}{$section}{$key} = $ini{$template}{$key};
				}
			}
		}

		# override with any locally set values in the module itself
		foreach my $key (keys %{$ini{$section}}) {
			$config{'datasets'}{$section}{$key} = $ini{$section}{$key};
		}

		# target not defined, which means no upload
		if (!defined ($config{'datasets'}{$section}{'target'}) || !length ($config{'datasets'}{$section}{'target'})) {
			delete $config{'datasets'}{$section};
			next;
		}

		if ($config{'datasets'}{$section}{'retention'} eq '') {
			$config{'datasets'}{$section}{'retention'} = $DEFAULT_RETENTION;
		}

		my @zfs_datasets = get_zfs_datasets($section, 0);
		my $zfs_dataset = shift @zfs_datasets;
		$config{'datasets'}{$section}{'mountpoint'} = $zfs_dataset->{'mountpoint'};
	}

	foreach my $dataset (keys %{$config{'datasets'}}) {
		$config{'datasets'}{$dataset}{'snapshots'} = [];
		my @snapshots = get_snapshots($dataset, $config{'datasets'}{$dataset});
		push(@{$config{'datasets'}{$dataset}{'snapshots'}}, @snapshots);
	}

	return %config;
}

sub get_zfs_datasets {
	my ($path, $recursive) = @_;
	my $recursive_arg = '';
	if ($recursive) {
		$recursive_arg = 'r';
	}

	my $cmd = "$zfs list -o name,mountpoint -t filesystem,volume -H$recursive_arg $path";
	if ($debug) { print "DEBUG: getting list of datasets of $path using '$cmd' ...\n"; }
	my @lines = `$cmd`;
	my $exit_code = $? >> 8;

	if ($exit_code != 0) {
		warn "CRITICAL ERROR: command '$cmd' failed with exit code: $exit_code";
		return ();
	}

	my @datasets = ();
	foreach my $line (@lines) {
		my ($name, $mountpoint) = $line =~ /^([^\t]+)\t([^\t]+)/;
		chomp $name;
		chomp $mountpoint;

		if (grep $mountpoint eq $_, ('-', 'legacy', 'none')) {
			$mountpoint = '';
		}

		my %dataset = (
			'name' => "$name",
			'mountpoint' => "$mountpoint"
		);
		push(@datasets, \%dataset);
	}

	return @datasets;
}

sub get_snapshots {
	my ($dataset, $dataset_hash, $log_fh) = @_;
	if (!defined $log_fh) { $log_fh = *STDOUT; }
	my ($retention_number, $retention_suffix) = ($dataset_hash->{'retention'} =~ /^(\d+)([hdwmy]?)$/);
	my @snapshots = ();

	my $cmd = "$zfs list -o name,creation -t snapshot -S creation -H -p $dataset 2>&1";
	if ($debug) { send_log($log_fh, "stdout", "DEBUG: getting list of snapshots of $dataset using '$cmd' ...\n"); }
	my @lines = `$cmd`;
	my $exit_code = $? >> 8;

	if ($exit_code != 0) {
		send_log($log_fh, "stderr", "CRITICAL ERROR: command '$cmd' failed with exit code: $exit_code");
		return ();
	}

	my $snapshot_count = 0;
	my $out_of_retention = 0;
	foreach my $line (@lines) {
		chomp $line;

		my ($snapshot, $timestamp) = ($line =~ /^\s*(\S+)\s+(\d+)\s*$/);
		if (! length($snapshot)) {
			send_log($log_fh, "stderr", "CRITICAL ERROR: get unexpected line from zfs list output -> '$line'");
			next;
		}

		my $trim_snapshot = $snapshot;
		$trim_snapshot =~ s/^$dataset@//;
		if (! ($trim_snapshot =~ /$dataset_hash->{'pattern'}/)) {
			next;
		}

		if ($retention_suffix ne '') {
			my $seconds = retention_to_seconds($retention_number, $retention_suffix);
			if (time() - $timestamp >= $seconds) {
				$out_of_retention = 1;
			}
		} elsif ($snapshot_count >= $retention_number) {
			$out_of_retention = 1;
		}

		if ($out_of_retention) {
			last;
		}

		my $snapshot_path = '';
		if ($dataset_hash->{'mountpoint'} ne "") {
			$snapshot_path = catfile($dataset_hash->{'mountpoint'}, ".zfs", "snapshot", "$trim_snapshot");
		}

		if ($debug) { send_log($log_fh, "stdout", "DEBUG: found snapshot $snapshot\n"); }
		my %snapshot_obj = (
			'name' => $snapshot,
			'path' => $snapshot_path,
			'timestamp' => $timestamp,
		);

		push(@snapshots, \%snapshot_obj);
		$snapshot_count += 1;
	}

	return @snapshots;
}

sub upload_snapshot {
	my ($dataset, $compressor, $snapshot, $remotes, $log_fh) = @_;

	my $cmd;
	my $method = -e "$snapshot->{'path'}" ? 'tar' : 'zfs-send';
	if (defined $dataset->{'method'} && $dataset->{'method'} ne '' && $dataset->{'method'} ne $method) {
		if (! grep { $_ eq $dataset->{'method'}} ('tar', 'files', 'zfs-send')) {
			send_log($log_fh, "stderr", "CRITICAL ERROR: method $dataset->{'method'} is not acceptable");
			return
		}
		if (!-e $snapshot->{'path'} && $dataset->{'method'} ne 'zfs-send') {
			send_log($log_fh, "stderr", "CRITICAL ERROR: 'zfs-send' is the only method acceptable for mountpoint=legacy snapshots");
			return;
		}
		$method = $dataset->{'method'};
	}

	my $snapshot_name = get_snapshot_name($snapshot);
	my $remote_path = get_remote_path($dataset->{'target'}, $snapshot_name, $compressor);

	my $rclone_options = $dataset->{'rclone_options'};
	if (!defined $rclone_options) {
		$rclone_options = '';
	}
	my $remote_name = (split ':', $dataset->{'target'})[0] . ':';
	if (is_s3_remote($remotes, $remote_name) && $rclone_options !~ /--s3-no-check-bucket/) {
		$rclone_options .= ' --s3-no-check-bucket';
	}

	my $cleanup_cmd;
	if ($method eq 'files') {
		if (is_s3_remote($remotes, $remote_name) && $rclone_options !~ /--size-only/) {
			$rclone_options .= ' --size-only';
		}
		if ($verbose && $rclone_options !~ /\s--progress\s/) {
			$rclone_options .= ' --progress';
		}
		$cmd = "$rclone sync $rclone_options $snapshot->{'path'} $remote_path 1>&2";
	} else {
		my $snapshot_size;
		my $snapshot_cmd;
		my $mbuffer_size = '128K';
		if ($method eq 'tar') {
			my $snapshot_dir = dirname("$snapshot->{'path'}");

			my $du_cmd = "du -s --apparent-size $snapshot->{'path'}";
			my $du_output = `$du_cmd`;
			if ($du_output =~ /^\s*(\d+)\s+/) {
				$snapshot_size = $1 * 1024; # du returns size in KB
			} else {
				send_log($log_fh, "stderr", "CRITICAL ERROR: got an unexpected return from '$du_cmd': $du_output");
				return;
			}

			$snapshot_cmd = "$tar cf - -C $snapshot_dir -b 128 $snapshot_name";
			$mbuffer_size = '64K';
			# When taring up snapshots in the invisible .zfs path finishes,
			# zfs leaves the path mounted. Therefore we make a cleanup
			# command to unmount it.
			$cleanup_cmd = "umount $snapshot->{'path'}";
		} else {
			my $cmd = "$zfs send -nP $snapshot->{'name'}";
			if ($debug) { send_log($log_fh, "stdout", "DEBUG: getting size of snapshot '$snapshot->{'name'}' using '$cmd' ...\n"); }
			my @lines = `$cmd`;
			my $exit_code = $? >> 8;

			if ($exit_code != 0) {
				send_log($log_fh, "stderr", "CRITICAL ERROR: command '$cmd' failed with exit code: $exit_code");
				return;
			}

			my @size_fields = split /\s+/, $lines[-1];
			$snapshot_size = $size_fields[-1];
			if (!defined $snapshot_size || $snapshot_size =~ /\D/) {
				send_log($log_fh, "stderr", "CRITICAL ERROR: got an unexpected return from '$zfs send -cp -nP $snapshot->{'name'}': $snapshot_size");
				return;
			}

			$snapshot_cmd = "$zfs send $dataset->{'zfs_send_options'} $snapshot->{'name'}";
		}

		my $pv_cmd = '';
		if ($verbose) {
			if ($rclone_options !~ /\s-v\s/ && $rclone_options !~ /\s--verbosse\s/ && $rclone_options !~ /\s-q\s/ && $rclone_options !~ /\s--quiet\s/) {
				$rclone_options .= ' -v';
			}
			$pv_cmd = "| $pv -f -F '%b %t %r %p %e\n' -s $snapshot_size";
		}

		my $compressor_cmd = defined $compressor ? '| ' . gen_compressor_cmd($compressor, $snapshot_size, $dataset->{'compressor_options'}) : '';
		
		if (is_s3_remote($remotes, $remote_name) && $rclone_options !~ /--s3-chunk-size/) {
			my $s3_chunk_size = ceil($snapshot_size / (8 * 1024 * 1024 * 1024)) + (defined $compressor ? 0 : 1) * 2;
			if ($s3_chunk_size < 5) {
				$s3_chunk_size = 5; # min s3 chunk size for multipart uploads
			}
			if ($s3_chunk_size > (5 * 1024)) {
				send_log($log_fh, "stderr", "WARN: using maximum --s3-chunk-size (5GB) to upload $snapshot->{'name'} due to its large size");
				$s3_chunk_size = 5 * 1024;
			}
			$rclone_options .= " --s3-chunk-size ${s3_chunk_size}M";
		}

		my $rclone_touch = '';
		if (defined $dataset->{'rclone_touch'} && (grep $dataset->{'rclone_touch'} eq $_, (1,"true","True","TRUE","yes","Yes","YES","on","On","ON"))) {
			$rclone_touch = "&& $rclone touch --no-create ${remote_path}";
		}

		my $rclone_command = "$rclone $rclone_options rcat ${remote_path} 1>&2 $rclone_touch";
		my $dataset_split_size = $dataset->{'split_size'};
		if (defined $split_size && "$split_size" ne '') {
			$dataset_split_size = $split_size;
		}
		if (defined $dataset_split_size && "$dataset_split_size" ne '') {
			my $split_bytes = parse_bytes("$dataset_split_size");
			if ($snapshot_size > $split_bytes) {
				if ($rclone_touch ne '') {
					$rclone_touch .= "\\\$FILE"
				}
				$rclone_command = "split -b $split_bytes -d -a 3 --filter=\"$rclone $rclone_options rcat ${remote_path}\\\$FILE 1>&2 $rclone_touch\" - .";
			}
		}

		$cmd = "$snapshot_cmd | $mbuffer -q -Q -s $mbuffer_size -m 16M $pv_cmd $compressor_cmd | $rclone_command";
	}

	if ($debug) { send_log($log_fh, "stdout", "DEBUG: uploading snapshot $snapshot->{'name'} with command '$cmd' ...\n"); }
	my $child_pid = open3(undef, undef, my $std_err = gensym, $cmd);
	$std_err->autoflush(1);

	my $pid = fork();
	if ($pid == 0) {
		while (my $line = <$std_err>) {
			if ($line =~ /^\s*\S+\s+\d+\:\d+\:\d+\s+\[[^\]]+\]\s+/) { # pv progress
				$line =~ s/^\r//;
				send_log($log_fh, 'progress', "$snapshot->{'name'} $line");
			} elsif ($line =~ /^Transferred\:.*\sETA\s+/) { # rclone progress
				send_log($log_fh, 'progress', "$snapshot->{'name'} $line");
			} elsif ($line =~ /^Transferred\:/ || $line =~ /^Elapsed time\:/ || $line =~ /^Transferring\:/) { # rclone progress
				next;
			} elsif ($line =~ /^\s+\*\s+/) { # rclone progress
				my $i = index($line, 'Transferred:');
				if ($i != -1) {
					$line = substr($line, $i, length($line));
					send_log($log_fh, 'progress', "$snapshot->{'name'} $line");
				}
			} else {
				send_log($log_fh, 'stderr', $line);
			}
		}
		exit 0;
	}

	my $reaped_child_pid = waitpid($child_pid, 0);
	if ($reaped_child_pid == $child_pid) {
		my $child_exit_status = $? >> 8;
		if ($child_exit_status != 0) {
			send_log($log_fh, "stderr", "CRITICAL ERROR: uploading snapshot $snapshot->{'name'} failed\n");
		};
	}
	waitpid($pid, 0);
	if (defined $cleanup_cmd) {
		system("$cleanup_cmd");
	}
}

sub get_snapshot_name {
	my ($snapshot) = @_;

	if (-e "$snapshot->{'path'}") {
		return basename("$snapshot->{'path'}");
	} else {
		my $snapshot_name = $snapshot->{'name'};
		$snapshot_name =~ s#@#/#;
		return basename($snapshot_name);
	}
}

sub get_remote_path {
	my ($target, $snapshot_name, $compressor) = @_;

	return catfile($target, $snapshot_name) . (defined $compressor ? ".tar.$compressor->{'extension'}" : '');
}

sub get_compressor {
	my ($compression, $level) = @_;
	if (length $level) {
		$level =~ s/^\s+|\s+$//g;
	} else {
		$level = '';
	}

	my %COMPRESS_ARGS = (
		'bzip2' => {
			bin 		=> 'bzip2',
			level		=> '',
			extension	=> 'bz2',
		},
		'gzip' => {
			bin			=> 'gzip',
			level		=> '',
			extension	=> 'gz',
		},
		'lz4' => {
			bin			=> 'lz4',
			level		=> '',
			extension	=> 'lz4',
		},
		'pbzip2' => {
			bin			=> 'pbzip2',
			level		=> '',
			extension	=> 'bz2',
		},
		'pigz' => {
			bin			=> 'pigz',
			level		=> '',
			extension	=> 'gz',
		},
		'zstd' => {
			bin			=> 'zstd',
			level		=> '',
			extension	=> 'zst',
		},
		'pzstd' => {
			bin			=> 'pzstd',
			level		=> '',
			extension	=> 'zst',
		},
		'pxz' => {
			bin			=> 'pxz',
			level		=> '',
			extension	=> 'xz',
		},
		'xz' => {
			bin			=> 'xz',
			level		=> '',
			extension	=> 'xz',
		},
		'none' => undef
	);

	if ($compression eq '') {
		$compression = $DEFAULT_COMPRESSION;
	} elsif (!(grep $compression eq $_, ((keys %COMPRESS_ARGS), 'default'))) {
		die "Unrecognised compression type '$compression'";
	}

	my $compressor = $COMPRESS_ARGS{$compression};
	if (defined $compressor && $level ne '') {
		$compressor->{'level'} = "-$level";
	}

	return $compressor;
}

sub gen_compressor_cmd {
	my ($compressor, $snapshot_size, $compressor_options) = @_;
	my $cmd = "$compressor->{'bin'} $compressor->{'level'} $compressor_options";

	if ($compressor->{'bin'} eq 'zstd') {
		my $size_hint = $snapshot_size < 2**32 ? $snapshot_size : (int($snapshot_size / (2**20) + 0.5) . 'MiB');
		$cmd = "$cmd --size-hint=$size_hint";
	}

	return "$cmd -";
}

sub checklock {
	# take argument $lockname.
	#
	# read $run_dir/$lockname.lock for a pid on first line and a mutex on second line.
	#
	# check process list to see if the pid from $run_dir/$lockname.lock is still active with
	# the original mutex found in $run_dir/$lockname.lock.
	#
	# return:
	#    0 if lock is present and valid for another process
	#    1 if no lock is present
	#    2 if lock is present, but we own the lock
	#
	# shorthand - any true return indicates we are clear to lock; a false return indicates
	#             that somebody else already has the lock and therefore we cannot.
	#

	my $lockname = shift;
	my $lockfile = "$run_dir/$lockname.lock";

	if (! -e $lockfile) {
		# no lockfile
		return 1;
	}
	# make sure lockfile contains something
	if ( -z $lockfile) {
	        # zero size lockfile, something is wrong
	        warn "WARN: deleting invalid/empty $lockfile\n";
	        unlink $lockfile;
	        return 1
	}

	# lockfile exists. read pid and mutex from it. see if it's our pid.  if not, see if
	# there's still a process running with that pid and with the same mutex.

	open FH, "< $lockfile" or die "ERROR: unable to open $lockfile";
	my @lock = <FH>;
	close FH;
	# if we didn't get exactly 2 items from the lock file there is a problem
	if (scalar(@lock) != 2) {
	    warn "WARN: deleting invalid $lockfile\n";
	    unlink $lockfile;
	    return 1
	}

	my $lockmutex = pop(@lock);
	my $lockpid = pop(@lock);

	chomp $lockmutex;
	chomp $lockpid;

	if ($lockpid == $$) {
		# we own the lockfile. no need to check any further.
		return 2;
	}
	open PL, "$ps -p $lockpid -o ppid= |";
	my @processlist = <PL>;
	close PL;

	my $ppid = pop(@processlist);
	defined $ppid && chomp $ppid;

	if (defined $ppid && $ppid == $parent_id) {
		# parent own the lockfile, no need to check any further.
		return 3;
	}

	if (defined $ppid) {
		# lock exists, is valid, is not owned by us - return false
		return 0;
	} else {
		# lock is present but not valid - remove and return true
		unlink $lockfile;
		return 1;
	}
}

sub removelock {
	# take argument $lockname.
	#
	# make sure $run_dir/$lockname.lock actually belongs to me (contains my pid and mutex)
	# and remove it if it does, die if it doesn't.

	my $lockname = shift;
	my $lockfile = "$run_dir/$lockname.lock";

	my $checklock_exit = checklock($lockname);
	if ($checklock_exit == 2 || $checklock_exit == 3) {
		unlink $lockfile;
		return;
	} elsif ($checklock_exit == 1) {
		die "ERROR: No valid lockfile found - Did a rogue process or user update or delete it?\n";
	} else {
		die "ERROR: A valid lockfile exists but does not belong to me! I refuse to remove it.\n";
	}
}

sub writelock {
	# take argument $lockname.
	#
	# write a lockfile to $run_dir/$lockname.lock with first line
	# being my pid and second line being my mutex.

	my $lockname = shift;
	my $lockfile = "$run_dir/$lockname.lock";

	# die honorably rather than overwriting a valid, existing lock
	if (! checklock($lockname)) {
		die "ERROR: Valid lock already exists - I refuse to overwrite it. Committing seppuku now.\n";
	}

	my $pid = $$;

	open PL, "$ps -p $$ -o args= |";
	my @processlist = <PL>;
	close PL;

	my $mutex = pop(@processlist);
	chomp $mutex;

	open FH, "> $lockfile" or die "CRITICAL ERROR: failed to create lock file $lockfile.\n";
	print FH "$pid\n";
	print FH "$mutex\n";
	close FH;
}

sub get_uploaded_snapshots {
	my %snapshots = ();

	foreach my $dataset (keys %{$config{'datasets'}}) {
		my @lines = `$rclone lsl --max-depth 1 $config{'datasets'}{$dataset}{'target'}`;
		foreach my $line (@lines) {
			my ($size, $year, $month, $day, $hour, $minute, $second, $object) = $line =~ /^\s*(\d+)\s+(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\.\d+\s+([^\s]+)/;
			if (! length($object)) {
				warn "CRITICAL ERROR: get unexpected line from rclone lsl output -> '$line'";
				next;
			}

			my $trim_object = $object;
			$trim_object =~ s/(\.tar\.[\.0-9a-zA-Z]+|\.\d{3})$//;
			if (! ($trim_object =~ /$config{'datasets'}{$dataset}{'pattern'}/)) {
				next;
			}

			my $datetime = "$year-$month-$day $hour:$minute:$second";
			my $name = "$dataset\@$trim_object";
			if (defined $snapshots{$name}) {
				push(@{$snapshots{$name}->{'objects'}}, $object);
				if ($datetime lt $snapshots{$name}->{'datetime'}) {
					$snapshots{$name}->{'datetime'} = $datetime;
				}
			} else {
				$snapshots{$name} = {
					'year' => $year,
					'month'=> $month,
					'day' => $day,
					'hour' => $hour,
					'minute' => $minute,
					'second' => $second,
					'datetime' => $datetime,
					'objects' => [$object]
				};
			}
		}
	}

	return %snapshots;
}

sub get_remotes {
	my %remotes = ();

	my @lines = `$rclone listremotes --long`;
	foreach my $line (@lines) {
		my ($name, $type) = $line =~ /^\s*(\S+)\s+(\S+)/;
		if (!defined $name) { next; }
		$remotes{$name} = {
			'type' => $type
		}
	}

	return %remotes;
}

sub is_s3_remote {
	my ($remotes, $remote_name) = @_;
	return exists $remotes->{$remote_name} && $remotes->{$remote_name}->{'type'} eq 's3';
}

sub retention_to_seconds {
	my ($number, $suffix) = @_;

	my $seconds = 60 * 60;
	if ($suffix eq 'h') {
		$seconds = $seconds * $number;
	} elsif ($suffix eq 'd') {
		$seconds = $seconds * 24 * $number;
	} elsif ($suffix eq 'w') {
		$seconds = $seconds * 24 * 7 * $number;
	} elsif ($suffix eq 'm') {
		$seconds = $seconds * 24 * 30 * $number;
	} else { # year
		$seconds = $seconds * 24 * 365 * $number;
	}

	return $seconds;
}

sub send_log {
	my ($fh, $type, $content) = @_;
	if (fileno($fh) == fileno(STDOUT)) {
		print $content;
	} else {
		print $fh "$type $content";
	}
}

__END__

=head1 NAME

backoid - a sanoid/syncoid-like utility for object storage backup targets

=head1 SYNOPSIS

backoid [options]

Options:

  --configdir=DIR       Specify a directory to find config file backoid.conf
  --run-dir=DIR         Specify a directory for temporary files such as lock files

  --split-size          Specify a size used to split up the backup if its size exceeds this value
  --parallel[=NUMBER]   Backup snapshots in parallel

  --verbose             Prints out additional information during a backoid run
  --debug               Prints out a lot of additional information during a backoid run

  --help                Prints this helptext
  --version             Prints the version number
