#! /usr/bin/perl -w
# Display and edit the 'dev' field in tar's snapshots
# Copyright 2007-2023 Free Software Foundation, Inc.
# This file is part of GNU tar.
# GNU tar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# GNU tar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# tar-snapshot-edit
#
# This script is capable of replacing values in the 'dev' field of an
# incremental backup 'snapshot' file. This is useful when the device
# used to store files in a tar archive changes, without the files
# themselves changing. This may happen when, for example, a device
# driver changes major or minor numbers.
#
# It can also run a check on all the field values found in the
# snapshot file, printing out a detailed message when it finds values
# that would cause an "Unexpected field value in snapshot file",
# "Numerical result out of range", or "Invalid argument" error
# if tar were run using that snapshot file as input. (See the
# comments included in the definition of the check_field_values
# routine for more detailed information regarding these checks.)
#
#
#
# Author: Dustin J. Mitchell
#
# Modified Aug 25, 2011 by Nathan Stratton Treadway :
# * update Perl syntax to work correctly with more recent versions of
# Perl. (The original code worked with in the v5.8 timeframe but
# not with Perl v5.10.1 and later.)
# * added a "-c" option to check the snapshot file for invalid field values.
# * handle NFS indicator character ("+") in version 0 and 1 files
# * preserve the original header/version line when editing version 1
# or 2 files.
# * tweak output formatting
#
# Modified March 13, 2013 by Nathan Stratton Treadway :
# * configure field ranges used for -c option based on the system
# architecture (in response to the December 2012 update to GNU tar
# enabling support for systems with signed dev_t values).
# * when printing the list of device ids found in the snapshot file
# (when run in the default mode), print the raw device id values
# instead of the hex-string version in those cases where they
# can't be converted successfully.
use Getopt::Std;
use Config;
my %snapshot_field_ranges; # used in check_field_values function
## reading
sub read_incr_db ($) {
my $filename = shift;
open(my $file, "<$filename") || die "Could not open '$filename' for reading";
my $header_str = <$file>;
my $file_version;
if ($header_str =~ /^GNU tar-[^-]*-([0-9]+)\n$/) {
$file_version = $1+0;
} else {
$file_version = 0;
}
print "\nFile: $filename\n";
print " Detected snapshot file version: $file_version\n\n";
if ($file_version == 0) {
return read_incr_db_0($file, $header_str);
} elsif ($file_version == 1) {
return read_incr_db_1($file, $header_str);
} elsif ($file_version == 2) {
return read_incr_db_2($file, $header_str);
} else {
die "Unrecognized snapshot version in header '$header_str'";
}
}
sub read_incr_db_0 ($$) {
my $file = shift;
my $header_str = shift;
my $hdr_timestamp_sec = $header_str;
chop $hdr_timestamp_sec;
my $hdr_timestamp_nsec = ''; # not present in file format 0
my $nfs;
my @dirs;
while (<$file>) {
/^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
if ( $1 eq "+" ) {
$nfs="1";
} else {
$nfs="0";
}
push @dirs, { nfs=>$nfs,
dev=>$2,
ino=>$3,
name=>$4 };
}
close($file);
# file version, timestamp, timestamp, dir list, file header line
return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""];
}
sub read_incr_db_1 ($$) {
my $file = shift;
my $header_str = shift;
my $timestamp = <$file>; # "sec nsec"
my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/);
my $nfs;
my @dirs;
while (<$file>) {
/^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
if ( $1 eq "+" ) {
$nfs="1";
} else {
$nfs="0";
}
push @dirs, { nfs=>$nfs,
timestamp_sec=>$2,
timestamp_nsec=>$3,
dev=>$4,
ino=>$5,
name=>$6 };
}
close($file);
# file version, timestamp, timestamp, dir list, file header line
return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ];
}
sub read_incr_db_2 ($$) {
my $file = shift;
my $header_str = shift;
$/="\0"; # $INPUT_RECORD_SEPARATOR
my $hdr_timestamp_sec = <$file>;
chop $hdr_timestamp_sec;
my $hdr_timestamp_nsec = <$file>;
chop $hdr_timestamp_nsec;
my @dirs;
while (1) {
last if eof($file);
my $nfs = <$file>;
my $timestamp_sec = <$file>;
my $timestamp_nsec = <$file>;
my $dev = <$file>;
my $ino = <$file>;
my $name = <$file>;
# get rid of trailing NULs
chop $nfs;
chop $timestamp_sec;
chop $timestamp_nsec;
chop $dev;
chop $ino;
chop $name;
my @dirents;
while (my $dirent = <$file>) {
chop $dirent;
push @dirents, $dirent;
last if ($dirent eq "");
}
die "missing terminator" unless (<$file> eq "\0");
push @dirs, { nfs=>$nfs,
timestamp_sec=>$timestamp_sec,
timestamp_nsec=>$timestamp_nsec,
dev=>$dev,
ino=>$ino,
name=>$name,
dirents=>\@dirents };
}
close($file);
$/ = "\n"; # reset to normal
# file version, timestamp, timestamp, dir list, file header line
return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str];
}
## display
sub show_device_counts ($) {
my $info = shift;
my %devices;
foreach my $dir (@{$info->[3]}) {
my $dev = $dir->{'dev'};
$devices{$dev}++;
}
my $devstr;
foreach $dev (sort {$a <=> $b} keys %devices) {
$devstr = sprintf ("0x%04x", $dev);
if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) {
# sprintf "%x" will not return a useful value for device ids
# that are negative or which overflow the integer size on this
# instance of Perl, so we convert the hex string back to a
# number, and if it doesn't (numerically) equal the original
# device id value, we know the hex conversion hasn't worked.
#
# Unfortunately, since we're running in "-w" mode, Perl will
# also print a warning message if the hex() routine is called
# on anything larger than "0xffffffff", even in 64-bit Perl
# where such values are actually supported... so we have to
# avoid calling hex() at all if the device id is too large or
# negative. (If it's negative, the conversion to an unsigned
# integer for the "%x" specifier will mean the result will
# always trigger hex()'s warning on a 64-bit machine.)
#
# These situations don't seem to occur very often, so for now
# when they do occur, we simply print the original text value
# that was read from the snapshot file; it will look a bit
# funny next to the values that do print in hex, but that's
# preferable to printing values that aren't actually correct.
$devstr = $dev;
}
printf " Device %s occurs $devices{$dev} times.\n", $devstr;
}
}
## check field values
# initializes the global %snapshot_field_ranges hash, based on the "-a"
# command-line option if given, otherwise based on the "archname" of
# the current system.
#
# Each value in the hash is a two-element array containing the minimum
# and maximum allowed values, respectively, for that field in the snapshot
# file. GNU tar's allowed values for each architecture are determined
# in the incremen.c source file, where the TYPE_MIN and TYPE_MAX
# pre-processor expressions are used to determine the range that can be
# expressed by the C data type used for each field; the values in the
# array defined below should match those calculations. (For tar v1.27
# and later, the valid ranges for a particular tar binary can easily
# be determined using the "tar --show-snapshot-field-ranges" command.)
sub choose_architecture ($) {
my $opt_a = shift;
my $arch = $opt_a ? $opt_a : $Config{'archname'};
# These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used
# by default on unrecognized/unsupported systems, too.
%iX86_linux_field_ranges = (
timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t
timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1
nfs => [ 0, 1 ],
dev => [ 0, 18446744073709551615 ], # min/max of dev_t
ino => [ 0, 4294967295 ], # min/max of ino_t
);
if ( $arch =~ m/^i[\dxX]86-linux/i ) {
%snapshot_field_ranges = %iX86_linux_field_ranges;
print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n";
} elsif ( $arch =~ m/^x86_64-linux/i ) {
%snapshot_field_ranges = (
timestamp_sec => [ -9223372036854775808, 9223372036854775807 ],
timestamp_nsec => [ 0, 999999999 ],
nfs => [ 0, 1 ],
dev => [ 0, 18446744073709551615 ],
ino => [ 0, 18446744073709551615 ],
);
print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n";
} elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) {
# HP/UX running on Itanium/ia64 architecture
%snapshot_field_ranges = (
timestamp_sec => [ -2147483648, 2147483647 ],
timestamp_nsec => [ 0, 999999999 ],
nfs => [ 0, 1 ],
dev => [ -2147483648, 2147483647 ],
ino => [ 0, 4294967295 ],
);
print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n";
} else {
%snapshot_field_ranges = %iX86_linux_field_ranges;
print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n";
print "(Use -a option to override.)\n" unless $opt_a;
print "\n";
}
if ( ref(1) ne "" ) {
print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n"
} else {
# find the largest max value in the current set of ranges
my $maxmax = 0;
for $v (values %snapshot_field_ranges ) {
$maxmax = $v->[1] if ($v->[1] > $maxmax);
}
# "~0" translates into a platform-native integer with all bits turned
# on -- that is, the largest value that can be represented as
# an integer. We print a warning if our $maxmax value is greater
# than that largest integer, since in that case Perl will switch
# to using floats for those large max values. The wording of
# the message assumes that the only way this situation can exist
# is that the platform uses 32-bit integers but some of the
# snapshot-file fields have 64-bit values.
if ( ~0 < $maxmax ) {
print < $max ) {
$msg = " $field_name value too high: \"$field_value\" > $max \n";
}
}
return $msg;
}
# This routine loops through each directory entry in the $info data
# structure and prints a warning message if tar would abort with an
# "Unexpected field value in snapshot file", "Numerical result out of
# range", or "Invalid argument" error upon reading this snapshot file.
#
# (Note that the "Unexpected field value in snapshot file" error message
# was introduced along with the change to snapshot file format "2",
# starting with tar v1.16 [or, more precisely, v1.15.91], while the
# other two were introduced in v1.27.)
#
# The checks here are intended to match those found in the incremen.c
# source file. See the choose_architecture() function (above) for more
# information on how to configure the range of values considered valid
# by this script.
#
# (Note: the checks here are taken from the code that processes
# version 2 snapshot files, but to keep things simple we apply those
# same checks to files having earlier versions -- but only for
# the fields that actually exist in those input files.)
sub check_field_values ($) {
my $info = shift;
my $msg;
my $error_found = 0;
print " Checking field values in snapshot file...\n";
$snapver = $info->[0];
$msg = "";
$msg .= validate_integer_field($info->[1], 'timestamp_sec');
if ($snapver >= 1) {
$msg .= validate_integer_field($info->[2], 'timestamp_nsec');
}
if ( $msg ne "" ) {
$error_found = 1;
print "\n snapshot file header:\n";
print $msg;
}
foreach my $dir (@{$info->[3]}) {
$msg = "";
$msg .= validate_integer_field($dir->{'nfs'}, 'nfs');
if ($snapver >= 1) {
$msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec');
$msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec');
}
$msg .= validate_integer_field($dir->{'dev'}, 'dev');
$msg .= validate_integer_field($dir->{'ino'}, 'ino');
if ( $msg ne "" ) {
$error_found = 1;
print "\n directory: $dir->{'name'}\n";
print $msg;
}
}
print "\n Snapshot field value check complete" ,
$error_found ? "" : ", no errors found" ,
".\n";
}
## editing
sub replace_device_number ($@) {
my $info = shift(@_);
my @repl = @_;
my $count = 0;
foreach my $dir (@{$info->[3]}) {
foreach $x (@repl) {
if ($dir->{'dev'} eq $$x[0]) {
$dir->{'dev'} = $$x[1];
$count++;
last;
}
}
}
print " Updated $count records.\n"
}
## writing
sub write_incr_db ($$) {
my $info = shift;
my $filename = shift;
my $file_version = $$info[0];
open($file, ">$filename") || die "Could not open '$filename' for writing";
if ($file_version == 0) {
write_incr_db_0($info, $file);
} elsif ($file_version == 1) {
write_incr_db_1($info, $file);
} elsif ($file_version == 2) {
write_incr_db_2($info, $file);
} else {
die "Unknown file version $file_version.";
}
close($file);
}
sub write_incr_db_0 ($$) {
my $info = shift;
my $file = shift;
my $timestamp_sec = $info->[1];
print $file "$timestamp_sec\n";
foreach my $dir (@{$info->[3]}) {
if ($dir->{'nfs'}) {
print $file '+'
}
print $file "$dir->{'dev'} ";
print $file "$dir->{'ino'} ";
print $file "$dir->{'name'}\n";
}
}
sub write_incr_db_1 ($$) {
my $info = shift;
my $file = shift;
print $file $info->[4];
my $timestamp_sec = $info->[1];
my $timestamp_nsec = $info->[2];
print $file "$timestamp_sec $timestamp_nsec\n";
foreach my $dir (@{$info->[3]}) {
if ($dir->{'nfs'}) {
print $file '+'
}
print $file "$dir->{'timestamp_sec'} ";
print $file "$dir->{'timestamp_nsec'} ";
print $file "$dir->{'dev'} ";
print $file "$dir->{'ino'} ";
print $file "$dir->{'name'}\n";
}
}
sub write_incr_db_2 ($$) {
my $info = shift;
my $file = shift;
print $file $info->[4];
my $timestamp_sec = $info->[1];
my $timestamp_nsec = $info->[2];
print $file $timestamp_sec . "\0";
print $file $timestamp_nsec . "\0";
foreach my $dir (@{$info->[3]}) {
print $file $dir->{'nfs'} . "\0";
print $file $dir->{'timestamp_sec'} . "\0";
print $file $dir->{'timestamp_nsec'} . "\0";
print $file $dir->{'dev'} . "\0";
print $file $dir->{'ino'} . "\0";
print $file $dir->{'name'} . "\0";
foreach my $dirent (@{$dir->{'dirents'}}) {
print $file $dirent . "\0";
}
print $file "\0";
}
}
## main
sub main {
our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a);
getopts('br:hca:');
HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) ||
($opt_a && !$opt_c) || ($opt_r && $opt_c) );
my @repl;
if ($opt_r) {
foreach my $spec (split(/,/, $opt_r)) {
($spec =~ /^([^-]+)-([^-]+)/) || die "Invalid replacement specification '$opt_r'";
push @repl, [interpret_dev($1), interpret_dev($2)];
}
}
choose_architecture($opt_a) if ($opt_c);
foreach my $snapfile (@ARGV) {
my $info = read_incr_db($snapfile);
if ($opt_r) {
if ($opt_b) {
rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup";
}
replace_device_number($info, @repl);
write_incr_db($info, $snapfile);
} elsif ($opt_c) {
check_field_values($info);
} else {
show_device_counts($info);
}
}
}
sub HELP_MESSAGE {
print <