#!/usr/bin/perl use DBI; use File::Copy; # dirs # Jody DeRidder 6/30/09 # a rewrite of dirs, written in 2003 # modified for UA holdings #verifies that checksum files have not changed since # last processing. Then rewrites checksums of checksums when done, since # files may have been added; these checksums of the checksum files are kept in # dcheck subdirectory. # if one has changed, then there's reason to believe error messages after that, # from that directory, are in error.... # to find all subdirectories, and locate all files for processing # a test, to incorporate into sumthem, the md5sum software for # verifying quality of preservation data before each full backup. ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $now = `date`; $day = substr($thisnow, 0, 3); #print $now."\n"; $scriptbase = "/srv/scripts/md5/cya/"; $email = "jlderidder\@ua.edu"; $email2 = "uadigitalservices\@gmail.com"; $okay = "yes"; # checksums for the files containing checksums of all files in the directory by # this file's name. Ex: file named _area2_COC_tiffs contains checksums of all files in # /area2/COC/tiffs. So dcheck contains this line: #58a5434d16adb51185f55d32417a6bb0 /home/firestar/md5stuff/chksums/_area2_COC_tiffs $dchk = $scriptbase."dcheck"; # dchked is the list is the output of checking the md5sums in dcheck; # so if the file _area2_COC_tiffs still has the same md5sum # as is listed in dcheck, dchked will have a line that says: #/home/firestar/md5stuff/chksums/_area2_COC_tiffs: OK # else it will end in 'FAILED' $dchked = $scriptbase."dchked"; # contains copies of all the files in chksums, # which is to say, all the files listed in dcheck $backup = $scriptbase."backups/"; # checked is the output of checking the md5sums that are in each # of the files in chksums. This is the file level verification # if it reads: #/area2/ETD/bib/etd583: OK # then that file's checksum did not change. $checked = $scriptbase."checked"; # chksums is the directory containing the files that hold all the checksums, # all of which were listed in dcheck $store = $scriptbase."chksums/"; # the base directory of what we are checking @bases = ("/srv/archive"); # , "/d1/area3"); $exists = "/existenceCheck"; #$base = "/area2"; #$base = "/area2/techservices"; $tellme = ">".$scriptbase."message"; $tellwhat = $scriptbase."message"; $errors = $scriptbase."DirsErrors"; unlink ($errors); # first, verify checksums of checksum files # this just lets me know if the files containing the checksums # have changed since last time; if they have, then I should # perhaps disregard errors from the rest of the script! # maybe restore a backup of these files, and try again unlink ($dchked); #remove old checked. `md5sum -c "$dchk" >> $dchked`; open (DCHK, $dchked) or die "can't open $dchked\n"; @dchkit = ; close (DCHK); undef ($message); foreach $line (@dchkit){ if ($line =~ /Metadata$/ || $line =~ /Documentation$/){ next;} if ($line =~ /FAILED/ ){ # it's okay for Metadata directory checksums to fail. $message = $message.$line."\n"; } } if ($message){ # notifies just me if there's a problem $okay = "no"; # don't copy this set of checksums to backup open (TELL, $tellme); print TELL $message; close(TELL); $subject = q("MD5SUM CHECKSUMS CHANGED, libcontent1 archive"); `mail -s $subject $email < $tellwhat`; } # now, look to see if new files have been added foreach $base (@bases){ # if (! -e $base.$exists){ # #send error message and do NOT run # &sendmessage("$base$exists does NOT exist on libcontent1. ERROR: cannot run md5 checks!!\n"); # exit; # } push (@dirs, $base); ($fname = $base) =~ s/\//\~/g; # replace / with ~ for filename of sums #print "storing $base for $fname: ---->$fnames{$fname}<---\n"; foreach $dir (@dirs){ if ($dir =~ /Metadata$/ || $dir =~ /Documentation$/){ next;} opendir(DIR, $dir) or die "can't open $dir\n"; ($mydir = $dir) =~ s/\//\~/g; # we have underscores in filenames; use ~ HERE: while ($file = readdir (DIR)){ #print "file is $file\n"; if (!($file =~ /^\./) && ($file ne "Thumbs.db") && (! ($file =~ /htaccess/))){ #ignore .files and Thumbs.db # we'll pick up the versioned text xml and png files. Not these. if ($file =~ /Manifest\.html/ || $file =~ /\d{4}\.txt/ || $file =~ /\d{4}\.xml/ || $file =~ /\.\d{1,2}\./ || $file =~ /\.icon\.png/ || $file =~ /mods\.xml/ || $file =~ /ead\.xml/ || $file =~ /Match2?\.txt/ || $file =~ /OAI_ids\.txt/ || $file =~ /mets\.xml/ ){ next; } $name = "$dir/$file"; ($fname = "$name") =~ s/\//\~/g; # replace / with ~ for filename of sums if (-d "$name"){ # if a directory push (@dirs, "$name"); # list of every directory found } else{ #a file here that's not a .file and not a directory # this is where I create a file with this directory's name (if it doesn't # already exist in $store), do a checksum of this file, and store it there # if the file does exist, look there for checksum for this file; ## if not there, add it $where = "$store$mydir"; if ($mydir =~ /Metadata$/ || $mydir =~ /Documentation$/){ next;} # we don't need checksums for Metadata directories. if (-e "$where"){ open (LOOK, "$where") or die "can't check $where\n"; @sumstuff = ; close (LOOK); foreach $line (@sumstuff){ ($md, @path) = split(" ",$line); $path = join ("",@path); # print "path is $path\n"; if ($path =~ /$name/){ # print "FOUND A MATCH!\n"; next HERE; } } } #should only get here if no match # this is supposedly a new file; adding a checksum for it # to the sums file # $name = qq($name); print "no match for ".$name."; trying to get an md5sum for it.\n"; `md5sum "$name" >> "$where"`; } } } closedir(DIR); } } # now verify checksums unlink ($checked); #remove old checked. opendir (DIR, $store) or die "can't open $store\n"; while ($subdir = readdir(DIR)){ if (!($subdir =~ /^\./)){ # print "doing a check on $store$subdir\n"; `md5sum -c "$store$subdir" >> $checked`; } } closedir(DIR); open (CHK, $checked) or die "can't open $checked\n"; @chkit = ; close (CHK); undef ($message); foreach $line (@chkit){ if ($line =~ /FAILED/){ $okay = "no"; $message = $message.$line."\n"; } } if ( $okay eq "no"){ # notifies me if there's a problem open (TELL, $tellme); print TELL $message; close(TELL); $subject = q("MD5SUM FAILURE LIBCONTENT1"); `mail -s $subject $email < $tellwhat`; } #if file exists and is larger than zero size if (-s $errors){ `mail -s "MD5 errors on Libcontent1" $email < $errors`;} $after = `date`; # now notify me that you ran # if today is Sunday, my number is 15 (not using this one) # if it's Thursday, my number is 1 #$thisnow = `date`; #$day = substr($thisnow, 0, 3); #print "today is $day; is this Thu?\n"; #if ($day eq "Thu"){ $mynum = 1;} #elsif ($day eq "Sun"){ $mynum = 15;} #else{$mynum = 1516; # print "GOT THROUGH THIS OKAY\n"; # for when I run it not on cronjob # } $username="user"; $password="password"; $mynum = 1; $dbh = DBI->connect ("dbi:mysql:checkscripts:content.lib.ua.edu:3306", $username, $password) or &sendmessage( "Can't connect to checkscripts on content.lib.ua.edu: ". $dbh->errstr()."\n"); $h->{PrintError} = 1; $h->{RaiserError} = 1; $mynum = $dbh->quote($mynum); $sth= $dbh->prepare("insert into ran values(NULL,$mynum,NULL,NULL)"); $sth->execute or &sendmessage ("dirs on libcontent1 can't insert into checkscripts: ",$dbh->errstr(),"\n"); $sth->finish; $dbh->disconnect(); # now, rewrite the check sums for the checksum files, for next time # first, keep a copy, in case there was a problem with them having changed. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)=localtime; $year += 1900; $mon += 1; if ($mon =~ /^[1-9]{1}$/){ $mon = "0".$mon;} if ($dat =~ /^[1-9]{1}$/){ $day = "0".$day;} if ($hour =~ /^[1-9]{1}$/){ $hour = "0".$hour;} if ($min =~ /^[1-9]{1}$/){ $min = "0".$min;} if ($sec =~ /^[1-9]{1}$/){ $sec = "0".$sec;} $timestamp=$year.$mon.$mday.$hour.$min.$sec; $newname = $backup."/filesOfSums/".$timestamp; # returns 1 if successful $success = copy ($dchk, $newname);# `cp $dchk $newname`; if ($success){ unlink ($dchk); #gets rid of old file where these sums are stored opendir (DIR, $store) or die "can't open $store to create new doublechecks\n"; while ($file = readdir(DIR)){ if (!($file =~ /^\./)){ `md5sum "$store$file" >> "$dchk"`; } } closedir (DIR); } else{ $okay = "no"; $message .=" unable to copy $dchk to $newname; did NOT do md5sum of $store$file and add to $dchk\n"; } if ($okay eq "yes"){ #checksums of checksums did not change, so copy all the # checksum files elsewhere for backup. $newdir = $backup."/".$timestamp; `mkdir $newdir`; opendir(GET, $store) or &sendmessage ("Cannot open $store to copy files to $newdir and zip them up\n"); while ($file = readdir(GET)){ if ($file =~ /^\./){ next;} $old = $store."/".$file; $new = $newdir."/".$file; copy ($old, $new) or &sendmessage ("Cannot open $store/$file to $newdir/$file to zip them up\n"); } close(GET); # `/bin/cp $store/* $newdir/.`; # now let's gzip them all. These take up a lot of space. $filename = $timestamp.".tar.gz"; `find $newdir -name '*~*' -print | tar -cvzf $filename --files-from -`; $old = $base.$filename; # this should be the file I just created if ( -e $old){ $killme = $newdir."/*~*"; # following fails -- too long a list #HERE! fix this! `/bin/rm $killme`; } $new = $newdir."/".$filename; # `mv $old $new`; if (copy ($old, $new)){ unlink $old; } else { $okay = "no"; $message .= "unable to copy $old to $new, so did not unlink $old\n"; &sendmessage($message); } # `tar -pczf $filename $newdir/*`; # `mv $filename $newdir`; &sendmessage ("All is well. Md5 scripts ran on libcontent1 from $now to $after.\n"); } else{ $message .= " dirs just finished getting checking checksums on libcontent1, with errors (some checksums changed). Did not copy for backup."; $message .= "\n It ran from $now to $after\n"; &sendmessage($message); } sub sendmessage{ if ($_[0]){ $message = $_[0];} if (!$message){ $message ="dirs just finished getting checking checksums on libcontent1 -- all is well there. Ran from $now to $after. However, it can't get to checkscripts database on encompass!"; } # else{ # $message.= " ---- dirs just finished getting checking checksums on libcontent1, with errors. Ran from $now to $after\n"; # } open (TELL, ">message"); print TELL $message; close(TELL); $subject = q("LIBCONTENT1 Message!"); `mail -s $subject $email < message`; $subject = q("LIBCONTENT1 Message!"); `mail -s $subject $email2 < message`; } # delete old backups; $lastyear = $year -1; $delete = $lastyear.$mon; opendir (ST, $backup) or die "can't look in $backup\n"; while ($file = readdir(ST)){ if ($file =~ /^$delete/){ $remove = $backup.$file; unlink $remove; } } closedir (ST); opendir (ST, $backup."/filesOfSums/") or die "can't look in $backup filesOfSums\n"; while ($file = readdir(ST)){ if ($file =~ /^$delete/){ $remove = $backup."/filesOfSums/".$file; unlink $remove; } } closedir (ST);