#!/usr/bin/perl use Time::Local; # testIncoming # locate all the bad filenames # things in the wrong directory # things missing # bad sequences ##Copyright (c) 2009, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 8/25/09. ##All rights reserved. ##Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the ## distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # jody DeRidder, 8/25/09 ×tamp; print $timestamp."\n"; $output = "./output/IncomingTest_".$timestamp.".rtf"; open (OUT, ">".$output) or die "can't open $output\n"; $base = "/srv/deposits/content/"; opendir(BASE, $base) or die "can't look through $base\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./){next;} # skip dot files $path = $base.$file; if (-d $path){ push (@dirs, $file."/"); } } close(BASE); foreach $topdir (@dirs){ undef (@mydirs); $mybase = $base.$topdir."/"; $thisdir = $mybase; opendir(THIS, $thisdir) or die "can't open $thisdir\n"; while ($file = readdir(THIS)){ if ($file =~ /^\./){next;} # skip dot files $path = $thisdir.$file; if (-d $path){ push (@mydirs, $path."/"); } } close(THIS); # test here to see if this directory has filename in it... # if not, ask for file name structure to test if ($thisdir =~ /([a-z]{1}\d{4}\_\d{7}(\_\d{7})?)/){ $coll = $1; print "Looking at $thisdir: Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; while ($answer =~ /n/i){ undef $coll; print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; } } if (!$coll){ print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; while ($answer =~ /n/i){ undef $coll; print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; } } print "\nWe will test the files against collection identifier $coll.\n\n"; print "Results will be in the $output file....\n when you see a \"Good bye!\" and this screen closes.\n"; print "Don't open it yet! :-) \n\n"; print ". . . working . . . (be patient) . . . \n\n"; foreach $dir (@mydirs){ #HERE: do a section for Admin, and one for Metadata $count = 1; # checking sequence count of files in directories opendir (DIR, $dir) or die "can't read files in $dir\n"; while ($adir = readdir(DIR)){ if ($adir =~ /^\./ || $adir =~ /Thumbs\.db/i){next;} #skip dot files and thumbs.db # print "looking at $adir\n"; ($short = $dir) =~ s,\/srv\/deposit\/content\/,,; ($parent = $short) =~ s,.*\/,,; # parent directory is what follows the last slash, yes? # we want to match for this $thispath = $dir."/".$adir; if ($adir =~ /Scans/i || $adir =~ /Transcripts/i){ push (@mydirs, $thispath); next; } elsif ($adir =~ /Admin/){ opendir(ADMIN, $thispath) or die "can't look through $thispath\n"; undef $found; while ($afile = readdir(ADMIN)){ if ($file =~ /^\./){ next;} if ($file =~ /^$coll\.xml$/){ $found = 1; } elsif ($file =~ /^$coll\.\d{1,2}\.xml$/){ $found = 1; } } if (!$found){ push (@missing, "$coll.xml missing from $parent\n");} close(ADMIN); } elsif ($adir =~ /Metadata/){ opendir(MD, $thispath) or die "can't look through $thispath\n"; undef $found; while ($afile = readdir(MD)){ if ($file =~ /^\./){ next;} if ($file =~ /^$coll\.txt$/){ $found = 1; } elsif ($file =~ /^$coll\.\d{1,2}\.txt$/){ $found = 1; } } if (!$found){ push (@missing, "$coll.txt missing from $parent\n");} close(MD); } elsif ($adir =~ /$coll/){ # passes first test, it matches collname if ( -d $thispath){ if (!( $adir =~ /^[a-z]{1}\d{4}\_\d{7}\_\d{7}(\_\d{4}(\_\d{3})?)?$/)){ push (@badform, $adir); } push (@mydirs, $thispath); # collect subdirectories for further investigation } else{ # not a directory; must be a file. Does it match its parent directory? if ((!($parent =~ /Scans/i)) && (!($adir =~ /$parent/))){ push (@wrongdir, $adir." ".$short); } if (!( $adir =~ /^[a-z]{1}\d{4}\_\d{7}\_\d{7}(\_\d{4}(\_\d{3})?)?\.[a-z]{3}$/)){ #note this breaks on jp2, mp3 push (@badform, $adir); } # pull off the last set of numbers before the extension elsif ($parent =~ /Scans/i || (!$parent)){ next;} # don't check non-compound objects for sequence elsif ($adir =~ /.*\_(\d{3,7})\./ && (!($adir =~ /\.txt/))) { # check for bad numbering ONLY on files named correctly $mynum = $1 + 0; # get rid of leading zeros if ($mynum != $count){ push (@badcount, $adir." ".$count); } $count ++; } } } else { push (@wrongdir, $adir." ".$short);} } close(DIR); } if (@badform || @badcount || @wrongdir || @missing ){ print OUT "\nTROUBLE: $topdir \n"; print OUT "-------------------------------------------\n"; } else{ print OUT "All is GREAT in $topdir! :-) \n"; } if (@missing){ print OUT "\nThe following files are missing or badly named \n"; foreach (@missing){ print OUT " $_\n";} } if (@wrongdir){ print OUT "\nThe following files or directories do NOT reflect the name of their parent directory\n"; print OUT "Are they in the right place? Please check:\n"; foreach (@wrongdir){ print OUT " $_\n";} } if (@badform){ print OUT "\nThe following filenames or directories are not in the correct format:\n"; foreach (@badform){ print OUT " $_\n";} } if (@badcount){ print OUT "\nThe following filenames were expected to be the sequence number\n"; print OUT "that follows them:\n"; foreach (@badcount){ print OUT " $_\n";} } undef @badform; undef @wrongdir; undef @badcount; undef @missing } # do the next top directory close OUT; print "Good bye!\n"; exit; sub timestamp{ #following for Windows #print "hit enter twice please\n"; #$date = `date`; #$time = `time`; #print $date."\n"; #if ($date =~ /.*? (\d*)\/(\d*)\/(\d*)/){ # $date = $3.$1.$2; # } #print $date."\n"; #if ($time =~ /.*? (\d*)\:(\d*)\:(\d*)\./){ # $time = $1.$2.$3; # } #print $time."\n"; #$timestamp = $date."T".$time; #print $timestamp."\n"; # following for unix ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime(); ##$year += 1900; $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $timestamp= "$year-$mon-$mday\T$hour:$min:$sec\Z"; }