#!/usr/bin/perl use Time::Local; # testIncoming # locate all the bad filenames # things in the wrong directory # things missing # bad sequences # jody DeRidder, 8/25/09 ×tamp; print $timestamp."\n"; $output = "./output/IncomingTest_".$timestamp.".rtf"; open (OUT, ">".$output) or die "can't open $output\n"; $base = "/srv/deposits/content/"; opendir(BASE, $base) or die "can't look through $base\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./){next;} # skip dot files $path = $base.$file; if (-d $path){ push (@dirs, $file."/"); } } close(BASE); foreach $topdir (@dirs){ undef (@mydirs); $mybase = $base.$topdir."/"; $thisdir = $mybase; opendir(THIS, $thisdir) or die "can't open $thisdir\n"; while ($file = readdir(THIS)){ if ($file =~ /^\./){next;} # skip dot files $path = $thisdir.$file; if (-d $path){ push (@mydirs, $path."/"); } } close(THIS); # test here to see if this directory has filename in it... # if not, ask for file name structure to test if ($thisdir =~ /([a-z]{1}\d{4}\_\d{7}(\_\d{7})?)/){ $coll = $1; print "Looking at $thisdir: Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; while ($answer =~ /n/i){ undef $coll; print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; } } if (!$coll){ print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; while ($answer =~ /n/i){ undef $coll; print "Please enter the collection identifier\n"; $coll = ; chop $base; print "Is this the collection identifier? -->$coll\n"; print "Y or N, and press enter\n"; $answer = ; } } print "\nWe will test the files against collection identifier $coll.\n\n"; print "Results will be in the $output file....\n when you see a \"Good bye!\" and this screen closes.\n"; print "Don't open it yet! :-) \n\n"; print ". . . working . . . (be patient) . . . \n\n"; foreach $dir (@mydirs){ #HERE: do a section for Admin, and one for Metadata $count = 1; # checking sequence count of files in directories opendir (DIR, $dir) or die "can't read files in $dir\n"; while ($adir = readdir(DIR)){ if ($adir =~ /^\./ || $adir =~ /Thumbs\.db/i){next;} #skip dot files and thumbs.db # print "looking at $adir\n"; ($short = $dir) =~ s,\/srv\/deposit\/content\/,,; ($parent = $short) =~ s,.*\/,,; # parent directory is what follows the last slash, yes? # we want to match for this $thispath = $dir."/".$adir; if ($adir =~ /Scans/i || $adir =~ /Transcripts/i){ push (@mydirs, $thispath); next; } elsif ($adir =~ /Admin/){ opendir(ADMIN, $thispath) or die "can't look through $thispath\n"; undef $found; while ($afile = readdir(ADMIN)){ if ($file =~ /^\./){ next;} if ($file =~ /^$coll\.xml$/){ $found = 1; } elsif ($file =~ /^$coll\.\d{1,2}\.xml$/){ $found = 1; } } if (!$found){ push (@missing, "$coll.xml missing from $parent\n");} close(ADMIN); } elsif ($adir =~ /Metadata/){ opendir(MD, $thispath) or die "can't look through $thispath\n"; undef $found; while ($afile = readdir(MD)){ if ($file =~ /^\./){ next;} if ($file =~ /^$coll\.txt$/){ $found = 1; } elsif ($file =~ /^$coll\.\d{1,2}\.txt$/){ $found = 1; } } if (!$found){ push (@missing, "$coll.txt missing from $parent\n");} close(MD); } elsif ($adir =~ /$coll/){ # passes first test, it matches collname if ( -d $thispath){ if (!( $adir =~ /^[a-z]{1}\d{4}\_\d{7}\_\d{7}(\_\d{4}(\_\d{3})?)?$/)){ push (@badform, $adir); } push (@mydirs, $thispath); # collect subdirectories for further investigation } else{ # not a directory; must be a file. Does it match its parent directory? if ((!($parent =~ /Scans/i)) && (!($adir =~ /$parent/))){ push (@wrongdir, $adir." ".$short); } if (!( $adir =~ /^[a-z]{1}\d{4}\_\d{7}\_\d{7}(\_\d{4}(\_\d{3})?)?\.[a-z]{3}$/)){ #note this breaks on jp2, mp3 push (@badform, $adir); } # pull off the last set of numbers before the extension elsif ($parent =~ /Scans/i || (!$parent)){ next;} # don't check non-compound objects for sequence elsif ($adir =~ /.*\_(\d{3,7})\./ && (!($adir =~ /\.txt/))) { # check for bad numbering ONLY on files named correctly $mynum = $1 + 0; # get rid of leading zeros if ($mynum != $count){ push (@badcount, $adir." ".$count); } $count ++; } } } else { push (@wrongdir, $adir." ".$short);} } close(DIR); } if (@badform || @badcount || @wrongdir || @missing ){ print OUT "\nTROUBLE: $topdir \n"; print OUT "-------------------------------------------\n"; } else{ print OUT "All is GREAT in $topdir! :-) \n"; } if (@missing){ print OUT "\nThe following files are missing or badly named \n"; foreach (@missing){ print OUT " $_\n";} } if (@wrongdir){ print OUT "\nThe following files or directories do NOT reflect the name of their parent directory\n"; print OUT "Are they in the right place? Please check:\n"; foreach (@wrongdir){ print OUT " $_\n";} } if (@badform){ print OUT "\nThe following filenames or directories are not in the correct format:\n"; foreach (@badform){ print OUT " $_\n";} } if (@badcount){ print OUT "\nThe following filenames were expected to be the sequence number\n"; print OUT "that follows them:\n"; foreach (@badcount){ print OUT " $_\n";} } undef @badform; undef @wrongdir; undef @badcount; undef @missing } # do the next top directory close OUT; print "Good bye!\n"; exit; sub timestamp{ #following for Windows #print "hit enter twice please\n"; #$date = `date`; #$time = `time`; #print $date."\n"; #if ($date =~ /.*? (\d*)\/(\d*)\/(\d*)/){ # $date = $3.$1.$2; # } #print $date."\n"; #if ($time =~ /.*? (\d*)\:(\d*)\:(\d*)\./){ # $time = $1.$2.$3; # } #print $time."\n"; #$timestamp = $date."T".$time; #print $timestamp."\n"; # following for unix ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime(); ##$year += 1900; $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $timestamp= "$year-$mon-$mday\T$hour:$min:$sec\Z"; }